当前位置: 代码迷 >> 综合 >> 【第02篇】利用POI框架的SAX方式之读取大数据2007版Excel(xlsx)【第2版】
  详细解决方案

【第02篇】利用POI框架的SAX方式之读取大数据2007版Excel(xlsx)【第2版】

热度:66   发布时间:2023-12-08 03:48:30.0

【第1版】地址

https://blog.csdn.net/rainyspring4540/article/details/50747122

针对老早写的POI处理Excel的大数据读取问题,看到好多人关注,感觉自己还是更新一版,毕竟虽然是自己备份,但是如果新手能少走弯路,也算欣慰。下面的版本是我的项目迭代过程中个人认为毕竟稳定和健壮的,算作【第2版】吧,里面修复了【第1版】的很多bug,诸如计算前后单元格差值的函数getLevel,以及识别新行的正则上也略有优化,最后希望,这段代码能为你争取更多的学习时间,而不是懒惰的借口。

package com.fulong.utils.poi;import java.io.File;/* ====================================================================Licensed to the Apache Software Foundation (ASF) under one or morecontributor license agreements.  See the NOTICE file distributed withthis work for additional information regarding copyright ownership.The ASF licenses this file to You under the Apache License, Version 2.0(the "License"); you may not use this file except in compliance withthe License.  You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.
==================================================================== */import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;import com.fulong.utils.report.tool.MathUtil;/*** XSSF and SAX (Event API) basic example. See {@link XLSX2CSV} for a fuller* example of doing XSLX processing with the XSSF Event code.* 目前函数又个缺陷,便是starElement函数中发现新行的策略有问题*/
public class MyExcel2007ForPaging_high {/*** 代表Excel中必须有值的起始列(A、B、C....AA、AB...)*/private static final String indexC4Data = "A";/*** 存储所有行的值*/public List<List<IndexValue>> dataList = new ArrayList<List<IndexValue>>();/** 临时存储当前行的值*/private List<IndexValue> rowData;private final int startRow;private final int endRow;private int currentRow = 0;private final String filename;public MyExcel2007ForPaging_high(String filename, int startRow, int endRow) throws Exception {if (StringUtils.isBlank(filename))throw new Exception("文件名不能空");this.filename = filename;this.startRow = startRow;this.endRow = endRow;processFirstSheet();}/*** 指定获取第一个sheet* * @param filename* @throws Exception*/private void processFirstSheet() throws Exception {OPCPackage pkg = OPCPackage.open(filename);XSSFReader r = new XSSFReader(pkg);SharedStringsTable sst = r.getSharedStringsTable();XMLReader parser = fetchSheetParser(sst);// To look up the Sheet Name / Sheet Order / rID,// you need to process the core Workbook stream.// Normally it's of the form rId# or rSheet#InputStream sheet1 = r.getSheet("rId1");InputSource sheetSource = new InputSource(sheet1);parser.parse(sheetSource);sheet1.close();pkg.close();}private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");ContentHandler handler = new PagingHandler(sst);parser.setContentHandler(handler);return parser;}/*** See org.xml.sax.helpers.DefaultHandler javadocs*/private class PagingHandler extends DefaultHandler {private SharedStringsTable sst;private String lastContents;private boolean nextIsString;private String index = null;private PagingHandler(SharedStringsTable sst) {this.sst = sst;}/*** 每个单元格开始时的处理*/@Overridepublic void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {// c => cellif (name.equals("c")) {// Print the cell reference// System.out.print(attributes.getValue("r") + " - ");index = attributes.getValue("r");System.out.println(index);if (index.contains("N")) {System.out.println("##" + attributes + "##");}// 这是一个新行if (Pattern.compile("^"+indexC4Data+"[0-9]+$").matcher(index).find()) {// 存储上一行数据if (rowData != null && isAccess() && !rowData.isEmpty()) {dataList.add(rowData);}rowData = new ArrayList<IndexValue>();;// 新行要先清除上一行的数据currentRow++;// 当前行+1// System.out.println(currentRow);}if (isAccess()) {// Figure out if the value is an index in the SSTString cellType = attributes.getValue("t");if (cellType != null && cellType.equals("s")) {nextIsString = true;} else {nextIsString = false;}}}// Clear contents cachelastContents = "";}/*** 每个单元格结束时的处理*/@Overridepublic void endElement(String uri, String localName, String name) throws SAXException {if (isAccess()) {// Process the last contents as required.// Do now, as characters() may be called more than onceif (nextIsString) {int idx = Integer.parseInt(lastContents);lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();nextIsString = false;}// v => contents of a cell// Output after we've seen the string contentsif (name.equals("v")) {// System.out.println(lastContents);rowData.add(new IndexValue(index, lastContents));}}}/*** 目前流的方式值支持  Excel单元格是文本  格式;日期、数字、公式不支持*/@Overridepublic void characters(char[] ch, int start, int length) throws SAXException {if (isAccess()) {lastContents += new String(ch, start, length);}}/*** 如果文档结束后,发现读取的末尾行正处在当前行中,存储下这行* (存在这样一种情况,当待读取的末尾行正好是文档最后一行时,最后一行无法存到集合中,* 因为最后一行没有下一行了,所以不为启动starElement()方法, 当然我们可以通过指定最大列来处理,但不想那么做,扩展性不好)*/@Overridepublic void endDocument() throws SAXException {if (rowData != null && isAccess() && !rowData.isEmpty()) {dataList.add(rowData);System.out.println("--end");}}}private boolean isAccess() {if (currentRow >= startRow && currentRow <= endRow) {return true;}return false;}private class IndexValue {String v_index;String v_value;public IndexValue(String v_index, String v_value) {super();this.v_index = v_index;this.v_value = v_value;}@Overridepublic String toString() {return "IndexValue [v_index=" + v_index + ", v_value=" + v_value + "]";}/*** 去掉数字部分(行信息),直接比较英文部分(列信息),计算前后两个值相距多少空列* @param p* @return*/public int getLevel(IndexValue p) {/*char[] other = p.v_index.replaceAll("[0-9]", "").toCharArray();char[] self = this.v_index.replaceAll("[0-9]", "").toCharArray();if (other.length != self.length)return -1;for (int i = 0; i < other.length; i++) {if (i == other.length - 1) {return self[i] - other[i];} else {if (self[i] != other[i]) {return -1;}}}return -1;*/String other = p.v_index.replaceAll("[0-9]", "");String self = this.v_index.replaceAll("[0-9]", "");return MathUtil.fromNumberSystem26(self)-MathUtil.fromNumberSystem26(other);}}/*** 获取真实的数据(处理空格)* * @return* @throws Exception*/public List<List<String>> getMyDataList() throws Exception {List<List<String>> myDataList = new ArrayList<List<String>>();if (dataList == null || dataList.size() <= 0)return myDataList;/** 是否是最后一行的数据*/boolean islastRow = false;for (int i = 0; i < dataList.size(); i++) {List<IndexValue> i_list = dataList.get(i);List<String> row = new ArrayList<String>();int j = 0;for (; j < i_list.size() - 1; j++) {// 获取当前值,并存储IndexValue current = i_list.get(j);//去掉空格String tempV = current.v_value!=null?current.v_value.trim():current.v_value;row.add(tempV);// 预存下一个IndexValue next = i_list.get(j + 1);// 获取差值int level = next.getLevel(current);/*if(i==2214){System.out.println("--"+i);}*/if (level <= 0){System.err.println("---!!!到达最后一行,行号:"+(i+1)+";level:"+level+"[超出处理范围]");islastRow = true;break;}//将差值补充为null,for (int k = 0; k < level - 1; k++) {row.add(null);}}/** 每行的最后一个值,留在最后插入* 但最后一行除外*/if(!islastRow){row.add(i_list.get(j).v_value);}myDataList.add(row);}return myDataList;}public static void main(String[] args) throws Exception {File file = new File("e:/a.xlsx");System.out.println(new MyExcel2007ForPaging_high(file.getPath(), 1, 50).getMyDataList());}
}

辅助类

package com.fulong.utils.report.tool;import org.apache.commons.lang.StringUtils;public class MathUtil {/*** /// <summary>/// 将指定的自然数转换为26进制表示。映射关系:[1-26] ->[A-Z]。/// </summary>/// <param name="n">自然数(如果无效,则返回空字符串)。</param>/// <returns>26进制表示。</returns>*/public static String toNumberSystem26(int n){String s = "";while (n > 0){int m = n % 26;if (m == 0) m = 26;s = (char)(m + 64) + s;n = (n - m) / 26;}return s;} /*** <summary>将指定的26进制表示转换为自然数。映射关系:[A-Z] ->[1-26]。</summary><param name="s">26进制表示(如果无效,则返回0)。</param><returns>自然数。</returns>*/public static int fromNumberSystem26(String s){if (StringUtils.isBlank(s)) return 0; s = s.toUpperCase();int n = 0;char[] arr = s.toCharArray();for (int i = arr.length - 1, j = 1; i >= 0; i--, j *= 26){char c = arr[i];if (c < 'A' || c > 'Z') return 0;//A的ASCII值为65n += ((int)c - 64) * j;}return n;}public static void main(String[] args) {System.out.println(fromNumberSystem26("aa"));System.out.println(toNumberSystem26(27));}
}

 

  相关解决方案