当前位置: 代码迷 >> 综合 >> 通过文件路径获取文件(pdf/doc/docx/xls/xlsx)并输出文件内容
  详细解决方案

通过文件路径获取文件(pdf/doc/docx/xls/xlsx)并输出文件内容

热度:62   发布时间:2023-12-22 22:12:39.0

最近在开发过程中,遇到了需要将附件中的文本读出存储到数据库中的问题,下面就记录一下各个格式文本的读取方式,大家按需索取,如有不明白的地方,欢迎留言讨论。

String filePath = “你的文件路径";

注意,要精准到文件的后缀名,比如D:/demo.doc;

File file = new File(filePath);

当文件后缀为doc时:

 if(file.isFile() && file.exists()) {
    if (filePath.endsWith(".doc")) {
    InputStream is = null;WordExtractor re = null;try {
    is = new FileInputStream(new File(filePath));re = new WordExtractor(is);fileContent = re.getText();} catch (IOException e) {
    e.printStackTrace();} finally {
    try {
    if(re != null){
    re.close();}if(is != null){
    is.close();}} catch (IOException e) {
    e.printStackTrace();}}}

当文件后缀为docx时:

if (filePath.endsWith(".docx")) {
    OPCPackage opcPackage = null;POIXMLTextExtractor extractor = null;try {
    opcPackage = POIXMLDocument.openPackage(filePath);extractor = new XWPFWordExtractor(opcPackage);fileContent = extractor.getText();} catch (Exception e) {
    e.printStackTrace();} finally {
    try {
    if(extractor != null){
    extractor.close();}} catch (IOException e) {
    e.printStackTrace();}}} 

当文件后缀为pdf时:

if(filePath.endsWith(".pdf")){
    FileInputStream in = null;RandomAccessRead randomAccessRead = null;try {
    in = new FileInputStream(new File(filePath));randomAccessRead = new RandomAccessBufferedFileInputStream(in);PDFParser parser = new PDFParser(randomAccessRead);parser.parse();PDDocument pdDocument = parser.getPDDocument();PDFTextStripper stripper = new PDFTextStripper();fileContent = stripper.getText(pdDocument);} catch (IOException e) {
    e.printStackTrace();} finally {
    try {
    if(randomAccessRead != null){
    randomAccessRead.close();}if(in != null){
    in.close();}} catch (IOException e) {
    e.printStackTrace();}}}

当文件后缀为xls时:

if (filePath.endsWith(".xls")) {
    List<String> listXLS = new ArrayList<>();// 解析excelPOIFSFileSystem fs  = null;// 获取整个excelHSSFWorkbook hb = null;try {
    fs = new POIFSFileSystem(new FileInputStream(filePath));hb = new HSSFWorkbook(fs);// 遍历多个sheet页for(int sheetIndex=0;sheetIndex<hb.getNumberOfSheets();sheetIndex++) {
    HSSFSheet sheet = hb.getSheetAt(sheetIndex);//HSSFSheet sheet = hb.getSheetAt(0);// 获取第一行int firstrow = sheet.getFirstRowNum();// 获取最后一行int lastrow = sheet.getLastRowNum();// 循环行数依次获取列数for (int i = firstrow; i < lastrow + 1; i++) {
    // 获取哪一行iHSSFRow row = sheet.getRow(i);if (row != null) {
    // 获取这一行的第一列int firstcell = row.getFirstCellNum();// 获取这一行的最后一列int lastcell = row.getLastCellNum();//将每一行的每一列数据都存入集合中for (int j = firstcell; j < lastcell; j++) {
    // 获取第j列HSSFCell cell = row.getCell(j);if (cell != null) {
    String cellStr = cell.toString();if(CommonUtils.isNotEmpty(cellStr)){
    listXLS.add(cell.toString());}}}}}fileContent= String.valueOf(listXLS);}} catch (IOException e) {
    e.printStackTrace();} finally {
    try {
    if(hb != null){
    hb.close();}if(fs != null){
    fs.close();}} catch (IOException e) {
    e.printStackTrace();}}}

当文件后缀为xlsx时:

if (filePath.endsWith(".xlsx")) {
    List<String> listXLSX = new ArrayList<>();// 用流的方式先读取到你想要的excel的文件FileInputStream fis = null;// 获取整个excelXSSFWorkbook hb = null;try {
    fis = new FileInputStream(new File(filePath));hb = new XSSFWorkbook(fis);// 遍历表单sheetfor(int sheetIndex=0;sheetIndex<hb.getNumberOfSheets();sheetIndex++) {
    Sheet sheet = hb.getSheetAt(sheetIndex);//Sheet sheet = hb.getSheetAt(0);// 获取第一行int firstrow = sheet.getFirstRowNum();// 获取最后一行int lastrow = sheet.getLastRowNum();// 循环行数依次获取列数for (int i = firstrow; i < lastrow + 1; i++) {
    // 获取哪一行iRow row = sheet.getRow(i);if (row != null) {
    // 获取这一行的第一列int firstcell = row.getFirstCellNum();// 获取这一行的最后一列int lastcell = row.getLastCellNum();// 创建一个集合,用处将每一行的每一列数据都存入集合中for (int j = firstcell; j < lastcell; j++) {
    // 获取第j列Cell cell = row.getCell(j);if (cell != null) {
    String cellStr = cell.toString();if(CommonUtils.isNotEmpty(cellStr)){
    listXLSX.add(cell.toString());}}}}}fileContent = String.valueOf(listXLSX);}} catch (IOException e) {
    e.printStackTrace();} finally {
    try {
    if(hb != null){
    hb.close();}if(fis != null){
    fis.close();}} catch (IOException e) {
    e.printStackTrace();}}}

大家有更好的方法欢迎留言讨论。

  相关解决方案