【JAVA】apache poi excel 文件读取,各种数据类型,不规则excel格式都可以读取

本文将通过实例来介绍apache poi 读取excel的原理,包括各种数据类型的处理,本文提供的代码非常通用,即使不规则的excel文件,也可以读取。

package poi.excel;
 
import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
 
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
 
/**
 * ClassName:ExcelFileParser <br/>
 * Function: TODO ADD FUNCTION. <br/>
 * Reason: TODO ADD REASON. <br/>
 * Date: 2015年12月29日 上午9:26:21 <br/>
 * 
 * @author chiwei
 * @version
 * @since JDK 1.6
 * @see
 */
public class ExcelFileParser {
 
    public static Workbook getWb(String path) {
        try {
            return WorkbookFactory.create(new File(path));
        } catch (Exception e) {
            throw new RuntimeException("读取EXCEL文件出错", e);
        }
    }
 
    public static Sheet getSheet(Workbook wb, int sheetIndex) {
        if (wb == null) {
            throw new RuntimeException("工作簿对象为空");
        }
        int sheetSize = wb.getNumberOfSheets();
        if (sheetIndex < 0 || sheetIndex > sheetSize - 1) {
            throw new RuntimeException("工作表获取错误");
        }
        return wb.getSheetAt(sheetIndex);
    }
 
    public static List<List<String>> getExcelRows(Sheet sheet, int startLine, int endLine) {
        List<List<String>> list = new ArrayList<List<String>>();
        // 如果开始行号和结束行号都是-1的话,则全表读取
        if (startLine == -1)
            startLine = 0;
        if (endLine == -1) {
            endLine = sheet.getLastRowNum() + 1;
        } else {
            endLine += 1;
        }
        for (int i = startLine; i < endLine; i++) {
            Row row = sheet.getRow(i);
            if (row == null) {
                System.out.println("该行为空,直接跳过");
                continue;
            }
            int rowSize = row.getLastCellNum();
            List<String> rowList = new ArrayList<String>();
            for (int j = 0; j < rowSize; j++) {
                Cell cell = row.getCell(j);
                String temp = "";
                if (cell == null) {
                    System.out.println("该列为空,赋值双引号");
                    temp = "NULL";
                } else {
                    int cellType = cell.getCellType();
                    switch (cellType) {
                    case Cell.CELL_TYPE_STRING:
                        temp = cell.getStringCellValue().trim();
                        temp = StringUtils.isEmpty(temp) ? "NULL" : temp;
                        break;
                    case Cell.CELL_TYPE_BOOLEAN:
                        temp = String.valueOf(cell.getBooleanCellValue());
                        break;
                    case Cell.CELL_TYPE_FORMULA:
                        temp = String.valueOf(cell.getCellFormula().trim());
                        break;
                    case Cell.CELL_TYPE_NUMERIC:
                        if (HSSFDateUtil.isCellDateFormatted(cell)) {
                            temp = DateUtil.parseToString(cell.getDateCellValue(),
                                    DateUtil.FORMAT_DATE);
                        } else {
                            temp = new DecimalFormat("#.######").format(cell.getNumericCellValue());
                        }
                        break;
                    case Cell.CELL_TYPE_BLANK:
                        temp = "NULL";
                        break;
                    case Cell.CELL_TYPE_ERROR:
                        temp = "ERROR";
                        break;
                    default:
                        temp = cell.toString().trim();
                        break;
                    }
                }
                rowList.add(temp);
            }
            list.add(rowList);
        }
        return list;
    }
 
    public static void main(String a[]) {
        String path = "D:\\test.xlsx";
        Workbook wb = getWb(path);
        List<List<String>> list = getExcelRows(getSheet(wb, 0), -1, -1);
                List<List<String>> listList = new ArrayList<>();
                List<List<CheckItemIndexInfos>> checkItemIndexInfos = new ArrayList<>();
                 for (List<String> row : list) {
                  / / 过滤掉无用数据
                  row = row.stream().filter(s -> !s.equals("NULL")).collect(Collectors.toList());
                if (row.size() > 0) {
                    listList.add(row);
                }
            }
        for (int i = 0; i < listList .size(); i++) {
            List<String> row = listList .get(i);
            for (int j = 0; j < row.size(); j++) {
                System.out.print(row.get(j) + "\t");
            }
            System.out.println();
        }
    }
 
}

maven 依赖如下:


<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.11</version>
        </dependency>

代码流程:

首先根据文件路径去创建一个工作簿对象Workbook

然后通过工作簿对象获取一个工作表对象Sheet

最后对工作表的row进行遍历

注意获取每行的列数,我用的row.getLastCellNum();而不是row.getPhysicalNumberOfCells()

这个方法就可以处理不规则的excel单元格内容了,即某一行3列,某一行4列,5列之类的

然后对行循环内存的单元列也要进行空判断,防止异常

代码提供的方法非常通用,只需传进去一个文件路径,和几个必要的参数即可,有其它需求的话,在此代码上二次开发非常简单

原文链接:https://blog.csdn.net/simonchi/article/details/50433219

推荐阅读更多精彩内容