123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486 |
- package com.simuwang.base.common.util;
- import cn.hutool.core.collection.CollUtil;
- import cn.hutool.core.date.DateUtil;
- import cn.hutool.core.exceptions.ExceptionUtil;
- import cn.hutool.core.lang.Pair;
- import cn.hutool.core.util.StrUtil;
- import com.github.junrar.Archive;
- import com.github.junrar.exception.RarException;
- import com.github.junrar.rarfile.FileHeader;
- import com.simuwang.base.common.conts.DateConst;
- import com.simuwang.base.common.conts.EmailDataDirectionConst;
- import org.apache.commons.io.FileUtils;
- import org.apache.pdfbox.Loader;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.poi.hssf.usermodel.*;
- import org.apache.poi.ss.usermodel.*;
- import org.apache.poi.xssf.usermodel.XSSFWorkbook;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import java.io.*;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.text.NumberFormat;
- import java.time.LocalDate;
- import java.time.format.DateTimeFormatter;
- import java.util.*;
- import org.apache.commons.compress.archivers.ArchiveEntry;
- import org.apache.commons.compress.archivers.ArchiveInputStream;
- import org.apache.commons.compress.archivers.ArchiveStreamFactory;
- import org.apache.commons.compress.utils.IOUtils;
- import technology.tabula.ObjectExtractor;
- import technology.tabula.Page;
- import technology.tabula.PageIterator;
- import technology.tabula.RectangularTextContainer;
- import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
- public class ExcelUtil {
- private static final Logger logger = LoggerFactory.getLogger(ExcelUtil.class);
- public static boolean isExcel(String fileName) {
- return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
- }
- public static boolean isPdf(String fileName) {
- return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF"));
- }
- public static boolean isZip(String fileName) {
- return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
- }
- public static boolean isHTML(String fileName) {
- return StrUtil.isNotBlank(fileName) && fileName.endsWith("html");
- }
- public static boolean isRAR(String fileName) {
- return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
- }
- public static Sheet getSheet(File file, int sheetIndex) {
- if (file == null || !file.exists()) {
- return null;
- }
- InputStream is = file2InStream(file);
- String[] arr = file.getName().split("\\.");
- String ext = arr[arr.length - 1];
- Sheet sheet = null;
- try {
- sheet = getSheet(is, ext, sheetIndex);
- } catch (Exception e) {
- logger.error(e.getMessage());
- if (e.getMessage().contains("XSSF instead of HSSF")) {
- is = file2InStream(file);
- try {
- sheet = getSheet(is, "xlsx", sheetIndex);
- } catch (IOException e1) {
- e1.printStackTrace();
- }
- } else if (e.getMessage().contains("HSSF instead of XSSF")) {
- is = file2InStream(file);
- try {
- sheet = getSheet(is, "xls", sheetIndex);
- } catch (IOException e1) {
- e1.printStackTrace();
- }
- } else if ("xls".equals(ext) && e.getMessage().contains("left 4 bytes remaining still to be read")) {
- file = changeXls(file);
- is = file2InStream(file);
- try {
- sheet = getSheet(is, "xls", sheetIndex);
- logger.info("文件转换成功!");
- } catch (IOException e1) {
- e1.printStackTrace();
- }
- } else {
- sheet = null;
- }
- }
- return sheet;
- }
- public static Sheet getSheet(InputStream is, String ext, int sheetIndex) throws IOException {
- if (ext == null) {
- ext = "xls";
- }
- ext = ext.toLowerCase();
- Sheet sheet = null;
- if ("xls".equals(ext)) {
- Workbook wb = new HSSFWorkbook(is);
- sheet = getSheet(wb, sheetIndex, null);
- } else if ("xlsx".equals(ext)) {
- Workbook wb = new XSSFWorkbook(is);
- sheet = getSheet(wb, sheetIndex, null);
- }
- return sheet;
- }
- private static Sheet getSheet(Workbook wb, int sheetIndex, String sheetName) throws IOException {
- if (wb != null) {
- Sheet sheet = null;
- if (sheetIndex > -1) {
- sheet = wb.getSheetAt(sheetIndex);
- } else if (StrUtil.isNotBlank(sheetName) && !"null".equals(sheetName.toLowerCase())) {
- sheet = wb.getSheet(sheetName);
- }
- wb.close();
- // 同时会把文件输入流关闭
- return sheet;
- }
- return null;
- }
- public static InputStream file2InStream(File file) {
- if (!file.exists()) {
- return null;
- } else {
- FileInputStream is = null;
- try {
- is = new FileInputStream(file);
- } catch (FileNotFoundException var3) {
- var3.printStackTrace();
- }
- return is;
- }
- }
- public static File changeXls(File file) {
- try {
- jxl.Workbook workbook = jxl.Workbook.getWorkbook(file);
- String fileName = "copy-" + file.getName();
- File file2 = new File(file.getParent() + "/" + fileName);
- if (file2.exists()) {
- return file2;
- }
- jxl.write.WritableWorkbook wwb = jxl.Workbook.createWorkbook(file2, workbook);
- wwb.write();
- wwb.close();
- workbook.close();
- return file2;
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
- public static String getCellValue(Cell cell) {
- if (cell == null) {
- return null;
- }
- String cellValue = "";
- switch (cell.getCellTypeEnum()) {
- case STRING:
- cellValue = cell.getStringCellValue();
- break;
- case NUMERIC:
- if (org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted(cell)) {
- // 如果是日期格式的数字
- Date dateCellValue = cell.getDateCellValue();
- cellValue = DateUtil.format(dateCellValue, DateConst.YYYY_MM_DD);
- } else {
- // 否则是纯数字
- NumberFormat numberFormat = NumberFormat.getNumberInstance();
- numberFormat.setMaximumFractionDigits(15);
- double formulaResult = cell.getNumericCellValue();
- cellValue = numberFormat.format(formulaResult).replaceAll(",", "");
- }
- break;
- case BOOLEAN:
- cellValue = String.valueOf(cell.getBooleanCellValue());
- break;
- case FORMULA:
- // 处理公式结果
- try {
- cellValue = String.valueOf(cell.getNumericCellValue());
- } catch (IllegalStateException e) {
- cellValue = cell.getStringCellValue();
- }
- break;
- case BLANK:
- break;
- case ERROR:
- cellValue = "ERROR: " + cell.getErrorCellValue();
- break;
- default:
- cellValue = "";
- break;
- }
- // 去掉换行符号
- cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "").replaceAll(",","").trim() : "";
- return cellValue;
- }
- public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) {
- List<String> filePathList = CollUtil.newArrayList();
- File destFile = new File(destFilePath);
- if (!destFile.exists()) {
- destFile.mkdirs();
- }
- try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
- ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
- ArchiveEntry entry;
- while ((entry = ais.getNextEntry()) != null) {
- File entryFile = new File(destFilePath, entry.getName());
- if (entry.isDirectory()) {
- entryFile.mkdirs();
- } else {
- try (FileOutputStream fos = new FileOutputStream(entryFile)) {
- IOUtils.copy(ais, fos);
- filePathList.add(entryFile.getPath());
- }
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
- return filePathList;
- }
- public static List<String> extractRar(String inputFilePath, String outputDirPath){
- List<String> fileList = new ArrayList<>();
- // 创建Archive对象,用于读取rar压缩文件格式
- try{
- Archive archive = new Archive(new FileInputStream(inputFilePath));
- // 读取压缩文件中的所有子目录或子文件(FileHeader对象)
- List<FileHeader> fileHeaderList = archive.getFileHeaders();
- // 遍历子目录和子文件
- for (FileHeader fd:fileHeaderList) {
- System.out.println(fd.getFileName());
- File f = new File(outputDirPath+"/"+fd.getFileName());
- if(fd.isDirectory()){
- // 创建新子目录
- f.mkdirs();
- }else{
- // 创建新子文件
- f.createNewFile();
- // 获取压缩包中的子文件输出流
- InputStream in = archive.getInputStream(fd);
- // 复制文件输入流至新子文件
- FileUtils.copyInputStreamToFile(in,f);
- fileList.add(f.getAbsolutePath());
- }
- }
- } catch (Exception e) {
- logger.error(e.getMessage(),e);
- }
- return fileList;
- }
- public static void writeDataToSheet(Sheet sheet, Elements rows) {
- int rowSize = rows.size();
- for (int rowNum = 0; rowNum < rowSize; rowNum++) {
- Row sheetRow = sheet.createRow(rowNum);
- Element elementRow = rows.get(rowNum);
- Elements cells = elementRow.select("td");
- if (cells.size() == 0) {
- cells = elementRow.select("th");
- }
- int cellSize = cells.size();
- for (int cellNum = 0; cellNum < cellSize; cellNum++) {
- Cell sheetRowCell = sheetRow.createCell(cellNum);
- sheetRowCell.setCellValue(cells.get(cellNum).text());
- }
- }
- }
- /**
- * 获取优先级高的字段值
- *
- * @param sheetRow 行
- * @param priorityPosition 字段位置
- * @param basePosition 优先级搞的字段位置
- * @return 优先级高的字段值
- */
- public static String getPriorityFieldValue(Row sheetRow, Integer priorityPosition, Integer basePosition) {
- boolean hasPriorityValue = priorityPosition != null && sheetRow.getCell(priorityPosition) != null && StrUtil.isNotBlank(ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition)));
- if (hasPriorityValue) {
- return ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition));
- }
- return basePosition != null && sheetRow.getCell(basePosition) != null ? ExcelUtil.getCellValue(sheetRow.getCell(basePosition)) : null;
- }
- /**
- * 去掉逗号
- *
- * @param numberData 数字型字符串
- * @return 无逗号的数字型字符串
- */
- public static String numberDataStripCommas(String numberData) {
- if (StrUtil.isBlank(numberData)) {
- return null;
- }
- // pdf解析到的值带有",",比如:"10,656,097.37"
- String data = numberData.replaceAll(",", "");
- if (!StringUtil.isNumeric(data)) {
- return null;
- }
- return data;
- }
- public static HSSFWorkbook getHSSFWorkbook(String sheetName, List<String> title, Map<String, List<List<String>>> valueMap, HSSFWorkbook wb) {
- // 第一步,创建一个HSSFWorkbook,对应一个Excel文件
- if (wb == null) {
- wb = new HSSFWorkbook();
- }
- try {
- // 第二步,在workbook中添加一个sheet,对应Excel文件中的sheet
- HSSFSheet sheet = wb.createSheet(sheetName);
- // 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制
- HSSFRow row = sheet.createRow(0);
- // 第四步,创建单元格,并设置值表头 设置表头居中
- HSSFCellStyle style = wb.createCellStyle();
- style.setAlignment(HorizontalAlignment.LEFT);
- style.setWrapText(true);
- sheet.setColumnWidth(0, 10000);
- sheet.setColumnWidth(1, 10000);
- sheet.setColumnWidth(2, 5000);
- sheet.setColumnWidth(3, 8000);
- sheet.setColumnWidth(4, 8000);
- sheet.setColumnWidth(5, 5000);
- //声明列对象
- HSSFCell cell = null;
- //创建标题
- for (int i = 0; i < title.size(); i++) {
- cell = row.createCell(i);
- cell.setCellValue(title.get(i));
- cell.setCellStyle(style);
- }
- List<List<String>> values = valueMap.get(sheetName);
- //创建内容
- for (int i = 0; i < values.size(); i++) {
- row = sheet.createRow(i + 1);
- for (int j = 0; j < values.get(i).size(); j++) {
- //将内容按顺序赋给对应的列对象
- row.createCell(j).setCellValue(values.get(i).get(j));
- }
- }
- } catch (Exception e) {
- logger.error(e.getMessage(), e);
- }
- return wb;
- }
- public static Sheet getFirstSheet(String filePath) {
- if (StrUtil.isBlank(filePath)) {
- return null;
- }
- try {
- File file = new File(filePath);
- return ExcelUtil.getSheet(file, 0);
- } catch (Exception e) {
- logger.error("获取文件的sheet错误 -> 文件路径:{},堆栈信息为:{}", filePath, ExceptionUtil.stacktraceToString(e));
- }
- return null;
- }
- public static String pdfConvertToExcel(String filePath, String excelFilePath) {
- File savefile = new File(excelFilePath);
- if (!savefile.exists()) {
- if (!savefile.getParentFile().exists()) {
- savefile.getParentFile().mkdirs();
- savefile.getParentFile().setExecutable(true);
- }
- }
- try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
- PDDocument document = Loader.loadPDF(new File(filePath));
- PageIterator extract = new ObjectExtractor(document).extract();
- Workbook workbook = new XSSFWorkbook();
- Sheet sheet = workbook.createSheet("Sheet1");
- int preEndRowIndex =0;
- while (extract.hasNext()) {
- Page next = extract.next();
- List<technology.tabula.Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
- for (technology.tabula.Table table : tableList) {
- List<List<RectangularTextContainer>> rows = table.getRows();
- int rowLength = rows.size();
- for (int rowNum = 0; rowNum < rowLength; rowNum++) {
- Row sheetRow = sheet.createRow(rowNum + preEndRowIndex);
- List<RectangularTextContainer> textContainerList = rows.get(rowNum);
- for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
- Cell cell = sheetRow.createCell(cellNum);
- RectangularTextContainer textContainer = textContainerList.get(cellNum);
- if (textContainer != null) {
- cell.setCellValue(textContainer.getText());
- }
- }
- }
- preEndRowIndex = preEndRowIndex + rowLength -1;
- }
- }
- // 将Excel工作簿写入输出流
- workbook.write(outputStream);
- } catch (Exception e) {
- logger.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
- return null;
- }
- return excelFilePath;
- }
- public static String contentConvertToExcel(String emailContent, String excelFilePath) {
- Elements rows;
- try {
- Document doc = Jsoup.parse(emailContent);
- Element table = doc.select("table").first();
- rows = table.select("tr");
- } catch (Exception e) {
- return null;
- }
- File saveFile = new File(excelFilePath);
- if (!saveFile.exists()) {
- if (!saveFile.getParentFile().exists()) {
- saveFile.getParentFile().mkdirs();
- saveFile.getParentFile().setExecutable(true);
- }
- }
- try (OutputStream outputStream = new FileOutputStream(saveFile)) {
- // 创建一个新的Excel工作簿
- Workbook workbook = new XSSFWorkbook();
- Sheet sheet = workbook.createSheet("Sheet1");
- ExcelUtil.writeDataToSheet(sheet, rows);
- // 将Excel工作簿写入输出流
- workbook.write(outputStream);
- } catch (Exception e) {
- logger.error("邮件正文转换成excel报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
- return null;
- }
- return excelFilePath;
- }
- /**
- * 通过表头所在位置判断是行数据还是列数据
- *
- * @param fieldPositionMap excel中表头所在的位置
- * @return 行方向-1,,列方向-2
- */
- public static Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
- long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
- return count == 1 ? EmailDataDirectionConst.COLUMN_DIRECTION_TYPE : EmailDataDirectionConst.ROW_DIRECTION_TYPE;
- }
- // Excel 的起始日期是 1900 年 1 月 1 日
- private static final LocalDate EXCEL_EPOCH_DATE = LocalDate.of(1900, 1, 1);
- public static String convertExcelDateToString(String dateNum) {
- LocalDate startDate = LocalDate.of(1900, 1, 1);
- LocalDate localDate = startDate.plusDays(Math.round(Double.valueOf(dateNum) - 2));
- DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
- return localDate.format(formatter);
- }
- }
|