package com.simuwang.base.common.util; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.date.DateUtil; import cn.hutool.core.exceptions.ExceptionUtil; import cn.hutool.core.lang.Pair; import cn.hutool.core.util.StrUtil; import com.github.junrar.Archive; import com.github.junrar.exception.RarException; import com.github.junrar.rarfile.FileHeader; import com.simuwang.base.common.conts.DateConst; import com.simuwang.base.common.conts.EmailDataDirectionConst; import org.apache.commons.io.FileUtils; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.poi.hssf.usermodel.*; import org.apache.poi.ss.usermodel.*; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.file.Files; import java.nio.file.Paths; import java.text.NumberFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.utils.IOUtils; import technology.tabula.ObjectExtractor; import technology.tabula.Page; import technology.tabula.PageIterator; import technology.tabula.RectangularTextContainer; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; public class ExcelUtil { private static final Logger logger = LoggerFactory.getLogger(ExcelUtil.class); public static boolean isExcel(String fileName) { return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX")); } public static boolean isPdf(String fileName) { return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF")); } public static boolean isZip(String fileName) { return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP")); } public static boolean isHTML(String fileName) { return StrUtil.isNotBlank(fileName) && fileName.endsWith("html"); } public static boolean isRAR(String fileName) { return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR")); } public static Sheet getSheet(File file, int sheetIndex) { if (file == null || !file.exists()) { return null; } InputStream is = file2InStream(file); String[] arr = file.getName().split("\\."); String ext = arr[arr.length - 1]; Sheet sheet = null; try { sheet = getSheet(is, ext, sheetIndex); } catch (Exception e) { logger.error(e.getMessage()); if (e.getMessage().contains("XSSF instead of HSSF")) { is = file2InStream(file); try { sheet = getSheet(is, "xlsx", sheetIndex); } catch (IOException e1) { e1.printStackTrace(); } } else if (e.getMessage().contains("HSSF instead of XSSF")) { is = file2InStream(file); try { sheet = getSheet(is, "xls", sheetIndex); } catch (IOException e1) { e1.printStackTrace(); } } else if ("xls".equals(ext) && e.getMessage().contains("left 4 bytes remaining still to be read")) { file = changeXls(file); is = file2InStream(file); try { sheet = getSheet(is, "xls", sheetIndex); logger.info("文件转换成功!"); } catch (IOException e1) { e1.printStackTrace(); } } else { sheet = null; } } return sheet; } public static Sheet getSheet(InputStream is, String ext, int sheetIndex) throws IOException { if (ext == null) { ext = "xls"; } ext = ext.toLowerCase(); Sheet sheet = null; if ("xls".equals(ext)) { Workbook wb = new HSSFWorkbook(is); sheet = getSheet(wb, sheetIndex, null); } else if ("xlsx".equals(ext)) { Workbook wb = new XSSFWorkbook(is); sheet = getSheet(wb, sheetIndex, null); } return sheet; } private static Sheet getSheet(Workbook wb, int sheetIndex, String sheetName) throws IOException { if (wb != null) { Sheet sheet = null; if (sheetIndex > -1) { sheet = wb.getSheetAt(sheetIndex); } else if (StrUtil.isNotBlank(sheetName) && !"null".equals(sheetName.toLowerCase())) { sheet = wb.getSheet(sheetName); } wb.close(); // 同时会把文件输入流关闭 return sheet; } return null; } public static InputStream file2InStream(File file) { if (!file.exists()) { return null; } else { FileInputStream is = null; try { is = new FileInputStream(file); } catch (FileNotFoundException var3) { var3.printStackTrace(); } return is; } } public static File changeXls(File file) { try { jxl.Workbook workbook = jxl.Workbook.getWorkbook(file); String fileName = "copy-" + file.getName(); File file2 = new File(file.getParent() + "/" + fileName); if (file2.exists()) { return file2; } jxl.write.WritableWorkbook wwb = jxl.Workbook.createWorkbook(file2, workbook); wwb.write(); wwb.close(); workbook.close(); return file2; } catch (Exception e) { e.printStackTrace(); return null; } } public static String getCellValue(Cell cell) { if (cell == null) { return null; } String cellValue = ""; switch (cell.getCellTypeEnum()) { case STRING: cellValue = cell.getStringCellValue(); break; case NUMERIC: if (org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted(cell)) { // 如果是日期格式的数字 Date dateCellValue = cell.getDateCellValue(); cellValue = DateUtil.format(dateCellValue, DateConst.YYYY_MM_DD); } else { // 否则是纯数字 NumberFormat numberFormat = NumberFormat.getNumberInstance(); numberFormat.setMaximumFractionDigits(15); double formulaResult = cell.getNumericCellValue(); cellValue = numberFormat.format(formulaResult).replaceAll(",", ""); } break; case BOOLEAN: cellValue = String.valueOf(cell.getBooleanCellValue()); break; case FORMULA: // 处理公式结果 try { cellValue = String.valueOf(cell.getNumericCellValue()); } catch (IllegalStateException e) { cellValue = cell.getStringCellValue(); } break; case BLANK: break; case ERROR: cellValue = "ERROR: " + cell.getErrorCellValue(); break; default: cellValue = ""; break; } // 去掉换行符号 cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "").replaceAll(",","").trim() : ""; return cellValue; } public static List extractCompressedFiles(String zipFilePath, String destFilePath) { List filePathList = CollUtil.newArrayList(); File destFile = new File(destFilePath); if (!destFile.exists()) { destFile.mkdirs(); } try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath)); ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) { ArchiveEntry entry; while ((entry = ais.getNextEntry()) != null) { File entryFile = new File(destFilePath, entry.getName()); if (entry.isDirectory()) { entryFile.mkdirs(); } else { try (FileOutputStream fos = new FileOutputStream(entryFile)) { IOUtils.copy(ais, fos); filePathList.add(entryFile.getPath()); } } } } catch (Exception e) { e.printStackTrace(); } return filePathList; } public static List extractRar(String inputFilePath, String outputDirPath){ List fileList = new ArrayList<>(); // 创建Archive对象,用于读取rar压缩文件格式 try{ Archive archive = new Archive(new FileInputStream(inputFilePath)); // 读取压缩文件中的所有子目录或子文件(FileHeader对象) List fileHeaderList = archive.getFileHeaders(); // 遍历子目录和子文件 for (FileHeader fd:fileHeaderList) { System.out.println(fd.getFileName()); File f = new File(outputDirPath+"/"+fd.getFileName()); if(fd.isDirectory()){ // 创建新子目录 f.mkdirs(); }else{ // 创建新子文件 f.createNewFile(); // 获取压缩包中的子文件输出流 InputStream in = archive.getInputStream(fd); // 复制文件输入流至新子文件 FileUtils.copyInputStreamToFile(in,f); fileList.add(f.getAbsolutePath()); } } } catch (Exception e) { logger.error(e.getMessage(),e); //调用unrar命令解压 try{ unrar(inputFilePath,outputDirPath); File dir = new File(outputDirPath); for (File file : Objects.requireNonNull(dir.listFiles())) { logger.info(file.getAbsolutePath()); fileList.add(file.getAbsolutePath()); } }catch (Exception e1){ logger.error(e.getMessage(),e1); } } return fileList; } /** * 解压RAR文件 * @param rarFilePath RAR文件路径 * @param destDir 目标解压目录 * @throws IOException 如果IO错误或解压失败 * @throws InterruptedException 如果进程被中断 */ public static void unrar(String rarFilePath, String destDir) throws IOException, InterruptedException { // 确保目标目录存在 File destDirFile = new File(destDir); if(!destDirFile.exists()){ destDirFile.mkdirs(); } // 构建解压命令 ProcessBuilder processBuilder = new ProcessBuilder( "unrar", "x", "-o+", rarFilePath, destDir ); processBuilder.redirectErrorStream(true); // 合并标准错误流和标准输出流 // 执行命令 Process process = processBuilder.start(); // 读取命令输出(避免阻塞) try (BufferedReader reader = new BufferedReader( new InputStreamReader(process.getInputStream()))) { String line; while ((line = reader.readLine()) != null) { // 可选:处理输出信息(例如记录日志) System.out.println(line); } } // 等待命令执行完成 int exitCode = process.waitFor(); if (exitCode != 0) { throw new IOException("解压失败,退出码: " + exitCode); } } public static void writeDataToSheet(Sheet sheet, Elements rows) { int rowSize = rows.size(); for (int rowNum = 0; rowNum < rowSize; rowNum++) { Row sheetRow = sheet.createRow(rowNum); Element elementRow = rows.get(rowNum); Elements cells = elementRow.select("td"); if (cells.size() == 0) { cells = elementRow.select("th"); } int cellSize = cells.size(); for (int cellNum = 0; cellNum < cellSize; cellNum++) { Cell sheetRowCell = sheetRow.createCell(cellNum); sheetRowCell.setCellValue(cells.get(cellNum).text()); } } } /** * 获取优先级高的字段值 * * @param sheetRow 行 * @param priorityPosition 字段位置 * @param basePosition 优先级搞的字段位置 * @return 优先级高的字段值 */ public static String getPriorityFieldValue(Row sheetRow, Integer priorityPosition, Integer basePosition) { boolean hasPriorityValue = priorityPosition != null && sheetRow.getCell(priorityPosition) != null && StrUtil.isNotBlank(ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition))); if (hasPriorityValue) { return ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition)); } return basePosition != null && sheetRow.getCell(basePosition) != null ? ExcelUtil.getCellValue(sheetRow.getCell(basePosition)) : null; } /** * 去掉逗号 * * @param numberData 数字型字符串 * @return 无逗号的数字型字符串 */ public static String numberDataStripCommas(String numberData) { if (StrUtil.isBlank(numberData)) { return null; } // pdf解析到的值带有",",比如:"10,656,097.37" String data = numberData.replaceAll(",", ""); if (!StringUtil.isNumeric(data)) { return null; } return data; } public static HSSFWorkbook getHSSFWorkbook(String sheetName, List title, Map>> valueMap, HSSFWorkbook wb) { // 第一步,创建一个HSSFWorkbook,对应一个Excel文件 if (wb == null) { wb = new HSSFWorkbook(); } try { // 第二步,在workbook中添加一个sheet,对应Excel文件中的sheet HSSFSheet sheet = wb.createSheet(sheetName); // 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制 HSSFRow row = sheet.createRow(0); // 第四步,创建单元格,并设置值表头 设置表头居中 HSSFCellStyle style = wb.createCellStyle(); style.setAlignment(HorizontalAlignment.LEFT); style.setWrapText(true); sheet.setColumnWidth(0, 10000); sheet.setColumnWidth(1, 10000); sheet.setColumnWidth(2, 5000); sheet.setColumnWidth(3, 8000); sheet.setColumnWidth(4, 8000); sheet.setColumnWidth(5, 5000); //声明列对象 HSSFCell cell = null; //创建标题 for (int i = 0; i < title.size(); i++) { cell = row.createCell(i); cell.setCellValue(title.get(i)); cell.setCellStyle(style); } List> values = valueMap.get(sheetName); //创建内容 for (int i = 0; i < values.size(); i++) { row = sheet.createRow(i + 1); for (int j = 0; j < values.get(i).size(); j++) { //将内容按顺序赋给对应的列对象 row.createCell(j).setCellValue(values.get(i).get(j)); } } } catch (Exception e) { logger.error(e.getMessage(), e); } return wb; } public static Sheet getFirstSheet(String filePath) { if (StrUtil.isBlank(filePath)) { return null; } try { File file = new File(filePath); return ExcelUtil.getSheet(file, 0); } catch (Exception e) { logger.error("获取文件的sheet错误 -> 文件路径:{},堆栈信息为:{}", filePath, ExceptionUtil.stacktraceToString(e)); } return null; } public static String pdfConvertToExcel(String filePath, String excelFilePath) { File savefile = new File(excelFilePath); if (!savefile.exists()) { if (!savefile.getParentFile().exists()) { savefile.getParentFile().mkdirs(); savefile.getParentFile().setExecutable(true); } } try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) { PDDocument document = Loader.loadPDF(new File(filePath)); PageIterator extract = new ObjectExtractor(document).extract(); Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("Sheet1"); int preEndRowIndex =0; while (extract.hasNext()) { Page next = extract.next(); List tableList = new SpreadsheetExtractionAlgorithm().extract(next); for (technology.tabula.Table table : tableList) { List> rows = table.getRows(); int rowLength = rows.size(); for (int rowNum = 0; rowNum < rowLength; rowNum++) { Row sheetRow = sheet.createRow(rowNum + preEndRowIndex); List textContainerList = rows.get(rowNum); for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) { Cell cell = sheetRow.createCell(cellNum); RectangularTextContainer textContainer = textContainerList.get(cellNum); if (textContainer != null) { cell.setCellValue(textContainer.getText()); } } } preEndRowIndex = preEndRowIndex + rowLength - 1 < 0 ? 0 : preEndRowIndex + rowLength - 1; } } // 将Excel工作簿写入输出流 workbook.write(outputStream); } catch (Exception e) { logger.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e)); return null; } return excelFilePath; } public static String contentConvertToExcel(String emailContent, String excelFilePath) { Elements rows; try { Document doc = Jsoup.parse(emailContent); Element table = doc.select("table").first(); rows = table.select("tr"); } catch (Exception e) { return null; } File saveFile = new File(excelFilePath); if (!saveFile.exists()) { if (!saveFile.getParentFile().exists()) { saveFile.getParentFile().mkdirs(); saveFile.getParentFile().setExecutable(true); } } try (OutputStream outputStream = new FileOutputStream(saveFile)) { // 创建一个新的Excel工作簿 Workbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet("Sheet1"); ExcelUtil.writeDataToSheet(sheet, rows); // 将Excel工作簿写入输出流 workbook.write(outputStream); } catch (Exception e) { logger.error("邮件正文转换成excel报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e)); return null; } return excelFilePath; } /** * 通过表头所在位置判断是行数据还是列数据 * * @param fieldPositionMap excel中表头所在的位置 * @return 行方向-1,,列方向-2 */ public static Integer detectDataDirection(Map> fieldPositionMap) { long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count(); return count == 1 ? EmailDataDirectionConst.COLUMN_DIRECTION_TYPE : EmailDataDirectionConst.ROW_DIRECTION_TYPE; } // Excel 的起始日期是 1900 年 1 月 1 日 private static final LocalDate EXCEL_EPOCH_DATE = LocalDate.of(1900, 1, 1); public static String convertExcelDateToString(String dateNum) { LocalDate startDate = LocalDate.of(1900, 1, 1); LocalDate localDate = startDate.plusDays(Math.round(Double.valueOf(dateNum) - 2)); DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); return localDate.format(formatter); } }