ExcelUtil.java 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. package com.simuwang.base.common.util;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.util.StrUtil;
  7. import com.github.junrar.Archive;
  8. import com.github.junrar.exception.RarException;
  9. import com.github.junrar.rarfile.FileHeader;
  10. import com.simuwang.base.common.conts.DateConst;
  11. import com.simuwang.base.common.conts.EmailDataDirectionConst;
  12. import org.apache.commons.io.FileUtils;
  13. import org.apache.pdfbox.Loader;
  14. import org.apache.pdfbox.pdmodel.PDDocument;
  15. import org.apache.poi.hssf.usermodel.*;
  16. import org.apache.poi.ss.usermodel.*;
  17. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  18. import org.jsoup.Jsoup;
  19. import org.jsoup.nodes.Document;
  20. import org.jsoup.nodes.Element;
  21. import org.jsoup.select.Elements;
  22. import org.slf4j.Logger;
  23. import org.slf4j.LoggerFactory;
  24. import java.io.*;
  25. import java.nio.file.Files;
  26. import java.nio.file.Paths;
  27. import java.text.NumberFormat;
  28. import java.time.LocalDate;
  29. import java.time.format.DateTimeFormatter;
  30. import java.util.*;
  31. import org.apache.commons.compress.archivers.ArchiveEntry;
  32. import org.apache.commons.compress.archivers.ArchiveInputStream;
  33. import org.apache.commons.compress.archivers.ArchiveStreamFactory;
  34. import org.apache.commons.compress.utils.IOUtils;
  35. import technology.tabula.ObjectExtractor;
  36. import technology.tabula.Page;
  37. import technology.tabula.PageIterator;
  38. import technology.tabula.RectangularTextContainer;
  39. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  40. public class ExcelUtil {
  41. private static final Logger logger = LoggerFactory.getLogger(ExcelUtil.class);
  42. public static boolean isExcel(String fileName) {
  43. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
  44. }
  45. public static boolean isPdf(String fileName) {
  46. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF"));
  47. }
  48. public static boolean isZip(String fileName) {
  49. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
  50. }
  51. public static boolean isHTML(String fileName) {
  52. return StrUtil.isNotBlank(fileName) && fileName.endsWith("html");
  53. }
  54. public static boolean isRAR(String fileName) {
  55. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
  56. }
  57. public static Sheet getSheet(File file, int sheetIndex) {
  58. if (file == null || !file.exists()) {
  59. return null;
  60. }
  61. InputStream is = file2InStream(file);
  62. String[] arr = file.getName().split("\\.");
  63. String ext = arr[arr.length - 1];
  64. Sheet sheet = null;
  65. try {
  66. sheet = getSheet(is, ext, sheetIndex);
  67. } catch (Exception e) {
  68. logger.error(e.getMessage());
  69. if (e.getMessage().contains("XSSF instead of HSSF")) {
  70. is = file2InStream(file);
  71. try {
  72. sheet = getSheet(is, "xlsx", sheetIndex);
  73. } catch (IOException e1) {
  74. e1.printStackTrace();
  75. }
  76. } else if (e.getMessage().contains("HSSF instead of XSSF")) {
  77. is = file2InStream(file);
  78. try {
  79. sheet = getSheet(is, "xls", sheetIndex);
  80. } catch (IOException e1) {
  81. e1.printStackTrace();
  82. }
  83. } else if ("xls".equals(ext) && e.getMessage().contains("left 4 bytes remaining still to be read")) {
  84. file = changeXls(file);
  85. is = file2InStream(file);
  86. try {
  87. sheet = getSheet(is, "xls", sheetIndex);
  88. logger.info("文件转换成功!");
  89. } catch (IOException e1) {
  90. e1.printStackTrace();
  91. }
  92. } else {
  93. sheet = null;
  94. }
  95. }
  96. return sheet;
  97. }
  98. public static Sheet getSheet(InputStream is, String ext, int sheetIndex) throws IOException {
  99. if (ext == null) {
  100. ext = "xls";
  101. }
  102. ext = ext.toLowerCase();
  103. Sheet sheet = null;
  104. if ("xls".equals(ext)) {
  105. Workbook wb = new HSSFWorkbook(is);
  106. sheet = getSheet(wb, sheetIndex, null);
  107. } else if ("xlsx".equals(ext)) {
  108. Workbook wb = new XSSFWorkbook(is);
  109. sheet = getSheet(wb, sheetIndex, null);
  110. }
  111. return sheet;
  112. }
  113. private static Sheet getSheet(Workbook wb, int sheetIndex, String sheetName) throws IOException {
  114. if (wb != null) {
  115. Sheet sheet = null;
  116. if (sheetIndex > -1) {
  117. sheet = wb.getSheetAt(sheetIndex);
  118. } else if (StrUtil.isNotBlank(sheetName) && !"null".equals(sheetName.toLowerCase())) {
  119. sheet = wb.getSheet(sheetName);
  120. }
  121. wb.close();
  122. // 同时会把文件输入流关闭
  123. return sheet;
  124. }
  125. return null;
  126. }
  127. public static InputStream file2InStream(File file) {
  128. if (!file.exists()) {
  129. return null;
  130. } else {
  131. FileInputStream is = null;
  132. try {
  133. is = new FileInputStream(file);
  134. } catch (FileNotFoundException var3) {
  135. var3.printStackTrace();
  136. }
  137. return is;
  138. }
  139. }
  140. public static File changeXls(File file) {
  141. try {
  142. jxl.Workbook workbook = jxl.Workbook.getWorkbook(file);
  143. String fileName = "copy-" + file.getName();
  144. File file2 = new File(file.getParent() + "/" + fileName);
  145. if (file2.exists()) {
  146. return file2;
  147. }
  148. jxl.write.WritableWorkbook wwb = jxl.Workbook.createWorkbook(file2, workbook);
  149. wwb.write();
  150. wwb.close();
  151. workbook.close();
  152. return file2;
  153. } catch (Exception e) {
  154. e.printStackTrace();
  155. return null;
  156. }
  157. }
  158. public static String getCellValue(Cell cell) {
  159. if (cell == null) {
  160. return null;
  161. }
  162. String cellValue = "";
  163. switch (cell.getCellTypeEnum()) {
  164. case STRING:
  165. cellValue = cell.getStringCellValue();
  166. break;
  167. case NUMERIC:
  168. if (org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted(cell)) {
  169. // 如果是日期格式的数字
  170. Date dateCellValue = cell.getDateCellValue();
  171. cellValue = DateUtil.format(dateCellValue, DateConst.YYYY_MM_DD);
  172. } else {
  173. // 否则是纯数字
  174. NumberFormat numberFormat = NumberFormat.getNumberInstance();
  175. numberFormat.setMaximumFractionDigits(15);
  176. double formulaResult = cell.getNumericCellValue();
  177. cellValue = numberFormat.format(formulaResult).replaceAll(",", "");
  178. }
  179. break;
  180. case BOOLEAN:
  181. cellValue = String.valueOf(cell.getBooleanCellValue());
  182. break;
  183. case FORMULA:
  184. // 处理公式结果
  185. try {
  186. cellValue = String.valueOf(cell.getNumericCellValue());
  187. } catch (IllegalStateException e) {
  188. cellValue = cell.getStringCellValue();
  189. }
  190. break;
  191. case BLANK:
  192. break;
  193. case ERROR:
  194. cellValue = "ERROR: " + cell.getErrorCellValue();
  195. break;
  196. default:
  197. cellValue = "";
  198. break;
  199. }
  200. // 去掉换行符号
  201. cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "").replaceAll(",","").trim() : "";
  202. return cellValue;
  203. }
  204. public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) {
  205. List<String> filePathList = CollUtil.newArrayList();
  206. File destFile = new File(destFilePath);
  207. if (!destFile.exists()) {
  208. destFile.mkdirs();
  209. }
  210. try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
  211. ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
  212. ArchiveEntry entry;
  213. while ((entry = ais.getNextEntry()) != null) {
  214. File entryFile = new File(destFilePath, entry.getName());
  215. if (entry.isDirectory()) {
  216. entryFile.mkdirs();
  217. } else {
  218. try (FileOutputStream fos = new FileOutputStream(entryFile)) {
  219. IOUtils.copy(ais, fos);
  220. filePathList.add(entryFile.getPath());
  221. }
  222. }
  223. }
  224. } catch (Exception e) {
  225. e.printStackTrace();
  226. }
  227. return filePathList;
  228. }
  229. public static List<String> extractRar(String inputFilePath, String outputDirPath){
  230. List<String> fileList = new ArrayList<>();
  231. // 创建Archive对象,用于读取rar压缩文件格式
  232. try{
  233. Archive archive = new Archive(new FileInputStream(inputFilePath));
  234. // 读取压缩文件中的所有子目录或子文件(FileHeader对象)
  235. List<FileHeader> fileHeaderList = archive.getFileHeaders();
  236. // 遍历子目录和子文件
  237. for (FileHeader fd:fileHeaderList) {
  238. System.out.println(fd.getFileName());
  239. File f = new File(outputDirPath+"/"+fd.getFileName());
  240. if(fd.isDirectory()){
  241. // 创建新子目录
  242. f.mkdirs();
  243. }else{
  244. // 创建新子文件
  245. f.createNewFile();
  246. // 获取压缩包中的子文件输出流
  247. InputStream in = archive.getInputStream(fd);
  248. // 复制文件输入流至新子文件
  249. FileUtils.copyInputStreamToFile(in,f);
  250. fileList.add(f.getAbsolutePath());
  251. }
  252. }
  253. } catch (Exception e) {
  254. logger.error(e.getMessage(),e);
  255. //调用unrar命令解压
  256. try{
  257. unrar(inputFilePath,outputDirPath);
  258. File dir = new File(outputDirPath);
  259. for (File file : Objects.requireNonNull(dir.listFiles())) {
  260. logger.info(file.getAbsolutePath());
  261. fileList.add(file.getAbsolutePath());
  262. }
  263. }catch (Exception e1){
  264. logger.error(e.getMessage(),e1);
  265. }
  266. }
  267. return fileList;
  268. }
  269. /**
  270. * 解压RAR文件
  271. * @param rarFilePath RAR文件路径
  272. * @param destDir 目标解压目录
  273. * @throws IOException 如果IO错误或解压失败
  274. * @throws InterruptedException 如果进程被中断
  275. */
  276. public static void unrar(String rarFilePath, String destDir) throws IOException, InterruptedException {
  277. // 确保目标目录存在
  278. File destDirFile = new File(destDir);
  279. if(!destDirFile.exists()){
  280. destDirFile.mkdirs();
  281. }
  282. // 构建解压命令
  283. ProcessBuilder processBuilder = new ProcessBuilder(
  284. "unrar", "x", "-o+", rarFilePath, destDir
  285. );
  286. processBuilder.redirectErrorStream(true); // 合并标准错误流和标准输出流
  287. // 执行命令
  288. Process process = processBuilder.start();
  289. // 读取命令输出(避免阻塞)
  290. try (BufferedReader reader = new BufferedReader(
  291. new InputStreamReader(process.getInputStream()))) {
  292. String line;
  293. while ((line = reader.readLine()) != null) {
  294. // 可选:处理输出信息(例如记录日志)
  295. System.out.println(line);
  296. }
  297. }
  298. // 等待命令执行完成
  299. int exitCode = process.waitFor();
  300. if (exitCode != 0) {
  301. throw new IOException("解压失败,退出码: " + exitCode);
  302. }
  303. }
  304. public static void writeDataToSheet(Sheet sheet, Elements rows) {
  305. int rowSize = rows.size();
  306. for (int rowNum = 0; rowNum < rowSize; rowNum++) {
  307. Row sheetRow = sheet.createRow(rowNum);
  308. Element elementRow = rows.get(rowNum);
  309. Elements cells = elementRow.select("td");
  310. if (cells.size() == 0) {
  311. cells = elementRow.select("th");
  312. }
  313. int cellSize = cells.size();
  314. for (int cellNum = 0; cellNum < cellSize; cellNum++) {
  315. Cell sheetRowCell = sheetRow.createCell(cellNum);
  316. sheetRowCell.setCellValue(cells.get(cellNum).text());
  317. }
  318. }
  319. }
  320. /**
  321. * 获取优先级高的字段值
  322. *
  323. * @param sheetRow 行
  324. * @param priorityPosition 字段位置
  325. * @param basePosition 优先级搞的字段位置
  326. * @return 优先级高的字段值
  327. */
  328. public static String getPriorityFieldValue(Row sheetRow, Integer priorityPosition, Integer basePosition) {
  329. boolean hasPriorityValue = priorityPosition != null && sheetRow.getCell(priorityPosition) != null && StrUtil.isNotBlank(ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition)));
  330. if (hasPriorityValue) {
  331. return ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition));
  332. }
  333. return basePosition != null && sheetRow.getCell(basePosition) != null ? ExcelUtil.getCellValue(sheetRow.getCell(basePosition)) : null;
  334. }
  335. /**
  336. * 去掉逗号
  337. *
  338. * @param numberData 数字型字符串
  339. * @return 无逗号的数字型字符串
  340. */
  341. public static String numberDataStripCommas(String numberData) {
  342. if (StrUtil.isBlank(numberData)) {
  343. return null;
  344. }
  345. // pdf解析到的值带有",",比如:"10,656,097.37"
  346. String data = numberData.replaceAll(",", "");
  347. if (!StringUtil.isNumeric(data)) {
  348. return null;
  349. }
  350. return data;
  351. }
  352. public static HSSFWorkbook getHSSFWorkbook(String sheetName, List<String> title, Map<String, List<List<String>>> valueMap, HSSFWorkbook wb) {
  353. // 第一步,创建一个HSSFWorkbook,对应一个Excel文件
  354. if (wb == null) {
  355. wb = new HSSFWorkbook();
  356. }
  357. try {
  358. // 第二步,在workbook中添加一个sheet,对应Excel文件中的sheet
  359. HSSFSheet sheet = wb.createSheet(sheetName);
  360. // 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制
  361. HSSFRow row = sheet.createRow(0);
  362. // 第四步,创建单元格,并设置值表头 设置表头居中
  363. HSSFCellStyle style = wb.createCellStyle();
  364. style.setAlignment(HorizontalAlignment.LEFT);
  365. style.setWrapText(true);
  366. sheet.setColumnWidth(0, 10000);
  367. sheet.setColumnWidth(1, 10000);
  368. sheet.setColumnWidth(2, 5000);
  369. sheet.setColumnWidth(3, 8000);
  370. sheet.setColumnWidth(4, 8000);
  371. sheet.setColumnWidth(5, 5000);
  372. //声明列对象
  373. HSSFCell cell = null;
  374. //创建标题
  375. for (int i = 0; i < title.size(); i++) {
  376. cell = row.createCell(i);
  377. cell.setCellValue(title.get(i));
  378. cell.setCellStyle(style);
  379. }
  380. List<List<String>> values = valueMap.get(sheetName);
  381. //创建内容
  382. for (int i = 0; i < values.size(); i++) {
  383. row = sheet.createRow(i + 1);
  384. for (int j = 0; j < values.get(i).size(); j++) {
  385. //将内容按顺序赋给对应的列对象
  386. row.createCell(j).setCellValue(values.get(i).get(j));
  387. }
  388. }
  389. } catch (Exception e) {
  390. logger.error(e.getMessage(), e);
  391. }
  392. return wb;
  393. }
  394. public static Sheet getFirstSheet(String filePath) {
  395. if (StrUtil.isBlank(filePath)) {
  396. return null;
  397. }
  398. try {
  399. File file = new File(filePath);
  400. return ExcelUtil.getSheet(file, 0);
  401. } catch (Exception e) {
  402. logger.error("获取文件的sheet错误 -> 文件路径:{},堆栈信息为:{}", filePath, ExceptionUtil.stacktraceToString(e));
  403. }
  404. return null;
  405. }
  406. public static String pdfConvertToExcel(String filePath, String excelFilePath) {
  407. File savefile = new File(excelFilePath);
  408. if (!savefile.exists()) {
  409. if (!savefile.getParentFile().exists()) {
  410. savefile.getParentFile().mkdirs();
  411. savefile.getParentFile().setExecutable(true);
  412. }
  413. }
  414. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  415. PDDocument document = Loader.loadPDF(new File(filePath));
  416. PageIterator extract = new ObjectExtractor(document).extract();
  417. Workbook workbook = new XSSFWorkbook();
  418. Sheet sheet = workbook.createSheet("Sheet1");
  419. int preEndRowIndex =0;
  420. while (extract.hasNext()) {
  421. Page next = extract.next();
  422. List<technology.tabula.Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  423. for (technology.tabula.Table table : tableList) {
  424. List<List<RectangularTextContainer>> rows = table.getRows();
  425. int rowLength = rows.size();
  426. for (int rowNum = 0; rowNum < rowLength; rowNum++) {
  427. Row sheetRow = sheet.createRow(rowNum + preEndRowIndex);
  428. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  429. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  430. Cell cell = sheetRow.createCell(cellNum);
  431. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  432. if (textContainer != null) {
  433. cell.setCellValue(textContainer.getText());
  434. }
  435. }
  436. }
  437. preEndRowIndex = preEndRowIndex + rowLength - 1 < 0 ? 0 : preEndRowIndex + rowLength - 1;
  438. }
  439. }
  440. // 将Excel工作簿写入输出流
  441. workbook.write(outputStream);
  442. } catch (Exception e) {
  443. logger.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  444. return null;
  445. }
  446. return excelFilePath;
  447. }
  448. public static String contentConvertToExcel(String emailContent, String excelFilePath) {
  449. Elements rows;
  450. try {
  451. Document doc = Jsoup.parse(emailContent);
  452. Element table = doc.select("table").first();
  453. rows = table.select("tr");
  454. } catch (Exception e) {
  455. return null;
  456. }
  457. File saveFile = new File(excelFilePath);
  458. if (!saveFile.exists()) {
  459. if (!saveFile.getParentFile().exists()) {
  460. saveFile.getParentFile().mkdirs();
  461. saveFile.getParentFile().setExecutable(true);
  462. }
  463. }
  464. try (OutputStream outputStream = new FileOutputStream(saveFile)) {
  465. // 创建一个新的Excel工作簿
  466. Workbook workbook = new XSSFWorkbook();
  467. Sheet sheet = workbook.createSheet("Sheet1");
  468. ExcelUtil.writeDataToSheet(sheet, rows);
  469. // 将Excel工作簿写入输出流
  470. workbook.write(outputStream);
  471. } catch (Exception e) {
  472. logger.error("邮件正文转换成excel报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  473. return null;
  474. }
  475. return excelFilePath;
  476. }
  477. /**
  478. * 通过表头所在位置判断是行数据还是列数据
  479. *
  480. * @param fieldPositionMap excel中表头所在的位置
  481. * @return 行方向-1,,列方向-2
  482. */
  483. public static Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  484. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  485. return count == 1 ? EmailDataDirectionConst.COLUMN_DIRECTION_TYPE : EmailDataDirectionConst.ROW_DIRECTION_TYPE;
  486. }
  487. // Excel 的起始日期是 1900 年 1 月 1 日
  488. private static final LocalDate EXCEL_EPOCH_DATE = LocalDate.of(1900, 1, 1);
  489. public static String convertExcelDateToString(String dateNum) {
  490. LocalDate startDate = LocalDate.of(1900, 1, 1);
  491. LocalDate localDate = startDate.plusDays(Math.round(Double.valueOf(dateNum) - 2));
  492. DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
  493. return localDate.format(formatter);
  494. }
  495. }