ExcelUtil.java 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. package com.simuwang.base.common.util;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.util.StrUtil;
  7. import com.github.junrar.Archive;
  8. import com.github.junrar.exception.RarException;
  9. import com.github.junrar.rarfile.FileHeader;
  10. import com.simuwang.base.common.conts.DateConst;
  11. import com.simuwang.base.common.conts.EmailDataDirectionConst;
  12. import org.apache.commons.io.FileUtils;
  13. import org.apache.pdfbox.Loader;
  14. import org.apache.pdfbox.pdmodel.PDDocument;
  15. import org.apache.poi.hssf.usermodel.*;
  16. import org.apache.poi.ss.usermodel.*;
  17. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  18. import org.jsoup.Jsoup;
  19. import org.jsoup.nodes.Document;
  20. import org.jsoup.nodes.Element;
  21. import org.jsoup.select.Elements;
  22. import org.slf4j.Logger;
  23. import org.slf4j.LoggerFactory;
  24. import java.io.*;
  25. import java.nio.file.Files;
  26. import java.nio.file.Paths;
  27. import java.text.NumberFormat;
  28. import java.time.LocalDate;
  29. import java.time.format.DateTimeFormatter;
  30. import java.util.*;
  31. import org.apache.commons.compress.archivers.ArchiveEntry;
  32. import org.apache.commons.compress.archivers.ArchiveInputStream;
  33. import org.apache.commons.compress.archivers.ArchiveStreamFactory;
  34. import org.apache.commons.compress.utils.IOUtils;
  35. import technology.tabula.ObjectExtractor;
  36. import technology.tabula.Page;
  37. import technology.tabula.PageIterator;
  38. import technology.tabula.RectangularTextContainer;
  39. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  40. public class ExcelUtil {
  41. private static final Logger logger = LoggerFactory.getLogger(ExcelUtil.class);
  42. public static boolean isExcel(String fileName) {
  43. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
  44. }
  45. public static boolean isPdf(String fileName) {
  46. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF"));
  47. }
  48. public static boolean isZip(String fileName) {
  49. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
  50. }
  51. public static boolean isHTML(String fileName) {
  52. return StrUtil.isNotBlank(fileName) && fileName.endsWith("html");
  53. }
  54. public static boolean isRAR(String fileName) {
  55. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
  56. }
  57. public static Sheet getSheet(File file, int sheetIndex) {
  58. if (file == null || !file.exists()) {
  59. return null;
  60. }
  61. InputStream is = file2InStream(file);
  62. String[] arr = file.getName().split("\\.");
  63. String ext = arr[arr.length - 1];
  64. Sheet sheet = null;
  65. try {
  66. sheet = getSheet(is, ext, sheetIndex);
  67. } catch (Exception e) {
  68. logger.error(e.getMessage());
  69. if (e.getMessage().contains("XSSF instead of HSSF")) {
  70. is = file2InStream(file);
  71. try {
  72. sheet = getSheet(is, "xlsx", sheetIndex);
  73. } catch (IOException e1) {
  74. e1.printStackTrace();
  75. }
  76. } else if (e.getMessage().contains("HSSF instead of XSSF")) {
  77. is = file2InStream(file);
  78. try {
  79. sheet = getSheet(is, "xls", sheetIndex);
  80. } catch (IOException e1) {
  81. e1.printStackTrace();
  82. }
  83. } else if ("xls".equals(ext) && e.getMessage().contains("left 4 bytes remaining still to be read")) {
  84. file = changeXls(file);
  85. is = file2InStream(file);
  86. try {
  87. sheet = getSheet(is, "xls", sheetIndex);
  88. logger.info("文件转换成功!");
  89. } catch (IOException e1) {
  90. e1.printStackTrace();
  91. }
  92. } else {
  93. sheet = null;
  94. }
  95. }
  96. return sheet;
  97. }
  98. public static Sheet getSheet(InputStream is, String ext, int sheetIndex) throws IOException {
  99. if (ext == null) {
  100. ext = "xls";
  101. }
  102. ext = ext.toLowerCase();
  103. Sheet sheet = null;
  104. if ("xls".equals(ext)) {
  105. Workbook wb = new HSSFWorkbook(is);
  106. sheet = getSheet(wb, sheetIndex, null);
  107. } else if ("xlsx".equals(ext)) {
  108. Workbook wb = new XSSFWorkbook(is);
  109. sheet = getSheet(wb, sheetIndex, null);
  110. }
  111. return sheet;
  112. }
  113. private static Sheet getSheet(Workbook wb, int sheetIndex, String sheetName) throws IOException {
  114. if (wb != null) {
  115. Sheet sheet = null;
  116. if (sheetIndex > -1) {
  117. sheet = wb.getSheetAt(sheetIndex);
  118. } else if (StrUtil.isNotBlank(sheetName) && !"null".equals(sheetName.toLowerCase())) {
  119. sheet = wb.getSheet(sheetName);
  120. }
  121. wb.close();
  122. // 同时会把文件输入流关闭
  123. return sheet;
  124. }
  125. return null;
  126. }
  127. public static InputStream file2InStream(File file) {
  128. if (!file.exists()) {
  129. return null;
  130. } else {
  131. FileInputStream is = null;
  132. try {
  133. is = new FileInputStream(file);
  134. } catch (FileNotFoundException var3) {
  135. var3.printStackTrace();
  136. }
  137. return is;
  138. }
  139. }
  140. public static File changeXls(File file) {
  141. try {
  142. jxl.Workbook workbook = jxl.Workbook.getWorkbook(file);
  143. String fileName = "copy-" + file.getName();
  144. File file2 = new File(file.getParent() + "/" + fileName);
  145. if (file2.exists()) {
  146. return file2;
  147. }
  148. jxl.write.WritableWorkbook wwb = jxl.Workbook.createWorkbook(file2, workbook);
  149. wwb.write();
  150. wwb.close();
  151. workbook.close();
  152. return file2;
  153. } catch (Exception e) {
  154. e.printStackTrace();
  155. return null;
  156. }
  157. }
  158. public static String getCellValue(Cell cell) {
  159. if (cell == null) {
  160. return null;
  161. }
  162. String cellValue = "";
  163. switch (cell.getCellTypeEnum()) {
  164. case STRING:
  165. cellValue = cell.getStringCellValue();
  166. break;
  167. case NUMERIC:
  168. if (org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted(cell)) {
  169. // 如果是日期格式的数字
  170. Date dateCellValue = cell.getDateCellValue();
  171. cellValue = DateUtil.format(dateCellValue, DateConst.YYYY_MM_DD);
  172. } else {
  173. // 否则是纯数字
  174. NumberFormat numberFormat = NumberFormat.getNumberInstance();
  175. numberFormat.setMaximumFractionDigits(15);
  176. double formulaResult = cell.getNumericCellValue();
  177. cellValue = numberFormat.format(formulaResult).replaceAll(",", "");
  178. }
  179. break;
  180. case BOOLEAN:
  181. cellValue = String.valueOf(cell.getBooleanCellValue());
  182. break;
  183. case FORMULA:
  184. // 处理公式结果
  185. try {
  186. cellValue = String.valueOf(cell.getNumericCellValue());
  187. } catch (IllegalStateException e) {
  188. cellValue = cell.getStringCellValue();
  189. }
  190. break;
  191. case BLANK:
  192. break;
  193. case ERROR:
  194. cellValue = "ERROR: " + cell.getErrorCellValue();
  195. break;
  196. default:
  197. cellValue = "";
  198. break;
  199. }
  200. // 去掉换行符号
  201. cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "").replaceAll(",","").trim() : "";
  202. return cellValue;
  203. }
  204. public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) {
  205. List<String> filePathList = CollUtil.newArrayList();
  206. File destFile = new File(destFilePath);
  207. if (!destFile.exists()) {
  208. destFile.mkdirs();
  209. }
  210. try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
  211. ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
  212. ArchiveEntry entry;
  213. while ((entry = ais.getNextEntry()) != null) {
  214. File entryFile = new File(destFilePath, entry.getName());
  215. if (entry.isDirectory()) {
  216. entryFile.mkdirs();
  217. } else {
  218. try (FileOutputStream fos = new FileOutputStream(entryFile)) {
  219. IOUtils.copy(ais, fos);
  220. filePathList.add(entryFile.getPath());
  221. }
  222. }
  223. }
  224. } catch (Exception e) {
  225. e.printStackTrace();
  226. }
  227. return filePathList;
  228. }
  229. public static List<String> extractRar(String inputFilePath, String outputDirPath){
  230. List<String> fileList = new ArrayList<>();
  231. // 创建Archive对象,用于读取rar压缩文件格式
  232. try{
  233. Archive archive = new Archive(new FileInputStream(inputFilePath));
  234. // 读取压缩文件中的所有子目录或子文件(FileHeader对象)
  235. List<FileHeader> fileHeaderList = archive.getFileHeaders();
  236. // 遍历子目录和子文件
  237. for (FileHeader fd:fileHeaderList) {
  238. System.out.println(fd.getFileName());
  239. File f = new File(outputDirPath+"/"+fd.getFileName());
  240. if(fd.isDirectory()){
  241. // 创建新子目录
  242. f.mkdirs();
  243. }else{
  244. // 创建新子文件
  245. f.createNewFile();
  246. // 获取压缩包中的子文件输出流
  247. InputStream in = archive.getInputStream(fd);
  248. // 复制文件输入流至新子文件
  249. FileUtils.copyInputStreamToFile(in,f);
  250. fileList.add(f.getAbsolutePath());
  251. }
  252. }
  253. } catch (Exception e) {
  254. logger.error(e.getMessage(),e);
  255. }
  256. return fileList;
  257. }
  258. public static void writeDataToSheet(Sheet sheet, Elements rows) {
  259. int rowSize = rows.size();
  260. for (int rowNum = 0; rowNum < rowSize; rowNum++) {
  261. Row sheetRow = sheet.createRow(rowNum);
  262. Element elementRow = rows.get(rowNum);
  263. Elements cells = elementRow.select("td");
  264. if (cells.size() == 0) {
  265. cells = elementRow.select("th");
  266. }
  267. int cellSize = cells.size();
  268. for (int cellNum = 0; cellNum < cellSize; cellNum++) {
  269. Cell sheetRowCell = sheetRow.createCell(cellNum);
  270. sheetRowCell.setCellValue(cells.get(cellNum).text());
  271. }
  272. }
  273. }
  274. /**
  275. * 获取优先级高的字段值
  276. *
  277. * @param sheetRow 行
  278. * @param priorityPosition 字段位置
  279. * @param basePosition 优先级搞的字段位置
  280. * @return 优先级高的字段值
  281. */
  282. public static String getPriorityFieldValue(Row sheetRow, Integer priorityPosition, Integer basePosition) {
  283. boolean hasPriorityValue = priorityPosition != null && sheetRow.getCell(priorityPosition) != null && StrUtil.isNotBlank(ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition)));
  284. if (hasPriorityValue) {
  285. return ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition));
  286. }
  287. return basePosition != null && sheetRow.getCell(basePosition) != null ? ExcelUtil.getCellValue(sheetRow.getCell(basePosition)) : null;
  288. }
  289. /**
  290. * 去掉逗号
  291. *
  292. * @param numberData 数字型字符串
  293. * @return 无逗号的数字型字符串
  294. */
  295. public static String numberDataStripCommas(String numberData) {
  296. if (StrUtil.isBlank(numberData)) {
  297. return null;
  298. }
  299. // pdf解析到的值带有",",比如:"10,656,097.37"
  300. String data = numberData.replaceAll(",", "");
  301. if (!StringUtil.isNumeric(data)) {
  302. return null;
  303. }
  304. return data;
  305. }
  306. public static HSSFWorkbook getHSSFWorkbook(String sheetName, List<String> title, Map<String, List<List<String>>> valueMap, HSSFWorkbook wb) {
  307. // 第一步,创建一个HSSFWorkbook,对应一个Excel文件
  308. if (wb == null) {
  309. wb = new HSSFWorkbook();
  310. }
  311. try {
  312. // 第二步,在workbook中添加一个sheet,对应Excel文件中的sheet
  313. HSSFSheet sheet = wb.createSheet(sheetName);
  314. // 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制
  315. HSSFRow row = sheet.createRow(0);
  316. // 第四步,创建单元格,并设置值表头 设置表头居中
  317. HSSFCellStyle style = wb.createCellStyle();
  318. style.setAlignment(HorizontalAlignment.LEFT);
  319. style.setWrapText(true);
  320. sheet.setColumnWidth(0, 10000);
  321. sheet.setColumnWidth(1, 10000);
  322. sheet.setColumnWidth(2, 5000);
  323. sheet.setColumnWidth(3, 8000);
  324. sheet.setColumnWidth(4, 8000);
  325. sheet.setColumnWidth(5, 5000);
  326. //声明列对象
  327. HSSFCell cell = null;
  328. //创建标题
  329. for (int i = 0; i < title.size(); i++) {
  330. cell = row.createCell(i);
  331. cell.setCellValue(title.get(i));
  332. cell.setCellStyle(style);
  333. }
  334. List<List<String>> values = valueMap.get(sheetName);
  335. //创建内容
  336. for (int i = 0; i < values.size(); i++) {
  337. row = sheet.createRow(i + 1);
  338. for (int j = 0; j < values.get(i).size(); j++) {
  339. //将内容按顺序赋给对应的列对象
  340. row.createCell(j).setCellValue(values.get(i).get(j));
  341. }
  342. }
  343. } catch (Exception e) {
  344. logger.error(e.getMessage(), e);
  345. }
  346. return wb;
  347. }
  348. public static Sheet getFirstSheet(String filePath) {
  349. if (StrUtil.isBlank(filePath)) {
  350. return null;
  351. }
  352. try {
  353. File file = new File(filePath);
  354. return ExcelUtil.getSheet(file, 0);
  355. } catch (Exception e) {
  356. logger.error("获取文件的sheet错误 -> 文件路径:{},堆栈信息为:{}", filePath, ExceptionUtil.stacktraceToString(e));
  357. }
  358. return null;
  359. }
  360. public static String pdfConvertToExcel(String filePath, String excelFilePath) {
  361. File savefile = new File(excelFilePath);
  362. if (!savefile.exists()) {
  363. if (!savefile.getParentFile().exists()) {
  364. savefile.getParentFile().mkdirs();
  365. savefile.getParentFile().setExecutable(true);
  366. }
  367. }
  368. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  369. PDDocument document = Loader.loadPDF(new File(filePath));
  370. PageIterator extract = new ObjectExtractor(document).extract();
  371. Workbook workbook = new XSSFWorkbook();
  372. Sheet sheet = workbook.createSheet("Sheet1");
  373. int preEndRowIndex =0;
  374. while (extract.hasNext()) {
  375. Page next = extract.next();
  376. List<technology.tabula.Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  377. for (technology.tabula.Table table : tableList) {
  378. List<List<RectangularTextContainer>> rows = table.getRows();
  379. int rowLength = rows.size();
  380. for (int rowNum = 0; rowNum < rowLength; rowNum++) {
  381. Row sheetRow = sheet.createRow(rowNum + preEndRowIndex);
  382. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  383. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  384. Cell cell = sheetRow.createCell(cellNum);
  385. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  386. if (textContainer != null) {
  387. cell.setCellValue(textContainer.getText());
  388. }
  389. }
  390. }
  391. preEndRowIndex = preEndRowIndex + rowLength -1;
  392. }
  393. }
  394. // 将Excel工作簿写入输出流
  395. workbook.write(outputStream);
  396. } catch (Exception e) {
  397. logger.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  398. return null;
  399. }
  400. return excelFilePath;
  401. }
  402. public static String contentConvertToExcel(String emailContent, String excelFilePath) {
  403. Elements rows;
  404. try {
  405. Document doc = Jsoup.parse(emailContent);
  406. Element table = doc.select("table").first();
  407. rows = table.select("tr");
  408. } catch (Exception e) {
  409. return null;
  410. }
  411. File saveFile = new File(excelFilePath);
  412. if (!saveFile.exists()) {
  413. if (!saveFile.getParentFile().exists()) {
  414. saveFile.getParentFile().mkdirs();
  415. saveFile.getParentFile().setExecutable(true);
  416. }
  417. }
  418. try (OutputStream outputStream = new FileOutputStream(saveFile)) {
  419. // 创建一个新的Excel工作簿
  420. Workbook workbook = new XSSFWorkbook();
  421. Sheet sheet = workbook.createSheet("Sheet1");
  422. ExcelUtil.writeDataToSheet(sheet, rows);
  423. // 将Excel工作簿写入输出流
  424. workbook.write(outputStream);
  425. } catch (Exception e) {
  426. logger.error("邮件正文转换成excel报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  427. return null;
  428. }
  429. return excelFilePath;
  430. }
  431. /**
  432. * 通过表头所在位置判断是行数据还是列数据
  433. *
  434. * @param fieldPositionMap excel中表头所在的位置
  435. * @return 行方向-1,,列方向-2
  436. */
  437. public static Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  438. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  439. return count == 1 ? EmailDataDirectionConst.COLUMN_DIRECTION_TYPE : EmailDataDirectionConst.ROW_DIRECTION_TYPE;
  440. }
  441. // Excel 的起始日期是 1900 年 1 月 1 日
  442. private static final LocalDate EXCEL_EPOCH_DATE = LocalDate.of(1900, 1, 1);
  443. public static String convertExcelDateToString(String dateNum) {
  444. LocalDate startDate = LocalDate.of(1900, 1, 1);
  445. LocalDate localDate = startDate.plusDays(Math.round(Double.valueOf(dateNum) - 2));
  446. DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
  447. return localDate.format(formatter);
  448. }
  449. }