NavEmailParser.java 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. package com.simuwang.daq.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.map.MapUtil;
  7. import cn.hutool.core.util.StrUtil;
  8. import com.simuwang.base.common.conts.DateConst;
  9. import com.simuwang.base.common.conts.EmailFieldConst;
  10. import com.simuwang.base.common.conts.EmailTypeConst;
  11. import com.simuwang.base.common.util.ExcelUtil;
  12. import com.simuwang.base.common.util.StringUtil;
  13. import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
  14. import com.simuwang.base.pojo.dto.EmailFundNavDTO;
  15. import org.apache.pdfbox.Loader;
  16. import org.apache.pdfbox.pdmodel.PDDocument;
  17. import org.apache.poi.ss.usermodel.Cell;
  18. import org.apache.poi.ss.usermodel.Row;
  19. import org.apache.poi.ss.usermodel.Sheet;
  20. import org.apache.poi.ss.usermodel.Workbook;
  21. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  22. import org.jsoup.Jsoup;
  23. import org.jsoup.nodes.Document;
  24. import org.jsoup.nodes.Element;
  25. import org.jsoup.select.Elements;
  26. import org.slf4j.Logger;
  27. import org.slf4j.LoggerFactory;
  28. import org.springframework.beans.factory.annotation.Value;
  29. import org.springframework.stereotype.Component;
  30. import technology.tabula.*;
  31. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  32. import java.io.File;
  33. import java.io.FileOutputStream;
  34. import java.io.OutputStream;
  35. import java.nio.file.Files;
  36. import java.nio.file.Paths;
  37. import java.util.*;
  38. import java.util.stream.Collectors;
  39. /**
  40. * @author mozuwen
  41. * @date 2024-09-04
  42. * @description 净值邮件解析器
  43. */
  44. @Component
  45. public class NavEmailParser extends AbstractEmailParser {
  46. private static final Logger log = LoggerFactory.getLogger(AbstractEmailParser.class);
  47. @Value("${email.file.path}")
  48. private String path;
  49. private static final Integer ROW_DIRECTION_TYPE = 1;
  50. private static final Integer COLUMN_DIRECTION_TYPE = 2;
  51. private static final int MAX_COLUMN = 20;
  52. @Override
  53. public boolean isSupport(Integer emailType) {
  54. return EmailTypeConst.NAV_EMAIL_TYPE.equals(emailType);
  55. }
  56. @Override
  57. public List<EmailFundNavDTO> parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
  58. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  59. String emailContent = emailContentInfoDTO.getEmailContent();
  60. // 1.解析邮件正文
  61. if (StrUtil.isNotBlank(emailContent)) {
  62. emailFundNavDTOList = parseEmailContent(emailContentInfoDTO, emailContent, emailFieldMap);
  63. }
  64. // 2.解析邮件excel附件
  65. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isExcel(emailContentInfoDTO.getFileName())) {
  66. List<EmailFundNavDTO> fundNavDTOList = parseExcelFile(emailContentInfoDTO.getFilePath(), emailFieldMap);
  67. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  68. }
  69. // 3.解析邮件pdf附件
  70. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
  71. String excelFilePath = path + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
  72. + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  73. List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
  74. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  75. }
  76. // 4.解析邮件zip,rar附件
  77. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isZip(emailContentInfoDTO.getFileName())) {
  78. List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFileName(), emailContentInfoDTO.getFilePath(), emailFieldMap);
  79. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  80. }
  81. // 校验净值数据格式
  82. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  83. emailFundNavDTOList = emailFundNavDTOList.stream().filter(super::dataFormat).collect(Collectors.toList());
  84. }
  85. return emailFundNavDTOList;
  86. }
  87. private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String fileName, String filePath, Map<String, List<String>> emailFieldMap) {
  88. String destPath = filePath.substring(0, filePath.indexOf(fileName)) + fileName.replaceAll(".zip","").replaceAll(".ZIP","") ;
  89. log.info("压缩包地址:{},解压后文件地址:{}", filePath, destPath);
  90. List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
  91. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  92. for (String zipFilePath : dir) {
  93. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
  94. File file = new File(zipFilePath);
  95. if (file.isDirectory()) {
  96. for (String navFilePath : Objects.requireNonNull(file.list())) {
  97. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
  98. }
  99. }
  100. }
  101. return emailFundNavDTOList;
  102. }
  103. private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
  104. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  105. if (ExcelUtil.isPdf(zipFilePath)) {
  106. String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  107. fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
  108. }
  109. if (ExcelUtil.isExcel(zipFilePath)) {
  110. fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
  111. }
  112. if (ExcelUtil.isZip(zipFilePath)) {
  113. String name = new File(zipFilePath).getName();
  114. fundNavDTOList = parsePackageFile(emailContentInfoDTO, name, zipFilePath, emailFieldMap);
  115. }
  116. return fundNavDTOList;
  117. }
  118. /**
  119. * 解析邮件pdf附件
  120. *
  121. * @param filePath 邮件pdf附件路径
  122. * @param excelFilePath pdf转excel路径
  123. * @param emailFieldMap 邮件字段识别规则映射表
  124. * @return 解析到的净值数据
  125. */
  126. private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
  127. File savefile = new File(excelFilePath);
  128. if (!savefile.exists()) {
  129. if (!savefile.getParentFile().exists()) {
  130. savefile.getParentFile().mkdirs();
  131. savefile.getParentFile().setExecutable(true);
  132. }
  133. }
  134. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  135. PDDocument document = Loader.loadPDF(new File(filePath));
  136. PageIterator extract = new ObjectExtractor(document).extract();
  137. Workbook workbook = new XSSFWorkbook();
  138. Sheet sheet = workbook.createSheet("Sheet1");
  139. while (extract.hasNext()) {
  140. Page next = extract.next();
  141. List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  142. for (Table table : tableList) {
  143. List<List<RectangularTextContainer>> rows = table.getRows();
  144. for (int rowNum = 0; rowNum < rows.size(); rowNum++) {
  145. Row sheetRow = sheet.createRow(rowNum);
  146. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  147. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  148. Cell cell = sheetRow.createCell(cellNum);
  149. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  150. if (textContainer != null) {
  151. cell.setCellValue(textContainer.getText());
  152. }
  153. }
  154. }
  155. }
  156. }
  157. // 将Excel工作簿写入输出流
  158. workbook.write(outputStream);
  159. } catch (Exception e) {
  160. log.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  161. }
  162. return parseExcelFile(excelFilePath, emailFieldMap);
  163. }
  164. /**
  165. * 解析邮件excel附件
  166. *
  167. * @param filePath 邮件excel附件地址
  168. * @param emailFieldMap 邮件字段识别规则映射表
  169. * @return 解析到的净值数据
  170. */
  171. private List<EmailFundNavDTO> parseExcelFile(String filePath, Map<String, List<String>> emailFieldMap) {
  172. File file = new File(filePath);
  173. Sheet sheet = ExcelUtil.getSheet(file, 0);
  174. // 1.找到表头所在位置
  175. Map<String, Pair<Integer, Integer>> fieldPositionMap = getFieldPosition(sheet, emailFieldMap);
  176. if (MapUtil.isEmpty(fieldPositionMap)) {
  177. log.warn("找不到文件表头字段 -> 文件:{}", filePath);
  178. return CollUtil.newArrayList();
  179. }
  180. // 2.解析sheet中的净值数据
  181. return parseSheetData(sheet, fieldPositionMap);
  182. }
  183. /**
  184. * 解析邮件正文
  185. *
  186. * @param emailContentInfoDTO 邮件信息
  187. * @param emailContent 正文内容
  188. * @param emailFieldMap 邮件字段识别规则映射表
  189. * @return 解析到的净值数据
  190. */
  191. private List<EmailFundNavDTO> parseEmailContent(EmailContentInfoDTO emailContentInfoDTO, String emailContent, Map<String, List<String>> emailFieldMap) {
  192. Document doc = Jsoup.parse(emailContent);
  193. Element table = doc.select("table").first();
  194. Elements rows = table.select("tr");
  195. String excelFilePath = "/data/file/content/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "") + "/"
  196. + emailContentInfoDTO.getFileName().replace(".html", ".xlsx");
  197. File savefile = new File(excelFilePath);
  198. if (!savefile.exists()) {
  199. if (!savefile.getParentFile().exists()) {
  200. savefile.getParentFile().mkdirs();
  201. savefile.getParentFile().setExecutable(true);
  202. }
  203. }
  204. try (OutputStream outputStream = new FileOutputStream(savefile)) {
  205. // 创建一个新的Excel工作簿
  206. Workbook workbook = new XSSFWorkbook();
  207. Sheet sheet = workbook.createSheet("Sheet1");
  208. writeDataToSheet(sheet, rows);
  209. // 将Excel工作簿写入输出流
  210. workbook.write(outputStream);
  211. } catch (Exception e) {
  212. log.error("解析正文报错 -> 邮件主题:{},邮件日期:{},堆栈信息:{}", emailContentInfoDTO.getEmailTitle(), emailContentInfoDTO.getEmailDate(), ExceptionUtil.stacktraceToString(e));
  213. }
  214. return parseExcelFile(excelFilePath, emailFieldMap);
  215. }
  216. private List<EmailFundNavDTO> parseSheetData(Sheet sheet, Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  217. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  218. // 通过表头所在位置判断是行数据还是列数据
  219. Integer dataDirectionType = detectDataDirection(fieldPositionMap);
  220. // 数据起始行,起始列
  221. int initRow = dataDirectionType.equals(ROW_DIRECTION_TYPE) ? fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0)
  222. : fieldPositionMap.values().stream().map(Pair::getKey).min(Integer::compareTo).orElse(0);
  223. int initColumn = fieldPositionMap.values().stream().map(Pair::getValue).min(Integer::compareTo).orElse(0);
  224. if (dataDirectionType.equals(ROW_DIRECTION_TYPE)) {
  225. // 表头字段-列号映射关系
  226. Map<String, Integer> fieldColumnMap = getFieldRow(fieldPositionMap);
  227. int lastRowNum = sheet.getLastRowNum();
  228. // 遍历可能的数据行
  229. for (int rowNum = initRow + 1; rowNum <= lastRowNum; rowNum++) {
  230. Row sheetRow = sheet.getRow(rowNum);
  231. Optional.ofNullable(readSheetRowData(sheetRow, fieldColumnMap)).ifPresent(fundNavDTOList::addAll);
  232. }
  233. }
  234. if (dataDirectionType.equals(COLUMN_DIRECTION_TYPE)) {
  235. // 表头字段-行号映射关系
  236. Map<Integer, String> fieldRowMap = getRowField(fieldPositionMap);
  237. int lastRow = fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0);
  238. // 遍历每一列
  239. for (int columnNum = initColumn + 1; columnNum < MAX_COLUMN; columnNum++) {
  240. Map<String, String> fieldValueMap = MapUtil.newHashMap();
  241. for (int rowNum = initRow; rowNum <= lastRow; rowNum++) {
  242. Row row = sheet.getRow(rowNum);
  243. Cell cell = row.getCell(columnNum);
  244. if (cell == null) {
  245. continue;
  246. }
  247. fieldValueMap.put(fieldRowMap.get(rowNum), cell.getStringCellValue());
  248. }
  249. Optional.ofNullable(buildEmailFundNavDTO(fieldValueMap)).ifPresent(fundNavDTOList::add);
  250. }
  251. }
  252. // 兼容净值日期为空的情况
  253. addPriceDateIfMiss(fundNavDTOList, getPriceDateFromSheet(sheet, initRow));
  254. return fundNavDTOList;
  255. }
  256. private void addPriceDateIfMiss(List<EmailFundNavDTO> fundNavDTOList, String priceDate) {
  257. if (fundNavDTOList.stream().map(EmailFundNavDTO::getPriceDate).allMatch(StrUtil::isBlank)) {
  258. fundNavDTOList.forEach(e -> e.setPriceDate(priceDate));
  259. }
  260. }
  261. private String getPriceDateFromSheet(Sheet sheet, Integer maxRowNum) {
  262. Map<Integer, String> priceDateMap = MapUtil.newHashMap();
  263. for (int rowNum = 0; rowNum < maxRowNum; rowNum++) {
  264. Row row = sheet.getRow(rowNum);
  265. if (row == null) {
  266. continue;
  267. }
  268. int lastCellNum = row.getLastCellNum();
  269. for (int columnNum = 0; columnNum < lastCellNum; columnNum++) {
  270. Cell cell = row.getCell(columnNum);
  271. if (cell == null) {
  272. continue;
  273. }
  274. String cellValue = cell.getStringCellValue();
  275. if (StrUtil.isNotBlank(cellValue) && cellValue.contains("截至")) {
  276. int index = cellValue.indexOf("截至");
  277. String date = cellValue.substring(index + 2, index + 2 + 10);
  278. if (StrUtil.isNotBlank(date)) {
  279. date = date.replaceAll("年", "-").replaceAll("月", "-");
  280. }
  281. priceDateMap.put(1, date);
  282. continue;
  283. }
  284. boolean isValidDate = StringUtil.isValidDate(cellValue);
  285. if (isValidDate) {
  286. String date = cellValue.replaceAll("年", "-").replaceAll("月", "-");
  287. priceDateMap.put(2, date);
  288. }
  289. }
  290. }
  291. if (MapUtil.isNotEmpty(priceDateMap)) {
  292. Integer key = priceDateMap.keySet().stream().min(Integer::compareTo).orElse(null);
  293. return priceDateMap.get(key);
  294. }
  295. return null;
  296. }
  297. private EmailFundNavDTO buildEmailFundNavDTO(Map<String, String> fieldValueMap) {
  298. if (MapUtil.isEmpty(fieldValueMap) || fieldValueMap.values().stream().allMatch(StrUtil::isBlank)) {
  299. return null;
  300. }
  301. EmailFundNavDTO fundNavDTO = new EmailFundNavDTO();
  302. fundNavDTO.setFundName(fieldValueMap.get(EmailFieldConst.FUND_NAME));
  303. fundNavDTO.setRegisterNumber(fieldValueMap.get(EmailFieldConst.REGISTER_NUMBER));
  304. fundNavDTO.setPriceDate(fieldValueMap.get(EmailFieldConst.PRICE_DATE));
  305. fundNavDTO.setNav(fieldValueMap.get(EmailFieldConst.NAV));
  306. fundNavDTO.setCumulativeNavWithdrawal(fieldValueMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL));
  307. // pdf解析到的值带有",",比如:"10,656,097.37"
  308. String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  309. fundNavDTO.setAssetNet(numberDataHandler(assetNet));
  310. String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  311. fundNavDTO.setAssetNet(numberDataHandler(assetShares));
  312. return fundNavDTO;
  313. }
  314. private List<EmailFundNavDTO> readSheetRowData(Row sheetRow, Map<String, Integer> columnFieldMap) {
  315. if (sheetRow == null) {
  316. return null;
  317. }
  318. String nav = columnFieldMap.get(EmailFieldConst.NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV)) != null
  319. ? ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV))) : null;
  320. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  321. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL))) : null;
  322. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
  323. return null;
  324. }
  325. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  326. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  327. String priceDate = columnFieldMap.get(EmailFieldConst.PRICE_DATE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE)) != null ?
  328. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE))) : null;
  329. // 份额基金净值文件格式
  330. long parentFiledCount = columnFieldMap.keySet().stream().filter(e -> e.contains("parent")).count();
  331. if (parentFiledCount >= 1) {
  332. Optional.ofNullable(buildParentNav(sheetRow, columnFieldMap, priceDate)).ifPresent(fundNavDTOList::add);
  333. }
  334. // 正常净值文件格式
  335. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  336. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  337. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  338. }
  339. emailFundNavDTO.setPriceDate(priceDate);
  340. String fundName = columnFieldMap.get(EmailFieldConst.FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.FUND_NAME)).getStringCellValue() != null ?
  341. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.FUND_NAME))) : null;
  342. emailFundNavDTO.setFundName(fundName);
  343. String registerNumber = columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER)) != null ?
  344. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER))) : null;
  345. emailFundNavDTO.setRegisterNumber(registerNumber);
  346. emailFundNavDTO.setNav(nav);
  347. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  348. String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
  349. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
  350. emailFundNavDTO.setVirtualNav(virtualNav);
  351. String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
  352. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
  353. emailFundNavDTO.setAssetNet(numberDataHandler(assetNet));
  354. String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
  355. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
  356. emailFundNavDTO.setAssetShare(numberDataHandler(assetShares));
  357. fundNavDTOList.add(emailFundNavDTO);
  358. return fundNavDTOList;
  359. }
  360. private String numberDataHandler(String numberData) {
  361. if (StrUtil.isBlank(numberData)) {
  362. return null;
  363. }
  364. // pdf解析到的值带有",",比如:"10,656,097.37"
  365. String data = numberData.replaceAll(",", "");
  366. if (!StringUtil.isNumeric(data)) {
  367. return null;
  368. }
  369. return data;
  370. }
  371. private EmailFundNavDTO buildParentNav(Row sheetRow, Map<String, Integer> columnFieldMap, String priceDate) {
  372. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  373. String nav = columnFieldMap.get(EmailFieldConst.PARENT_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV)) != null ?
  374. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV))) : null;
  375. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  376. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL))) : null;
  377. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
  378. return null;
  379. }
  380. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  381. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  382. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  383. }
  384. emailFundNavDTO.setPriceDate(priceDate);
  385. String fundName = columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME)).getStringCellValue() != null ?
  386. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME))) : null;
  387. emailFundNavDTO.setFundName(fundName);
  388. String registerNumber = columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER)) != null ?
  389. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER))) : null;
  390. emailFundNavDTO.setRegisterNumber(registerNumber);
  391. emailFundNavDTO.setNav(nav);
  392. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  393. String virtualNav = columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV)) != null ?
  394. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV))) : null;
  395. emailFundNavDTO.setVirtualNav(virtualNav);
  396. String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
  397. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
  398. emailFundNavDTO.setAssetNet(numberDataHandler(assetNet));
  399. String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
  400. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
  401. emailFundNavDTO.setAssetNet(numberDataHandler(assetShares));
  402. return emailFundNavDTO;
  403. }
  404. private Map<String, Integer> getFieldRow(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  405. // 考虑日期字段识别逻辑的问题
  406. long rowNumCount = fieldPositionMap.values().stream().map(Pair::getKey).distinct().count();
  407. if (rowNumCount > 1) {
  408. // 存在合并单元格的方式 -> 日期字段所在位置可能会存在错误
  409. fieldPositionMap.remove(EmailFieldConst.PRICE_DATE);
  410. }
  411. Map<String, Integer> fieldRowMap = MapUtil.newHashMap();
  412. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  413. String field = fieldPositionEntry.getKey();
  414. Integer column = fieldPositionEntry.getValue().getValue();
  415. fieldRowMap.put(field, column);
  416. }
  417. return fieldRowMap;
  418. }
  419. private Map<Integer, String> getRowField(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  420. Map<Integer, String> fieldRowMap = MapUtil.newHashMap();
  421. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  422. String field = fieldPositionEntry.getKey();
  423. Integer column = fieldPositionEntry.getValue().getKey();
  424. fieldRowMap.put(column, field);
  425. }
  426. return fieldRowMap;
  427. }
  428. /**
  429. * 通过表头所在位置判断是行数据还是列数据
  430. *
  431. * @param fieldPositionMap excel中表头所在的位置
  432. * @return 行方向-1,,列方向-2
  433. */
  434. private Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  435. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  436. return count == 1 ? COLUMN_DIRECTION_TYPE : ROW_DIRECTION_TYPE;
  437. }
  438. /**
  439. * 找出excel中表头所在的位置
  440. *
  441. * @param sheet 表格工作簿
  442. * @param emailFieldMap 邮件字段识别规则映射表
  443. * @return excel中表头所在的位置(行, 列)
  444. */
  445. private Map<String, Pair<Integer, Integer>> getFieldPosition(Sheet sheet, Map<String, List<String>> emailFieldMap) {
  446. Map<String, List<Pair<Integer, Integer>>> tempFieldPositionMap = MapUtil.newHashMap();
  447. int lastRowNum = sheet.getLastRowNum();
  448. for (int rowNum = 0; rowNum < lastRowNum; rowNum++) {
  449. Row sheetRow = sheet.getRow(rowNum);
  450. if (sheetRow == null) {
  451. continue;
  452. }
  453. int lastCellNum = sheetRow.getLastCellNum();
  454. for (int cellNum = 0; cellNum < lastCellNum; cellNum++) {
  455. Cell cell = sheetRow.getCell(cellNum);
  456. if (cell == null) {
  457. continue;
  458. }
  459. String cellValue = ExcelUtil.getCellValue(cell);
  460. String field = fieldMatch(cellValue, emailFieldMap);
  461. if (StrUtil.isNotBlank(field)) {
  462. List<Pair<Integer, Integer>> pairList = tempFieldPositionMap.getOrDefault(field, new ArrayList<>());
  463. pairList.add(Pair.of(rowNum, cellNum));
  464. tempFieldPositionMap.put(field, pairList);
  465. }
  466. }
  467. }
  468. // 判断是不是份额基金净值文件格式(同时存在两个备案编码字段)
  469. return handlerFieldPosition(tempFieldPositionMap);
  470. }
  471. private Map<String, Pair<Integer, Integer>> handlerFieldPosition(Map<String, List<Pair<Integer, Integer>>> tempFieldPositionMap) {
  472. Map<String, Pair<Integer, Integer>> fieldPositionMap = MapUtil.newHashMap();
  473. boolean hasParentField = tempFieldPositionMap.keySet().stream().anyMatch(e -> e.contains("parent"));
  474. for (Map.Entry<String, List<Pair<Integer, Integer>>> entry : tempFieldPositionMap.entrySet()) {
  475. List<Pair<Integer, Integer>> pairList = entry.getValue();
  476. if (pairList.size() == 1) {
  477. fieldPositionMap.put(entry.getKey(), pairList.get(0));
  478. continue;
  479. }
  480. if ((!hasParentField && pairList.size() > 1)) {
  481. fieldPositionMap.put(entry.getKey(), pairList.get(pairList.size() - 1));
  482. continue;
  483. }
  484. if ((hasParentField && pairList.size() > 1)) {
  485. fieldPositionMap.put(entry.getKey(), pairList.get(0));
  486. }
  487. }
  488. return fieldPositionMap;
  489. }
  490. /**
  491. * 判断单元格值是否为表头字段
  492. *
  493. * @param cellValue 单元格值
  494. * @param emailFieldMap 邮件字段识别规则映射表
  495. * @return 表头对应的标识
  496. */
  497. public String fieldMatch(String cellValue, Map<String, List<String>> emailFieldMap) {
  498. if (StrUtil.isBlank(cellValue)) {
  499. return null;
  500. }
  501. String newCellValue = StringUtil.retainChineseCharacters(cellValue);
  502. for (Map.Entry<String, List<String>> fieldEntry : emailFieldMap.entrySet()) {
  503. List<String> fieldList = fieldEntry.getValue();
  504. for (String field : fieldList) {
  505. if (newCellValue.equals(field)) {
  506. return fieldEntry.getKey();
  507. }
  508. }
  509. }
  510. return null;
  511. }
  512. private void writeDataToSheet(Sheet sheet, Elements rows) {
  513. int rowSize = rows.size();
  514. for (int rowNum = 0; rowNum < rowSize; rowNum++) {
  515. Row sheetRow = sheet.createRow(rowNum);
  516. Element elementRow = rows.get(rowNum);
  517. Elements cells = elementRow.select("td");
  518. int cellSize = cells.size();
  519. for (int cellNum = 0; cellNum < cellSize; cellNum++) {
  520. Cell sheetRowCell = sheetRow.createCell(cellNum);
  521. sheetRowCell.setCellValue(cells.get(cellNum).text());
  522. }
  523. }
  524. }
  525. }