NavEmailParser.java 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. package com.simuwang.daq.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.map.MapUtil;
  7. import cn.hutool.core.util.StrUtil;
  8. import com.simuwang.base.common.conts.DateConst;
  9. import com.simuwang.base.common.conts.EmailFieldConst;
  10. import com.simuwang.base.common.conts.EmailTypeConst;
  11. import com.simuwang.base.common.util.ExcelUtil;
  12. import com.simuwang.base.common.util.StringUtil;
  13. import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
  14. import com.simuwang.base.pojo.dto.EmailFundNavDTO;
  15. import com.simuwang.base.pojo.dto.FieldPositionDTO;
  16. import org.apache.pdfbox.Loader;
  17. import org.apache.pdfbox.pdmodel.PDDocument;
  18. import org.apache.poi.ss.usermodel.Cell;
  19. import org.apache.poi.ss.usermodel.Row;
  20. import org.apache.poi.ss.usermodel.Sheet;
  21. import org.apache.poi.ss.usermodel.Workbook;
  22. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  23. import org.jsoup.Jsoup;
  24. import org.jsoup.nodes.Document;
  25. import org.jsoup.nodes.Element;
  26. import org.jsoup.select.Elements;
  27. import org.slf4j.Logger;
  28. import org.slf4j.LoggerFactory;
  29. import org.springframework.beans.factory.annotation.Value;
  30. import org.springframework.stereotype.Component;
  31. import technology.tabula.*;
  32. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  33. import java.io.File;
  34. import java.io.FileOutputStream;
  35. import java.io.OutputStream;
  36. import java.nio.file.Files;
  37. import java.nio.file.Paths;
  38. import java.util.*;
  39. import java.util.stream.Collectors;
  40. /**
  41. * @author mozuwen
  42. * @date 2024-09-04
  43. * @description 净值邮件解析器
  44. */
  45. @Component
  46. public class NavEmailParser extends AbstractEmailParser {
  47. private static final Logger log = LoggerFactory.getLogger(AbstractEmailParser.class);
  48. @Value("${email.file.path}")
  49. private String path;
  50. private final ValuationEmailParser valuationEmailParser;
  51. private static final Integer ROW_DIRECTION_TYPE = 1;
  52. private static final Integer COLUMN_DIRECTION_TYPE = 2;
  53. private static final int MAX_COLUMN = 20;
  54. public NavEmailParser(ValuationEmailParser valuationEmailParser) {
  55. this.valuationEmailParser = valuationEmailParser;
  56. }
  57. @Override
  58. public boolean isSupport(Integer emailType) {
  59. return EmailTypeConst.NAV_EMAIL_TYPE.equals(emailType);
  60. }
  61. @Override
  62. public List<EmailFundNavDTO>
  63. parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
  64. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  65. String emailContent = emailContentInfoDTO.getEmailContent();
  66. // 1.解析邮件正文
  67. if (StrUtil.isNotBlank(emailContent)) {
  68. emailFundNavDTOList = parseEmailContent(emailContentInfoDTO, emailContent, emailFieldMap);
  69. }
  70. // 2.解析邮件excel附件
  71. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isExcel(emailContentInfoDTO.getFileName())) {
  72. List<EmailFundNavDTO> fundNavDTOList = parseExcelFile(emailContentInfoDTO.getFilePath(), emailFieldMap);
  73. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  74. }
  75. // 3.解析邮件pdf附件
  76. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
  77. String excelFilePath = path + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
  78. + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  79. List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
  80. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  81. }
  82. // 4.解析邮件zip,rar附件
  83. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isZip(emailContentInfoDTO.getFileName())) {
  84. List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFileName(), emailContentInfoDTO.getFilePath(), emailFieldMap);
  85. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  86. }
  87. // 校验净值数据格式
  88. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  89. emailFundNavDTOList = emailFundNavDTOList.stream().filter(super::dataFormat).collect(Collectors.toList());
  90. }
  91. if (CollUtil.isEmpty(emailFundNavDTOList)) {
  92. // 判断文件名是否包含"估值表"
  93. String fileName = emailContentInfoDTO.getFileName();
  94. if (StrUtil.isNotBlank(fileName) && fileName.contains("估值表")) {
  95. emailFundNavDTOList = valuationEmailParser.parse(emailContentInfoDTO, emailFieldMap);
  96. }
  97. }
  98. return emailFundNavDTOList;
  99. }
  100. private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String fileName, String filePath, Map<String, List<String>> emailFieldMap) {
  101. String destPath = filePath.substring(0, filePath.indexOf(fileName)) + fileName.replaceAll(".zip", "").replaceAll(".ZIP", "");
  102. log.info("压缩包地址:{},解压后文件地址:{}", filePath, destPath);
  103. List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
  104. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  105. for (String zipFilePath : dir) {
  106. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
  107. File file = new File(zipFilePath);
  108. if (file.isDirectory()) {
  109. for (String navFilePath : Objects.requireNonNull(file.list())) {
  110. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
  111. }
  112. }
  113. }
  114. return emailFundNavDTOList;
  115. }
  116. private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
  117. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  118. if (ExcelUtil.isPdf(zipFilePath)) {
  119. String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  120. fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
  121. }
  122. if (ExcelUtil.isExcel(zipFilePath)) {
  123. fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
  124. }
  125. if (ExcelUtil.isZip(zipFilePath)) {
  126. String name = new File(zipFilePath).getName();
  127. fundNavDTOList = parsePackageFile(emailContentInfoDTO, name, zipFilePath, emailFieldMap);
  128. }
  129. return fundNavDTOList;
  130. }
  131. /**
  132. * 解析邮件pdf附件
  133. *
  134. * @param filePath 邮件pdf附件路径
  135. * @param excelFilePath pdf转excel路径
  136. * @param emailFieldMap 邮件字段识别规则映射表
  137. * @return 解析到的净值数据
  138. */
  139. private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
  140. File savefile = new File(excelFilePath);
  141. if (!savefile.exists()) {
  142. if (!savefile.getParentFile().exists()) {
  143. savefile.getParentFile().mkdirs();
  144. savefile.getParentFile().setExecutable(true);
  145. }
  146. }
  147. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  148. PDDocument document = Loader.loadPDF(new File(filePath));
  149. PageIterator extract = new ObjectExtractor(document).extract();
  150. Workbook workbook = new XSSFWorkbook();
  151. Sheet sheet = workbook.createSheet("Sheet1");
  152. while (extract.hasNext()) {
  153. Page next = extract.next();
  154. List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  155. for (Table table : tableList) {
  156. List<List<RectangularTextContainer>> rows = table.getRows();
  157. for (int rowNum = 0; rowNum < rows.size(); rowNum++) {
  158. Row sheetRow = sheet.createRow(rowNum);
  159. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  160. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  161. Cell cell = sheetRow.createCell(cellNum);
  162. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  163. if (textContainer != null) {
  164. cell.setCellValue(textContainer.getText());
  165. }
  166. }
  167. }
  168. }
  169. }
  170. // 将Excel工作簿写入输出流
  171. workbook.write(outputStream);
  172. } catch (Exception e) {
  173. log.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  174. }
  175. return parseExcelFile(excelFilePath, emailFieldMap);
  176. }
  177. /**
  178. * 解析邮件excel附件
  179. *
  180. * @param filePath 邮件excel附件地址
  181. * @param emailFieldMap 邮件字段识别规则映射表
  182. * @return 解析到的净值数据
  183. */
  184. private List<EmailFundNavDTO> parseExcelFile(String filePath, Map<String, List<String>> emailFieldMap) {
  185. File file = new File(filePath);
  186. Sheet sheet = ExcelUtil.getSheet(file, 0);
  187. // 1.找到表头所在位置
  188. Map<String, Pair<Integer, Integer>> fieldPositionMap = getFieldPosition(sheet, emailFieldMap);
  189. if (MapUtil.isEmpty(fieldPositionMap)) {
  190. log.warn("找不到文件表头字段 -> 文件:{}", filePath);
  191. return CollUtil.newArrayList();
  192. }
  193. // 2.解析sheet中的净值数据
  194. return parseSheetData(sheet, fieldPositionMap);
  195. }
  196. /**
  197. * 解析邮件正文
  198. *
  199. * @param emailContentInfoDTO 邮件信息
  200. * @param emailContent 正文内容
  201. * @param emailFieldMap 邮件字段识别规则映射表
  202. * @return 解析到的净值数据
  203. */
  204. private List<EmailFundNavDTO> parseEmailContent(EmailContentInfoDTO emailContentInfoDTO, String emailContent, Map<String, List<String>> emailFieldMap) {
  205. Document doc = Jsoup.parse(emailContent);
  206. Element table = doc.select("table").first();
  207. Elements rows = table.select("tr");
  208. String excelFilePath = path + "/content/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "") + "/"
  209. + emailContentInfoDTO.getFileName().replace(".html", ".xlsx");
  210. File saveFile = new File(excelFilePath);
  211. if (!saveFile.exists()) {
  212. if (!saveFile.getParentFile().exists()) {
  213. saveFile.getParentFile().mkdirs();
  214. saveFile.getParentFile().setExecutable(true);
  215. }
  216. }
  217. try (OutputStream outputStream = new FileOutputStream(saveFile)) {
  218. // 创建一个新的Excel工作簿
  219. Workbook workbook = new XSSFWorkbook();
  220. Sheet sheet = workbook.createSheet("Sheet1");
  221. ExcelUtil.writeDataToSheet(sheet, rows);
  222. // 将Excel工作簿写入输出流
  223. workbook.write(outputStream);
  224. } catch (Exception e) {
  225. log.error("解析正文报错 -> 邮件主题:{},邮件日期:{},堆栈信息:{}", emailContentInfoDTO.getEmailTitle(), emailContentInfoDTO.getEmailDate(), ExceptionUtil.stacktraceToString(e));
  226. }
  227. return parseExcelFile(excelFilePath, emailFieldMap);
  228. }
  229. private List<EmailFundNavDTO> parseSheetData(Sheet sheet, Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  230. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  231. // 通过表头所在位置判断是行数据还是列数据
  232. Integer dataDirectionType = detectDataDirection(fieldPositionMap);
  233. // 数据起始行,起始列
  234. int initRow = dataDirectionType.equals(ROW_DIRECTION_TYPE) ? fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0)
  235. : fieldPositionMap.values().stream().map(Pair::getKey).min(Integer::compareTo).orElse(0);
  236. int initColumn = fieldPositionMap.values().stream().map(Pair::getValue).min(Integer::compareTo).orElse(0);
  237. if (dataDirectionType.equals(ROW_DIRECTION_TYPE)) {
  238. // 表头字段-列号映射关系
  239. Map<String, Integer> fieldColumnMap = getFieldRow(fieldPositionMap);
  240. int lastRowNum = sheet.getLastRowNum();
  241. // 遍历可能的数据行
  242. for (int rowNum = initRow + 1; rowNum <= lastRowNum; rowNum++) {
  243. Row sheetRow = sheet.getRow(rowNum);
  244. Optional.ofNullable(readSheetRowData(sheetRow, fieldColumnMap)).ifPresent(fundNavDTOList::addAll);
  245. }
  246. }
  247. if (dataDirectionType.equals(COLUMN_DIRECTION_TYPE)) {
  248. // 表头字段-行号映射关系
  249. Map<Integer, String> fieldRowMap = getRowField(fieldPositionMap);
  250. int lastRow = fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0);
  251. // 遍历每一列
  252. for (int columnNum = initColumn + 1; columnNum < MAX_COLUMN; columnNum++) {
  253. Map<String, String> fieldValueMap = MapUtil.newHashMap();
  254. for (int rowNum = initRow; rowNum <= lastRow; rowNum++) {
  255. Row row = sheet.getRow(rowNum);
  256. Cell cell = row.getCell(columnNum);
  257. if (cell == null) {
  258. continue;
  259. }
  260. fieldValueMap.put(fieldRowMap.get(rowNum), ExcelUtil.getCellValue(cell));
  261. }
  262. Optional.ofNullable(buildEmailFundNavDTO(fieldValueMap)).ifPresent(fundNavDTOList::add);
  263. }
  264. }
  265. // 兼容净值日期为空的情况
  266. addPriceDateIfMiss(fundNavDTOList, getPriceDateFromSheet(sheet, initRow));
  267. return fundNavDTOList;
  268. }
  269. private void addPriceDateIfMiss(List<EmailFundNavDTO> fundNavDTOList, String priceDate) {
  270. if (fundNavDTOList.stream().map(EmailFundNavDTO::getPriceDate).allMatch(StrUtil::isBlank)) {
  271. fundNavDTOList.forEach(e -> e.setPriceDate(priceDate));
  272. }
  273. }
  274. private String getPriceDateFromSheet(Sheet sheet, Integer maxRowNum) {
  275. Map<Integer, String> priceDateMap = MapUtil.newHashMap();
  276. for (int rowNum = 0; rowNum < maxRowNum; rowNum++) {
  277. Row row = sheet.getRow(rowNum);
  278. if (row == null) {
  279. continue;
  280. }
  281. int lastCellNum = row.getLastCellNum();
  282. for (int columnNum = 0; columnNum < lastCellNum; columnNum++) {
  283. Cell cell = row.getCell(columnNum);
  284. if (cell == null) {
  285. continue;
  286. }
  287. String cellValue = ExcelUtil.getCellValue(cell);
  288. if (StrUtil.isNotBlank(cellValue) && cellValue.contains("截至")) {
  289. int index = cellValue.indexOf("截至");
  290. String date = cellValue.substring(index + 2, index + 2 + 10);
  291. if (StrUtil.isNotBlank(date)) {
  292. date = date.replaceAll("年", "-").replaceAll("月", "-");
  293. }
  294. priceDateMap.put(1, date);
  295. continue;
  296. }
  297. boolean isValidDate = StringUtil.isValidDate(cellValue);
  298. if (isValidDate) {
  299. String date = cellValue.replaceAll("年", "-").replaceAll("月", "-");
  300. priceDateMap.put(2, date);
  301. }
  302. }
  303. }
  304. if (MapUtil.isNotEmpty(priceDateMap)) {
  305. Integer key = priceDateMap.keySet().stream().min(Integer::compareTo).orElse(null);
  306. return priceDateMap.get(key);
  307. }
  308. return null;
  309. }
  310. private EmailFundNavDTO buildEmailFundNavDTO(Map<String, String> fieldValueMap) {
  311. if (MapUtil.isEmpty(fieldValueMap) || fieldValueMap.values().stream().allMatch(StrUtil::isBlank)) {
  312. return null;
  313. }
  314. EmailFundNavDTO fundNavDTO = new EmailFundNavDTO();
  315. fundNavDTO.setFundName(fieldValueMap.get(EmailFieldConst.FUND_NAME));
  316. fundNavDTO.setRegisterNumber(fieldValueMap.get(EmailFieldConst.REGISTER_NUMBER));
  317. fundNavDTO.setPriceDate(fieldValueMap.get(EmailFieldConst.PRICE_DATE));
  318. fundNavDTO.setNav(fieldValueMap.get(EmailFieldConst.NAV));
  319. fundNavDTO.setCumulativeNavWithdrawal(fieldValueMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL));
  320. // pdf解析到的值带有",",比如:"10,656,097.37"
  321. String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  322. fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  323. String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_SHARE);
  324. fundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  325. return fundNavDTO;
  326. }
  327. private List<EmailFundNavDTO> readSheetRowData(Row sheetRow, Map<String, Integer> columnFieldMap) {
  328. if (sheetRow == null) {
  329. return null;
  330. }
  331. String nav = columnFieldMap.get(EmailFieldConst.NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV)) != null
  332. ? ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV))) : null;
  333. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  334. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL))) : null;
  335. String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
  336. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
  337. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal) && StrUtil.isBlank(assetNet)) {
  338. return null;
  339. }
  340. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  341. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  342. String priceDate = columnFieldMap.get(EmailFieldConst.PRICE_DATE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE)) != null ?
  343. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE))) : null;
  344. // 份额基金净值文件格式
  345. long parentFiledCount = columnFieldMap.keySet().stream().filter(e -> e.contains("parent")).count();
  346. if (parentFiledCount >= 1) {
  347. Optional.ofNullable(buildParentNav(sheetRow, columnFieldMap, priceDate)).ifPresent(fundNavDTOList::add);
  348. }
  349. // 正常净值文件格式
  350. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  351. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  352. priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
  353. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  354. }
  355. emailFundNavDTO.setPriceDate(priceDate);
  356. String fundName = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_FUND_NAME), columnFieldMap.get(EmailFieldConst.FUND_NAME));
  357. emailFundNavDTO.setFundName(fundName);
  358. String registerNumber = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_REGISTER_NUMBER), columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER));
  359. emailFundNavDTO.setRegisterNumber(registerNumber);
  360. emailFundNavDTO.setNav(nav);
  361. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  362. String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
  363. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
  364. emailFundNavDTO.setVirtualNav(virtualNav);
  365. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  366. String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
  367. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
  368. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  369. fundNavDTOList.add(emailFundNavDTO);
  370. return fundNavDTOList;
  371. }
  372. private EmailFundNavDTO buildParentNav(Row sheetRow, Map<String, Integer> columnFieldMap, String priceDate) {
  373. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  374. String nav = columnFieldMap.get(EmailFieldConst.PARENT_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV)) != null ?
  375. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV))) : null;
  376. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  377. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL))) : null;
  378. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
  379. return null;
  380. }
  381. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  382. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  383. priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
  384. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  385. }
  386. emailFundNavDTO.setPriceDate(priceDate);
  387. String fundName = columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME)).getStringCellValue() != null ?
  388. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME))) : null;
  389. emailFundNavDTO.setFundName(fundName);
  390. String registerNumber = columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER)) != null ?
  391. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER))) : null;
  392. emailFundNavDTO.setRegisterNumber(registerNumber);
  393. emailFundNavDTO.setNav(nav);
  394. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  395. String virtualNav = columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV)) != null ?
  396. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV))) : null;
  397. emailFundNavDTO.setVirtualNav(virtualNav);
  398. String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
  399. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
  400. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  401. String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
  402. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
  403. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  404. return emailFundNavDTO;
  405. }
  406. private Map<String, Integer> getFieldRow(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  407. // 考虑日期字段识别逻辑的问题
  408. long rowNumCount = fieldPositionMap.values().stream().map(Pair::getKey).distinct().count();
  409. if (rowNumCount > 1) {
  410. // 存在合并单元格的方式 -> 日期字段所在位置可能会存在错误
  411. fieldPositionMap.remove(EmailFieldConst.PRICE_DATE);
  412. }
  413. Map<String, Integer> fieldRowMap = MapUtil.newHashMap();
  414. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  415. String field = fieldPositionEntry.getKey();
  416. Integer column = fieldPositionEntry.getValue().getValue();
  417. fieldRowMap.put(field, column);
  418. }
  419. return fieldRowMap;
  420. }
  421. private Map<Integer, String> getRowField(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  422. Map<Integer, String> fieldRowMap = MapUtil.newHashMap();
  423. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  424. String field = fieldPositionEntry.getKey();
  425. Integer column = fieldPositionEntry.getValue().getKey();
  426. fieldRowMap.put(column, field);
  427. }
  428. return fieldRowMap;
  429. }
  430. /**
  431. * 通过表头所在位置判断是行数据还是列数据
  432. *
  433. * @param fieldPositionMap excel中表头所在的位置
  434. * @return 行方向-1,,列方向-2
  435. */
  436. private Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  437. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  438. return count == 1 ? COLUMN_DIRECTION_TYPE : ROW_DIRECTION_TYPE;
  439. }
  440. /**
  441. * 找出excel中表头所在的位置
  442. *
  443. * @param sheet 表格工作簿
  444. * @param emailFieldMap 邮件字段识别规则映射表
  445. * @return excel中表头所在的位置(行, 列)
  446. */
  447. private Map<String, Pair<Integer, Integer>> getFieldPosition(Sheet sheet, Map<String, List<String>> emailFieldMap) {
  448. Map<String, List<FieldPositionDTO>> tempFieldPositionMap = MapUtil.newHashMap();
  449. int lastRowNum = sheet.getLastRowNum();
  450. for (int rowNum = 0; rowNum <= lastRowNum; rowNum++) {
  451. Row sheetRow = sheet.getRow(rowNum);
  452. if (sheetRow == null) {
  453. continue;
  454. }
  455. int lastCellNum = sheetRow.getLastCellNum();
  456. for (int cellNum = 0; cellNum < lastCellNum; cellNum++) {
  457. Cell cell = sheetRow.getCell(cellNum);
  458. if (cell == null) {
  459. continue;
  460. }
  461. String cellValue = ExcelUtil.getCellValue(cell);
  462. // 移除掉非中文字符
  463. String newCellValue = StringUtil.retainChineseCharacters(cellValue);
  464. String field = fieldMatch(newCellValue, emailFieldMap);
  465. if (StrUtil.isNotBlank(field)) {
  466. List<FieldPositionDTO> fieldPositionDTOList = tempFieldPositionMap.getOrDefault(field, new ArrayList<>());
  467. fieldPositionDTOList.add(new FieldPositionDTO(newCellValue, Pair.of(rowNum, cellNum)));
  468. tempFieldPositionMap.put(field, fieldPositionDTOList);
  469. }
  470. }
  471. }
  472. // 判断是不是份额基金净值文件格式(同时存在两个备案编码字段)
  473. return handlerFieldPosition(tempFieldPositionMap);
  474. }
  475. private Map<String, Pair<Integer, Integer>> handlerFieldPosition(Map<String, List<FieldPositionDTO>> tempFieldPositionMap) {
  476. Map<String, Pair<Integer, Integer>> fieldPositionMap = MapUtil.newHashMap();
  477. boolean hasParentField = tempFieldPositionMap.keySet().stream().anyMatch(e -> e.contains("parent"));
  478. for (Map.Entry<String, List<FieldPositionDTO>> entry : tempFieldPositionMap.entrySet()) {
  479. String field = entry.getKey();
  480. List<FieldPositionDTO> fieldPositionDTOList = entry.getValue();
  481. int size = fieldPositionDTOList.size();
  482. if (size == 1) {
  483. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  484. continue;
  485. }
  486. if ((!hasParentField && size > 1)) {
  487. if (EmailFieldConst.REGISTER_NUMBER.equals(field)) {
  488. Pair<Integer, Integer> pair = fieldPositionDTOList.stream()
  489. .filter(e -> !e.getFieldValue().contains("协会") && !e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  490. fieldPositionMap.put(field, pair);
  491. } else {
  492. fieldPositionMap.put(field, fieldPositionDTOList.get(size - 1).getPair());
  493. }
  494. continue;
  495. }
  496. if ((hasParentField && size > 1)) {
  497. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  498. }
  499. }
  500. // 母基金缺少代码的情况
  501. if (hasParentField && fieldPositionMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) == null) {
  502. List<FieldPositionDTO> fieldPositionDTOS = tempFieldPositionMap.get(EmailFieldConst.REGISTER_NUMBER);
  503. Pair<Integer, Integer> parentRegisterNumberPair = fieldPositionDTOS.stream()
  504. .filter(e -> e.getFieldValue().contains("协会") || e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  505. fieldPositionMap.put(EmailFieldConst.PARENT_REGISTER_NUMBER, parentRegisterNumberPair);
  506. }
  507. return fieldPositionMap;
  508. }
  509. /**
  510. * 判断单元格值是否为表头字段
  511. *
  512. * @param cellValue 单元格值
  513. * @param emailFieldMap 邮件字段识别规则映射表
  514. * @return 表头对应的标识
  515. */
  516. public String fieldMatch(String cellValue, Map<String, List<String>> emailFieldMap) {
  517. if (StrUtil.isBlank(cellValue)) {
  518. return null;
  519. }
  520. for (Map.Entry<String, List<String>> fieldEntry : emailFieldMap.entrySet()) {
  521. List<String> fieldList = fieldEntry.getValue();
  522. for (String field : fieldList) {
  523. if (cellValue.equals(field)) {
  524. return fieldEntry.getKey();
  525. }
  526. }
  527. }
  528. return null;
  529. }
  530. }