NavEmailParser.java 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. package com.simuwang.daq.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.map.MapUtil;
  7. import cn.hutool.core.util.StrUtil;
  8. import com.simuwang.base.common.conts.DateConst;
  9. import com.simuwang.base.common.conts.EmailFieldConst;
  10. import com.simuwang.base.common.conts.EmailTypeConst;
  11. import com.simuwang.base.common.util.ExcelUtil;
  12. import com.simuwang.base.common.util.StringUtil;
  13. import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
  14. import com.simuwang.base.pojo.dto.EmailFundNavDTO;
  15. import com.simuwang.base.pojo.dto.FieldPositionDTO;
  16. import org.apache.pdfbox.Loader;
  17. import org.apache.pdfbox.pdmodel.PDDocument;
  18. import org.apache.poi.ss.usermodel.Cell;
  19. import org.apache.poi.ss.usermodel.Row;
  20. import org.apache.poi.ss.usermodel.Sheet;
  21. import org.apache.poi.ss.usermodel.Workbook;
  22. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  23. import org.jsoup.Jsoup;
  24. import org.jsoup.nodes.Document;
  25. import org.jsoup.nodes.Element;
  26. import org.jsoup.select.Elements;
  27. import org.slf4j.Logger;
  28. import org.slf4j.LoggerFactory;
  29. import org.springframework.beans.factory.annotation.Value;
  30. import org.springframework.stereotype.Component;
  31. import technology.tabula.*;
  32. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  33. import java.io.File;
  34. import java.io.FileOutputStream;
  35. import java.io.OutputStream;
  36. import java.nio.file.Files;
  37. import java.nio.file.Paths;
  38. import java.util.*;
  39. import java.util.stream.Collectors;
  40. /**
  41. * @author mozuwen
  42. * @date 2024-09-04
  43. * @description 净值邮件解析器
  44. */
  45. @Component
  46. public class NavEmailParser extends AbstractEmailParser {
  47. private static final Logger log = LoggerFactory.getLogger(AbstractEmailParser.class);
  48. @Value("${email.file.path}")
  49. private String path;
  50. private static final Integer ROW_DIRECTION_TYPE = 1;
  51. private static final Integer COLUMN_DIRECTION_TYPE = 2;
  52. private static final int MAX_COLUMN = 20;
  53. @Override
  54. public boolean isSupport(Integer emailType) {
  55. return EmailTypeConst.NAV_EMAIL_TYPE.equals(emailType);
  56. }
  57. @Override
  58. public List<EmailFundNavDTO>
  59. parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
  60. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  61. String emailContent = emailContentInfoDTO.getEmailContent();
  62. // 1.解析邮件正文
  63. if (StrUtil.isNotBlank(emailContent)) {
  64. emailFundNavDTOList = parseEmailContent(emailContentInfoDTO, emailContent, emailFieldMap);
  65. }
  66. // 2.解析邮件excel附件
  67. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isExcel(emailContentInfoDTO.getFileName())) {
  68. List<EmailFundNavDTO> fundNavDTOList = parseExcelFile(emailContentInfoDTO.getFilePath(), emailFieldMap);
  69. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  70. }
  71. // 3.解析邮件pdf附件
  72. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
  73. String excelFilePath = path + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
  74. + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  75. List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
  76. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  77. }
  78. // 4.解析邮件zip,rar附件
  79. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isZip(emailContentInfoDTO.getFileName())) {
  80. List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFileName(), emailContentInfoDTO.getFilePath(), emailFieldMap);
  81. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  82. }
  83. // 校验净值数据格式
  84. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  85. emailFundNavDTOList = emailFundNavDTOList.stream().filter(super::dataFormat).collect(Collectors.toList());
  86. }
  87. return emailFundNavDTOList;
  88. }
  89. private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String fileName, String filePath, Map<String, List<String>> emailFieldMap) {
  90. String destPath = filePath.substring(0, filePath.indexOf(fileName)) + fileName.replaceAll(".zip", "").replaceAll(".ZIP", "");
  91. log.info("压缩包地址:{},解压后文件地址:{}", filePath, destPath);
  92. List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
  93. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  94. for (String zipFilePath : dir) {
  95. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
  96. File file = new File(zipFilePath);
  97. if (file.isDirectory()) {
  98. for (String navFilePath : Objects.requireNonNull(file.list())) {
  99. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
  100. }
  101. }
  102. }
  103. return emailFundNavDTOList;
  104. }
  105. private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
  106. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  107. if (ExcelUtil.isPdf(zipFilePath)) {
  108. String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  109. fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
  110. }
  111. if (ExcelUtil.isExcel(zipFilePath)) {
  112. fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
  113. }
  114. if (ExcelUtil.isZip(zipFilePath)) {
  115. String name = new File(zipFilePath).getName();
  116. fundNavDTOList = parsePackageFile(emailContentInfoDTO, name, zipFilePath, emailFieldMap);
  117. }
  118. return fundNavDTOList;
  119. }
  120. /**
  121. * 解析邮件pdf附件
  122. *
  123. * @param filePath 邮件pdf附件路径
  124. * @param excelFilePath pdf转excel路径
  125. * @param emailFieldMap 邮件字段识别规则映射表
  126. * @return 解析到的净值数据
  127. */
  128. private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
  129. File savefile = new File(excelFilePath);
  130. if (!savefile.exists()) {
  131. if (!savefile.getParentFile().exists()) {
  132. savefile.getParentFile().mkdirs();
  133. savefile.getParentFile().setExecutable(true);
  134. }
  135. }
  136. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  137. PDDocument document = Loader.loadPDF(new File(filePath));
  138. PageIterator extract = new ObjectExtractor(document).extract();
  139. Workbook workbook = new XSSFWorkbook();
  140. Sheet sheet = workbook.createSheet("Sheet1");
  141. while (extract.hasNext()) {
  142. Page next = extract.next();
  143. List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  144. for (Table table : tableList) {
  145. List<List<RectangularTextContainer>> rows = table.getRows();
  146. for (int rowNum = 0; rowNum < rows.size(); rowNum++) {
  147. Row sheetRow = sheet.createRow(rowNum);
  148. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  149. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  150. Cell cell = sheetRow.createCell(cellNum);
  151. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  152. if (textContainer != null) {
  153. cell.setCellValue(textContainer.getText());
  154. }
  155. }
  156. }
  157. }
  158. }
  159. // 将Excel工作簿写入输出流
  160. workbook.write(outputStream);
  161. } catch (Exception e) {
  162. log.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  163. }
  164. return parseExcelFile(excelFilePath, emailFieldMap);
  165. }
  166. /**
  167. * 解析邮件excel附件
  168. *
  169. * @param filePath 邮件excel附件地址
  170. * @param emailFieldMap 邮件字段识别规则映射表
  171. * @return 解析到的净值数据
  172. */
  173. private List<EmailFundNavDTO> parseExcelFile(String filePath, Map<String, List<String>> emailFieldMap) {
  174. File file = new File(filePath);
  175. Sheet sheet = ExcelUtil.getSheet(file, 0);
  176. // 1.找到表头所在位置
  177. Map<String, Pair<Integer, Integer>> fieldPositionMap = getFieldPosition(sheet, emailFieldMap);
  178. if (MapUtil.isEmpty(fieldPositionMap)) {
  179. log.warn("找不到文件表头字段 -> 文件:{}", filePath);
  180. return CollUtil.newArrayList();
  181. }
  182. // 2.解析sheet中的净值数据
  183. return parseSheetData(sheet, fieldPositionMap);
  184. }
  185. /**
  186. * 解析邮件正文
  187. *
  188. * @param emailContentInfoDTO 邮件信息
  189. * @param emailContent 正文内容
  190. * @param emailFieldMap 邮件字段识别规则映射表
  191. * @return 解析到的净值数据
  192. */
  193. private List<EmailFundNavDTO> parseEmailContent(EmailContentInfoDTO emailContentInfoDTO, String emailContent, Map<String, List<String>> emailFieldMap) {
  194. Document doc = Jsoup.parse(emailContent);
  195. Element table = doc.select("table").first();
  196. Elements rows = table.select("tr");
  197. String excelFilePath = path + "/content/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "") + "/"
  198. + emailContentInfoDTO.getFileName().replace(".html", ".xlsx");
  199. File saveFile = new File(excelFilePath);
  200. if (!saveFile.exists()) {
  201. if (!saveFile.getParentFile().exists()) {
  202. saveFile.getParentFile().mkdirs();
  203. saveFile.getParentFile().setExecutable(true);
  204. }
  205. }
  206. try (OutputStream outputStream = new FileOutputStream(saveFile)) {
  207. // 创建一个新的Excel工作簿
  208. Workbook workbook = new XSSFWorkbook();
  209. Sheet sheet = workbook.createSheet("Sheet1");
  210. ExcelUtil.writeDataToSheet(sheet, rows);
  211. // 将Excel工作簿写入输出流
  212. workbook.write(outputStream);
  213. } catch (Exception e) {
  214. log.error("解析正文报错 -> 邮件主题:{},邮件日期:{},堆栈信息:{}", emailContentInfoDTO.getEmailTitle(), emailContentInfoDTO.getEmailDate(), ExceptionUtil.stacktraceToString(e));
  215. }
  216. return parseExcelFile(excelFilePath, emailFieldMap);
  217. }
  218. private List<EmailFundNavDTO> parseSheetData(Sheet sheet, Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  219. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  220. // 通过表头所在位置判断是行数据还是列数据
  221. Integer dataDirectionType = detectDataDirection(fieldPositionMap);
  222. // 数据起始行,起始列
  223. int initRow = dataDirectionType.equals(ROW_DIRECTION_TYPE) ? fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0)
  224. : fieldPositionMap.values().stream().map(Pair::getKey).min(Integer::compareTo).orElse(0);
  225. int initColumn = fieldPositionMap.values().stream().map(Pair::getValue).min(Integer::compareTo).orElse(0);
  226. if (dataDirectionType.equals(ROW_DIRECTION_TYPE)) {
  227. // 表头字段-列号映射关系
  228. Map<String, Integer> fieldColumnMap = getFieldRow(fieldPositionMap);
  229. int lastRowNum = sheet.getLastRowNum();
  230. // 遍历可能的数据行
  231. for (int rowNum = initRow + 1; rowNum <= lastRowNum; rowNum++) {
  232. Row sheetRow = sheet.getRow(rowNum);
  233. Optional.ofNullable(readSheetRowData(sheetRow, fieldColumnMap)).ifPresent(fundNavDTOList::addAll);
  234. }
  235. }
  236. if (dataDirectionType.equals(COLUMN_DIRECTION_TYPE)) {
  237. // 表头字段-行号映射关系
  238. Map<Integer, String> fieldRowMap = getRowField(fieldPositionMap);
  239. int lastRow = fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0);
  240. // 遍历每一列
  241. for (int columnNum = initColumn + 1; columnNum < MAX_COLUMN; columnNum++) {
  242. Map<String, String> fieldValueMap = MapUtil.newHashMap();
  243. for (int rowNum = initRow; rowNum <= lastRow; rowNum++) {
  244. Row row = sheet.getRow(rowNum);
  245. Cell cell = row.getCell(columnNum);
  246. if (cell == null) {
  247. continue;
  248. }
  249. fieldValueMap.put(fieldRowMap.get(rowNum), ExcelUtil.getCellValue(cell));
  250. }
  251. Optional.ofNullable(buildEmailFundNavDTO(fieldValueMap)).ifPresent(fundNavDTOList::add);
  252. }
  253. }
  254. // 兼容净值日期为空的情况
  255. addPriceDateIfMiss(fundNavDTOList, getPriceDateFromSheet(sheet, initRow));
  256. return fundNavDTOList;
  257. }
  258. private void addPriceDateIfMiss(List<EmailFundNavDTO> fundNavDTOList, String priceDate) {
  259. if (fundNavDTOList.stream().map(EmailFundNavDTO::getPriceDate).allMatch(StrUtil::isBlank)) {
  260. fundNavDTOList.forEach(e -> e.setPriceDate(priceDate));
  261. }
  262. }
  263. private String getPriceDateFromSheet(Sheet sheet, Integer maxRowNum) {
  264. Map<Integer, String> priceDateMap = MapUtil.newHashMap();
  265. for (int rowNum = 0; rowNum < maxRowNum; rowNum++) {
  266. Row row = sheet.getRow(rowNum);
  267. if (row == null) {
  268. continue;
  269. }
  270. int lastCellNum = row.getLastCellNum();
  271. for (int columnNum = 0; columnNum < lastCellNum; columnNum++) {
  272. Cell cell = row.getCell(columnNum);
  273. if (cell == null) {
  274. continue;
  275. }
  276. String cellValue = ExcelUtil.getCellValue(cell);
  277. if (StrUtil.isNotBlank(cellValue) && cellValue.contains("截至")) {
  278. int index = cellValue.indexOf("截至");
  279. String date = cellValue.substring(index + 2, index + 2 + 10);
  280. if (StrUtil.isNotBlank(date)) {
  281. date = date.replaceAll("年", "-").replaceAll("月", "-");
  282. }
  283. priceDateMap.put(1, date);
  284. continue;
  285. }
  286. boolean isValidDate = StringUtil.isValidDate(cellValue);
  287. if (isValidDate) {
  288. String date = cellValue.replaceAll("年", "-").replaceAll("月", "-");
  289. priceDateMap.put(2, date);
  290. }
  291. }
  292. }
  293. if (MapUtil.isNotEmpty(priceDateMap)) {
  294. Integer key = priceDateMap.keySet().stream().min(Integer::compareTo).orElse(null);
  295. return priceDateMap.get(key);
  296. }
  297. return null;
  298. }
  299. private EmailFundNavDTO buildEmailFundNavDTO(Map<String, String> fieldValueMap) {
  300. if (MapUtil.isEmpty(fieldValueMap) || fieldValueMap.values().stream().allMatch(StrUtil::isBlank)) {
  301. return null;
  302. }
  303. EmailFundNavDTO fundNavDTO = new EmailFundNavDTO();
  304. fundNavDTO.setFundName(fieldValueMap.get(EmailFieldConst.FUND_NAME));
  305. fundNavDTO.setRegisterNumber(fieldValueMap.get(EmailFieldConst.REGISTER_NUMBER));
  306. fundNavDTO.setPriceDate(fieldValueMap.get(EmailFieldConst.PRICE_DATE));
  307. fundNavDTO.setNav(fieldValueMap.get(EmailFieldConst.NAV));
  308. fundNavDTO.setCumulativeNavWithdrawal(fieldValueMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL));
  309. // pdf解析到的值带有",",比如:"10,656,097.37"
  310. String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  311. fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  312. String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  313. fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetShares));
  314. return fundNavDTO;
  315. }
  316. private List<EmailFundNavDTO> readSheetRowData(Row sheetRow, Map<String, Integer> columnFieldMap) {
  317. if (sheetRow == null) {
  318. return null;
  319. }
  320. String nav = columnFieldMap.get(EmailFieldConst.NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV)) != null
  321. ? ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV))) : null;
  322. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  323. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL))) : null;
  324. String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
  325. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
  326. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal) && StrUtil.isBlank(assetNet)) {
  327. return null;
  328. }
  329. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  330. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  331. String priceDate = columnFieldMap.get(EmailFieldConst.PRICE_DATE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE)) != null ?
  332. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE))) : null;
  333. // 份额基金净值文件格式
  334. long parentFiledCount = columnFieldMap.keySet().stream().filter(e -> e.contains("parent")).count();
  335. if (parentFiledCount >= 1) {
  336. Optional.ofNullable(buildParentNav(sheetRow, columnFieldMap, priceDate)).ifPresent(fundNavDTOList::add);
  337. }
  338. // 正常净值文件格式
  339. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  340. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  341. priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
  342. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  343. }
  344. emailFundNavDTO.setPriceDate(priceDate);
  345. String fundName = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_FUND_NAME), columnFieldMap.get(EmailFieldConst.FUND_NAME));
  346. emailFundNavDTO.setFundName(fundName);
  347. String registerNumber = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_REGISTER_NUMBER), columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER));
  348. emailFundNavDTO.setRegisterNumber(registerNumber);
  349. emailFundNavDTO.setNav(nav);
  350. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  351. String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
  352. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
  353. emailFundNavDTO.setVirtualNav(virtualNav);
  354. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  355. String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
  356. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
  357. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  358. fundNavDTOList.add(emailFundNavDTO);
  359. return fundNavDTOList;
  360. }
  361. private EmailFundNavDTO buildParentNav(Row sheetRow, Map<String, Integer> columnFieldMap, String priceDate) {
  362. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  363. String nav = columnFieldMap.get(EmailFieldConst.PARENT_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV)) != null ?
  364. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV))) : null;
  365. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  366. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL))) : null;
  367. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
  368. return null;
  369. }
  370. if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
  371. // 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
  372. priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
  373. priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
  374. }
  375. emailFundNavDTO.setPriceDate(priceDate);
  376. String fundName = columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME)).getStringCellValue() != null ?
  377. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME))) : null;
  378. emailFundNavDTO.setFundName(fundName);
  379. String registerNumber = columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER)) != null ?
  380. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER))) : null;
  381. emailFundNavDTO.setRegisterNumber(registerNumber);
  382. emailFundNavDTO.setNav(nav);
  383. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  384. String virtualNav = columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV)) != null ?
  385. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV))) : null;
  386. emailFundNavDTO.setVirtualNav(virtualNav);
  387. String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
  388. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
  389. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  390. String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
  391. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
  392. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  393. return emailFundNavDTO;
  394. }
  395. private Map<String, Integer> getFieldRow(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  396. // 考虑日期字段识别逻辑的问题
  397. long rowNumCount = fieldPositionMap.values().stream().map(Pair::getKey).distinct().count();
  398. if (rowNumCount > 1) {
  399. // 存在合并单元格的方式 -> 日期字段所在位置可能会存在错误
  400. fieldPositionMap.remove(EmailFieldConst.PRICE_DATE);
  401. }
  402. Map<String, Integer> fieldRowMap = MapUtil.newHashMap();
  403. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  404. String field = fieldPositionEntry.getKey();
  405. Integer column = fieldPositionEntry.getValue().getValue();
  406. fieldRowMap.put(field, column);
  407. }
  408. return fieldRowMap;
  409. }
  410. private Map<Integer, String> getRowField(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  411. Map<Integer, String> fieldRowMap = MapUtil.newHashMap();
  412. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  413. String field = fieldPositionEntry.getKey();
  414. Integer column = fieldPositionEntry.getValue().getKey();
  415. fieldRowMap.put(column, field);
  416. }
  417. return fieldRowMap;
  418. }
  419. /**
  420. * 通过表头所在位置判断是行数据还是列数据
  421. *
  422. * @param fieldPositionMap excel中表头所在的位置
  423. * @return 行方向-1,,列方向-2
  424. */
  425. private Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  426. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  427. return count == 1 ? COLUMN_DIRECTION_TYPE : ROW_DIRECTION_TYPE;
  428. }
  429. /**
  430. * 找出excel中表头所在的位置
  431. *
  432. * @param sheet 表格工作簿
  433. * @param emailFieldMap 邮件字段识别规则映射表
  434. * @return excel中表头所在的位置(行, 列)
  435. */
  436. private Map<String, Pair<Integer, Integer>> getFieldPosition(Sheet sheet, Map<String, List<String>> emailFieldMap) {
  437. Map<String, List<FieldPositionDTO>> tempFieldPositionMap = MapUtil.newHashMap();
  438. int lastRowNum = sheet.getLastRowNum();
  439. for (int rowNum = 0; rowNum <= lastRowNum; rowNum++) {
  440. Row sheetRow = sheet.getRow(rowNum);
  441. if (sheetRow == null) {
  442. continue;
  443. }
  444. int lastCellNum = sheetRow.getLastCellNum();
  445. for (int cellNum = 0; cellNum < lastCellNum; cellNum++) {
  446. Cell cell = sheetRow.getCell(cellNum);
  447. if (cell == null) {
  448. continue;
  449. }
  450. String cellValue = ExcelUtil.getCellValue(cell);
  451. // 移除掉非中文字符
  452. String newCellValue = StringUtil.retainChineseCharacters(cellValue);
  453. String field = fieldMatch(newCellValue, emailFieldMap);
  454. if (StrUtil.isNotBlank(field)) {
  455. List<FieldPositionDTO> fieldPositionDTOList = tempFieldPositionMap.getOrDefault(field, new ArrayList<>());
  456. fieldPositionDTOList.add(new FieldPositionDTO(newCellValue, Pair.of(rowNum, cellNum)));
  457. tempFieldPositionMap.put(field, fieldPositionDTOList);
  458. }
  459. }
  460. }
  461. // 判断是不是份额基金净值文件格式(同时存在两个备案编码字段)
  462. return handlerFieldPosition(tempFieldPositionMap);
  463. }
  464. private Map<String, Pair<Integer, Integer>> handlerFieldPosition(Map<String, List<FieldPositionDTO>> tempFieldPositionMap) {
  465. Map<String, Pair<Integer, Integer>> fieldPositionMap = MapUtil.newHashMap();
  466. boolean hasParentField = tempFieldPositionMap.keySet().stream().anyMatch(e -> e.contains("parent"));
  467. for (Map.Entry<String, List<FieldPositionDTO>> entry : tempFieldPositionMap.entrySet()) {
  468. String field = entry.getKey();
  469. List<FieldPositionDTO> fieldPositionDTOList = entry.getValue();
  470. int size = fieldPositionDTOList.size();
  471. if (size == 1) {
  472. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  473. continue;
  474. }
  475. if ((!hasParentField && size > 1)) {
  476. if (EmailFieldConst.REGISTER_NUMBER.equals(field)) {
  477. Pair<Integer, Integer> pair = fieldPositionDTOList.stream()
  478. .filter(e -> !e.getFieldValue().contains("协会") && !e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  479. fieldPositionMap.put(field, pair);
  480. } else {
  481. fieldPositionMap.put(field, fieldPositionDTOList.get(size - 1).getPair());
  482. }
  483. continue;
  484. }
  485. if ((hasParentField && size > 1)) {
  486. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  487. }
  488. }
  489. // 母基金缺少代码的情况
  490. if (hasParentField && fieldPositionMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) == null) {
  491. List<FieldPositionDTO> fieldPositionDTOS = tempFieldPositionMap.get(EmailFieldConst.REGISTER_NUMBER);
  492. Pair<Integer, Integer> parentRegisterNumberPair = fieldPositionDTOS.stream()
  493. .filter(e -> e.getFieldValue().contains("协会") || e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  494. fieldPositionMap.put(EmailFieldConst.PARENT_REGISTER_NUMBER, parentRegisterNumberPair);
  495. }
  496. return fieldPositionMap;
  497. }
  498. /**
  499. * 判断单元格值是否为表头字段
  500. *
  501. * @param cellValue 单元格值
  502. * @param emailFieldMap 邮件字段识别规则映射表
  503. * @return 表头对应的标识
  504. */
  505. public String fieldMatch(String cellValue, Map<String, List<String>> emailFieldMap) {
  506. if (StrUtil.isBlank(cellValue)) {
  507. return null;
  508. }
  509. for (Map.Entry<String, List<String>> fieldEntry : emailFieldMap.entrySet()) {
  510. List<String> fieldList = fieldEntry.getValue();
  511. for (String field : fieldList) {
  512. if (cellValue.equals(field)) {
  513. return fieldEntry.getKey();
  514. }
  515. }
  516. }
  517. return null;
  518. }
  519. }