|
@@ -26,6 +26,7 @@ import org.jsoup.nodes.Element;
|
|
|
import org.jsoup.select.Elements;
|
|
|
import org.slf4j.Logger;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
import technology.tabula.*;
|
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
@@ -35,10 +36,7 @@ import java.io.FileOutputStream;
|
|
|
import java.io.OutputStream;
|
|
|
import java.nio.file.Files;
|
|
|
import java.nio.file.Paths;
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.Optional;
|
|
|
+import java.util.*;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
/**
|
|
@@ -51,6 +49,9 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
|
|
|
private static final Logger log = LoggerFactory.getLogger(AbstractEmailParser.class);
|
|
|
|
|
|
+ @Value("${email.file.path}")
|
|
|
+ private String path;
|
|
|
+
|
|
|
private static final Integer ROW_DIRECTION_TYPE = 1;
|
|
|
private static final Integer COLUMN_DIRECTION_TYPE = 2;
|
|
|
|
|
@@ -76,7 +77,14 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
}
|
|
|
// 3.解析邮件pdf附件
|
|
|
if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
|
|
|
- List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO, emailContentInfoDTO.getFilePath(), emailFieldMap);
|
|
|
+ String excelFilePath = path + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
|
|
|
+ + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
|
|
|
+ List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
|
|
|
+ Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
|
|
|
+ }
|
|
|
+ // 4.解析邮件zip,rar附件
|
|
|
+ if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isZip(emailContentInfoDTO.getFileName())) {
|
|
|
+ List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFileName(), emailContentInfoDTO.getFilePath(), emailFieldMap);
|
|
|
Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
|
|
|
}
|
|
|
|
|
@@ -87,17 +95,48 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
return emailFundNavDTOList;
|
|
|
}
|
|
|
|
|
|
+ private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String fileName, String filePath, Map<String, List<String>> emailFieldMap) {
|
|
|
+ String destPath = filePath.substring(0, filePath.indexOf(fileName)) + fileName.replaceAll(".zip", "").replaceAll(".ZIP", "");
|
|
|
+ log.info("压缩包地址:{},解压后文件地址:{}", filePath, destPath);
|
|
|
+ List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
|
|
|
+ List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
|
|
|
+ for (String zipFilePath : dir) {
|
|
|
+ emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
|
|
|
+ File file = new File(zipFilePath);
|
|
|
+ if (file.isDirectory()) {
|
|
|
+ for (String navFilePath : Objects.requireNonNull(file.list())) {
|
|
|
+ emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return emailFundNavDTOList;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
|
|
|
+ List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
|
|
|
+ if (ExcelUtil.isPdf(zipFilePath)) {
|
|
|
+ String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
|
|
|
+ fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
|
|
|
+ }
|
|
|
+ if (ExcelUtil.isExcel(zipFilePath)) {
|
|
|
+ fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
|
|
|
+ }
|
|
|
+ if (ExcelUtil.isZip(zipFilePath)) {
|
|
|
+ String name = new File(zipFilePath).getName();
|
|
|
+ fundNavDTOList = parsePackageFile(emailContentInfoDTO, name, zipFilePath, emailFieldMap);
|
|
|
+ }
|
|
|
+ return fundNavDTOList;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 解析邮件pdf附件
|
|
|
*
|
|
|
- * @param emailContentInfoDTO 邮件信息
|
|
|
- * @param filePath 邮件excel附件地址
|
|
|
- * @param emailFieldMap 邮件字段识别规则映射表
|
|
|
+ * @param filePath 邮件pdf附件路径
|
|
|
+ * @param excelFilePath pdf转excel路径
|
|
|
+ * @param emailFieldMap 邮件字段识别规则映射表
|
|
|
* @return 解析到的净值数据
|
|
|
*/
|
|
|
- private List<EmailFundNavDTO> parsePdfFile(EmailContentInfoDTO emailContentInfoDTO, String filePath, Map<String, List<String>> emailFieldMap) {
|
|
|
- String excelFilePath = "/data/file/pdf/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
|
|
|
- + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
|
|
|
+ private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
|
|
|
File savefile = new File(excelFilePath);
|
|
|
if (!savefile.exists()) {
|
|
|
if (!savefile.getParentFile().exists()) {
|
|
@@ -131,7 +170,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
// 将Excel工作簿写入输出流
|
|
|
workbook.write(outputStream);
|
|
|
} catch (Exception e) {
|
|
|
- log.error("解析邮件pdf附件报错 -> 邮件主题:{},邮件日期:{},堆栈信息:{}", emailContentInfoDTO.getEmailTitle(), emailContentInfoDTO.getEmailDate(), ExceptionUtil.stacktraceToString(e));
|
|
|
+ log.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
|
|
|
}
|
|
|
return parseExcelFile(excelFilePath, emailFieldMap);
|
|
|
}
|
|
@@ -181,7 +220,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
// 创建一个新的Excel工作簿
|
|
|
Workbook workbook = new XSSFWorkbook();
|
|
|
Sheet sheet = workbook.createSheet("Sheet1");
|
|
|
- writeDataToSheet(sheet, rows);
|
|
|
+ ExcelUtil.writeDataToSheet(sheet, rows);
|
|
|
// 将Excel工作簿写入输出流
|
|
|
workbook.write(outputStream);
|
|
|
} catch (Exception e) {
|
|
@@ -221,7 +260,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
if (cell == null) {
|
|
|
continue;
|
|
|
}
|
|
|
- fieldValueMap.put(fieldRowMap.get(rowNum), cell.getStringCellValue());
|
|
|
+ fieldValueMap.put(fieldRowMap.get(rowNum), ExcelUtil.getCellValue(cell));
|
|
|
}
|
|
|
Optional.ofNullable(buildEmailFundNavDTO(fieldValueMap)).ifPresent(fundNavDTOList::add);
|
|
|
}
|
|
@@ -251,7 +290,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
if (cell == null) {
|
|
|
continue;
|
|
|
}
|
|
|
- String cellValue = cell.getStringCellValue();
|
|
|
+ String cellValue = ExcelUtil.getCellValue(cell);
|
|
|
if (StrUtil.isNotBlank(cellValue) && cellValue.contains("截至")) {
|
|
|
int index = cellValue.indexOf("截至");
|
|
|
String date = cellValue.substring(index + 2, index + 2 + 10);
|
|
@@ -288,12 +327,11 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
|
|
|
// pdf解析到的值带有",",比如:"10,656,097.37"
|
|
|
String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
|
|
|
- assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
|
|
|
+ fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
|
|
|
|
|
|
- fundNavDTO.setAssetNet(assetNet);
|
|
|
String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_NET);
|
|
|
- assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
|
|
|
- fundNavDTO.setAssetShare(assetShares);
|
|
|
+ fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetShares));
|
|
|
+
|
|
|
return fundNavDTO;
|
|
|
}
|
|
|
|
|
@@ -305,7 +343,9 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
? ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV))) : null;
|
|
|
String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL)) != null ?
|
|
|
ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL))) : null;
|
|
|
- if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
|
|
|
+ String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
|
|
|
+ ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
|
|
|
+ if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal) && StrUtil.isBlank(assetNet)) {
|
|
|
return null;
|
|
|
}
|
|
|
List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
|
|
@@ -321,30 +361,28 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
// 正常净值文件格式
|
|
|
if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
|
|
|
// 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
|
|
|
+ priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
|
|
|
priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
|
|
|
}
|
|
|
emailFundNavDTO.setPriceDate(priceDate);
|
|
|
- String fundName = columnFieldMap.get(EmailFieldConst.FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.FUND_NAME)).getStringCellValue() != null ?
|
|
|
- ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.FUND_NAME))) : null;
|
|
|
+ String fundName = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_FUND_NAME), columnFieldMap.get(EmailFieldConst.FUND_NAME));
|
|
|
emailFundNavDTO.setFundName(fundName);
|
|
|
|
|
|
- String registerNumber = columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER)) != null ?
|
|
|
- ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER))) : null;
|
|
|
+ String registerNumber = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_REGISTER_NUMBER), columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER));
|
|
|
emailFundNavDTO.setRegisterNumber(registerNumber);
|
|
|
+
|
|
|
emailFundNavDTO.setNav(nav);
|
|
|
emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
|
|
|
String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
|
|
|
ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
|
|
|
emailFundNavDTO.setVirtualNav(virtualNav);
|
|
|
- String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
|
|
|
- ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
|
|
|
- // pdf解析到的值带有",",比如:"10,656,097.37"
|
|
|
- assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
|
|
|
- emailFundNavDTO.setAssetNet(assetNet);
|
|
|
+
|
|
|
+
|
|
|
+ emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
|
|
|
+
|
|
|
String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
|
|
|
ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
|
|
|
- assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
|
|
|
- emailFundNavDTO.setAssetShare(assetShares);
|
|
|
+ emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
|
|
|
|
|
|
fundNavDTOList.add(emailFundNavDTO);
|
|
|
return fundNavDTOList;
|
|
@@ -361,6 +399,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
}
|
|
|
if (StrUtil.isNotBlank(priceDate) && !priceDate.contains("-")) {
|
|
|
// 处理日期yyyyMMdd格式 -> 转成yyyy-MM-dd
|
|
|
+ priceDate = priceDate.replace("年", "").replace("月", "").replace("日", "");
|
|
|
priceDate = DateUtil.format(DateUtil.parse(priceDate, DateConst.YYYYMMDD), DateConst.YYYY_MM_DD);
|
|
|
}
|
|
|
emailFundNavDTO.setPriceDate(priceDate);
|
|
@@ -378,13 +417,12 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
|
|
|
String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
|
|
|
ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
|
|
|
- assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
|
|
|
- emailFundNavDTO.setAssetNet(assetNet);
|
|
|
+ emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
|
|
|
+
|
|
|
String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
|
|
|
ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
|
|
|
- assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
|
|
|
+ emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetShares));
|
|
|
|
|
|
- emailFundNavDTO.setAssetShare(assetShares);
|
|
|
return emailFundNavDTO;
|
|
|
}
|
|
|
|
|
@@ -435,7 +473,7 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
private Map<String, Pair<Integer, Integer>> getFieldPosition(Sheet sheet, Map<String, List<String>> emailFieldMap) {
|
|
|
Map<String, List<Pair<Integer, Integer>>> tempFieldPositionMap = MapUtil.newHashMap();
|
|
|
int lastRowNum = sheet.getLastRowNum();
|
|
|
- for (int rowNum = 0; rowNum < lastRowNum; rowNum++) {
|
|
|
+ for (int rowNum = 0; rowNum <= lastRowNum; rowNum++) {
|
|
|
Row sheetRow = sheet.getRow(rowNum);
|
|
|
if (sheetRow == null) {
|
|
|
continue;
|
|
@@ -501,19 +539,4 @@ public class NavEmailParser extends AbstractEmailParser {
|
|
|
}
|
|
|
return null;
|
|
|
}
|
|
|
-
|
|
|
- private void writeDataToSheet(Sheet sheet, Elements rows) {
|
|
|
- int rowSize = rows.size();
|
|
|
- for (int rowNum = 0; rowNum < rowSize; rowNum++) {
|
|
|
- Row sheetRow = sheet.createRow(rowNum);
|
|
|
-
|
|
|
- Element elementRow = rows.get(rowNum);
|
|
|
- Elements cells = elementRow.select("td");
|
|
|
- int cellSize = cells.size();
|
|
|
- for (int cellNum = 0; cellNum < cellSize; cellNum++) {
|
|
|
- Cell sheetRowCell = sheetRow.createCell(cellNum);
|
|
|
- sheetRowCell.setCellValue(cells.get(cellNum).text());
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
}
|