Kaynağa Gözat

feat:邮件解析-支持解析邮件zip附件

mozuwen 7 ay önce
ebeveyn
işleme
6aebcc1bdb

+ 14 - 0
service-base/pom.xml

@@ -198,6 +198,20 @@
                 </exclusion>
             </exclusions>
         </dependency>
+
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.16.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <version>1.27.1</version>
+        </dependency>
+
+
     </dependencies>
 
 <!--    <build>-->

+ 37 - 1
service-base/src/main/java/com/simuwang/base/common/util/ExcelUtil.java

@@ -1,5 +1,6 @@
 package com.simuwang.base.common.util;
 
+import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.util.StrUtil;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.ss.usermodel.Cell;
@@ -11,6 +12,11 @@ import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.text.NumberFormat;
+import java.util.List;
+import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
+import org.apache.commons.compress.utils.IOUtils;
 
 public class ExcelUtil {
 
@@ -25,7 +31,7 @@ public class ExcelUtil {
     }
 
     public static boolean isZip(String filePath) {
-        return filePath.endsWith("zip") || filePath.endsWith("ZIP");
+        return filePath.endsWith("zip") || filePath.endsWith("ZIP") || filePath.endsWith("rar") || filePath.endsWith("RAR");
     }
 
     public static Sheet getSheet(File file, int sheetIndex) {
@@ -182,4 +188,34 @@ public class ExcelUtil {
         cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "") : "";
         return cellValue;
     }
+
+    public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) {
+        List<String> filePathList = CollUtil.newArrayList();
+
+        File destFile = new File(destFilePath);
+        if (!destFile.exists()) {
+            destFile.mkdirs();
+        }
+
+        try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
+             ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
+            ArchiveEntry entry;
+            while ((entry = ais.getNextEntry()) != null) {
+                File entryFile = new File(destFilePath, entry.getName());
+                if (entry.isDirectory()) {
+                    entryFile.mkdirs();
+                } else {
+                    try (FileOutputStream fos = new FileOutputStream(entryFile)) {
+                        IOUtils.copy(ais, fos);
+                        filePathList.add(entryFile.getAbsolutePath());
+                    }
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        return filePathList;
+    }
+
 }

+ 4 - 2
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -261,7 +261,8 @@ public class EmailParseService {
 
     private Integer saveEmailFileInfo(Integer emailId, String fileName, String filePath, Date parseDate) {
         EmailFileInfoDO emailFileInfoDO = buildEmailFileInfoDO(emailId, fileName, filePath, parseDate);
-        return emailFileInfoMapper.insert(emailFileInfoDO);
+        emailFileInfoMapper.insert(emailFileInfoDO);
+        return emailFileInfoDO.getId();
     }
 
     private EmailFileInfoDO buildEmailFileInfoDO(Integer emailId, String fileName, String filePath, Date parseDate) {
@@ -332,7 +333,8 @@ public class EmailParseService {
         if (emailParseInfoDO == null) {
             return null;
         }
-        return emailParseInfoMapper.insert(emailParseInfoDO);
+        emailParseInfoMapper.insert(emailParseInfoDO);
+        return emailParseInfoDO.getId();
     }
 
     private EmailParseInfoDO buildEmailParseInfo(String emailAddress, String emailDate, String emailTitle, Integer emailType, Integer parseStatus, Date parseDate) {

+ 67 - 25
service-daq/src/main/java/com/simuwang/daq/service/NavEmailParser.java

@@ -35,10 +35,7 @@ import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
+import java.util.*;
 import java.util.stream.Collectors;
 
 /**
@@ -76,7 +73,14 @@ public class NavEmailParser extends AbstractEmailParser {
         }
         // 3.解析邮件pdf附件
         if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
-            List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO, emailContentInfoDTO.getFilePath(), emailFieldMap);
+            String excelFilePath = "/data/file/pdf/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
+                    + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
+            List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
+            Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
+        }
+        // 4.解析邮件zip,rar附件
+        if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isZip(emailContentInfoDTO.getFileName())) {
+            List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFilePath(), emailFieldMap);
             Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
         }
 
@@ -87,17 +91,46 @@ public class NavEmailParser extends AbstractEmailParser {
         return emailFundNavDTOList;
     }
 
+    private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String filePath, Map<String, List<String>> emailFieldMap) {
+        String destPath = filePath.substring(0, filePath.lastIndexOf("/") + 1) + "/" + UUID.randomUUID().toString().replaceAll("-", "");
+        List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
+        List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
+        for (String zipFilePath : dir) {
+            emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
+            File file = new File(zipFilePath);
+            if (file.isDirectory()) {
+                for (String navFilePath : Objects.requireNonNull(file.list())) {
+                    emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
+                }
+            }
+        }
+        return emailFundNavDTOList;
+    }
+
+    private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
+        List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
+        if (ExcelUtil.isPdf(zipFilePath)) {
+            String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
+            fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
+        }
+        if (ExcelUtil.isExcel(zipFilePath)) {
+            fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
+        }
+        if (ExcelUtil.isZip(zipFilePath)) {
+            fundNavDTOList = parsePackageFile(emailContentInfoDTO, zipFilePath, emailFieldMap);
+        }
+        return fundNavDTOList;
+    }
+
     /**
      * 解析邮件pdf附件
      *
-     * @param emailContentInfoDTO 邮件信息
-     * @param filePath            邮件excel附件地址
-     * @param emailFieldMap       邮件字段识别规则映射表
+     * @param filePath      邮件pdf附件路径
+     * @param excelFilePath pdf转excel路径
+     * @param emailFieldMap 邮件字段识别规则映射表
      * @return 解析到的净值数据
      */
-    private List<EmailFundNavDTO> parsePdfFile(EmailContentInfoDTO emailContentInfoDTO, String filePath, Map<String, List<String>> emailFieldMap) {
-        String excelFilePath = "/data/file/pdf/" + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
-                + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
+    private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
         File savefile = new File(excelFilePath);
         if (!savefile.exists()) {
             if (!savefile.getParentFile().exists()) {
@@ -131,7 +164,7 @@ public class NavEmailParser extends AbstractEmailParser {
             // 将Excel工作簿写入输出流
             workbook.write(outputStream);
         } catch (Exception e) {
-            log.error("解析邮件pdf附件报错 -> 邮件主题:{},邮件日期:{},堆栈信息:{}", emailContentInfoDTO.getEmailTitle(), emailContentInfoDTO.getEmailDate(), ExceptionUtil.stacktraceToString(e));
+            log.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
         }
         return parseExcelFile(excelFilePath, emailFieldMap);
     }
@@ -288,12 +321,11 @@ public class NavEmailParser extends AbstractEmailParser {
 
         // pdf解析到的值带有",",比如:"10,656,097.37"
         String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
-        assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
+        fundNavDTO.setAssetNet(numberDataHandler(assetNet));
 
-        fundNavDTO.setAssetNet(assetNet);
         String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_NET);
-        assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
-        fundNavDTO.setAssetShare(assetShares);
+        fundNavDTO.setAssetNet(numberDataHandler(assetShares));
+
         return fundNavDTO;
     }
 
@@ -336,20 +368,31 @@ public class NavEmailParser extends AbstractEmailParser {
         String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
                 ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
         emailFundNavDTO.setVirtualNav(virtualNav);
+
         String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
                 ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
-        // pdf解析到的值带有",",比如:"10,656,097.37"
-        assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
-        emailFundNavDTO.setAssetNet(assetNet);
+        emailFundNavDTO.setAssetNet(numberDataHandler(assetNet));
+
         String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
                 ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
-        assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
-        emailFundNavDTO.setAssetShare(assetShares);
+        emailFundNavDTO.setAssetShare(numberDataHandler(assetShares));
 
         fundNavDTOList.add(emailFundNavDTO);
         return fundNavDTOList;
     }
 
+    private String numberDataHandler(String numberData) {
+        if (StrUtil.isBlank(numberData)) {
+            return null;
+        }
+        // pdf解析到的值带有",",比如:"10,656,097.37"
+        String data = numberData.replaceAll(",", "");
+        if (!StringUtil.isNumeric(data)) {
+            return null;
+        }
+        return data;
+    }
+
     private EmailFundNavDTO buildParentNav(Row sheetRow, Map<String, Integer> columnFieldMap, String priceDate) {
         EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
         String nav = columnFieldMap.get(EmailFieldConst.PARENT_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV)) != null ?
@@ -378,13 +421,12 @@ public class NavEmailParser extends AbstractEmailParser {
 
         String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
                 ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
-        assetNet = StrUtil.isNotBlank(assetNet) ? assetNet.replaceAll(",", "") : null;
-        emailFundNavDTO.setAssetNet(assetNet);
+        emailFundNavDTO.setAssetNet(numberDataHandler(assetNet));
+
         String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
                 ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
-        assetShares = StrUtil.isNotBlank(assetShares) ? assetShares.replaceAll(",", "") : null;
+        emailFundNavDTO.setAssetNet(numberDataHandler(assetShares));
 
-        emailFundNavDTO.setAssetShare(assetShares);
         return emailFundNavDTO;
     }
 

+ 2 - 2
service-deploy/src/main/test/java/com/simuwang/datadaq/DataTrusteeApplicationTests.java

@@ -32,8 +32,8 @@ class DataTrusteeApplicationTests {
         emailInfoDTO.setProtocol("imap");
         Map<Integer, List<String>> emailTypeMap = MapUtil.newHashMap();
         emailTypeMap.put(1, List.of("净值"));
-        Date startDate = DateUtil.parse("2024-09-10 15:30:00", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2024-09-10 18:00:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2024-09-11 11:13:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2024-09-11 12:00:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             emailParseService.parseEmail(emailInfoDTO, startDate, endDate);
         } catch (Exception e) {