Browse Source

feat:月报的管理人版和协会版识别功能,表新增字段维护

wangzaijun 2 tuần trước cách đây
mục cha
commit
8e39222ec7

+ 48 - 7
mo-daq/src/main/java/com/smppw/modaq/application/components/OCRReportParser.java

@@ -2,10 +2,12 @@ package com.smppw.modaq.application.components;
 
 import cn.hutool.core.io.IORuntimeException;
 import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.NumberUtil;
 import cn.hutool.core.util.StrUtil;
 import cn.hutool.http.HttpUtil;
 import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.common.enums.ReportMonthlyType;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.report.OCRParseData;
@@ -18,14 +20,49 @@ public class OCRReportParser {
     private final Logger logger = LoggerFactory.getLogger(this.getClass());
 
     private static final Map<String, Object> RESULT_SCHEMA_MAP = MapUtil.newHashMap(8);
+    private static final Map<String, Object> MONTHLY_TYPE_SCHEMA_MAP = MapUtil.newHashMap(8);
 
     static {
         RESULT_SCHEMA_MAP.put("基金名称", "");
         RESULT_SCHEMA_MAP.put("产品代码", "");
         RESULT_SCHEMA_MAP.put("是否有红色印章", "");
         RESULT_SCHEMA_MAP.put("是否有电话", "");
-//        RESULT_SCHEMA_MAP.put("是否有地址", "");
-//        RESULT_SCHEMA_MAP.put("是否有关注我们", "");
+
+        // 管理人版
+        MONTHLY_TYPE_SCHEMA_MAP.put("是否有曲线", "");
+        // 协会版
+        MONTHLY_TYPE_SCHEMA_MAP.put("基金净资产", "");
+        MONTHLY_TYPE_SCHEMA_MAP.put("基金份额总额", "");
+    }
+
+    public ReportMonthlyType parseMonthlyType(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
+        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+        paramsMap.put("image_url", ocrImgUrl);
+        paramsMap.put("result_schema", JSONUtil.toJsonStr(MONTHLY_TYPE_SCHEMA_MAP));
+        ReportMonthlyType res = ReportMonthlyType.FAILED;
+        try {
+            JSONObject jsonObject = this.parseOcrResult(ocrApi, paramsMap);
+            String hasTrend = this.cleanData(jsonObject.getStr("是否有曲线"));
+            String netAsset = this.cleanData(jsonObject.getStr("基金净资产"));
+            String totalShare = this.cleanData(jsonObject.getStr("基金份额总额"));
+            if (StrUtil.isNotBlank(hasTrend)) {
+                res = ReportMonthlyType.MANAGER;
+            } else if (StrUtil.isAllNotBlank(netAsset, totalShare)
+                    && NumberUtil.isNumber(netAsset) && NumberUtil.isNumber(totalShare)) {
+                // 这里可能会存在误判,因为部分管理人月报中也有份额和净值
+                res = ReportMonthlyType.AMAC;
+            }
+            return res;
+        } catch (IORuntimeException e) {
+            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+        } catch (Exception e) {
+            this.logger.warn("报告{} OCR提取月报类型错误:{}", filename, e.getMessage());
+            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
+        } finally {
+            if (logger.isInfoEnabled()) {
+                this.logger.info("报告{} OCR提取月报类型参数{},OCR提取月报类型结果:{}", filename, paramsMap, res);
+            }
+        }
     }
 
     public OCRParseData parse(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
@@ -34,11 +71,7 @@ public class OCRReportParser {
         paramsMap.put("result_schema", JSONUtil.toJsonStr(RESULT_SCHEMA_MAP));
         OCRParseData res = new OCRParseData();
         try {
-            String body = HttpUtil.get(ocrApi, paramsMap);
-            JSONObject jsonResult = JSONUtil.parseObj(body);
-            String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
-            String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
-            JSONObject jsonObject = JSONUtil.parseObj(aiParserContent);
+            JSONObject jsonObject = this.parseOcrResult(ocrApi, paramsMap);
             String fundName = this.cleanData(jsonObject.getStr("基金名称"));
             String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
             String seals = this.cleanData(jsonObject.getStr("是否有红色印章"));
@@ -68,6 +101,14 @@ public class OCRReportParser {
         }
     }
 
+    private JSONObject parseOcrResult(String ocrApi, Map<String, Object> paramsMap) {
+        String body = HttpUtil.get(ocrApi, paramsMap);
+        JSONObject jsonResult = JSONUtil.parseObj(body);
+        String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
+        String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
+        return JSONUtil.parseObj(aiParserContent);
+    }
+
     private String cleanData(String text) {
         if (text == null) {
             return null;

+ 13 - 0
mo-daq/src/main/java/com/smppw/modaq/common/conts/Constants.java

@@ -4,4 +4,17 @@ public class Constants {
     public static final long DEFAULT_SERIAL_ID = 999L;
 
     public static final String WATERMARK_REPLACE = "+_+" + System.lineSeparator();
+
+    public static final String ARCHIVE_ZIP = "zip";
+    public static final String ARCHIVE_7Z = "7z";
+    public static final String ARCHIVE_RAR = "rar";
+
+    public static final String FILE_PDF = "pdf";
+    public static final String FILE_PNG = "png";
+    public static final String FILE_JPG = "jpg";
+    public static final String FILE_DOCX = "docx";
+    public static final String FILE_DOC = "doc";
+    public static final String FILE_HTML = "html";
+    public static final String FILE_XLSX = "xlsx";
+    public static final String FILE_XLS = "xls";
 }

+ 29 - 0
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportMonthlyType.java

@@ -0,0 +1,29 @@
+package com.smppw.modaq.common.enums;
+
+import lombok.Getter;
+
+@Getter
+public enum ReportMonthlyType {
+    /**
+     * 不用提取
+     */
+    NO_NEED(-1),
+    /**
+     * 未成功提取
+     */
+    FAILED(null),
+    /**
+     * 协会版
+     */
+    AMAC(0),
+    /**
+     * 管理人版
+     */
+    MANAGER(1),
+    ;
+    private final Integer type;
+
+    ReportMonthlyType(Integer type) {
+        this.type = type;
+    }
+}

+ 4 - 7
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java

@@ -1,10 +1,9 @@
 package com.smppw.modaq.common.enums;
 
-import cn.hutool.core.collection.ListUtil;
+import com.smppw.modaq.common.conts.Constants;
 import lombok.Getter;
 
 import java.util.Arrays;
-import java.util.List;
 
 /**
  * @author wangzaijun
@@ -13,9 +12,9 @@ import java.util.List;
  */
 @Getter
 public enum ReportParserFileType {
-    PDF("pdf"),
-    WORD("docx,doc"),
-    IMG("png.jpg,jpeg"),
+    PDF(Constants.FILE_PDF),
+    WORD(Constants.FILE_DOCX),
+    IMG(Constants.FILE_PNG + "," + Constants.FILE_JPG),
 //    EXCEL("xlsx,xls"),
 //    PYTHON("python");
     AI("ai");
@@ -30,6 +29,4 @@ public enum ReportParserFileType {
         return Arrays.stream(ReportParserFileType.values())
                 .filter(e -> e.getSuffix().contains(suffix)).findFirst().orElse(null);
     }
-
-    public static final List<ReportParserFileType> GENERATE_PNG_FILE_TYPES = ListUtil.of(ReportParserFileType.PDF, ReportParserFileType.WORD);
 }

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -8,7 +8,7 @@ public enum ReportType {
     OTHER(-2, "其他报告",
             new String[]{"公告", "通知", "告知函", "意见征询函", "说明函", "简报",
                     "清算报告", "邀请函", "观点", "预警", "投研报告", "公示", "回顾",
-                    "风险提示函", "说明", "合同变更", "生效函", "投资报告", "投资者月报", "运行报告"}),
+                    "风险提示函", "说明", "合同变更", "生效函"}),
 
     LETTER(-1, "交易流水确认函",
             new String[]{"确认单", "确认函", "交易确认数据",

+ 8 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportBaseInfoDTO.java

@@ -28,6 +28,12 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
      */
     private String reportType;
     /**
+     * 月报类型
+     *
+     * @see com.smppw.modaq.common.enums.ReportMonthlyType
+     */
+    private Integer monthlyType;
+    /**
      * 报告是否用印
      */
     private Boolean withSeals;
@@ -51,6 +57,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
         entity.setReportDate(this.reportDate);
         entity.setReportName(this.reportName);
         entity.setReportType(this.reportType);
+        entity.setMonthlyType(this.monthlyType);
         entity.setWithSeals(this.withSeals);
         entity.setWithContacts(this.withContacts);
         this.initEntity(entity);
@@ -64,6 +71,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
                 ", reportDate='" + (reportDate == null ? null : DateUtil.formatDate(reportDate)) + '\'' +
                 ", reportName='" + reportName + '\'' +
                 ", reportType='" + reportType + '\'' +
+                ", monthlyType=" + monthlyType +
                 ", withSeals=" + withSeals +
                 ", withContacts=" + withContacts +
                 '}';

+ 6 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/entity/report/ReportBaseInfoDO.java

@@ -28,6 +28,12 @@ public class ReportBaseInfoDO extends BaseReportDO {
      */
     private String reportType;
     /**
+     * 月报类型
+     *
+     * @see com.smppw.modaq.common.enums.ReportMonthlyType
+     */
+    private Integer monthlyType;
+    /**
      * 报告是否用印
      */
     private Boolean withSeals;

+ 93 - 25
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -14,9 +14,11 @@ import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
 import com.smppw.modaq.application.components.report.writer.ReportWriter;
 import com.smppw.modaq.application.components.report.writer.ReportWriterFactory;
 import com.smppw.modaq.application.util.EmailUtil;
+import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.conts.DateConst;
 import com.smppw.modaq.common.conts.EmailParseStatusConst;
 import com.smppw.modaq.common.conts.EmailTypeConst;
+import com.smppw.modaq.common.enums.ReportMonthlyType;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
@@ -142,7 +144,8 @@ public class EmailParseService {
                 log.warn("未采集到正文或附件");
                 continue;
             }
-            log.info("开始解析邮件数据 -> 邮件主题:{},邮件日期:{}", emailContentInfoDTOList.get(0).getEmailTitle(), emailContentInfoDTOList.get(0).getEmailDate());
+            log.info("开始解析邮件数据 -> 邮件主题:{},邮件日期:{}",
+                    emailContentInfoDTOList.get(0).getEmailTitle(), emailContentInfoDTOList.get(0).getEmailDate());
             Map<EmailContentInfoDTO, List<EmailZipFileDTO>> emailZipFileMap = MapUtil.newHashMap();
             for (EmailContentInfoDTO emailDto : emailContentInfoDTOList) {
                 // 正文不用解压附件
@@ -206,11 +209,11 @@ public class EmailParseService {
         String emailTitle = emailContentInfoDTO.getEmailTitle();
 
         if (ArchiveUtil.isZip(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, ".zip", emailType, resultList);
+            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_ZIP, emailType, resultList);
         } else if (ArchiveUtil.isRAR(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, ".rar", emailType, resultList);
+            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_RAR, emailType, resultList);
         } else if (ArchiveUtil.is7z(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, ".7z", emailType, resultList);
+            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_7Z, emailType, resultList);
         } else {
             // 不是压缩包时
             EmailZipFileDTO dto = new EmailZipFileDTO(emailTitle, emailContentInfoDTO);
@@ -289,7 +292,7 @@ public class EmailParseService {
         List<ParseResult<ReportData>> dataList = ListUtil.list(false);
         for (Map.Entry<EmailContentInfoDTO, List<EmailZipFileDTO>> entry : emailZipFileMap.entrySet()) {
             EmailContentInfoDTO emailDto = entry.getKey();
-            if (emailDto.getFileName() != null && emailDto.getFileName().endsWith(".html")) {
+            if (emailDto.getFileName() != null && emailDto.getFileName().endsWith(Constants.FILE_HTML)) {
                 continue;
             }
             String emailTitle = emailDto.getEmailTitle();
@@ -312,8 +315,8 @@ public class EmailParseService {
             }
             // 如果压缩包里面既有pdf又有其他格式的文件,说明其他格式的文件是不需要解析的
             List<String> exts = dtos.stream().map(EmailZipFileDTO::getExtName).distinct().toList();
-            if (exts.contains("pdf") && exts.size() > 1) {
-                dtos.removeIf(e -> !Objects.equals("pdf", e.getExtName()));
+            if (exts.contains(Constants.FILE_PDF) && exts.size() > 1) {
+                dtos.removeIf(e -> !Objects.equals(Constants.FILE_PDF, e.getExtName()));
             }
             // 移除逻辑
             Iterator<EmailZipFileDTO> removeIterator = dtos.iterator();
@@ -390,7 +393,8 @@ public class EmailParseService {
                         .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
                 if (CollUtil.isNotEmpty(aiParaseList)) {
                     for (ReportData data : aiParaseList) {
-                        this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(), data.getAiParse(), data.getAiFileId());
+                        this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(),
+                                data.getAiParse(), data.getAiFileId());
                     }
                 }
                 long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
@@ -403,13 +407,15 @@ public class EmailParseService {
         }
     }
 
-    private ParseResult<ReportData> parseReportAndHandleResult(String emailTitle, EmailFileInfoDO emailFileInfo, EmailZipFileDTO zipFile) {
+    private ParseResult<ReportData> parseReportAndHandleResult(String emailTitle,
+                                                               EmailFileInfoDO emailFileInfo,
+                                                               EmailZipFileDTO zipFile) {
         Integer emailType = zipFile.getEmailType();
         String fileName = zipFile.getFilename();
         String filepath = zipFile.getFilepath();
         ParseResult<ReportData> result = new ParseResult<>();
         boolean reportFlag = emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType);
-        if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(".html")) {
+        if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(Constants.FILE_HTML)) {
             result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
             result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
             log.error(result.getMsg());
@@ -424,9 +430,7 @@ public class EmailParseService {
             }
         }
         // 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
-        ReportParserFileType fileType;
-        String fileSuffix = StrUtil.subAfter(fileName, ".", true);
-        fileType = ReportParserFileType.getBySuffix(fileSuffix);
+        ReportParserFileType fileType = ReportParserFileType.getBySuffix(zipFile.getExtName());
         // 不支持的格式
         if (fileType == null) {
             result.setStatus(ReportParseStatus.NO_SUPPORT_TEMPLATE.getCode());
@@ -527,8 +531,13 @@ public class EmailParseService {
                     log.info("报告{} AI解析结束!结果是:{}", fileName, reportData);
                 }
             }
-            // ocr信息提取
+            // ocr信息提取(印章、联系人、基金名称和产品代码)
             this.ocrReportData(reportType, reportData, fileName, images);
+            // ocr识别月报是否管理人版或协会版
+            ReportMonthlyType monthlyType = this.extractMonthlyType(reportType, emailTitle, fileName, filepath, images);
+            if (reportData != null && reportData.getBaseInfo() != null) {
+                reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
+            }
             parserWatch.stop();
             if (log.isInfoEnabled()) {
                 log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
@@ -540,13 +549,70 @@ public class EmailParseService {
     }
 
     /**
+     * 判断月报类型(管理人版还是协会版)
+     *
+     * @param reportType 报告类型
+     * @param emailTitle 邮件主题
+     * @param fileName   报告名称
+     * @param filepath   报告路径
+     * @param images     报告的第一页和尾页图片地址(主要用于ocr提取关键信息)
+     */
+    private ReportMonthlyType extractMonthlyType(ReportType reportType, String emailTitle,
+                                                 String fileName, String filepath, List<String> images) {
+        if (ReportType.MONTHLY != reportType) {
+            return ReportMonthlyType.NO_NEED;
+        }
+        // 1.依据报告名称判断
+        if (fileName.contains("协会")) {
+            return ReportMonthlyType.AMAC;
+        }
+        String fundCode = ReportParseUtils.matchFundCode(fileName);
+        if (StrUtil.isNotBlank(fundCode)) {
+            return ReportMonthlyType.AMAC;
+        }
+        if (fileName.contains("管理人") || fileName.contains("公司版")
+                || fileName.contains("投资者月报") || fileName.contains("运行报告")
+                || fileName.contains("投资者报告") || fileName.contains("投资报告")
+                || fileName.contains("投资月报") || fileName.contains("月度简报")) {
+            return ReportMonthlyType.MANAGER;
+        }
+        // 2.依据文件路径判断
+        List<String> paths = StrUtil.split(filepath, File.separator);
+        for (String pathSplit : paths) {
+            boolean ncam = !pathSplit.contains("公司及协会版") && !pathSplit.contains("公司和协会版");
+            if (ncam && pathSplit.contains("协会")) {
+                return ReportMonthlyType.AMAC;
+            }
+            if (ncam && (pathSplit.contains("管理人") || pathSplit.contains("公司版"))) {
+                return ReportMonthlyType.MANAGER;
+            }
+        }
+        // 3.依据主题判断
+        if ((emailTitle.contains("协会") || emailTitle.contains("信披")) && !emailTitle.contains("公司及协会版")) {
+            return ReportMonthlyType.AMAC;
+        }
+        if (emailTitle.contains("管理人") || emailTitle.contains("公司版")
+                || emailTitle.contains("投资者月报") || emailTitle.contains("运行报告")
+                || emailTitle.contains("投资者报告") || emailTitle.contains("投资报告")
+                || emailTitle.contains("投资月报") || emailTitle.contains("月度简报")) {
+            return ReportMonthlyType.MANAGER;
+        }
+        // 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有基金份额则是协会
+        if (CollUtil.isNotEmpty(images)) {
+            return new OCRReportParser().parseMonthlyType(fileName, this.ocrParserUrl, images.get(0));
+        }
+        return ReportMonthlyType.FAILED;
+    }
+
+    /**
      * ocr 提取信息(包括首页的基金名称或报告日期,尾页的印章或联系人等信息)
      *
      * @param reportData 报告解析结果
      * @param fileName   报告名称
      * @param images     报告的收益和尾页png图片
      */
-    private void ocrReportData(ReportType reportType, ReportData reportData, String fileName, List<String> images) {
+    private void ocrReportData(ReportType reportType, ReportData reportData,
+                               String fileName, List<String> images) {
         if (reportData == null || CollUtil.isEmpty(images)) {
             return;
         }
@@ -659,15 +725,15 @@ public class EmailParseService {
     }
 
     private EmailParseInfoDO buildEmailParseInfo(Integer emailId, String emailAddress,
-                                                 EmailContentInfoDTO emailContentInfoDTO, long totalSize) {
+                                                 EmailContentInfoDTO emailDto, long totalSize) {
         EmailParseInfoDO emailParseInfoDO = new EmailParseInfoDO();
         emailParseInfoDO.setId(emailId);
-        emailParseInfoDO.setSenderEmail(emailContentInfoDTO.getSenderEmail());
+        emailParseInfoDO.setSenderEmail(emailDto.getSenderEmail());
         emailParseInfoDO.setEmail(emailAddress);
-        emailParseInfoDO.setEmailDate(DateUtil.parse(emailContentInfoDTO.getEmailDate(), DateConst.YYYY_MM_DD_HH_MM_SS));
-        emailParseInfoDO.setParseDate(emailContentInfoDTO.getParseDate() == null ? null : DateUtil.parseDate(emailContentInfoDTO.getParseDate()));
-        emailParseInfoDO.setEmailTitle(emailContentInfoDTO.getEmailTitle());
-        emailParseInfoDO.setEmailType(emailContentInfoDTO.getEmailType());
+        emailParseInfoDO.setEmailDate(DateUtil.parse(emailDto.getEmailDate(), DateConst.YYYY_MM_DD_HH_MM_SS));
+        emailParseInfoDO.setParseDate(emailDto.getParseDate() == null ? null : DateUtil.parseDate(emailDto.getParseDate()));
+        emailParseInfoDO.setEmailTitle(emailDto.getEmailTitle());
+        emailParseInfoDO.setEmailType(emailDto.getEmailType());
         emailParseInfoDO.setParseStatus(EmailParseStatusConst.SUCCESS);
         emailParseInfoDO.setAttrSize(totalSize);
         emailParseInfoDO.setIsvalid(1);
@@ -824,15 +890,17 @@ public class EmailParseService {
         String disposition = part.getDisposition();
         String contentType = part.getContentType();
 
-        boolean attachmentFlag = StrUtil.endWithAny(fileName, ".zip", ".rar", ".pdf", ".png", ".jpg", ".docx", ".7z");
+        String[] att_files = new String[]{Constants.ARCHIVE_7Z, Constants.ARCHIVE_RAR,
+                Constants.ARCHIVE_ZIP, Constants.FILE_PDF, Constants.FILE_DOCX, Constants.FILE_JPG, Constants.FILE_PNG};
+        boolean attachmentFlag = StrUtil.endWithAny(fileName, att_files);
         boolean isAttachment = attachmentFlag
                 || Part.ATTACHMENT.equalsIgnoreCase(disposition)
                 || (contentType != null && attachmentMimePrefixes.stream().anyMatch(prefix ->
                 StrUtil.startWithIgnoreCase(contentType, prefix)
         ));
         if (!isAttachment) {
-            log.warn("邮件 {} 未检测到pdf/zip/rar/7z/png/jpg/docx类型的附件 (fileName={}, disposition={}, contentType={})",
-                    subject, fileName, disposition, contentType);
+            log.warn("邮件 {} 未检测到{}类型的附件 (fileName={}, disposition={}, contentType={})",
+                    subject, att_files, fileName, disposition, contentType);
             return;
         }
 
@@ -840,7 +908,7 @@ public class EmailParseService {
         String filePath = path + File.separator + account + File.separator + emailDateStr + File.separator;
         // 压缩包重名时的后面的压缩包会覆盖前面压缩包的问题(不考虑普通文件)
         String emailDate = DateUtil.format(sendDate, DateConst.YYYYMMDDHHMMSS24);
-        String realName = (fileName.endsWith(".zip") || fileName.endsWith(".rar") || fileName.endsWith(".7z")) ? emailDate + fileName : fileName;
+        String realName = ArchiveUtil.isArchive(fileName) ? emailDate + fileName : fileName;
         File saveFile = FileUtil.file(filePath + realName);
         if (!saveFile.exists()) {
             if (!saveFile.getParentFile().exists()) {

+ 9 - 11
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/ArchiveUtil.java

@@ -3,6 +3,7 @@ package com.smppw.modaq.infrastructure.util;
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.util.StrUtil;
+import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.enums.ReportType;
 import net.sf.sevenzipjbinding.*;
 import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
@@ -52,16 +53,20 @@ public class ArchiveUtil {
             {0x3000, 0x303F}     // 常用标点
     };
 
+    public static boolean isArchive(String fileName) {
+        return isZip(fileName) || is7z(fileName) || isRAR(fileName);
+    }
+
     public static boolean isZip(String fileName) {
-        return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
+        return StrUtil.isNotBlank(fileName) && StrUtil.endWithIgnoreCase(fileName, Constants.ARCHIVE_ZIP);
     }
 
     public static boolean is7z(String fileName) {
-        return StrUtil.isNotBlank(fileName) && (fileName.endsWith("7z") || fileName.endsWith("7Z"));
+        return StrUtil.isNotBlank(fileName) && StrUtil.endWithIgnoreCase(fileName, Constants.ARCHIVE_7Z);
     }
 
     public static boolean isRAR(String fileName) {
-        return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
+        return StrUtil.isNotBlank(fileName) && StrUtil.endWithIgnoreCase(fileName, Constants.ARCHIVE_RAR);
     }
 
     public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) throws IOException {
@@ -328,7 +333,7 @@ public class ArchiveUtil {
                 }
 
                 // 4. 递归处理嵌套 ZIP(深度+1)
-                if (isZipFile(name) && currentDepth < maxDepth) {
+                if (isZip(name) && currentDepth < maxDepth) {
                     File nestedZipFile = entryPath.toFile();
                     decompressZipRecursive(
                             nestedZipFile,
@@ -348,13 +353,6 @@ public class ArchiveUtil {
         }
     }
 
-    /**
-     * 判断文件是否为 ZIP 格式
-     */
-    private static boolean isZipFile(String filepath) {
-        return filepath.toLowerCase().endsWith(".zip");
-    }
-
     public static void main(String[] args) throws Exception {
         String zipFilePath = "D:\\Documents\\新报告解析\\基协报告\\排排网代销-宏锡5月报告(公司及协会版).7z";
         String destFilePath = "D:\\Documents\\新报告解析\\基协报告\\rar";

+ 1 - 1
mo-daq/src/main/resources/application.yml

@@ -17,7 +17,7 @@ server:
 # 日志配置
 logging:
   level:
-    com.smppw.modaq.domain.mapper: debug
+    com.smppw.modaq.domain.mapper: info
   config: classpath:logback.xml
 
 spring:

+ 1 - 1
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -38,7 +38,7 @@ public class MoDaqApplicationTests {
     @Test
     public void reportTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox("*@simuwang.com", "*");
-        Date startDate = DateUtil.parse("2025-06-07 14:00:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2025-06-07 14:43:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         Date endDate = DateUtil.parse("2025-06-07 17:05:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             List<String> folderNames = ListUtil.list(false);