3 Commits 48f0faeaf3 ... 4fa1cae6d7

Author SHA1 Message Date
  wangzaijun 4fa1cae6d7 fix:修复ocr解析逻辑问题 1 week ago
  wangzaijun cd4f4948b3 fix:优化报告日期和基金编码解析 1 week ago
  wangzaijun c908157198 fix:打印日志优化 1 week ago

+ 6 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -330,10 +330,16 @@ public final class ReportParseUtils {
         if (StrUtil.isBlank(text)) {
             return null;
         }
+        // 匹配私募基金产品代码
         Matcher matcher = PatternConsts.FUND_CODE_PATTERN.matcher(text);
         if (matcher.find()) {
             return matcher.group();
         }
+        // 匹配公募基金6位备案编码
+        matcher = PatternConsts.PUB_FUND_CODE_PATTERN.matcher(text);
+        if (matcher.find()) {
+            return matcher.group();
+        }
         return null;
     }
 

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIOtherReportParser.java

@@ -18,7 +18,7 @@ public class AIOtherReportParser extends AbstractAIReportParser<ReportData> {
     @Override
     protected String prompt() {
         return """
-                识别文件中的基金名称、基金管理人、产品代码和报告日期,产品代码的正则表达式是`S[A-Z0-9]{5}`
+                识别文件中的基金名称、基金管理人、产品代码和报告日期;
                 如果无法识别就返回空字符,结果用json返回,json中不要注释
                 """;
     }

+ 1 - 0
mo-daq/src/main/java/com/smppw/modaq/common/conts/PatternConsts.java

@@ -31,6 +31,7 @@ public class PatternConsts {
      * 基金编码的正则表达式
      */
     public static final Pattern FUND_CODE_PATTERN = Pattern.compile("S[A-Z0-9]{5}");
+    public static final Pattern PUB_FUND_CODE_PATTERN = Pattern.compile("^[0-9]{6}$");
 
     /**
      * 分级基金级别正则匹配

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -8,7 +8,7 @@ public enum ReportType {
     OTHER(-2, "其他报告",
             new String[]{"公告", "通知", "告知函", "意见征询函", "说明函", "简报",
                     "清算报告", "邀请函", "观点", "预警", "投研报告", "公示", "回顾",
-                    "风险提示函", "说明", "合同变更", "生效函"}),
+                    "风险提示函", "说明", "合同变更", "生效函", "实施情况"}),
 
     LETTER(-1, "交易流水确认函",
             new String[]{"确认单", "确认函", "交易确认数据",

+ 7 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportFundTransactionDTO.java

@@ -239,6 +239,13 @@ public class ReportFundTransactionDTO extends BaseReportDTO<ReportFundTransactio
      */
     private String actualPerformanceShare;
 
+    public ReportFundTransactionDTO() {
+    }
+
+    public ReportFundTransactionDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     public ReportFundTransactionDO toEntity() {
         ReportFundTransactionDO entity = new ReportFundTransactionDO();

+ 7 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportInvestorInfoDTO.java

@@ -37,6 +37,13 @@ public class ReportInvestorInfoDTO extends BaseReportDTO<ReportInvestorInfoDO> {
      */
     private String tradingAccount;
 
+    public ReportInvestorInfoDTO() {
+    }
+
+    public ReportInvestorInfoDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     public ReportInvestorInfoDO toEntity() {
         ReportInvestorInfoDO entity = new ReportInvestorInfoDO();

+ 13 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ocr/OCRLetterParseData.java

@@ -49,4 +49,17 @@ public class OCRLetterParseData {
      */
     private String share;
     private String nav;
+
+    @Override
+    public String toString() {
+        return "{" +
+                "fundName='" + fundName + '\'' +
+                ", fundCode='" + fundCode + '\'' +
+                ", investorName='" + investorName + '\'' +
+                ", holdingDate='" + holdingDate + '\'' +
+                ", amount='" + amount + '\'' +
+                ", share='" + share + '\'' +
+                ", nav='" + nav + '\'' +
+                '}';
+    }
 }

+ 70 - 18
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -34,6 +34,7 @@ import com.smppw.modaq.domain.entity.EmailParseInfoDO;
 import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
 import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
 import com.smppw.modaq.infrastructure.util.ArchiveUtil;
+import com.smppw.modaq.infrastructure.util.ConvertUtil;
 import com.smppw.modaq.infrastructure.util.PdfUtil;
 import jakarta.mail.*;
 import jakarta.mail.internet.MimeUtility;
@@ -176,9 +177,9 @@ public class EmailParseService {
                 Integer type = EmailUtil.getEmailTypeBySubject(emailTitle + emailFile.getFilename());
                 // 特殊月报
                 if ((Objects.equals(EmailTypeConst.NAV_EMAIL_TYPE, type)
-                          || Objects.equals(EmailTypeConst.REPORT_OTHER_TYPE, type))
+                        || Objects.equals(EmailTypeConst.REPORT_OTHER_TYPE, type))
                         && (ReportParseUtils.containsAny(emailTitle, ReportParseUtils.MANAGER_KEYWORDS)
-                          || emailTitle.contains("定期报告"))) {
+                        || emailTitle.contains("定期报告"))) {
                     type = EmailTypeConst.REPORT_EMAIL_TYPE;
                 }
                 // 其他报告
@@ -600,17 +601,19 @@ public class EmailParseService {
                     result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
                 }
             }
-            if (log.isInfoEnabled()) {
-                log.info("报告{} 用ocr补充解析结果。补充前的结果是:\n{}", fileName, reportData);
-            }
-            // ocr信息提取(印章、联系人、基金名称和产品代码)
-            this.ocrReportData(reportType, reportData, fileName, images);
-            // 设置月报类型
             if (reportData != null && reportData.getBaseInfo() != null) {
+                // 设置月报类型
                 reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
+                // 当报告日期还是空时设置为今天的前一天
+                if (reportData.getBaseInfo().getReportDate() == null) {
+                    Date date = DateUtil.offsetDay(new Date(), -1);
+                    reportData.getBaseInfo().setReportDate(date);
+                }
             }
+            // ocr信息提取(印章、联系人、基金名称和产品代码)
+            reportData = this.ocrReportData(fileId, reportType, reportData, fileName, images);
             if (log.isInfoEnabled()) {
-                log.info("报告{} 解析耗时{}ms,结果是:\n{}", fileName, (System.currentTimeMillis() - start), reportData);
+                log.info("报告{} 解析耗时{}ms,结果是:{}", fileName, (System.currentTimeMillis() - start), reportData);
             }
         }
         // 保存报告解析结果
@@ -676,13 +679,19 @@ public class EmailParseService {
      * @param fileName   报告名称
      * @param images     报告的收益和尾页png图片
      */
-    private void ocrReportData(ReportType reportType,
-                               ReportData reportData,
-                               String fileName,
-                               List<String> images) {
-        if (reportData == null || CollUtil.isEmpty(images)) {
-            return;
+    private ReportData ocrReportData(Integer fileId,
+                                     ReportType reportType,
+                                     ReportData reportData,
+                                     String fileName,
+                                     List<String> images) {
+        if (CollUtil.isEmpty(images)) {
+            return reportData;
+        }
+        if (log.isInfoEnabled()) {
+            log.info("报告{} 用ocr补充解析结果。补充前的结果是:{}", fileName, reportData);
         }
+        // 当reportData==null时重新构建一个reportData对象
+        reportData = this.buildNvlReportData(fileId, reportType, reportData, fileName);
         // 报告才识别尾页的印章和联系人,确认单不识别尾页
         if (ReportType.LETTER != reportType) {
             OCRParseData parseRes = null;
@@ -735,22 +744,24 @@ public class EmailParseService {
                     log.error("报告{} OCR提取确认单关键信息出错:{}", fileName, e.getMessage());
                 }
                 if (parseRes == null) {
-                    return;
+                    return reportData;
                 }
                 if (letterReportData.getFundInfo() != null) {
                     letterReportData.getFundInfo().setFundName(parseRes.getFundName());
                     letterReportData.getFundInfo().setFundCode(parseRes.getFundCode());
                 }
+                // 投资者信息
                 if (letterReportData.getInvestorInfo() == null) {
-                    letterReportData.setInvestorInfo(new ReportInvestorInfoDTO());
+                    letterReportData.setInvestorInfo(new ReportInvestorInfoDTO(fileId));
                 }
                 letterReportData.getInvestorInfo().setInvestorName(parseRes.getInvestorName());
                 letterReportData.getInvestorInfo().setCertificateNumber(parseRes.getCertificateNumber());
                 letterReportData.getInvestorInfo().setTradingAccount(parseRes.getTradingAccount());
                 letterReportData.getInvestorInfo().setFundAccount(parseRes.getFundAccount());
                 letterReportData.getInvestorInfo().setCertificateType(parseRes.getCertificateType());
+                // 交易流水
                 if (letterReportData.getFundTransaction() == null) {
-                    letterReportData.setFundTransaction(new ReportFundTransactionDTO());
+                    letterReportData.setFundTransaction(new ReportFundTransactionDTO(fileId));
                 }
                 letterReportData.getFundTransaction().setTransactionType(parseRes.getTransactionType());
                 letterReportData.getFundTransaction().setApplyDate(parseRes.getApplyDate());
@@ -761,7 +772,48 @@ public class EmailParseService {
                 letterReportData.getFundTransaction().setShare(parseRes.getShare());
                 letterReportData.getFundTransaction().setNav(parseRes.getNav());
             }
+            return letterReportData;
+        }
+        return reportData;
+    }
+
+    /**
+     * 当reportData==null时重新构建一个新对象
+     *
+     * @param fileId     文件ID
+     * @param reportType 报告类型
+     * @param reportData 解析结果对象
+     * @param fileName   报告名称
+     * @return /
+     */
+    private ReportData buildNvlReportData(Integer fileId,
+                                          ReportType reportType,
+                                          ReportData reportData,
+                                          String fileName) {
+        if (reportData != null) {
+            return reportData;
+        }
+        ReportBaseInfoDTO baseInfo = new ReportBaseInfoDTO(fileId);
+        baseInfo.setReportName(fileName);
+        baseInfo.setReportType(reportType.name());
+        String reportDate = ReportParseUtils.matchReportDate(reportType, fileName);
+        baseInfo.setReportDate(ConvertUtil.toDate(reportDate));
+        ReportFundInfoDTO fundInfo = new ReportFundInfoDTO(fileId);
+        if (ReportType.ANNUALLY == reportType) {
+            reportData = new AnnuallyReportData(baseInfo, fundInfo);
+        } else if (ReportType.QUARTERLY == reportType) {
+            reportData = new QuarterlyReportData(baseInfo, fundInfo);
+        } else if (ReportType.MONTHLY == reportType) {
+            reportData = new MonthlyReportData(baseInfo, fundInfo);
+        } else if (ReportType.WEEKLY == reportType) {
+            reportData = new WeeklyReportData(baseInfo, fundInfo);
+        } else if (ReportType.OTHER == reportType) {
+            reportData = new ReportData.DefaultReportData(baseInfo, fundInfo);
+        } else if (ReportType.LETTER == reportType) {
+            reportData = new LetterReportData(baseInfo, fundInfo);
         }
+        reportData.setAiParse(true);
+        return reportData;
     }
 
     /**

+ 1 - 1
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -26,7 +26,7 @@ public class MoDaqApplicationTests {
     @Test
     public void letterTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox("**@simuwang.com", "**");
-        Date startDate = DateUtil.parse("2025-06-18 08:47:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2025-06-18 17:00:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         Date endDate = DateUtil.parse("2025-06-18 18:56:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             List<String> folderNames = ListUtil.list(false);