浏览代码

fix:优化报告日期和基金编码解析

wangzaijun 1 周之前
父节点
当前提交
cd4f4948b3

+ 6 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -330,10 +330,16 @@ public final class ReportParseUtils {
         if (StrUtil.isBlank(text)) {
             return null;
         }
+        // 匹配私募基金产品代码
         Matcher matcher = PatternConsts.FUND_CODE_PATTERN.matcher(text);
         if (matcher.find()) {
             return matcher.group();
         }
+        // 匹配公募基金6位备案编码
+        matcher = PatternConsts.PUB_FUND_CODE_PATTERN.matcher(text);
+        if (matcher.find()) {
+            return matcher.group();
+        }
         return null;
     }
 

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIOtherReportParser.java

@@ -18,7 +18,7 @@ public class AIOtherReportParser extends AbstractAIReportParser<ReportData> {
     @Override
     protected String prompt() {
         return """
-                识别文件中的基金名称、基金管理人、产品代码和报告日期,产品代码的正则表达式是`S[A-Z0-9]{5}`
+                识别文件中的基金名称、基金管理人、产品代码和报告日期;
                 如果无法识别就返回空字符,结果用json返回,json中不要注释
                 """;
     }

+ 1 - 0
mo-daq/src/main/java/com/smppw/modaq/common/conts/PatternConsts.java

@@ -31,6 +31,7 @@ public class PatternConsts {
      * 基金编码的正则表达式
      */
     public static final Pattern FUND_CODE_PATTERN = Pattern.compile("S[A-Z0-9]{5}");
+    public static final Pattern PUB_FUND_CODE_PATTERN = Pattern.compile("^[0-9]{6}$");
 
     /**
      * 分级基金级别正则匹配

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -8,7 +8,7 @@ public enum ReportType {
     OTHER(-2, "其他报告",
             new String[]{"公告", "通知", "告知函", "意见征询函", "说明函", "简报",
                     "清算报告", "邀请函", "观点", "预警", "投研报告", "公示", "回顾",
-                    "风险提示函", "说明", "合同变更", "生效函"}),
+                    "风险提示函", "说明", "合同变更", "生效函", "实施情况"}),
 
     LETTER(-1, "交易流水确认函",
             new String[]{"确认单", "确认函", "交易确认数据",

+ 13 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ocr/OCRLetterParseData.java

@@ -49,4 +49,17 @@ public class OCRLetterParseData {
      */
     private String share;
     private String nav;
+
+    @Override
+    public String toString() {
+        return "{" +
+                "fundName='" + fundName + '\'' +
+                ", fundCode='" + fundCode + '\'' +
+                ", investorName='" + investorName + '\'' +
+                ", holdingDate='" + holdingDate + '\'' +
+                ", amount='" + amount + '\'' +
+                ", share='" + share + '\'' +
+                ", nav='" + nav + '\'' +
+                '}';
+    }
 }

+ 6 - 1
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -600,9 +600,14 @@ public class EmailParseService {
                     result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
                 }
             }
-            // 设置月报类型
             if (reportData != null && reportData.getBaseInfo() != null) {
+                // 设置月报类型
                 reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
+                // 当报告日期还是空时设置为今天的前一天
+                if (reportData.getBaseInfo().getReportDate() == null) {
+                    Date date = DateUtil.offsetDay(new Date(), -1);
+                    reportData.getBaseInfo().setReportDate(date);
+                }
             }
             // ocr信息提取(印章、联系人、基金名称和产品代码)
             this.ocrReportData(reportType, reportData, fileName, images);