Ver código fonte

fix:ocr识别结果非法数据清理

wangzaijun 3 semanas atrás
pai
commit
2375610ec7

+ 21 - 7
mo-daq/src/main/java/com/smppw/modaq/application/components/OCRReportParser.java

@@ -40,13 +40,13 @@ public class OCRReportParser {
             String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
             String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
             JSONObject jsonObject = JSONUtil.parseObj(aiParserContent);
-            String fundName = jsonObject.getStr("基金名称");
-            String fundCode = jsonObject.getStr("产品代码");
-            String reportDate = jsonObject.getStr("报告日期");
-            String seals = jsonObject.getStr("是否有红色印章");
-            String phone = jsonObject.getStr("是否有电话");
-            String addr = jsonObject.getStr("是否有地址");
-            String withme = jsonObject.getStr("是否有关注我们");
+            String fundName = this.cleanData(jsonObject.getStr("基金名称"));
+            String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
+            String reportDate = this.cleanData(jsonObject.getStr("报告日期"));
+            String seals = this.cleanData(jsonObject.getStr("是否有红色印章"));
+            String phone = this.cleanData(jsonObject.getStr("是否有电话"));
+            String addr = this.cleanData(jsonObject.getStr("是否有地址"));
+            String withme = this.cleanData(jsonObject.getStr("是否有关注我们"));
             OCRParseData res = new OCRParseData();
             if (StrUtil.isNotBlank(reportDate)) {
                 res.setReportDate(reportDate);
@@ -75,4 +75,18 @@ public class OCRReportParser {
             }
         }
     }
+
+    private String cleanData(String text) {
+        if (text == null) {
+            return null;
+        }
+        String trim = StrUtil.trim(text);
+        if (StrUtil.isBlank(trim)) {
+            return null;
+        }
+        if ("无".equals(trim) || "否".equals(trim)) {
+            return null;
+        }
+        return trim;
+    }
 }

+ 1 - 1
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -39,7 +39,7 @@ public class MoDaqApplicationTests {
     public void reportTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox("***@simuwang.com", "***");
         Date startDate = DateUtil.parse("2025-06-03 17:40:00", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2025-06-03 17:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2025-06-04 17:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             List<String> folderNames = ListUtil.list(false);
 //            folderNames.add("其他文件夹/报告公告");