Forráskód Böngészése

fix:合并解决冲突

wangzaijun 1 hete
szülő
commit
e6cff25e56

+ 130 - 102
mo-daq/src/main/java/com/smppw/modaq/application/components/OCRReportParser.java

@@ -4,35 +4,39 @@ import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.io.IORuntimeException;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
 import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONUtil;
-import com.smppw.modaq.common.enums.ReportMonthlyType;
+import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
+import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
+import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
+import com.alibaba.dashscope.common.MultiModalMessage;
+import com.alibaba.dashscope.common.Role;
+import com.alibaba.dashscope.exception.NoApiKeyException;
+import com.alibaba.dashscope.exception.UploadFileException;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.report.ocr.OCRLetterParseData;
-import com.smppw.modaq.domain.dto.report.ocr.OCRParseData;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Map;
-import java.util.Objects;
 
 public class OCRReportParser {
     private final Logger logger = LoggerFactory.getLogger(this.getClass());
 
-    public OCRLetterParseData parseLetterData(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
-        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-        paramsMap.put("image_url", ocrImgUrl);
-        paramsMap.put("user_msg", """
+    public OCRLetterParseData parseLetterData(String filename, String ocrImgUrl) throws ReportParseException {
+        String userMsg = """
                 请提取文件中的基金名称、产品代码、投资人姓名、证件类型、证件号码、基金账户、交易账号、业务类型、申请日期、申请金额、申请份额、确认日期、确认金额、确认份额、单位净值。
                 要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
                 返回数据格式以json方式输出,格式为:{"基金名称":"","产品代码":"","投资人姓名":"","证件类型":"","证件号码":"","基金账户":"","交易账号":"","业务类型":"","申请日期":"","申请金额":"","申请份额":"","确认日期":"","确认金额":"","确认份额":"","单位净值":""}
-                """);
+                """;
         OCRLetterParseData res = new OCRLetterParseData();
         String objectStr = null;
         try {
-            objectStr = this.parseOcrResult(ocrApi, paramsMap);
+            ocrImgUrl = "/" + ocrImgUrl.replaceAll("\\\\", "/");
+            objectStr = this.parseOcrResult(this.call(ocrImgUrl, userMsg));
             JSONObject jsonObject = JSONUtil.parseObj(objectStr);
             String fundName = this.cleanData(jsonObject.getStr("基金名称"));
             String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
@@ -103,97 +107,129 @@ public class OCRReportParser {
             throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
         } finally {
             if (logger.isInfoEnabled()) {
-                this.logger.info("确认单{} OCR识别参数{},OCR识别结果:{},处理后的结果是:{}",
-                        filename, paramsMap, objectStr, res);
+                this.logger.info("确认单{} OCR结果:{},处理后的结果是:{}",
+                        filename, objectStr, res);
             }
         }
     }
 
-    public ReportMonthlyType parseMonthlyType(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
-        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-        paramsMap.put("image_url", ocrImgUrl);
-        paramsMap.put("user_msg", """
-                请帮我判断报告的类型,判断依据是:如果有基金概况和净值月报则为协会版,如果有业绩曲线或者基金概况和净值月报都没有则为管理人版,都不满足是返回null。
-                返回数据格式以json方式输出,格式为:{"报告类型":""}
-                """);
-        ReportMonthlyType res = ReportMonthlyType.FAILED;
-        String objectStr = null;
-        try {
-            objectStr = this.parseOcrResult(ocrApi, paramsMap);
-            JSONObject jsonObject = JSONUtil.parseObj(objectStr);
-            String type = this.cleanData(jsonObject.getStr("报告类型"));
-            if (StrUtil.isNotBlank(type) && Objects.equals("协会版", type)) {
-                res = ReportMonthlyType.AMAC;
-            } else if (StrUtil.isNotBlank(type) && Objects.equals("管理人版", type)) {
-                res = ReportMonthlyType.MANAGER;
-            }
-            return res;
-        } catch (IORuntimeException e) {
-            this.logger.warn("报告{} 解析出错:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
-            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
-        } catch (Exception e) {
-            this.logger.warn("报告{} OCR提取月报类型错误:{}", filename, ExceptionUtil.stacktraceToString(e));
-            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
-        } finally {
-            if (logger.isInfoEnabled()) {
-                this.logger.info("报告{} OCR提取月报类型参数{},OCR提取月报类型结果:{},处理后的结果是:{}",
-                        filename, paramsMap, objectStr, res);
-            }
-        }
-    }
+//    public ReportMonthlyType parseMonthlyType(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
+//        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+//        paramsMap.put("image_url", ocrImgUrl);
+//        paramsMap.put("user_msg", """
+//                请帮我判断报告的类型,判断依据是:如果有基金概况和净值月报则为协会版,如果有业绩曲线或者基金概况和净值月报都没有则为管理人版,都不满足是返回null。
+//                返回数据格式以json方式输出,格式为:{"报告类型":""}
+//                """);
+//        ReportMonthlyType res = ReportMonthlyType.FAILED;
+//        String objectStr = null;
+//        try {
+//            objectStr = this.parseOcrResult(ocrApi, paramsMap);
+//            JSONObject jsonObject = JSONUtil.parseObj(objectStr);
+//            String type = this.cleanData(jsonObject.getStr("报告类型"));
+//            if (StrUtil.isNotBlank(type) && Objects.equals("协会版", type)) {
+//                res = ReportMonthlyType.AMAC;
+//            } else if (StrUtil.isNotBlank(type) && Objects.equals("管理人版", type)) {
+//                res = ReportMonthlyType.MANAGER;
+//            }
+//            return res;
+//        } catch (IORuntimeException e) {
+//            this.logger.warn("报告{} 解析出错:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
+//            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+//        } catch (Exception e) {
+//            this.logger.warn("报告{} OCR提取月报类型错误:{}", filename, ExceptionUtil.stacktraceToString(e));
+//            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
+//        } finally {
+//            if (logger.isInfoEnabled()) {
+//                this.logger.info("报告{} OCR提取月报类型参数{},OCR提取月报类型结果:{},处理后的结果是:{}",
+//                        filename, paramsMap, objectStr, res);
+//            }
+//        }
+//    }
+//
+//    public OCRParseData parse(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
+//        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+//        paramsMap.put("image_url", ocrImgUrl);
+//        paramsMap.put("user_msg", """
+//                请提取文件中的基金名称、基金公司、产品代码,并判断是否有红色印章和是否有电话。
+//                要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
+//                返回数据格式以json方式输出,格式为:{"基金名称":"","基金公司":"","产品代码":"","是否有红色印章":"","是否有电话":""}
+//                """);
+//        OCRParseData res = new OCRParseData();
+//        String objectStr = null;
+//        try {
+//            objectStr = this.parseOcrResult(ocrApi, paramsMap);
+//            JSONObject jsonObject = JSONUtil.parseObj(objectStr);
+//            String fundName = this.cleanData(jsonObject.getStr("基金名称"));
+//            String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
+//            String companyName = ReportParseUtils.cleaningValue(jsonObject.getStr("基金公司"));
+//            String seals = this.cleanData(jsonObject.getStr("是否有红色印章"));
+//            String phone = this.cleanData(jsonObject.getStr("是否有电话"));
+//            if (StrUtil.isNotBlank(fundName) && (fundName.contains("基金") || fundName.contains("资产管理")) && !fundName.contains("公司")) {
+//                res.setFundName(fundName);
+//            }
+//            if (StrUtil.isNotBlank(companyName) && companyName.contains("有限公司")) {
+//                res.setCompanyName(StrUtil.subBefore(companyName, "有限公司", true) + "有限公司");
+//            }
+//            if (StrUtil.isNotBlank(fundCode)) {
+//                res.setFundCode(ReportParseUtils.matchFundCode(fundCode));
+//            }
+//            if (StrUtil.isNotBlank(seals)) {
+//                res.setWithSeals(true);
+//            }
+//            if (StrUtil.isNotBlank(phone)) {
+//                res.setWithContacts(true);
+//            }
+//            return res;
+//        } catch (IORuntimeException e) {
+//            this.logger.warn("报告{} 解析错误:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
+//            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+//        } catch (Exception e) {
+//            this.logger.warn("报告{} OCR识别错误:{}", filename, ExceptionUtil.stacktraceToString(e));
+//            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
+//        } finally {
+//            if (logger.isInfoEnabled()) {
+//                this.logger.info("报告{} OCR识别参数{},OCR识别结果:{},处理后的结果是:{}",
+//                        filename, paramsMap, objectStr, res);
+//            }
+//        }
+//    }
 
-    public OCRParseData parse(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
-        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-        paramsMap.put("image_url", ocrImgUrl);
-        paramsMap.put("user_msg", """
-                请提取文件中的基金名称、基金公司、产品代码,并判断是否有红色印章和是否有电话。
-                要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
-                返回数据格式以json方式输出,格式为:{"基金名称":"","基金公司":"","产品代码":"","是否有红色印章":"","是否有电话":""}
-                """);
-        OCRParseData res = new OCRParseData();
-        String objectStr = null;
-        try {
-            objectStr = this.parseOcrResult(ocrApi, paramsMap);
-            JSONObject jsonObject = JSONUtil.parseObj(objectStr);
-            String fundName = this.cleanData(jsonObject.getStr("基金名称"));
-            String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
-            String companyName = ReportParseUtils.cleaningValue(jsonObject.getStr("基金公司"));
-            String seals = this.cleanData(jsonObject.getStr("是否有红色印章"));
-            String phone = this.cleanData(jsonObject.getStr("是否有电话"));
-            if (StrUtil.isNotBlank(fundName) && (fundName.contains("基金") || fundName.contains("资产管理")) && !fundName.contains("公司")) {
-                res.setFundName(fundName);
-            }
-            if (StrUtil.isNotBlank(companyName) && companyName.contains("有限公司")) {
-                res.setCompanyName(StrUtil.subBefore(companyName, "有限公司", true) + "有限公司");
-            }
-            if (StrUtil.isNotBlank(fundCode)) {
-                res.setFundCode(ReportParseUtils.matchFundCode(fundCode));
-            }
-            if (StrUtil.isNotBlank(seals)) {
-                res.setWithSeals(true);
-            }
-            if (StrUtil.isNotBlank(phone)) {
-                res.setWithContacts(true);
-            }
-            return res;
-        } catch (IORuntimeException e) {
-            this.logger.warn("报告{} 解析错误:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
-            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
-        } catch (Exception e) {
-            this.logger.warn("报告{} OCR识别错误:{}", filename, ExceptionUtil.stacktraceToString(e));
-            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
-        } finally {
-            if (logger.isInfoEnabled()) {
-                this.logger.info("报告{} OCR识别参数{},OCR识别结果:{},处理后的结果是:{}",
-                        filename, paramsMap, objectStr, res);
-            }
+    private Object call(String localPath, String userMsg) throws NoApiKeyException, UploadFileException {
+        String filePath = "file://" + localPath;
+        MultiModalConversation conv = new MultiModalConversation();
+        Map<String, Object> map = MapUtil.newHashMap();
+        map.put("image", filePath);
+        // 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
+        map.put("max_pixels", "6422528");
+        // 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
+        map.put("min_pixels", "3136");
+        // 开启图像自动转正功能
+        map.put("enable_rotate", true);
+        MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
+                .content(Arrays.asList(
+                        map,
+                        // qwen-vl-ocr-latest未设置内置任务时,支持在以下text字段中传入Prompt,若未传入则使用默认的Prompt:Please output only the text content from the image without any additional descriptions or formatting.
+                        // 如调用qwen-vl-ocr-1028,模型会使用固定Prompt:Read all the text in the image.,不支持用户在text中传入自定义Prompt
+                        Collections.singletonMap("text", userMsg))).build();
+        String dashscopeApiKey = System.getenv("DASHSCOPE_API_KEY");
+        if (StrUtil.isBlank(dashscopeApiKey)) {
+            dashscopeApiKey = "sk-7f1caa54f94047db91d4e36f7ee811c5";
         }
+        MultiModalConversationParam param = MultiModalConversationParam.builder()
+                // 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
+                .apiKey(dashscopeApiKey)
+                .model("qwen-vl-ocr-latest")
+                .message(userMessage)
+                .topP(0.001)
+                .temperature(0.1f)
+                .maxLength(8192)
+                .build();
+        MultiModalConversationResult result = conv.call(param);
+        return result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text");
     }
 
-    private String parseOcrResult(String ocrApi, Map<String, Object> paramsMap) {
-        String body = HttpUtil.get(ocrApi, paramsMap);
-        JSONObject jsonResult = JSONUtil.parseObj(body);
-        String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
+    private String parseOcrResult(Object context) {
+        String content = StrUtil.split(context.toString(), "```").get(1);
         return "{" + StrUtil.subAfter(content, "{", false) + "}";
     }
 
@@ -208,14 +244,6 @@ public class OCRReportParser {
         if ("无".equals(trim) || "否".equals(trim)) {
             return null;
         }
-        String value = ReportParseUtils.cleaningValue(trim);
-        if (value == null) {
-            return null;
-        }
-        // 识别到多个基金
-        if (value.contains("、") || value.contains(",")) {
-            return value.replaceAll("[、,]", ",");
-        }
-        return value;
+        return trim;
     }
 }

+ 63 - 59
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -349,33 +349,33 @@ public final class ReportParseUtils {
             return null;
         }
         text = StrUtil.trim(text);
-        if (ReportType.QUARTERLY.equals(reportType)) {
-            Matcher matcher = PatternConsts.QUARTERLY_PATTERN.matcher(text);
-            if (matcher.find()) {
-                String year = matcher.group(1);
-                return switch (matcher.group(2)) {
-                    case "一", "1" -> year + "-03-31";
-                    case "二", "2" -> year + "-06-30";
-                    case "三", "3" -> year + "-09-30";
-                    case "四", "4" -> year + "-12-31";
-                    default -> null;
-                };
-            }
-        }
-        if (ReportType.ANNUALLY.equals(reportType)) {
-            Matcher matcher = PatternConsts.ANNUALLY_PATTERN.matcher(text);
-            if (matcher.find()) {
-                return matcher.group(1) + "-12-31";
-            }
-        }
-        if (ReportType.MONTHLY.equals(reportType)) {
-            Matcher matcher = PatternConsts.MONTHLY_PATTERN.matcher(text);
-            if (matcher.find()) {
-                int year = Integer.parseInt(matcher.group(1));
-                int month = Integer.parseInt(matcher.group(2));
-                return formatMonthEnd(year, month);
-            }
-        }
+//        if (ReportType.QUARTERLY.equals(reportType)) {
+//            Matcher matcher = PatternConsts.QUARTERLY_PATTERN.matcher(text);
+//            if (matcher.find()) {
+//                String year = matcher.group(1);
+//                return switch (matcher.group(2)) {
+//                    case "一", "1" -> year + "-03-31";
+//                    case "二", "2" -> year + "-06-30";
+//                    case "三", "3" -> year + "-09-30";
+//                    case "四", "4" -> year + "-12-31";
+//                    default -> null;
+//                };
+//            }
+//        }
+//        if (ReportType.ANNUALLY.equals(reportType)) {
+//            Matcher matcher = PatternConsts.ANNUALLY_PATTERN.matcher(text);
+//            if (matcher.find()) {
+//                return matcher.group(1) + "-12-31";
+//            }
+//        }
+//        if (ReportType.MONTHLY.equals(reportType)) {
+//            Matcher matcher = PatternConsts.MONTHLY_PATTERN.matcher(text);
+//            if (matcher.find()) {
+//                int year = Integer.parseInt(matcher.group(1));
+//                int month = Integer.parseInt(matcher.group(2));
+//                return formatMonthEnd(year, month);
+//            }
+//        }
         // 其他所有场景下都支持的日期匹配规则
         // 先用严格的日期匹配
         Matcher matcher = PatternConsts.STRICT_DAY_PATTERN.matcher(text);
@@ -391,15 +391,15 @@ public final class ReportParseUtils {
             }
             return date;
         }
-        // 其他报告的日期(匹配到日)
-        if (ReportType.OTHER.equals(reportType)) {
-            matcher = PatternConsts.MONTHLY_PATTERN.matcher(text);
-            if (matcher.find()) {
-                int year = Integer.parseInt(matcher.group(1));
-                int month = Integer.parseInt(matcher.group(2));
-                return formatMonthEnd(year, month);
-            }
-        }
+//        // 其他报告的日期(匹配到日)
+//        if (ReportType.OTHER.equals(reportType)) {
+//            matcher = PatternConsts.MONTHLY_PATTERN.matcher(text);
+//            if (matcher.find()) {
+//                int year = Integer.parseInt(matcher.group(1));
+//                int month = Integer.parseInt(matcher.group(2));
+//                return formatMonthEnd(year, month);
+//            }
+//        }
         return null;
     }
 
@@ -415,30 +415,34 @@ public final class ReportParseUtils {
             return null;
         }
         ReportType reportType = null;
-        // 优先确认函、周报、其他观点,然后才匹配定期报告
-        boolean isAmac = Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType);
-        if (!isAmac) {
-            if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)
-                    || StrUtil.containsAny(text, ReportType.LETTER.getPatterns())) {
-                reportType = ReportType.LETTER;
-            } else if (StrUtil.containsAny(text, ReportType.WEEKLY.getPatterns())) {
-                reportType = ReportType.WEEKLY;
-            } else if (StrUtil.containsAny(text, ReportType.OTHER.getPatterns())) {
-                reportType = ReportType.OTHER;
-            }
-            return reportType;
-        }
-        // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
-        if (StrUtil.containsAny(text, ReportType.QUARTERLY.getPatterns())) {
-            reportType = ReportType.QUARTERLY;
-        } else if (StrUtil.containsAny(text, ReportType.ANNUALLY.getPatterns())) {
-            reportType = ReportType.ANNUALLY;
-        } else if (StrUtil.containsAny(text, ReportType.MONTHLY.getPatterns())) {
-            reportType = ReportType.MONTHLY;
-        } else if (ReportParseUtils.containsAny(text, ReportParseUtils.MANAGER_KEYWORDS) || text.contains("定期报告")) {
-            // 特殊的月报(当季度->年度->月度报告无法识别时)
-            reportType = ReportType.MONTHLY;
+        if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)
+                || StrUtil.containsAny(text, ReportType.LETTER.getPatterns())) {
+            reportType = ReportType.LETTER;
         }
+//        // 优先确认函、周报、其他观点,然后才匹配定期报告
+//        boolean isAmac = Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType);
+//        if (!isAmac) {
+//            if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)
+//                    || StrUtil.containsAny(text, ReportType.LETTER.getPatterns())) {
+//                reportType = ReportType.LETTER;
+//            } else if (StrUtil.containsAny(text, ReportType.WEEKLY.getPatterns())) {
+//                reportType = ReportType.WEEKLY;
+//            } else if (StrUtil.containsAny(text, ReportType.OTHER.getPatterns())) {
+//                reportType = ReportType.OTHER;
+//            }
+//            return reportType;
+//        }
+//        // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
+//        if (StrUtil.containsAny(text, ReportType.QUARTERLY.getPatterns())) {
+//            reportType = ReportType.QUARTERLY;
+//        } else if (StrUtil.containsAny(text, ReportType.ANNUALLY.getPatterns())) {
+//            reportType = ReportType.ANNUALLY;
+//        } else if (StrUtil.containsAny(text, ReportType.MONTHLY.getPatterns())) {
+//            reportType = ReportType.MONTHLY;
+//        } else if (ReportParseUtils.containsAny(text, ReportParseUtils.MANAGER_KEYWORDS) || text.contains("定期报告")) {
+//            // 特殊的月报(当季度->年度->月度报告无法识别时)
+//            reportType = ReportType.MONTHLY;
+//        }
         return reportType;
     }
 

+ 83 - 83
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java

@@ -1,83 +1,83 @@
-package com.smppw.modaq.application.components.report.parser.ai;
-
-import cn.hutool.core.collection.ListUtil;
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
-import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
-import com.smppw.modaq.common.enums.ReportParseStatus;
-import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.*;
-import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
-import org.springframework.stereotype.Component;
-
-import java.util.List;
-import java.util.Map;
-
-@Component(ReportParserConstant.PARSER_AI_LETTER)
-public class AILetterReportParser extends AbstractAIReportParser<LetterReportData> {
-    public AILetterReportParser(EmailFieldMappingMapper fieldMappingMapper) {
-        super(fieldMappingMapper);
-    }
-
-    @Override
-    protected boolean isSupportAIParse() {
-        return true;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    protected void handleAiResult(String result) throws ReportParseException {
-        try {
-            JSONObject jsonObject = JSONUtil.parseObj(result);
-            this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
-            if (this.allInfoMap.containsKey("交易确认明细")) {
-                Object temp = this.allInfoMap.remove("交易确认明细");
-                if (temp instanceof Map<?, ?> map) {
-                    this.allInfoMap.putAll((Map<String, Object>) map);
-                } else if (temp instanceof List<?> list && !list.isEmpty()) {
-                    this.allInfoMap.putAll((Map<String, Object>) list.get(0));
-                }
-            }
-        } catch (Exception e) {
-            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
-        }
-    }
-
-    @Override
-    protected LetterReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) throws ReportParseException {
-        Integer fileId = reportInfo.getFileId();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("文件{} 解析内容是:{}", fileId, this.allInfoMap);
-        }
-        // 投资者信息
-        ReportInvestorInfoDTO investorInfo = this.buildDto(fileId, ReportInvestorInfoDTO.class, this.allInfoMap);
-        // 交易流水
-        ReportFundTransactionDTO fundTransaction = this.buildDto(fileId, ReportFundTransactionDTO.class, this.allInfoMap);
-        // 构建结果数据
-        LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
-        reportData.setFundTransaction(fundTransaction);
-        reportData.setInvestorInfo(investorInfo);
-        return reportData;
-    }
-
-    @Override
-    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
-        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, this.allInfoMap);
-    }
-
-    @SuppressWarnings("unchecked")
-    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
-        Map<String, Object> result = MapUtil.newHashMap(16);
-        for (Map.Entry<String, Object> entry : data.entrySet()) {
-            List<String> currKeys = ListUtil.toList(keys);
-            currKeys.add(entry.getKey());
-            if (entry.getValue() instanceof Map<?, ?>) {
-                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
-            } else {
-                result.put(entry.getKey(), entry.getValue());
-            }
-        }
-        return result;
-    }
-}
+//package com.smppw.modaq.application.components.report.parser.ai;
+//
+//import cn.hutool.core.collection.ListUtil;
+//import cn.hutool.core.map.MapUtil;
+//import cn.hutool.json.JSONObject;
+//import cn.hutool.json.JSONUtil;
+//import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+//import com.smppw.modaq.common.enums.ReportParseStatus;
+//import com.smppw.modaq.common.exception.ReportParseException;
+//import com.smppw.modaq.domain.dto.report.*;
+//import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+//import org.springframework.stereotype.Component;
+//
+//import java.util.List;
+//import java.util.Map;
+//
+//@Component(ReportParserConstant.PARSER_AI_LETTER)
+//public class AILetterReportParser extends AbstractAIReportParser<LetterReportData> {
+//    public AILetterReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+//        super(fieldMappingMapper);
+//    }
+//
+//    @Override
+//    protected boolean isSupportAIParse() {
+//        return true;
+//    }
+//
+//    @Override
+//    @SuppressWarnings("unchecked")
+//    protected void handleAiResult(String result) throws ReportParseException {
+//        try {
+//            JSONObject jsonObject = JSONUtil.parseObj(result);
+//            this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
+//            if (this.allInfoMap.containsKey("交易确认明细")) {
+//                Object temp = this.allInfoMap.remove("交易确认明细");
+//                if (temp instanceof Map<?, ?> map) {
+//                    this.allInfoMap.putAll((Map<String, Object>) map);
+//                } else if (temp instanceof List<?> list && !list.isEmpty()) {
+//                    this.allInfoMap.putAll((Map<String, Object>) list.get(0));
+//                }
+//            }
+//        } catch (Exception e) {
+//            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+//        }
+//    }
+//
+//    @Override
+//    protected LetterReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) throws ReportParseException {
+//        Integer fileId = reportInfo.getFileId();
+//        if (this.logger.isInfoEnabled()) {
+//            this.logger.info("文件{} 解析内容是:{}", fileId, this.allInfoMap);
+//        }
+//        // 投资者信息
+//        ReportInvestorInfoDTO investorInfo = this.buildDto(fileId, ReportInvestorInfoDTO.class, this.allInfoMap);
+//        // 交易流水
+//        ReportFundTransactionDTO fundTransaction = this.buildDto(fileId, ReportFundTransactionDTO.class, this.allInfoMap);
+//        // 构建结果数据
+//        LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
+//        reportData.setFundTransaction(fundTransaction);
+//        reportData.setInvestorInfo(investorInfo);
+//        return reportData;
+//    }
+//
+//    @Override
+//    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+//        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, this.allInfoMap);
+//    }
+//
+//    @SuppressWarnings("unchecked")
+//    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
+//        Map<String, Object> result = MapUtil.newHashMap(16);
+//        for (Map.Entry<String, Object> entry : data.entrySet()) {
+//            List<String> currKeys = ListUtil.toList(keys);
+//            currKeys.add(entry.getKey());
+//            if (entry.getValue() instanceof Map<?, ?>) {
+//                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
+//            } else {
+//                result.put(entry.getKey(), entry.getValue());
+//            }
+//        }
+//        return result;
+//    }
+//}

+ 168 - 168
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java

@@ -1,168 +1,168 @@
-package com.smppw.modaq.application.components.report.parser.ai;
-
-import cn.hutool.core.exceptions.ExceptionUtil;
-import cn.hutool.core.io.IORuntimeException;
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.StrUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
-import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
-import com.smppw.modaq.common.conts.PatternConsts;
-import com.smppw.modaq.common.enums.ReportParseStatus;
-import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
-import com.smppw.modaq.domain.dto.report.ReportData;
-import com.smppw.modaq.domain.dto.report.ReportParserParams;
-import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
-import com.smppw.modaq.infrastructure.util.ConvertUtil;
-import org.springframework.beans.factory.annotation.Value;
-
-import java.util.Date;
-import java.util.Map;
-
-/**
- * 抽象的报告AI解析器
- *
- * @param <T> 泛型参数
- */
-public abstract class AbstractAIReportParser<T extends ReportData> extends AbstractReportParser<T> {
-//    @Value("${email.report.ai-parser-url}")
-//    private String aiParserUrl;
-
-    protected String aiFileId;
-
-    protected Map<String, Object> allInfoMap;
-
-    public AbstractAIReportParser(EmailFieldMappingMapper fieldMappingMapper) {
-        super(fieldMappingMapper);
-    }
-
-    @Override
-    public T parse(ReportParserParams params) throws ReportParseException {
-        if (!isSupportAIParse()) {
-            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_AI);
-        }
-        // 初始化
-        this.init();
-        // 解析文件内容,并把文件内容解构到 allInfoMap 对象中
-        this.parseFileContent(params);
-        // 解构话返回解析数据
-        T reportData = this.buildReportData(params, params.getFilename());
-        if (reportData != null) {
-            reportData.setAiFileId(this.aiFileId);
-            reportData.setAiParse(true);
-        }
-        return reportData;
-    }
-
-
-    @Override
-    protected void cleaningReportData(T reportData) {
-        // do something.
-    }
-
-    /**
-     * AI解析的提示词
-     *
-     * @return 返回null就用默认的提示词
-     */
-    protected String prompt() {
-        return null;
-    }
-
-    /**
-     * 报告是否支持ai工具解析
-     *
-     * @return /
-     */
-    protected abstract boolean isSupportAIParse();
-
-    /**
-     * 覆盖报告基本信息的方法,报告日期从报告名称获取失败时从内容获取
-     *
-     * @param params /
-     * @return /
-     */
-    @Override
-    protected ReportBaseInfoDTO buildReportInfo(ReportParserParams params) {
-        ReportBaseInfoDTO reportInfo = super.buildReportInfo(params);
-        if (reportInfo.getReportDate() == null) {
-            Date date = ConvertUtil.toDate(MapUtil.getStr(this.allInfoMap, "报告日期"));
-            reportInfo.setReportDate(date);
-        }
-        return reportInfo;
-    }
-
-    /**
-     * 处理ai解析结果,方便构建结构化对象
-     *
-     * @param result ai解析结果
-     */
-    protected void handleAiResult(String result) throws ReportParseException {
-        try {
-            JSONObject jsonObject = JSONUtil.parseObj(result);
-            this.allInfoMap.putAll(jsonObject);
-        } catch (Exception e) {
-            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
-        }
-    }
-
-    @Override
-    protected void init() {
-        super.init();
-        // 先初始化为null
-        this.aiFileId = null;
-        this.allInfoMap = MapUtil.newHashMap(128);
-    }
-
-    /**
-     * 移除 JSON 字符串中的注释
-     */
-    protected String removeJsonComments(String json) {
-        return PatternConsts.JSON_COMMENT_PATTERN.matcher(json).replaceAll(mr -> {
-                    // 如果匹配到的是字符串内容(双引号包裹),则保留原内容
-                    if (mr.group(1) != null) {
-                        return mr.group(1);
-                    }
-                    // 否则移除注释(替换为空)
-                    return "";
-                })
-                .replaceAll("(?m)^\\s+", "")  // 移除空行
-                .trim();
-    }
-
-    private void parseFileContent(ReportParserParams params) {
-        String filename = params.getFilename();
-        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-        paramsMap.put("filepath", params.getFilepath());
-        String prompt = this.prompt();
-        if (StrUtil.isNotBlank(prompt)) {
-            paramsMap.put("user_msg", prompt);
-        }
-        String body = null;
-        try {
-            // todo 用java的AI解析
-//            body = HttpUtil.get(this.aiParserUrl, paramsMap);
-            JSONObject jsonResult = JSONUtil.parseObj(body);
-            this.aiFileId = MapUtil.getStr(jsonResult, "file_id");
-            String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
-            String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
-            if (StrUtil.isNotBlank(aiParserContent)) {
-                String result = this.removeJsonComments(aiParserContent);
-                this.handleAiResult(result);
-            }
-        } catch (ReportParseException e) {
-            this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
-            throw e;
-        } catch (IORuntimeException e) {
-            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
-        } catch (Exception e) {
-            this.logger.warn("报告{} 在AI解析时报错:{}", filename, ExceptionUtil.stacktraceToString(e));
-            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
-        } finally {
-            if (logger.isInfoEnabled()) {
-                this.logger.info("报告{} AI解析参数{},AI解析结果:{}", filename, paramsMap, body);
-            }
-        }
-    }
-}
+//package com.smppw.modaq.application.components.report.parser.ai;
+//
+//import cn.hutool.core.exceptions.ExceptionUtil;
+//import cn.hutool.core.io.IORuntimeException;
+//import cn.hutool.core.map.MapUtil;
+//import cn.hutool.core.util.StrUtil;
+//import cn.hutool.json.JSONObject;
+//import cn.hutool.json.JSONUtil;
+//import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
+//import com.smppw.modaq.common.conts.PatternConsts;
+//import com.smppw.modaq.common.enums.ReportParseStatus;
+//import com.smppw.modaq.common.exception.ReportParseException;
+//import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
+//import com.smppw.modaq.domain.dto.report.ReportData;
+//import com.smppw.modaq.domain.dto.report.ReportParserParams;
+//import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+//import com.smppw.modaq.infrastructure.util.ConvertUtil;
+//import org.springframework.beans.factory.annotation.Value;
+//
+//import java.util.Date;
+//import java.util.Map;
+//
+///**
+// * 抽象的报告AI解析器
+// *
+// * @param <T> 泛型参数
+// */
+//public abstract class AbstractAIReportParser<T extends ReportData> extends AbstractReportParser<T> {
+////    @Value("${email.report.ai-parser-url}")
+////    private String aiParserUrl;
+//
+//    protected String aiFileId;
+//
+//    protected Map<String, Object> allInfoMap;
+//
+//    public AbstractAIReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+//        super(fieldMappingMapper);
+//    }
+//
+//    @Override
+//    public T parse(ReportParserParams params) throws ReportParseException {
+//        if (!isSupportAIParse()) {
+//            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_AI);
+//        }
+//        // 初始化
+//        this.init();
+//        // 解析文件内容,并把文件内容解构到 allInfoMap 对象中
+//        this.parseFileContent(params);
+//        // 解构话返回解析数据
+//        T reportData = this.buildReportData(params, params.getFilename());
+//        if (reportData != null) {
+//            reportData.setAiFileId(this.aiFileId);
+//            reportData.setAiParse(true);
+//        }
+//        return reportData;
+//    }
+//
+//
+//    @Override
+//    protected void cleaningReportData(T reportData) {
+//        // do something.
+//    }
+//
+//    /**
+//     * AI解析的提示词
+//     *
+//     * @return 返回null就用默认的提示词
+//     */
+//    protected String prompt() {
+//        return null;
+//    }
+//
+//    /**
+//     * 报告是否支持ai工具解析
+//     *
+//     * @return /
+//     */
+//    protected abstract boolean isSupportAIParse();
+//
+//    /**
+//     * 覆盖报告基本信息的方法,报告日期从报告名称获取失败时从内容获取
+//     *
+//     * @param params /
+//     * @return /
+//     */
+//    @Override
+//    protected ReportBaseInfoDTO buildReportInfo(ReportParserParams params) {
+//        ReportBaseInfoDTO reportInfo = super.buildReportInfo(params);
+//        if (reportInfo.getReportDate() == null) {
+//            Date date = ConvertUtil.toDate(MapUtil.getStr(this.allInfoMap, "报告日期"));
+//            reportInfo.setReportDate(date);
+//        }
+//        return reportInfo;
+//    }
+//
+//    /**
+//     * 处理ai解析结果,方便构建结构化对象
+//     *
+//     * @param result ai解析结果
+//     */
+//    protected void handleAiResult(String result) throws ReportParseException {
+//        try {
+//            JSONObject jsonObject = JSONUtil.parseObj(result);
+//            this.allInfoMap.putAll(jsonObject);
+//        } catch (Exception e) {
+//            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+//        }
+//    }
+//
+//    @Override
+//    protected void init() {
+//        super.init();
+//        // 先初始化为null
+//        this.aiFileId = null;
+//        this.allInfoMap = MapUtil.newHashMap(128);
+//    }
+//
+//    /**
+//     * 移除 JSON 字符串中的注释
+//     */
+//    protected String removeJsonComments(String json) {
+//        return PatternConsts.JSON_COMMENT_PATTERN.matcher(json).replaceAll(mr -> {
+//                    // 如果匹配到的是字符串内容(双引号包裹),则保留原内容
+//                    if (mr.group(1) != null) {
+//                        return mr.group(1);
+//                    }
+//                    // 否则移除注释(替换为空)
+//                    return "";
+//                })
+//                .replaceAll("(?m)^\\s+", "")  // 移除空行
+//                .trim();
+//    }
+//
+//    private void parseFileContent(ReportParserParams params) {
+//        String filename = params.getFilename();
+//        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+//        paramsMap.put("filepath", params.getFilepath());
+//        String prompt = this.prompt();
+//        if (StrUtil.isNotBlank(prompt)) {
+//            paramsMap.put("user_msg", prompt);
+//        }
+//        String body = null;
+//        try {
+//            // todo 用java的AI解析
+////            body = HttpUtil.get(this.aiParserUrl, paramsMap);
+//            JSONObject jsonResult = JSONUtil.parseObj(body);
+//            this.aiFileId = MapUtil.getStr(jsonResult, "file_id");
+//            String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
+//            String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
+//            if (StrUtil.isNotBlank(aiParserContent)) {
+//                String result = this.removeJsonComments(aiParserContent);
+//                this.handleAiResult(result);
+//            }
+//        } catch (ReportParseException e) {
+//            this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+//            throw e;
+//        } catch (IORuntimeException e) {
+//            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+//        } catch (Exception e) {
+//            this.logger.warn("报告{} 在AI解析时报错:{}", filename, ExceptionUtil.stacktraceToString(e));
+//            throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
+//        } finally {
+//            if (logger.isInfoEnabled()) {
+//                this.logger.info("报告{} AI解析参数{},AI解析结果:{}", filename, paramsMap, body);
+//            }
+//        }
+//    }
+//}

+ 12 - 12
mo-daq/src/main/java/com/smppw/modaq/application/util/EmailUtil.java

@@ -262,18 +262,18 @@ public class EmailUtil {
         // 1.确认函
         emailTypeMap.put(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE,
                 ListUtil.toList(ReportType.LETTER.getPatterns()));
-        // 2.周报
-        emailTypeMap.put(EmailTypeConst.REPORT_WEEKLY_TYPE,
-                ListUtil.toList(ReportType.WEEKLY.getPatterns()));
-        // 3.其他
-        emailTypeMap.put(EmailTypeConst.REPORT_OTHER_TYPE,
-                ListUtil.toList(ReportType.OTHER.getPatterns()));
-        // 4.定期报告的类型判断
-        List<String> types = ListUtil.list(true);
-        CollUtil.addAll(types, ReportType.QUARTERLY.getPatterns());
-        CollUtil.addAll(types, ReportType.ANNUALLY.getPatterns());
-        CollUtil.addAll(types, ReportType.MONTHLY.getPatterns());
-        emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE, types);
+//        // 2.周报
+//        emailTypeMap.put(EmailTypeConst.REPORT_WEEKLY_TYPE,
+//                ListUtil.toList(ReportType.WEEKLY.getPatterns()));
+//        // 3.其他
+//        emailTypeMap.put(EmailTypeConst.REPORT_OTHER_TYPE,
+//                ListUtil.toList(ReportType.OTHER.getPatterns()));
+//        // 4.定期报告的类型判断
+//        List<String> types = ListUtil.list(true);
+//        CollUtil.addAll(types, ReportType.QUARTERLY.getPatterns());
+//        CollUtil.addAll(types, ReportType.ANNUALLY.getPatterns());
+//        CollUtil.addAll(types, ReportType.MONTHLY.getPatterns());
+//        emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE, types);
         return emailTypeMap;
     }
 

+ 75 - 80
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -8,6 +8,7 @@ import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.IdUtil;
 import cn.hutool.core.util.StrUtil;
+import com.smppw.modaq.application.components.OCRReportParser;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.application.components.report.parser.ReportParser;
 import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
@@ -18,19 +19,13 @@ import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.conts.DateConst;
 import com.smppw.modaq.common.conts.EmailParseStatusConst;
 import com.smppw.modaq.common.conts.EmailTypeConst;
-import com.smppw.modaq.common.enums.ReportMonthlyType;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
-import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.*;
 import com.smppw.modaq.domain.dto.report.*;
 import com.smppw.modaq.domain.dto.report.ocr.OCRLetterParseData;
-import com.smppw.modaq.domain.dto.report.ocr.OCRParseData;
-import com.smppw.modaq.domain.dto.report.ParseResult;
-import com.smppw.modaq.domain.dto.report.ReportData;
-import com.smppw.modaq.domain.dto.report.ReportParserParams;
 import com.smppw.modaq.domain.entity.EmailFileInfoDO;
 import com.smppw.modaq.domain.entity.EmailParseInfoDO;
 import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
@@ -552,16 +547,16 @@ public class EmailParseService {
 //            }
         }
 
-        // ocr识别月报是否管理人版或协会版
-        ReportMonthlyType monthlyType = ReportMonthlyType.NO_NEED;
+//        // ocr识别月报是否管理人版或协会版
+//        ReportMonthlyType monthlyType = ReportMonthlyType.NO_NEED;
 //        if (ReportType.MONTHLY == reportType) {
 //            monthlyType = this.determineReportType(emailTitle, fileName, filepath, images);
 //        }
 //        boolean isAmac = reportType == ReportType.ANNUALLY || reportType == ReportType.QUARTERLY
 //                || (reportType == ReportType.MONTHLY && ReportMonthlyType.AMAC == monthlyType);
         boolean isAmac = false;
-        // 不支持解析的格式文件
-        boolean notSupportFile = false;
+//        // 不支持解析的格式文件
+//        boolean notSupportFile = false;
         // 解析报告
         ReportData reportData = null;
         ReportParserParams params = new ReportParserParams(fileId, fileName, filepath, reportType);
@@ -575,43 +570,43 @@ public class EmailParseService {
         } catch (ReportParseException e) {
             result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
             log.warn("解析失败:{}", result.getMsg());
-            if (e instanceof NotSupportReportException) {
-                notSupportFile = true;
-            }
+//            if (e instanceof NotSupportReportException) {
+//                notSupportFile = true;
+//            }
         } catch (Exception e) {
             log.warn("解析错误:{}", ExceptionUtil.stacktraceToString(e));
             result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
         } finally {
-            // 如果解析结果是空的就用AI工具解析一次
-            if (reportData == null && !notSupportFile) {
-                if (log.isInfoEnabled()) {
-                    log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", fileName);
-                }
-                try {
-                    if (!isAmac && CollUtil.isNotEmpty(images)) {
-                        filepath = images.get(0);
-                    }
-                    params = new ReportParserParams(fileId, fileName, filepath, reportType);
-                    ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
-                    reportData = instance.parse(params);
-                    result = new ParseResult<>(1, "报告解析成功--AI", reportData);
-                } catch (ReportParseException e) {
-                    result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
-                    log.warn("AI解析失败:{}", result.getMsg());
-                } catch (Exception e) {
-                    log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
-                    result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
-                }
-            }
+//            // 如果解析结果是空的就用AI工具解析一次
+//            if (reportData == null && !notSupportFile) {
+//                if (log.isInfoEnabled()) {
+//                    log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", fileName);
+//                }
+//                try {
+//                    if (!isAmac && CollUtil.isNotEmpty(images)) {
+//                        filepath = images.get(0);
+//                    }
+//                    params = new ReportParserParams(fileId, fileName, filepath, reportType);
+//                    ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
+//                    reportData = instance.parse(params);
+//                    result = new ParseResult<>(1, "报告解析成功--AI", reportData);
+//                } catch (ReportParseException e) {
+//                    result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
+//                    log.warn("AI解析失败:{}", result.getMsg());
+//                } catch (Exception e) {
+//                    log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
+//                    result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
+//                }
+//            }
             if (log.isInfoEnabled()) {
                 log.info("报告{} 用ocr补充解析结果。补充前的结果是:\n{}", fileName, reportData);
             }
-//            // ocr信息提取(印章、联系人、基金名称和产品代码)
-//            this.ocrReportData(reportType, reportData, fileName, images);
-            // 设置月报类型
-            if (reportData != null && reportData.getBaseInfo() != null) {
-                reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
-            }
+            // ocr信息提取(印章、联系人、基金名称和产品代码)
+            this.ocrReportData(reportType, reportData, fileName, images);
+//            // 设置月报类型
+//            if (reportData != null && reportData.getBaseInfo() != null) {
+//                reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
+//            }
             if (log.isInfoEnabled()) {
                 log.info("报告{} 解析耗时{}ms,结果是:\n{}", fileName, (System.currentTimeMillis() - start), reportData);
             }
@@ -688,52 +683,52 @@ public class EmailParseService {
         }
         // 报告才识别尾页的印章和联系人,确认单不识别尾页
         if (ReportType.LETTER != reportType) {
-            OCRParseData parseRes = null;
-            try {
-                // 首页和尾页相等时只读首页
-                String imageUrl = images.size() == 1 ? images.get(0) : images.get(1);
-                parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, imageUrl);
-            } catch (Exception e) {
-                log.error("报告{} OCR识别印章和联系人出错:{}", fileName, e.getMessage());
-            }
-            // ocr识别尾页是否包含印章和联系人信息
-            if (parseRes != null) {
-                if (reportData.getBaseInfo() != null) {
-                    reportData.getBaseInfo().setWithSeals(parseRes.getWithSeals());
-                    reportData.getBaseInfo().setWithContacts(parseRes.getWithContacts());
-                    if (fileName.contains("用印") && !Objects.equals(true, reportData.getBaseInfo().getWithSeals())) {
-                        reportData.getBaseInfo().setWithSeals(true);
-                    }
-                }
-            }
-            // 首页和尾页不相等时解析首页的数据
-            if (images.size() != 1) {
-                try {
-                    parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, images.get(0));
-                } catch (Exception e) {
-                    log.error("报告{} OCR识别首页基金名称和报告日期出错:{}", fileName, e.getMessage());
-                }
-            }
-            // 用首页识别基金名称、产品代码和基金管理人
-            if (reportData.getFundInfo() != null && parseRes != null) {
-                if (StrUtil.isBlank(reportData.getFundInfo().getFundName())) {
-                    reportData.getFundInfo().setFundName(parseRes.getFundName());
-                }
-                if (StrUtil.isBlank(reportData.getFundInfo().getFundCode())) {
-                    reportData.getFundInfo().setFundCode(parseRes.getFundCode());
-                }
-                if (StrUtil.isBlank(reportData.getFundInfo().getCompanyName())
-                        || !reportData.getFundInfo().getCompanyName().contains("有限公司")) {
-                    reportData.getFundInfo().setCompanyName(parseRes.getCompanyName());
-                }
-            }
+//            OCRParseData parseRes = null;
+//            try {
+//                // 首页和尾页相等时只读首页
+//                String imageUrl = images.size() == 1 ? images.get(0) : images.get(1);
+//                parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, imageUrl);
+//            } catch (Exception e) {
+//                log.error("报告{} OCR识别印章和联系人出错:{}", fileName, e.getMessage());
+//            }
+//            // ocr识别尾页是否包含印章和联系人信息
+//            if (parseRes != null) {
+//                if (reportData.getBaseInfo() != null) {
+//                    reportData.getBaseInfo().setWithSeals(parseRes.getWithSeals());
+//                    reportData.getBaseInfo().setWithContacts(parseRes.getWithContacts());
+//                    if (fileName.contains("用印") && !Objects.equals(true, reportData.getBaseInfo().getWithSeals())) {
+//                        reportData.getBaseInfo().setWithSeals(true);
+//                    }
+//                }
+//            }
+//            // 首页和尾页不相等时解析首页的数据
+//            if (images.size() != 1) {
+//                try {
+//                    parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, images.get(0));
+//                } catch (Exception e) {
+//                    log.error("报告{} OCR识别首页基金名称和报告日期出错:{}", fileName, e.getMessage());
+//                }
+//            }
+//            // 用首页识别基金名称、产品代码和基金管理人
+//            if (reportData.getFundInfo() != null && parseRes != null) {
+//                if (StrUtil.isBlank(reportData.getFundInfo().getFundName())) {
+//                    reportData.getFundInfo().setFundName(parseRes.getFundName());
+//                }
+//                if (StrUtil.isBlank(reportData.getFundInfo().getFundCode())) {
+//                    reportData.getFundInfo().setFundCode(parseRes.getFundCode());
+//                }
+//                if (StrUtil.isBlank(reportData.getFundInfo().getCompanyName())
+//                        || !reportData.getFundInfo().getCompanyName().contains("有限公司")) {
+//                    reportData.getFundInfo().setCompanyName(parseRes.getCompanyName());
+//                }
+//            }
         } else {
             // 确认单AI解析失败时重新用OCR识别
             LetterReportData letterReportData = (LetterReportData) reportData;
             if (letterReportData.wasFailed()) {
                 OCRLetterParseData parseRes = null;
                 try {
-                    parseRes = new OCRReportParser().parseLetterData(fileName, this.ocrParserUrl, images.get(0));
+                    parseRes = new OCRReportParser().parseLetterData(fileName, images.get(0));
                 } catch (Exception e) {
                     log.error("报告{} OCR提取确认单关键信息出错:{}", fileName, e.getMessage());
                 }

+ 5 - 5
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/ArchiveUtil.java

@@ -240,12 +240,12 @@ public class ArchiveUtil {
         // 假设正常文本应包含常见停用词(的、是、在等)
         List<String> commonWords = ListUtil.list(false);
         commonWords.add("基金");
-        commonWords.addAll(ListUtil.toList(ReportType.ANNUALLY.getPatterns()));
-        commonWords.addAll(ListUtil.toList(ReportType.QUARTERLY.getPatterns()));
-        commonWords.addAll(ListUtil.toList(ReportType.MONTHLY.getPatterns()));
+//        commonWords.addAll(ListUtil.toList(ReportType.ANNUALLY.getPatterns()));
+//        commonWords.addAll(ListUtil.toList(ReportType.QUARTERLY.getPatterns()));
+//        commonWords.addAll(ListUtil.toList(ReportType.MONTHLY.getPatterns()));
         commonWords.addAll(ListUtil.toList(ReportType.LETTER.getPatterns()));
-        commonWords.addAll(ListUtil.toList(ReportType.WEEKLY.getPatterns()));
-        commonWords.addAll(ListUtil.toList(ReportType.OTHER.getPatterns()));
+//        commonWords.addAll(ListUtil.toList(ReportType.WEEKLY.getPatterns()));
+//        commonWords.addAll(ListUtil.toList(ReportType.OTHER.getPatterns()));
         for (String word : commonWords) {
             if (text.contains(word)) return false;
         }

+ 26 - 26
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/PdfUtil.java

@@ -20,32 +20,32 @@ import java.util.List;
 
 public class PdfUtil {
 
-    public static void validateDocx(String path) throws Exception {
-        File file = new File(path);
-
-        // 基础检查
-        if (!file.exists()) throw new FileNotFoundException("文件不存在");
-        if (!file.canRead()) throw new IOException("无读取权限");
-        if (file.length() == 0) throw new IOException("文件为空");
-
-        // 文件头检查
-        try (InputStream is = new FileInputStream(file)) {
-            byte[] header = new byte[4];
-            if (is.read(header) < 4) throw new IOException("文件过小");
-            if (header[0] != 0x50 || header[1] != 0x4B) { // PK 头
-                throw new IOException("不是ZIP格式文件");
-            }
-        }
-
-        // 尝试作为ZIP打开
-        try (java.util.zip.ZipFile zip = new java.util.zip.ZipFile(file)) {
-            if (zip.getEntry("[Content_Types].xml") == null) {
-                throw new IOException("缺少[Content_Types].xml");
-            }
-        } catch (IOException e) {
-            throw new IOException("无效的ZIP格式: " + e.getMessage());
-        }
-    }
+//    public static void validateDocx(String path) throws Exception {
+//        File file = new File(path);
+//
+//        // 基础检查
+//        if (!file.exists()) throw new FileNotFoundException("文件不存在");
+//        if (!file.canRead()) throw new IOException("无读取权限");
+//        if (file.length() == 0) throw new IOException("文件为空");
+//
+//        // 文件头检查
+//        try (InputStream is = new FileInputStream(file)) {
+//            byte[] header = new byte[4];
+//            if (is.read(header) < 4) throw new IOException("文件过小");
+//            if (header[0] != 0x50 || header[1] != 0x4B) { // PK 头
+//                throw new IOException("不是ZIP格式文件");
+//            }
+//        }
+//
+//        // 尝试作为ZIP打开
+//        try (java.util.zip.ZipFile zip = new java.util.zip.ZipFile(file)) {
+//            if (zip.getEntry("[Content_Types].xml") == null) {
+//                throw new IOException("缺少[Content_Types].xml");
+//            }
+//        } catch (IOException e) {
+//            throw new IOException("无效的ZIP格式: " + e.getMessage());
+//        }
+//    }
 
 //    public static void convertDocxToPdf(String input, String output) throws Exception {
 //        validateDocx(input);