4 Achegas ffedbc403d ... b32768cf3f

Autor SHA1 Mensaxe Data
  wangzaijun b32768cf3f feat:AI解析观点报告的基本信息就好 hai 5 días
  wangzaijun a80d5d379e feat:报告支持AI hai 5 días
  wangzaijun b8fa6bdb5e 合并 hai 6 días
  wangzaijun a0f10fe7a7 feat:优化解析逻辑当代码无法解析时用AI解析 hai 6 días
Modificáronse 24 ficheiros con 685 adicións e 231 borrados
  1. 24 61
      mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java
  2. 49 9
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/AbstractReportParser.java
  3. 31 23
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserConstant.java
  4. 2 1
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserFactory.java
  5. 43 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIAnnuallyReportParser.java
  6. 74 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java
  7. 77 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIMonthlyReportParser.java
  8. 43 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIQuarterlyReportParser.java
  9. 113 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java
  10. 51 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AiOtherReportParser.java
  11. 28 82
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java
  12. 2 27
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDLetterReportParser.java
  13. 0 5
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDMonthlyReportParser.java
  14. 0 5
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDQuarterlyReportParser.java
  15. 18 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/OtherReportWriter.java
  16. 5 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/ReportWriterConstant.java
  17. 6 1
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParseStatus.java
  18. 2 1
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java
  19. 8 2
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java
  20. 20 0
      mo-daq/src/main/java/com/smppw/modaq/common/exception/NotSupportReportException.java
  21. 14 0
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/LetterReportData.java
  22. 14 0
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportData.java
  23. 7 2
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportParserParams.java
  24. 54 12
      mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

+ 24 - 61
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -236,6 +236,28 @@ public final class ReportParseUtils {
     }
 
     /**
+     * 分级基金名称
+     *
+     * @param text 文本内容
+     * @return /
+     */
+    public static String matchFundLevel(String text) {
+        // 使用正则表达式查找匹配项
+        Pattern pattern = Pattern.compile("[A-F]级|基金[A-F]");
+        Matcher matcher = pattern.matcher(text);
+        String result = null;
+        while (matcher.find()) {
+            result = matcher.group();
+        }
+        if (StrUtil.isBlank(result)) {
+            result = "母基金";
+        } else {
+            result = result.replaceAll("[^A-F]", "") + "级";
+        }
+        return result;
+    }
+
+    /**
      * 匹配报告日期
      *
      * @param string 文本内容
@@ -303,10 +325,10 @@ public final class ReportParseUtils {
             reportType = ReportType.ANNUALLY;
         } else if (StrUtil.containsAny(string, ReportType.MONTHLY.getPatterns())) {
             reportType = ReportType.MONTHLY;
-//        } else if (StrUtil.containsAny(string, ReportType.WEEKLY.getPatterns())) {
-//            reportType = ReportType.WEEKLY;
         } else if (StrUtil.containsAny(string, ReportType.LETTER.getPatterns())) {
             reportType = ReportType.LETTER;
+        } else if (StrUtil.containsAny(string, ReportType.OTHER.getPatterns())) {
+            reportType = ReportType.OTHER;
         }
         return reportType;
     }
@@ -322,65 +344,6 @@ public final class ReportParseUtils {
         return String.format("%02d", Integer.parseInt(number));
     }
 
-//    public static GenerationResult callWithMessage() throws ApiException, NoApiKeyException, InputRequiredException {
-//        Generation gen = new Generation();
-//        Message systemMsg = Message.builder()
-//                .role(Role.SYSTEM.getValue())
-//                .content("You are a helpful assistant.")
-//                .build();
-//        Message userMsg = Message.builder()
-//                .role(Role.USER.getValue())
-//                .content("你是谁?")
-//                .build();
-//        GenerationParam param = GenerationParam.builder()
-//                // 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
-//                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
-//                // 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
-//                .model("qwen-plus")
-//                .messages(Arrays.asList(systemMsg, userMsg))
-//                .resultFormat(GenerationParam.ResultFormat.MESSAGE)
-//                .build();
-//        return gen.call(param);
-//    }
-//
-//    public static void simpleMultiModalConversationCall()
-//            throws ApiException, NoApiKeyException, UploadFileException {
-//        MultiModalConversation conv = new MultiModalConversation();
-//        Map<String, Object> map = new HashMap<>();
-//        map.put("image", "./流水1.jpg");
-//        map.put("max_pixels", "1003520");
-//        map.put("min_pixels", "3136");
-//        MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
-//                .content(Arrays.asList(
-//                        map,
-//                        // 目前为保证模型效果,模型内部会统一使用"Read all the text in the image."作为text的值,用户输入的文本不会生效。
-//                        Collections.singletonMap("text", "Read all the text in the image."))).build();
-//        MultiModalConversationParam param = MultiModalConversationParam.builder()
-//                // 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
-//                .apiKey(System.getenv("DASHSCOPE_API_KEY"))
-//                // 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
-//                .model("qwen-vl-ocr")
-//                .message(userMessage)
-//                .build();
-//        MultiModalConversationResult result = conv.call(param);
-//        System.out.println(JsonUtils.toJson(result));
-//    }
-//
-//    public static void main(String[] args) throws IOException {
-////        try {
-////            GenerationResult result = callWithMessage();
-////            System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent());
-////        } catch (ApiException | NoApiKeyException | InputRequiredException e) {
-////            System.err.println("错误信息:"+e.getMessage());
-////            System.out.println("请参考文档:https://help.aliyun.com/zh/model-studio/developer-reference/error-code");
-////        }
-//        try {
-//            simpleMultiModalConversationCall();
-//        } catch (ApiException | NoApiKeyException | UploadFileException e) {
-//            System.out.println(e.getMessage());
-//        }
-//    }
-
     public static void main(String[] args) throws IOException, ReportParseException {
 //        String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\(1)投资者交易确认函【申购】_【SZF635】佳岳国债增强私募证券投资基金_20250217_任军.pdf";
 //        String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\CP080A_优美利赢胜价值1号私募投资基金A_20250217_邓辉_申购确认_20250217131352.pdf";

+ 49 - 9
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/AbstractReportParser.java

@@ -8,10 +8,7 @@ import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.BaseReportDTO;
-import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
-import com.smppw.modaq.domain.dto.report.ReportData;
-import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.dto.report.*;
 import com.smppw.modaq.domain.entity.EmailFieldMappingDO;
 import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
 import org.slf4j.Logger;
@@ -43,13 +40,13 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
      * 初始化数据的方法
      */
     protected void init() {
-        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping(ListUtil.of(3, 4));
-        if (CollUtil.isEmpty(emailFieldMapping)) {
+        List<EmailFieldMappingDO> mapping = this.fieldMappingMapper.getEmailFieldMapping(ListUtil.of(3, 4));
+        if (CollUtil.isEmpty(mapping)) {
             throw new ReportParseException(ReportParseStatus.PARSE_RULE_NO_FUND);
         }
-        for (EmailFieldMappingDO mapping : emailFieldMapping) {
-            String code = mapping.getCode();
-            List<String> names = StrUtil.split(mapping.getName(), ",");
+        for (EmailFieldMappingDO temp : mapping) {
+            String code = temp.getCode();
+            List<String> names = StrUtil.split(temp.getName(), ",");
             for (String name : names) {
                 this.fieldMapper.putIfAbsent(name, code);
             }
@@ -64,6 +61,49 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
     protected abstract void cleaningReportData(T reportData);
 
     /**
+     * 解析报告的其他信息并设置到对象中
+     *
+     * @param reportInfo 报告基本信息
+     * @param fundInfo   报告中基金基本信息
+     * @return /
+     */
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                ReportFundInfoDTO fundInfo) throws ReportParseException;
+
+    /**
+     * 绑定基金基本信息
+     *
+     * @param params /
+     * @return /
+     */
+    protected abstract ReportFundInfoDTO buildFundInfo(ReportParserParams params);
+
+    /**
+     * 构建报告解析结果的所有数据
+     *
+     * @param params   请求参数
+     * @param filename 文件名
+     * @return /
+     */
+    protected T buildReportData(ReportParserParams params, String filename) {
+        try {
+            // 报告基本信息
+            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
+            // 解析报告中主体基金的基本信息
+            ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
+            // 解析其他表格信息并且设置结果字段
+            T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
+            // 数据清洗后返回
+            this.cleaningReportData(reportData);
+            return reportData;
+        } catch (ReportParseException e) {
+            throw e;
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
+        }
+    }
+
+    /**
      * 构建只有两列表格的dto数据对象
      *
      * @param <DTO>   泛型对象

+ 31 - 23
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserConstant.java

@@ -14,50 +14,58 @@ import java.util.Map;
 public final class ReportParserConstant {
     public static final Map<ReportType, Map<ReportParserFileType, String>> REPORT_PARSER_BEAN_MAP = MapUtil.newHashMap(8);
 
+    // 其他报告
+    public static final String PARSER_AI_OTHER = "report-parser:ai:other";
+
     // 交易流水确认函解析
     public static final String PARSER_PDF_LETTER = "report-parser:pdf:letter";
+    public static final String PARSER_AI_LETTER = "report-parser:ai:letter";
 
     public static final String PARSER_PDF_MONTHLY = "report-parser:pdf:monthly";
-    //    public static final String PARSER_WORD_MONTHLY = "report-parser:word:monthly";
     public static final String PARSER_EXCEL_MONTHLY = "report-parser:excel:monthly";
-//    public static final String PARSER_PYTHON_MONTHLY = "report-parser:python:monthly";
+    public static final String PARSER_AI_MONTHLY = "report-parser:ai:monthly";
 
     public static final String PARSER_PDF_QUARTERLY = "report-parser:pdf:quarterly";
-//    public static final String PARSER_WORD_QUARTERLY = "report-parser:word:quarterly";
     public static final String PARSER_EXCEL_QUARTERLY = "report-parser:excel:quarterly";
-//    public static final String PARSER_PYTHON_QUARTERLY = "report-parser:python:quarterly";
-//
+    public static final String PARSER_AI_QUARTERLY = "report-parser:ai:quarterly";
+
     public static final String PARSER_PDF_ANNUALLY = "report-parser:pdf:annually";
-//    public static final String PARSER_WORD_ANNUALLY = "report-parser:word:annually";
     public static final String PARSER_EXCEL_ANNUALLY = "report-parser:excel:annually";
-//    public static final String PARSER_PYTHON_ANNUALLY = "report-parser:python:annually";
+    public static final String PARSER_AI_ANNUALLY = "report-parser:ai:annually";
 
     static {
+        // 其他报告解析
+        REPORT_PARSER_BEAN_MAP.put(ReportType.OTHER, Map.of(ReportParserFileType.AI, PARSER_AI_OTHER));
+
         // 交易流水确认函解析
-        REPORT_PARSER_BEAN_MAP.put(ReportType.LETTER, Map.of(ReportParserFileType.PDF, PARSER_PDF_LETTER));
+        REPORT_PARSER_BEAN_MAP.put(ReportType.LETTER,
+                Map.of(
+                        ReportParserFileType.PDF, PARSER_PDF_LETTER,
+                        ReportParserFileType.AI, PARSER_AI_LETTER
+                ));
 
+        // 月度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.MONTHLY,
-                Map.of(ReportParserFileType.PDF, PARSER_PDF_MONTHLY,
-//                        ReportParserFileType.WORD, PARSER_WORD_MONTHLY,
-                        ReportParserFileType.EXCEL, PARSER_EXCEL_MONTHLY
-
-//                        ReportParserFileType.PYTHON, PARSER_PYTHON_MONTHLY
+                Map.of(
+                        ReportParserFileType.PDF, PARSER_PDF_MONTHLY,
+                        ReportParserFileType.EXCEL, PARSER_EXCEL_MONTHLY,
+                        ReportParserFileType.AI, PARSER_AI_MONTHLY
                 ));
 
+        // 季度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.QUARTERLY,
-                Map.of(ReportParserFileType.PDF, PARSER_PDF_QUARTERLY,
-//                        ReportParserFileType.WORD, PARSER_WORD_QUARTERLY,
-                        ReportParserFileType.EXCEL, PARSER_EXCEL_QUARTERLY
-
-//                        ReportParserFileType.PYTHON, PARSER_PYTHON_QUARTERLY
+                Map.of(
+                        ReportParserFileType.PDF, PARSER_PDF_QUARTERLY,
+                        ReportParserFileType.EXCEL, PARSER_EXCEL_QUARTERLY,
+                        ReportParserFileType.AI, PARSER_AI_QUARTERLY
                 ));
 
+        // 年度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.ANNUALLY,
-                Map.of(ReportParserFileType.PDF, PARSER_PDF_ANNUALLY,
-//                        ReportParserFileType.WORD, PARSER_WORD_ANNUALLY,
-                        ReportParserFileType.EXCEL, PARSER_EXCEL_ANNUALLY
-
-//                        ReportParserFileType.PYTHON, PARSER_PYTHON_ANNUALLY
+                Map.of(
+                        ReportParserFileType.PDF, PARSER_PDF_ANNUALLY,
+                        ReportParserFileType.EXCEL, PARSER_EXCEL_ANNUALLY,
+                        ReportParserFileType.AI, PARSER_AI_ANNUALLY
                 ));
     }
 }

+ 2 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserFactory.java

@@ -4,6 +4,7 @@ import cn.hutool.core.map.MapUtil;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
+import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.report.ReportData;
 import org.springframework.stereotype.Component;
@@ -23,7 +24,7 @@ public class ReportParserFactory {
         String beanName = ReportParserConstant.REPORT_PARSER_BEAN_MAP.getOrDefault(reportType, MapUtil.empty()).get(reportParserFileType);
         ReportParser<? extends ReportData> reportParser = REPORT_WRITER_MAP.get(beanName);
         if (reportParser == null) {
-            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_TEMPLATE);
+            throw new NotSupportReportException(ReportParseStatus.NO_SUPPORT_TEMPLATE);
         }
         return (ReportParser<T>) reportParser;
     }

+ 43 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIAnnuallyReportParser.java

@@ -0,0 +1,43 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.AnnuallyReportData;
+import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
+import com.smppw.modaq.domain.dto.report.ReportFundInfoDTO;
+import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_AI_ANNUALLY)
+public class AIAnnuallyReportParser extends AbstractAIReportParser<AnnuallyReportData> {
+    public AIAnnuallyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected boolean isSupportAIParse() {
+        return false;
+    }
+
+    @Override
+    protected String prompt() {
+        return "";
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+
+    }
+
+    @Override
+    protected AnnuallyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                        ReportFundInfoDTO fundInfo) throws ReportParseException {
+        return null;
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        return null;
+    }
+}

+ 74 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java

@@ -0,0 +1,74 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.*;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+
+@Component(ReportParserConstant.PARSER_AI_LETTER)
+public class AILetterReportParser extends AbstractAIReportParser<LetterReportData> {
+    public AILetterReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected boolean isSupportAIParse() {
+        return true;
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+        try {
+            JSONObject jsonObject = JSONUtil.parseObj(result);
+            this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+        }
+    }
+
+    @Override
+    protected LetterReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) throws ReportParseException {
+        Integer fileId = reportInfo.getFileId();
+        if (this.logger.isInfoEnabled()) {
+            this.logger.info("文件{} 解析内容是:{}", fileId, this.allInfoMap);
+        }
+        // 投资者信息
+        ReportInvestorInfoDTO investorInfo = this.buildDto(fileId, ReportInvestorInfoDTO.class, this.allInfoMap);
+        // 交易流水
+        ReportFundTransactionDTO fundTransaction = this.buildDto(fileId, ReportFundTransactionDTO.class, this.allInfoMap);
+        // 构建结果数据
+        LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
+        reportData.setFundTransaction(fundTransaction);
+        reportData.setInvestorInfo(investorInfo);
+        return reportData;
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, this.allInfoMap);
+    }
+
+    @SuppressWarnings("unchecked")
+    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
+        Map<String, Object> result = MapUtil.newHashMap(16);
+        for (Map.Entry<String, Object> entry : data.entrySet()) {
+            List<String> currKeys = ListUtil.toList(keys);
+            currKeys.add(entry.getKey());
+            if (entry.getValue() instanceof Map<?, ?>) {
+                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
+            } else {
+                result.put(entry.getKey(), entry.getValue());
+            }
+        }
+        return result;
+    }
+}

+ 77 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIMonthlyReportParser.java

@@ -0,0 +1,77 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.application.components.ReportParseUtils;
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.*;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+
+@Component(ReportParserConstant.PARSER_AI_MONTHLY)
+public class AIMonthlyReportParser extends AbstractAIReportParser<MonthlyReportData> {
+    public AIMonthlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected boolean isSupportAIParse() {
+        return true;
+    }
+
+    @Override
+    protected String prompt() {
+        return "识别文件中的基金概况、净值信息表格数据,要求准确识别金额等小数的位数,去掉金额单位、英文和多余的空格,结果用json返回";
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+        try {
+            JSONObject jsonObject = JSONUtil.parseObj(result);
+            this.allInfoMap.putAll(jsonObject);
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+        }
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    protected MonthlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                       ReportFundInfoDTO fundInfo) throws ReportParseException {
+        List<ReportNetReportDTO> dtos = ListUtil.list(true);
+        for (Map.Entry<String, Object> entry : this.allInfoMap.entrySet()) {
+            Object netInfo = entry.getValue();
+            if (netInfo == null) {
+                continue;
+            }
+            Map<String, Object> netInfoMap = (Map<String, Object>) netInfo;
+            ReportNetReportDTO dto = this.buildDto(reportInfo.getFileId(), ReportNetReportDTO.class, netInfoMap);
+            if (dto == null) {
+                continue;
+            }
+            dto.setLevel(ReportParseUtils.matchFundLevel(entry.getKey()));
+            dtos.add(dto);
+        }
+        MonthlyReportData reportData = new MonthlyReportData(reportInfo, fundInfo);
+        reportData.setNetReport(dtos);
+        return reportData;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        // 获取并移除基金概况信息
+        Object fundInfo = this.allInfoMap.remove("基金概况");
+        if (fundInfo == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_FUND_INFO_FAIL, params.getFilename());
+        }
+        Map<String, Object> fundInfoMap = (Map<String, Object>) fundInfo;
+        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, fundInfoMap);
+    }
+}

+ 43 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIQuarterlyReportParser.java

@@ -0,0 +1,43 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.QuarterlyReportData;
+import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
+import com.smppw.modaq.domain.dto.report.ReportFundInfoDTO;
+import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_AI_QUARTERLY)
+public class AIQuarterlyReportParser extends AbstractAIReportParser<QuarterlyReportData> {
+    public AIQuarterlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected boolean isSupportAIParse() {
+        return false;
+    }
+
+    @Override
+    protected String prompt() {
+        return "";
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+
+    }
+
+    @Override
+    protected QuarterlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                         ReportFundInfoDTO fundInfo) throws ReportParseException {
+        return null;
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        return null;
+    }
+}

+ 113 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java

@@ -0,0 +1,113 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.core.exceptions.ExceptionUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.http.HttpUtil;
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.ReportData;
+import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.beans.factory.annotation.Value;
+
+import java.util.Map;
+
+/**
+ * 抽象的报告AI解析器
+ *
+ * @param <T> 泛型参数
+ */
+public abstract class AbstractAIReportParser<T extends ReportData> extends AbstractReportParser<T> {
+    @Value("${email.report.ai-parser-url}")
+    private String aiParserUrl;
+
+    protected String aiFileId;
+
+    protected Map<String, Object> allInfoMap;
+
+    public AbstractAIReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public T parse(ReportParserParams params) throws ReportParseException {
+        if (!isSupportAIParse()) {
+            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_AI);
+        }
+        // 初始化
+        this.init();
+        String filename = params.getFilename();
+        String filepath = params.getFilepath();
+        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+        paramsMap.put("filepath", filepath);
+        paramsMap.put("file_id", params.getAiFileId());
+        String prompt = this.prompt();
+        if (StrUtil.isNotBlank(prompt)) {
+            paramsMap.put("user_msg", prompt);
+        }
+        String body = null;
+        try {
+            body = HttpUtil.get(this.aiParserUrl, paramsMap);
+            JSONObject jsonResult = JSONUtil.parseObj(body);
+            this.aiFileId = MapUtil.getStr(jsonResult, "file_id");
+            String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
+            String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
+            if (StrUtil.isNotBlank(aiParserContent)) {
+                this.handleAiResult(aiParserContent);
+            }
+        } catch (ReportParseException e) {
+            this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+            throw e;
+        } catch (Exception e) {
+            this.logger.warn("报告{} 在AI解析时报错:{}", filename, ExceptionUtil.stacktraceToString(e));
+            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+        }
+        T reportData = this.buildReportData(params, filename);
+        if (reportData != null) {
+            reportData.setAiFileId(this.aiFileId);
+            reportData.setAiParse(true);
+        }
+        return reportData;
+    }
+
+
+    @Override
+    protected void cleaningReportData(T reportData) {
+        // do something.
+    }
+
+    /**
+     * AI解析的提示词
+     *
+     * @return 返回null就用默认的提示词
+     */
+    protected String prompt() {
+        return null;
+    }
+
+    /**
+     * 报告是否支持ai工具解析
+     *
+     * @return /
+     */
+    protected abstract boolean isSupportAIParse();
+
+    /**
+     * 处理ai解析结果,方便构建结构化对象
+     *
+     * @param result ai解析结果
+     */
+    protected abstract void handleAiResult(String result) throws ReportParseException;
+
+    @Override
+    protected void init() {
+        super.init();
+        // 先初始化为null
+        this.aiFileId = null;
+        this.allInfoMap = MapUtil.newHashMap(128);
+    }
+}

+ 51 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AiOtherReportParser.java

@@ -0,0 +1,51 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.*;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+/**
+ * 其他格式的报告(只解析报告基本信息、基金基本信息)
+ */
+@Component(ReportParserConstant.PARSER_AI_OTHER)
+public class AiOtherReportParser extends AbstractAIReportParser<ReportData> {
+    public AiOtherReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected String prompt() {
+        return "识别文件中的基金名称、基金管理人、基金托管人和报告日期,如果无法识别就返回空字符,结果用json返回";
+    }
+
+    @Override
+    protected boolean isSupportAIParse() {
+        return true;
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+        try {
+            JSONObject jsonObject = JSONUtil.parseObj(result);
+            this.allInfoMap.putAll(jsonObject);
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+        }
+    }
+
+    @Override
+    protected ReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                ReportFundInfoDTO fundInfo) throws ReportParseException {
+        return new MonthlyReportData(reportInfo, fundInfo);
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, this.allInfoMap);
+    }
+}

+ 28 - 82
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java

@@ -1,24 +1,21 @@
 package com.smppw.modaq.application.components.report.parser.pdf;
 
 import cn.hutool.core.collection.ListUtil;
-import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
 import com.smppw.modaq.application.components.CustomPDFTextStripper;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportType;
 import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.*;
+import com.smppw.modaq.domain.dto.report.BaseReportLevelDTO;
+import com.smppw.modaq.domain.dto.report.ReportData;
+import com.smppw.modaq.domain.dto.report.ReportParserParams;
 import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.springframework.beans.factory.annotation.Value;
 import technology.tabula.CustomObjectExtractor;
 import technology.tabula.Page;
 import technology.tabula.PageIterator;
@@ -42,15 +39,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      */
     protected List<String> textList;
 
-    @Value("${email.report.ai-parser-url}")
-    private String aiParserUrl;
-
-    protected String aiFileId;
-
-    protected String aiParserContent;
-
-    protected Boolean aiParse = false;
-
     public AbstractPDReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         super(fieldMappingMapper);
     }
@@ -68,9 +56,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
             String text = stripper.getText(document);
             this.textList = StrUtil.split(text, System.lineSeparator());
             this.textList.removeIf(StrUtil::isBlank);
-//            if (this.textList.isEmpty()) {
-//                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
-//            }
+            if (this.textList.isEmpty()) {
+                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
+            }
             // 解析所有表格(单元格字符去水印)
             List<Table> tables = ListUtil.list(true);
             SpreadsheetExtractionAlgorithm spreadsheetExtractionAlgorithm = new SpreadsheetExtractionAlgorithm();
@@ -84,54 +72,30 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
                 if (i >= 1 && params.getReportType() == ReportType.LETTER) {
                     break;
                 }
-                Integer rows = tableList.stream().map(Table::getRowCount)
-                        .filter(rowCount -> rowCount >= 1).reduce(0, Integer::sum);
-                if (rows >= 1) {
-                    for (Table table : tableList) {
-                        int rowCount = table.getRowCount();
-                        if (rowCount >= 1) {
-                            tables.add(table);
-                        }
-                    }
-                } else if (params.getReportType() == ReportType.LETTER) {
-                    this.aiParse = true;
-                    Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-                    paramsMap.put("filepath", filepath);
-                    paramsMap.put("file_id", params.getAiFileId());
-                    String body = null;
-                    try {
-                        body = HttpUtil.get(this.aiParserUrl, paramsMap);
-                        JSONObject jsonObject = JSONUtil.parseObj(body);
-                        this.aiFileId = MapUtil.getStr(jsonObject, "file_id");
-                        String content = StrUtil.split(jsonObject.getStr("content"), "```").get(1);
-                        this.aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
-                    } catch (Exception e) {
-                        this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}",
-                                filename, body, ExceptionUtil.stacktraceToString(e));
+                for (Table table : tableList) {
+                    int rowCount = table.getRowCount();
+                    if (rowCount >= 1) {
+                        tables.add(table);
                     }
                 }
                 i++;
             }
-            if (tables.isEmpty() && StrUtil.isBlank(this.aiParserContent)) {
+            if (tables.isEmpty()) {
                 throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
             }
+            Integer rows = tables.stream().map(Table::getRowCount)
+                    .filter(rowCount -> rowCount >= 1).reduce(0, Integer::sum);
+            if (rows < 1) {
+                throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
+            }
             this.initTableInfo(tables);
         }
-        try {
-            // 报告基本信息
-            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
-            // 解析报告中主体基金的基本信息
-            ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
-            // 解析其他表格信息并且设置结果字段
-            T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
-            // 数据清洗后返回
-            this.cleaningReportData(reportData);
-            return reportData;
-        } catch (ReportParseException e) {
-            throw e;
-        } catch (Exception e) {
-            throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
+        T reportData = this.buildReportData(params, filename);
+        if (!reportData.wasSuccessful()) {
+            // 抛出异常方便ai解析
+            throw new ReportParseException(ReportParseStatus.PARSE_CORE_INFO_FAIL, filename);
         }
+        return reportData;
     }
 
     /**
@@ -141,23 +105,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      */
     protected abstract void initTableInfo(List<Table> tables);
 
-    /**
-     * 绑定基金基本信息
-     *
-     * @param params /
-     * @return /
-     */
-    protected abstract ReportFundInfoDTO buildFundInfo(ReportParserParams params);
-
-    /**
-     * 解析报告的其他信息并设置到对象中
-     *
-     * @param reportInfo 报告基本信息
-     * @param fundInfo   报告中基金基本信息
-     * @return /
-     */
-    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
-                                                ReportFundInfoDTO fundInfo);
 
     @Override
     protected void cleaningReportData(T reportData) {
@@ -169,9 +116,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         super.init();
         // 先初始化为null
         this.textList = null;
-        this.aiFileId = null;
-        this.aiParserContent = null;
-        this.aiParse = false;
     }
 
     /**
@@ -259,11 +203,13 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         for (Map.Entry<Integer, List<Table>> entry : spanningPageTableMap.entrySet()) {
             List<Table> spanningPageShareChangeTables = entry.getValue();
             Table master = spanningPageShareChangeTables.get(0);
-            Table slave = spanningPageShareChangeTables.get(1);
-            int rowCount = master.getRowCount();
-            for (int j = 0; j < slave.getRowCount(); j++) {
-                for (int k = 0; k < slave.getColCount(); k++) {
-                    master.add(slave.getCell(j, k), rowCount + j, k);
+            if (spanningPageShareChangeTables.size() == 2) {
+                Table slave = spanningPageShareChangeTables.get(1);
+                int rowCount = master.getRowCount();
+                for (int j = 0; j < slave.getRowCount(); j++) {
+                    for (int k = 0; k < slave.getColCount(); k++) {
+                        master.add(slave.getCell(j, k), rowCount + j, k);
+                    }
                 }
             }
             tables.add(entry.getKey(), master);

+ 2 - 27
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDLetterReportParser.java

@@ -1,11 +1,8 @@
 package com.smppw.modaq.application.components.report.parser.pdf;
 
 import cn.hutool.core.collection.CollUtil;
-import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
 import com.smppw.modaq.domain.dto.report.*;
@@ -29,13 +26,6 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
     protected void initTableInfo(List<Table> tables) {
         // 每次重新清空map数据
         this.allInfoMap.clear();
-        if (CollUtil.isEmpty(tables)) {
-            if (StrUtil.isNotBlank(this.aiParserContent)) {
-                JSONObject jsonObject = JSONUtil.parseObj(this.aiParserContent);
-                this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
-            }
-            return;
-        }
         for (Table table : tables) {
             int rowCount = table.getRowCount();
             int colCount = table.getColCount();
@@ -87,7 +77,7 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
     protected LetterReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
         Integer fileId = reportInfo.getFileId();
         if (this.logger.isInfoEnabled()) {
-            this.logger.info("文件{},是否AI解析{}, 解析的内容是:{}", fileId, this.aiParse, this.allInfoMap);
+            this.logger.info("文件{} 解析的内容是:{}", fileId, this.allInfoMap);
         }
         // 投资者信息
         ReportInvestorInfoDTO investorInfo = this.buildDto(fileId, ReportInvestorInfoDTO.class, this.allInfoMap);
@@ -97,25 +87,10 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
         LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
         reportData.setInvestorInfo(investorInfo);
         reportData.setFundTransaction(fundTransaction);
-        reportData.setAiParse(Objects.equals(true, this.aiParse));
-        reportData.setAiFileId(this.aiFileId);
+        reportData.setAiParse(false);
         return reportData;
     }
 
-    @SuppressWarnings("unchecked")
-    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
-        Map<String, Object> result = MapUtil.newHashMap(16);
-        for (Map.Entry<String, Object> entry : data.entrySet()) {
-            List<String> currKeys = ListUtil.toList(keys);
-            currKeys.add(entry.getKey());
-            if (entry.getValue() instanceof Map<?, ?>) {
-                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
-            } else {
-                result.put(entry.getKey(), entry.getValue());
-            }
-        }
-        return result;
-    }
 
     /**
      * 解析键和值是垂直格式的表格数据

+ 0 - 5
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDMonthlyReportParser.java

@@ -97,9 +97,4 @@ public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportD
         reportData.setNetReport(dtos);
         return reportData;
     }
-
-    @Override
-    protected void cleaningReportData(MonthlyReportData reportData) {
-        // todo 数据清洗
-    }
 }

+ 0 - 5
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -187,11 +187,6 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
         return t;
     }
 
-    @Override
-    protected void cleaningReportData(T reportData) {
-        // todo 数据清洗
-    }
-
     /**
      * 构建基金行业配置解析数据
      *

+ 18 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/OtherReportWriter.java

@@ -0,0 +1,18 @@
+package com.smppw.modaq.application.components.report.writer;
+
+import com.smppw.modaq.domain.dto.report.ReportData;
+import com.smppw.modaq.domain.mapper.report.ReportBaseInfoMapper;
+import com.smppw.modaq.domain.mapper.report.ReportFundInfoMapper;
+import org.springframework.stereotype.Component;
+
+@Component(ReportWriterConstant.WRITER_OTHER)
+public class OtherReportWriter extends AbstractReportWriter<ReportData> {
+    public OtherReportWriter(ReportBaseInfoMapper baseInfoMapper, ReportFundInfoMapper fundInfoMapper) {
+        super(baseInfoMapper, fundInfoMapper);
+    }
+
+    @Override
+    protected void writeExtData(ReportData reportData) {
+        // 没有数据要保存
+    }
+}

+ 5 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/ReportWriterConstant.java

@@ -8,12 +8,17 @@ import java.util.Map;
 public final class ReportWriterConstant {
     public static final Map<ReportType, String> REPORT_TYPE_BEAN_MAP = MapUtil.newHashMap(8);
 
+    static final String WRITER_OTHER = "report-writer:other";
+
     static final String WRITER_LETTER = "report-writer:letter";
+
     static final String WRITER_MONTHLY = "report-writer:monthly";
     static final String WRITER_QUARTERLY = "report-writer:quarterly";
     static final String WRITER_ANNUALLY = "report-writer:annually";
 
     static {
+        REPORT_TYPE_BEAN_MAP.put(ReportType.OTHER, WRITER_OTHER);
+
         REPORT_TYPE_BEAN_MAP.put(ReportType.LETTER, WRITER_LETTER);
 
         REPORT_TYPE_BEAN_MAP.put(ReportType.MONTHLY, WRITER_MONTHLY);

+ 6 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParseStatus.java

@@ -1,8 +1,10 @@
 package com.smppw.modaq.common.enums;
 
 public enum ReportParseStatus implements StatusCode {
+    AI_NOT_FOUND(20009, "AI资源找不到"),
+    NO_SUPPORT_AI(20010, "报告[{}]不支持AI解析"),
     PARSE_FAIL(21000, "定期报告或交易确认单解析错误:{}"),
-    NOT_A_REPORT(21001, "[{}]不是定期报告或交易确认单"),
+    NOT_A_REPORT(21001, "[{}]不是支持的报告格式"),
     REPORT_IS_SCAN(21002, "报告[{}]为扫描件"),
     NO_SUPPORT_TEMPLATE(21003, "报告[{}]是不支持的文件格式"),
     NOT_A_FIXED_FORMAT(21004, "报告[{}]不是基协统一格式"),
@@ -14,6 +16,9 @@ public enum ReportParseStatus implements StatusCode {
     PARSE_ASSET_INFO_FAIL(21014, "报告[{}]没有解析到基金资产配置信息"),
     PARSE_SHARE_INFO_FAIL(21015, "报告[{}]没有解析到基金份额变动信息"),
 
+    PARSE_CORE_INFO_FAIL(21019, "报告[{}]没有解析到关键信息"),
+    PARSE_HANDLE_FAIL(21018, "报告结果在结构化时转换错误"),
+
     PARSE_RULE_NO_FUND(21020, "未设置报告解析规则"),
     ;
     private final int code;

+ 2 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java

@@ -10,8 +10,9 @@ import java.util.Arrays;
 public enum ReportParserFileType {
     PDF("pdf"),
 //    WORD("docx,doc"),
-    EXCEL("xlsx,xls");
+    EXCEL("xlsx,xls"),
 //    PYTHON("python");
+    AI("ai");
 
     private final String suffix;
 

+ 8 - 2
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -4,10 +4,16 @@ import lombok.Getter;
 
 @Getter
 public enum ReportType {
-    OTHER(-2, "其他报告", new String[]{"观点", "分红公告", "周报", "预警公告"}),
-    LETTER(-1, "交易流水确认函", new String[]{"确认单", "确认函", "确认"}),
+    OTHER(-2, "其他报告", new String[]{"公告", "通知", "告知函", "意见征询函", "说明函",
+            "清算报告", "邀请函", "观点", "预警", "复核函", "提醒", "投研报告", "周报", "公示"}),
+
+    LETTER(-1, "交易流水确认函", new String[]{"确认单", "确认函", "交易确认数据",
+            "赎回确认", "申购确认", "分红确认", "确认表", "交易确认", "确认"}),
+
     MONTHLY(0, "月", new String[]{"月", "月度", "月报"}),
+
     QUARTERLY(1, "季", new String[]{"季", "季度", "季报"}),
+
     ANNUALLY(2, "年", new String[]{"年度", "年报"}),
     ;
 

+ 20 - 0
mo-daq/src/main/java/com/smppw/modaq/common/exception/NotSupportReportException.java

@@ -0,0 +1,20 @@
+package com.smppw.modaq.common.exception;
+
+import com.smppw.modaq.common.enums.StatusCode;
+
+/**
+ * 不支持的报告格式,要直接跳出方法所以单独定义异常
+ */
+public class NotSupportReportException extends ReportParseException {
+    public NotSupportReportException(StatusCode statusCode) {
+        super(statusCode);
+    }
+
+    public NotSupportReportException(Integer code, String msg) {
+        super(code, msg);
+    }
+
+    public NotSupportReportException(StatusCode statusCode, Object... msgs) {
+        super(statusCode, msgs);
+    }
+}

+ 14 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/LetterReportData.java

@@ -1,5 +1,6 @@
 package com.smppw.modaq.domain.dto.report;
 
+import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.common.enums.ReportType;
 import lombok.Getter;
 import lombok.Setter;
@@ -20,6 +21,19 @@ public class LetterReportData extends ReportData {
     }
 
     @Override
+    public boolean wasSuccessful() {
+        boolean superFlag = super.wasSuccessful();
+        if (!superFlag) {
+            return false;
+        }
+        if (this.investorInfo == null || fundTransaction == null) {
+            return false;
+        }
+        return !StrUtil.isBlank(this.investorInfo.getInvestorName())
+                && !StrUtil.isBlank(this.fundTransaction.getFundName());
+    }
+
+    @Override
     public String toString() {
         return "{" +
                 super.toString() +

+ 14 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportData.java

@@ -1,5 +1,6 @@
 package com.smppw.modaq.domain.dto.report;
 
+import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.enums.ReportType;
 import lombok.Getter;
@@ -44,6 +45,19 @@ public abstract class ReportData implements Serializable {
 
     public abstract ReportType getReportType();
 
+    /**
+     * 判断当前数据文件数据是否解析成功
+     *
+     * @return /
+     */
+    public boolean wasSuccessful() {
+        if (this.baseInfo == null || this.fundInfo == null) {
+            return false;
+        }
+        return !StrUtil.isBlank(this.baseInfo.getReportName())
+                && !StrUtil.isBlank(this.fundInfo.getFundName());
+    }
+
     @Override
     public String toString() {
         return "baseInfo=" + baseInfo +

+ 7 - 2
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportParserParams.java

@@ -27,8 +27,13 @@ public class ReportParserParams {
      * 备案编码
      */
     private String registerNumber;
+    /**
+     * 报告类型
+     */
+    private ReportType reportType;
 
+    /**
+     * 上次传输的ai解析上传文件ID
+     */
     private String aiFileId;
-
-    private ReportType reportType;
 }

+ 54 - 12
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -18,6 +18,7 @@ import com.smppw.modaq.common.conts.EmailTypeConst;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
+import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
 import com.smppw.modaq.domain.dto.EmailZipFileDTO;
@@ -273,7 +274,8 @@ public class EmailParseService {
                                                                String filepath, Integer emailType, String aiFileId) {
         ParseResult<ReportData> result = new ParseResult<>();
         boolean reportFlag = !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)
-                && !Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType);
+                && !Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)
+                && !Objects.equals(EmailTypeConst.REPORT_OTHER_TYPE, emailType);
         if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(".html")) {
             result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
             result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
@@ -290,7 +292,7 @@ public class EmailParseService {
         if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)) {
             reportType = ReportType.LETTER;
         }
-        // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
+        // 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
         ReportParserFileType fileType;
         String fileSuffix = StrUtil.subAfter(fileName, ".", true);
         fileType = ReportParserFileType.getBySuffix(fileSuffix);
@@ -308,25 +310,63 @@ public class EmailParseService {
         }
         // 解析报告
         ReportData reportData = null;
+        boolean notSupportFile = false;
         StopWatch parserWatch = new StopWatch();
         parserWatch.start();
         try {
-            ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName)
-                    .filepath(filepath).registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
-            ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
-            reportData = instance.parse(params);
-            result.setStatus(1);
-            result.setMsg("报告解析成功");
-            result.setData(reportData);
+            if (StrUtil.isBlank(aiFileId) && reportType != ReportType.OTHER) {
+                ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
+                        .registerNumber(registerNumber).reportType(reportType).build();
+                ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
+                reportData = instance.parse(params);
+                result.setStatus(1);
+                result.setMsg("报告解析成功");
+                result.setData(reportData);
+            } else {
+                if (reportType == ReportType.OTHER) {
+                    if (log.isInfoEnabled()) {
+                        log.info("报告{} 是其他类型,直接用AI解析器解析", fileName);
+                    }
+                } else {
+                    if (log.isInfoEnabled()) {
+                        log.info("报告{} 是已经存在ai解析记录,上传过文件{},直接跳转到AI解析器进行解析", fileName, fileId);
+                    }
+                }
+            }
         } catch (ReportParseException e) {
-            log.error("解析失败\n{}", StrUtil.format(e.getMsg(), fileName));
+            log.error("解析失败{}", StrUtil.format(e.getMsg(), fileName));
             result.setStatus(e.getCode());
             result.setMsg(StrUtil.format(e.getMsg(), fileName));
+            if (e instanceof NotSupportReportException) {
+                notSupportFile = true;
+            }
         } catch (Exception e) {
-            log.error("解析错误\n{}", ExceptionUtil.stacktraceToString(e));
+            log.error("解析错误{}", ExceptionUtil.stacktraceToString(e));
             result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
             result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
         } finally {
+            // 如果解析结果是空的就用AI工具解析一次
+            if (reportData == null && !notSupportFile) {
+                if (log.isInfoEnabled()) {
+                    log.info("报告{} 开始AI解析......", fileName);
+                }
+                ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
+                        .registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
+                ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
+                try {
+                    reportData = instance.parse(params);
+                    result.setStatus(1);
+                    result.setMsg("报告解析成功");
+                    result.setData(reportData);
+                } catch (Exception e) {
+                    log.error("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
+                    result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
+                    result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
+                }
+                if (log.isInfoEnabled()) {
+                    log.info("报告{} AI解析结束!", fileName);
+                }
+            }
             parserWatch.stop();
             if (log.isInfoEnabled()) {
                 log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
@@ -418,7 +458,9 @@ public class EmailParseService {
         emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE,
                 ListUtil.toList("月报", "月度报告", "季报", "季度报告", "年报", "年度报告"));
         emailTypeMap.put(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE,
-                ListUtil.toList("确认单", "确认函", "交易确认数据", "赎回确认", "申购确认", "分红确认", "确认表", "交易确认", "确认"));
+                ListUtil.toList(ReportType.LETTER.getPatterns()));
+        emailTypeMap.put(EmailTypeConst.REPORT_OTHER_TYPE,
+                ListUtil.toList(ReportType.OTHER.getPatterns()));
         return emailTypeMap;
     }