Ver código fonte

feat:优化解析逻辑当代码无法解析时用AI解析

wangzaijun 6 dias atrás
pai
commit
602f035ffb
15 arquivos alterados com 382 adições e 133 exclusões
  1. 43 4
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/AbstractReportParser.java
  2. 15 16
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserConstant.java
  3. 2 3
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserFactory.java
  4. 78 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java
  5. 90 0
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java
  6. 34 67
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java
  7. 8 26
      mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDLetterReportParser.java
  8. 3 0
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParseStatus.java
  9. 1 1
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java
  10. 13 5
      mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java
  11. 20 0
      mo-daq/src/main/java/com/smppw/modaq/common/exception/NotSupportReportException.java
  12. 13 0
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/LetterReportData.java
  13. 13 0
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportData.java
  14. 7 2
      mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportParserParams.java
  15. 42 9
      mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

+ 43 - 4
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/AbstractReportParser.java

@@ -8,10 +8,7 @@ import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.BaseReportDTO;
-import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
-import com.smppw.modaq.domain.dto.report.ReportData;
-import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.dto.report.*;
 import com.smppw.modaq.domain.entity.EmailFieldMappingDO;
 import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
 import org.slf4j.Logger;
@@ -64,6 +61,48 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
     protected abstract void cleaningReportData(T reportData);
 
     /**
+     * 解析报告的其他信息并设置到对象中
+     *
+     * @param reportInfo 报告基本信息
+     * @param fundInfo   报告中基金基本信息
+     * @return /
+     */
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) throws ReportParseException;
+
+    /**
+     * 绑定基金基本信息
+     *
+     * @param params /
+     * @return /
+     */
+    protected abstract ReportFundInfoDTO buildFundInfo(ReportParserParams params);
+
+    /**
+     * 构建报告解析结果的所有数据
+     *
+     * @param params   请求参数
+     * @param filename 文件名
+     * @return /
+     */
+    protected T buildReportData(ReportParserParams params, String filename) {
+        try {
+            // 报告基本信息
+            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
+            // 解析报告中主体基金的基本信息
+            ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
+            // 解析其他表格信息并且设置结果字段
+            T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
+            // 数据清洗后返回
+            this.cleaningReportData(reportData);
+            return reportData;
+        } catch (ReportParseException e) {
+            throw e;
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
+        }
+    }
+
+    /**
      * 构建只有两列表格的dto数据对象
      *
      * @param <DTO>   泛型对象

+ 15 - 16
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserConstant.java

@@ -16,48 +16,47 @@ public final class ReportParserConstant {
 
     // 交易流水确认函解析
     public static final String PARSER_PDF_LETTER = "report-parser:pdf:letter";
+    public static final String PARSER_AI_LETTER = "report-parser:ai:letter";
 
     public static final String PARSER_PDF_MONTHLY = "report-parser:pdf:monthly";
-    public static final String PARSER_WORD_MONTHLY = "report-parser:word:monthly";
     public static final String PARSER_EXCEL_MONTHLY = "report-parser:excel:monthly";
-    public static final String PARSER_PYTHON_MONTHLY = "report-parser:python:monthly";
+    public static final String PARSER_AI_MONTHLY = "report-parser:ai:monthly";
 
     public static final String PARSER_PDF_QUARTERLY = "report-parser:pdf:quarterly";
-    public static final String PARSER_WORD_QUARTERLY = "report-parser:word:quarterly";
     public static final String PARSER_EXCEL_QUARTERLY = "report-parser:excel:quarterly";
-    public static final String PARSER_PYTHON_QUARTERLY = "report-parser:python:quarterly";
+    public static final String PARSER_AI_QUARTERLY = "report-parser:ai:quarterly";
 
     public static final String PARSER_PDF_ANNUALLY = "report-parser:pdf:annually";
-    public static final String PARSER_WORD_ANNUALLY = "report-parser:word:annually";
     public static final String PARSER_EXCEL_ANNUALLY = "report-parser:excel:annually";
-    public static final String PARSER_PYTHON_ANNUALLY = "report-parser:python:annually";
+    public static final String PARSER_AI_ANNUALLY = "report-parser:ai:annually";
 
     static {
         // 交易流水确认函解析
-        REPORT_PARSER_BEAN_MAP.put(ReportType.LETTER, Map.of(ReportParserFileType.PDF, PARSER_PDF_LETTER));
+        REPORT_PARSER_BEAN_MAP.put(ReportType.LETTER,
+                Map.of(
+                        ReportParserFileType.PDF, PARSER_PDF_LETTER,
+                        ReportParserFileType.AI, PARSER_AI_LETTER
+                ));
 
+        // 月度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.MONTHLY,
                 Map.of(ReportParserFileType.PDF, PARSER_PDF_MONTHLY,
-                        ReportParserFileType.WORD, PARSER_WORD_MONTHLY,
                         ReportParserFileType.EXCEL, PARSER_EXCEL_MONTHLY,
-
-                        ReportParserFileType.PYTHON, PARSER_PYTHON_MONTHLY
+                        ReportParserFileType.AI, PARSER_AI_MONTHLY
                 ));
 
+        // 季度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.QUARTERLY,
                 Map.of(ReportParserFileType.PDF, PARSER_PDF_QUARTERLY,
-                        ReportParserFileType.WORD, PARSER_WORD_QUARTERLY,
                         ReportParserFileType.EXCEL, PARSER_EXCEL_QUARTERLY,
-
-                        ReportParserFileType.PYTHON, PARSER_PYTHON_QUARTERLY
+                        ReportParserFileType.AI, PARSER_AI_QUARTERLY
                 ));
 
+        // 年度报告
         REPORT_PARSER_BEAN_MAP.put(ReportType.ANNUALLY,
                 Map.of(ReportParserFileType.PDF, PARSER_PDF_ANNUALLY,
-                        ReportParserFileType.WORD, PARSER_WORD_ANNUALLY,
                         ReportParserFileType.EXCEL, PARSER_EXCEL_ANNUALLY,
-
-                        ReportParserFileType.PYTHON, PARSER_PYTHON_ANNUALLY
+                        ReportParserFileType.AI, PARSER_AI_ANNUALLY
                 ));
     }
 }

+ 2 - 3
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ReportParserFactory.java

@@ -4,6 +4,7 @@ import cn.hutool.core.map.MapUtil;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
+import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.report.ReportData;
 import org.springframework.stereotype.Component;
@@ -12,8 +13,6 @@ import java.util.Map;
 
 @Component
 public class ReportParserFactory {
-    private static final ReportParser<? extends ReportData> DEFAULT = (ReportParser<ReportData>) params -> null;
-
     private static final Map<String, ReportParser<? extends ReportData>> REPORT_WRITER_MAP = MapUtil.newHashMap(32);
 
     public ReportParserFactory(Map<String, ReportParser<? extends ReportData>> components) {
@@ -25,7 +24,7 @@ public class ReportParserFactory {
         String beanName = ReportParserConstant.REPORT_PARSER_BEAN_MAP.getOrDefault(reportType, MapUtil.empty()).get(reportParserFileType);
         ReportParser<? extends ReportData> reportParser = REPORT_WRITER_MAP.get(beanName);
         if (reportParser == null) {
-            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_TEMPLATE);
+            throw new NotSupportReportException(ReportParseStatus.NO_SUPPORT_TEMPLATE);
         }
         return (ReportParser<T>) reportParser;
     }

+ 78 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java

@@ -0,0 +1,78 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.*;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+
+@Component(ReportParserConstant.PARSER_AI_LETTER)
+public class AILetterReportParser extends AbstractAIReportParser<LetterReportData> {
+    public AILetterReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected void handleAiResult(String result) throws ReportParseException {
+        try {
+            JSONObject jsonResult = JSONUtil.parseObj(result);
+            this.aiFileId = MapUtil.getStr(jsonResult, "file_id");
+            String content = StrUtil.split(jsonResult.getStr("content"), "```").get(1);
+            String aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
+            if (StrUtil.isNotBlank(aiParserContent)) {
+                JSONObject jsonObject = JSONUtil.parseObj(aiParserContent);
+                this.infoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
+            }
+        } catch (Exception e) {
+            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
+        }
+    }
+
+    @Override
+    protected LetterReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) throws ReportParseException {
+        Integer fileId = reportInfo.getFileId();
+        if (this.logger.isInfoEnabled()) {
+            this.logger.info("文件{} 解析内容是:{}", fileId, this.infoMap);
+        }
+        // 投资者信息
+        ReportInvestorInfoDTO investorInfo = this.buildDto(fileId, ReportInvestorInfoDTO.class, this.infoMap);
+        // 交易流水
+        ReportFundTransactionDTO fundTransaction = this.buildDto(fileId, ReportFundTransactionDTO.class, this.infoMap);
+        // 构建结果数据
+        LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
+        reportData.setInvestorInfo(investorInfo);
+        reportData.setFundTransaction(fundTransaction);
+        reportData.setAiParse(true);
+        reportData.setAiFileId(this.aiFileId);
+        return reportData;
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        return this.buildDto(params.getFileId(), ReportFundInfoDTO.class, this.infoMap);
+    }
+
+    @SuppressWarnings("unchecked")
+    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
+        Map<String, Object> result = MapUtil.newHashMap(16);
+        for (Map.Entry<String, Object> entry : data.entrySet()) {
+            List<String> currKeys = ListUtil.toList(keys);
+            currKeys.add(entry.getKey());
+            if (entry.getValue() instanceof Map<?, ?>) {
+                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
+            } else {
+                result.put(entry.getKey(), entry.getValue());
+            }
+        }
+        return result;
+    }
+}

+ 90 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java

@@ -0,0 +1,90 @@
+package com.smppw.modaq.application.components.report.parser.ai;
+
+import cn.hutool.core.exceptions.ExceptionUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.http.HttpUtil;
+import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
+import com.smppw.modaq.common.enums.ReportParseStatus;
+import com.smppw.modaq.common.exception.ReportParseException;
+import com.smppw.modaq.domain.dto.report.ReportData;
+import com.smppw.modaq.domain.dto.report.ReportParserParams;
+import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
+import org.springframework.beans.factory.annotation.Value;
+
+import java.util.Map;
+
+/**
+ * 抽象的报告AI解析器
+ *
+ * @param <T> 泛型参数
+ */
+public abstract class AbstractAIReportParser<T extends ReportData> extends AbstractReportParser<T> {
+    @Value("${email.report.ai-parser-url}")
+    private String aiParserUrl;
+
+    protected String aiFileId;
+
+    protected Map<String, Object> infoMap;
+
+    public AbstractAIReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public T parse(ReportParserParams params) throws ReportParseException {
+        // 初始化
+        this.init();
+        String filename = params.getFilename();
+        String filepath = params.getFilepath();
+        Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+        paramsMap.put("filepath", filepath);
+        paramsMap.put("file_id", params.getAiFileId());
+        String prompt = this.prompt();
+        if (StrUtil.isNotBlank(prompt)) {
+            paramsMap.put("user_msg", prompt);
+        }
+        String body = null;
+        try {
+            body = HttpUtil.get(this.aiParserUrl, paramsMap);
+            this.handleAiResult(body);
+        } catch (ReportParseException e) {
+            this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+            throw e;
+        } catch (Exception e) {
+            this.logger.warn("报告{} 在AI解析时报错:{}", filename, ExceptionUtil.stacktraceToString(e));
+            throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
+        }
+        return this.buildReportData(params, filename);
+    }
+
+
+    @Override
+    protected void cleaningReportData(T reportData) {
+        // do something.
+    }
+
+    /**
+     * AI解析的提示词
+     *
+     * @return 返回null就用默认的提示词
+     */
+    protected String prompt() {
+        return null;
+    }
+
+    /**
+     * 处理ai解析结果,方便构建结构化对象
+     *
+     * @param result ai解析结果
+     */
+    protected abstract void handleAiResult(String result) throws ReportParseException;
+
+    @Override
+    protected void init() {
+        super.init();
+        // 先初始化为null
+        this.aiFileId = null;
+        this.infoMap = MapUtil.newHashMap(128);
+    }
+}

+ 34 - 67
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java

@@ -1,26 +1,18 @@
 package com.smppw.modaq.application.components.report.parser.pdf;
 
 import cn.hutool.core.collection.ListUtil;
-import cn.hutool.core.exceptions.ExceptionUtil;
-import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
 import com.smppw.modaq.application.components.CustomPDFTextStripper;
 import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
 import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.report.ReportBaseInfoDTO;
 import com.smppw.modaq.domain.dto.report.ReportData;
-import com.smppw.modaq.domain.dto.report.ReportFundInfoDTO;
 import com.smppw.modaq.domain.dto.report.ReportParserParams;
 import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.springframework.beans.factory.annotation.Value;
 import technology.tabula.CustomObjectExtractor;
 import technology.tabula.Page;
 import technology.tabula.PageIterator;
@@ -29,7 +21,6 @@ import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
 
 /**
  * @author wangzaijun
@@ -42,14 +33,14 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      */
     protected List<String> textList;
 
-    @Value("${email.report.ai-parser-url}")
-    private String aiParserUrl;
+//    @Value("${email.report.ai-parser-url}")
+//    private String aiParserUrl;
 
-    protected String aiFileId;
-
-    protected String aiParserContent;
-
-    protected Boolean aiParse = false;
+//    protected String aiFileId;
+//
+//    protected String aiParserContent;
+//
+//    protected Boolean aiParse = false;
 
     public AbstractPDReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         super(fieldMappingMapper);
@@ -69,9 +60,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
             String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, StrUtil.EMPTY);
             this.textList = StrUtil.split(text, System.lineSeparator());
             this.textList.removeIf(StrUtil::isBlank);
-//            if (this.textList.isEmpty()) {
-//                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
-//            }
+            if (this.textList.isEmpty()) {
+                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
+            }
             // 解析所有表格(单元格字符去水印)
             List<Table> tables = ListUtil.list(true);
             SpreadsheetExtractionAlgorithm spreadsheetExtractionAlgorithm = new SpreadsheetExtractionAlgorithm();
@@ -94,43 +85,35 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
                         }
                     }
                 } else {
-                    this.aiParse = true;
-                    Map<String, Object> paramsMap = MapUtil.newHashMap(4);
-                    paramsMap.put("filepath", filepath);
-                    paramsMap.put("file_id", params.getAiFileId());
-                    String body = null;
-                    try {
-                        body = HttpUtil.get(this.aiParserUrl, paramsMap);
-                        JSONObject jsonObject = JSONUtil.parseObj(body);
-                        this.aiFileId = MapUtil.getStr(jsonObject, "file_id");
-                        String content = StrUtil.split(jsonObject.getStr("content"), "```").get(1);
-                        this.aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
-                    } catch (Exception e) {
-                        this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
-                    }
+//                    this.aiParse = true;
+//                    Map<String, Object> paramsMap = MapUtil.newHashMap(4);
+//                    paramsMap.put("filepath", filepath);
+//                    paramsMap.put("file_id", params.getAiFileId());
+//                    String body = null;
+//                    try {
+//                        body = HttpUtil.get(this.aiParserUrl, paramsMap);
+//                        JSONObject jsonObject = JSONUtil.parseObj(body);
+//                        this.aiFileId = MapUtil.getStr(jsonObject, "file_id");
+//                        String content = StrUtil.split(jsonObject.getStr("content"), "```").get(1);
+//                        this.aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
+//                    } catch (Exception e) {
+//                        this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+//                    }
+                    throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
                 }
                 i++;
             }
-            if (tables.isEmpty() && StrUtil.isBlank(this.aiParserContent)) {
+            if (tables.isEmpty()) {
                 throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, filename);
             }
             this.initTableInfo(tables);
         }
-        try {
-            // 报告基本信息
-            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
-            // 解析报告中主体基金的基本信息
-            ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
-            // 解析其他表格信息并且设置结果字段
-            T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
-            // 数据清洗后返回
-            this.cleaningReportData(reportData);
-            return reportData;
-        } catch (ReportParseException e) {
-            throw e;
-        } catch (Exception e) {
-            throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT, filename);
+        T reportData = this.buildReportData(params, filename);
+        if (!reportData.wasSuccessful()) {
+            // 抛出异常方便ai解析
+            throw new ReportParseException(ReportParseStatus.PARSE_CORE_INFO_FAIL, filename);
         }
+        return reportData;
     }
 
     /**
@@ -140,22 +123,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      */
     protected abstract void initTableInfo(List<Table> tables);
 
-    /**
-     * 绑定基金基本信息
-     *
-     * @param params /
-     * @return /
-     */
-    protected abstract ReportFundInfoDTO buildFundInfo(ReportParserParams params);
-
-    /**
-     * 解析报告的其他信息并设置到对象中
-     *
-     * @param reportInfo 报告基本信息
-     * @param fundInfo   报告中基金基本信息
-     * @return /
-     */
-    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo);
 
     @Override
     protected void cleaningReportData(T reportData) {
@@ -167,9 +134,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         super.init();
         // 先初始化为null
         this.textList = null;
-        this.aiFileId = null;
-        this.aiParserContent = null;
-        this.aiParse = false;
+//        this.aiFileId = null;
+//        this.aiParserContent = null;
+//        this.aiParse = false;
     }
 
 //    /**

+ 8 - 26
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDLetterReportParser.java

@@ -1,11 +1,8 @@
 package com.smppw.modaq.application.components.report.parser.pdf;
 
 import cn.hutool.core.collection.CollUtil;
-import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
 import com.smppw.modaq.application.components.ReportParseUtils;
 import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
 import com.smppw.modaq.domain.dto.report.*;
@@ -29,13 +26,13 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
     protected void initTableInfo(List<Table> tables) {
         // 每次重新清空map数据
         this.allInfoMap.clear();
-        if (CollUtil.isEmpty(tables)) {
-            if (StrUtil.isNotBlank(this.aiParserContent)) {
-                JSONObject jsonObject = JSONUtil.parseObj(this.aiParserContent);
-                this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
-            }
-            return;
-        }
+//        if (CollUtil.isEmpty(tables)) {
+//            if (StrUtil.isNotBlank(this.aiParserContent)) {
+//                JSONObject jsonObject = JSONUtil.parseObj(this.aiParserContent);
+//                this.allInfoMap.putAll(flattenMap(jsonObject, ListUtil.list(false)));
+//            }
+//            return;
+//        }
         for (Table table : tables) {
             int rowCount = table.getRowCount();
             int colCount = table.getColCount();
@@ -97,25 +94,10 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
         LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
         reportData.setInvestorInfo(investorInfo);
         reportData.setFundTransaction(fundTransaction);
-        reportData.setAiParse(Objects.equals(true, this.aiParse));
-        reportData.setAiFileId(this.aiFileId);
+        reportData.setAiParse(false);
         return reportData;
     }
 
-    @SuppressWarnings("unchecked")
-    private static Map<String, Object> flattenMap(Map<String, Object> data, List<String> keys) {
-        Map<String, Object> result = MapUtil.newHashMap(16);
-        for (Map.Entry<String, Object> entry : data.entrySet()) {
-            List<String> currKeys = ListUtil.toList(keys);
-            currKeys.add(entry.getKey());
-            if (entry.getValue() instanceof Map<?, ?>) {
-                result.putAll(flattenMap((Map<String, Object>) entry.getValue(), currKeys));
-            } else {
-                result.put(entry.getKey(), entry.getValue());
-            }
-        }
-        return result;
-    }
 
     /**
      * 解析键和值是垂直格式的表格数据

+ 3 - 0
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParseStatus.java

@@ -1,6 +1,7 @@
 package com.smppw.modaq.common.enums;
 
 public enum ReportParseStatus implements StatusCode {
+    AI_NOT_FOUND(20009, "AI资源找不到"),
     PARSE_FAIL(21000, "定期报告或交易确认单解析错误:{}"),
     NOT_A_REPORT(21001, "[{}]不是定期报告或交易确认单"),
     REPORT_IS_SCAN(21002, "报告[{}]为扫描件"),
@@ -13,6 +14,8 @@ public enum ReportParseStatus implements StatusCode {
 //    PARSE_INDUSTRY_INFO_FAIL(21013, "报告[{}]没有解析到基金行业配置信息"),
 //    PARSE_ASSET_INFO_FAIL(21014, "报告[{}]没有解析到基金资产配置信息"),
 //    PARSE_SHARE_INFO_FAIL(21015, "报告[{}]没有解析到基金份额变动信息"),
+    PARSE_CORE_INFO_FAIL(21019, "报告[{}]没有解析到关键信息"),
+    PARSE_HANDLE_FAIL(21018, "报告结果在结构化时转换错误"),
 
     PARSE_RULE_NO_FUND(21020, "未设置报告解析规则"),
     ;

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java

@@ -11,7 +11,7 @@ public enum ReportParserFileType {
     PDF("pdf"),
     WORD("docx,doc"),
     EXCEL("xlsx,xls"),
-    PYTHON("python");
+    AI("ai");
 
     private final String suffix;
 

+ 13 - 5
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -4,18 +4,26 @@ import lombok.Getter;
 
 @Getter
 public enum ReportType {
-    LETTER(-1, "交易流水确认函", new String[]{"确认单", "确认函", "确认"}),
-    MONTHLY(0, "月", new String[]{"月", "月度", "月报"}),
-    QUARTERLY(1, "季", new String[]{"季", "季度", "季报"}),
-    ANNUALLY(2, "年", new String[]{"年度", "年报"});
+    LETTER(-1, "交易流水确认函", new String[]{"确认单", "确认函", "确认"}, null),
+    MONTHLY(0, "月", new String[]{"月", "月度", "月报"}, null),
+    QUARTERLY(1, "季", new String[]{"季", "季度", "季报"}, null),
+    ANNUALLY(2, "年", new String[]{"年度", "年报"}, null);
 
     private final int type;
     private final String label;
+    /**
+     * 报告类型识别关键字
+     */
     private final String[] patterns;
+    /**
+     * ai解析提示词
+     */
+    private final String prompt;
 
-    ReportType(int type, String label, String[] patterns) {
+    ReportType(int type, String label, String[] patterns, String prompt) {
         this.type = type;
         this.label = label;
         this.patterns = patterns;
+        this.prompt = prompt;
     }
 }

+ 20 - 0
mo-daq/src/main/java/com/smppw/modaq/common/exception/NotSupportReportException.java

@@ -0,0 +1,20 @@
+package com.smppw.modaq.common.exception;
+
+import com.smppw.modaq.common.enums.StatusCode;
+
+/**
+ * 不支持的报告格式,要直接跳出方法所以单独定义异常
+ */
+public class NotSupportReportException extends ReportParseException {
+    public NotSupportReportException(StatusCode statusCode) {
+        super(statusCode);
+    }
+
+    public NotSupportReportException(Integer code, String msg) {
+        super(code, msg);
+    }
+
+    public NotSupportReportException(StatusCode statusCode, Object... msgs) {
+        super(statusCode, msgs);
+    }
+}

+ 13 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/LetterReportData.java

@@ -1,5 +1,6 @@
 package com.smppw.modaq.domain.dto.report;
 
+import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.common.enums.ReportType;
 import lombok.Getter;
 import lombok.Setter;
@@ -20,6 +21,18 @@ public class LetterReportData extends ReportData {
     }
 
     @Override
+    public boolean wasSuccessful() {
+        boolean superFlag = super.wasSuccessful();
+        if (!superFlag) {
+            return false;
+        }
+        if (this.investorInfo == null || fundTransaction == null) {
+            return false;
+        }
+        return !StrUtil.isBlank(this.investorInfo.getInvestorName()) && !StrUtil.isBlank(this.fundTransaction.getFundName());
+    }
+
+    @Override
     public String toString() {
         return "{" +
                 super.toString() +

+ 13 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportData.java

@@ -1,5 +1,6 @@
 package com.smppw.modaq.domain.dto.report;
 
+import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.common.conts.Constants;
 import com.smppw.modaq.common.enums.ReportType;
 import lombok.Getter;
@@ -44,6 +45,18 @@ public abstract class ReportData implements Serializable {
 
     public abstract ReportType getReportType();
 
+    /**
+     * 判断当前数据文件数据是否解析成功
+     *
+     * @return /
+     */
+    public boolean wasSuccessful() {
+        if (this.baseInfo == null || this.fundInfo == null) {
+            return false;
+        }
+        return !StrUtil.isBlank(this.baseInfo.getReportName()) && !StrUtil.isBlank(this.fundInfo.getFundName());
+    }
+
     @Override
     public String toString() {
         return "baseInfo=" + baseInfo +

+ 7 - 2
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportParserParams.java

@@ -27,8 +27,13 @@ public class ReportParserParams {
      * 备案编码
      */
     private String registerNumber;
+    /**
+     * 报告类型
+     */
+    private ReportType reportType;
 
+    /**
+     * 上次传输的ai解析上传文件ID
+     */
     private String aiFileId;
-
-    private ReportType reportType;
 }

+ 42 - 9
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -18,6 +18,7 @@ import com.smppw.modaq.common.conts.EmailTypeConst;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
+import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
 import com.smppw.modaq.domain.dto.EmailZipFileDTO;
@@ -311,25 +312,57 @@ public class EmailParseService {
         }
         // 解析报告
         ReportData reportData = null;
+        boolean notSupportFile = false;
         StopWatch parserWatch = new StopWatch();
         parserWatch.start();
         try {
-            ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName)
-                    .filepath(filepath).registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
-            ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
-            reportData = instance.parse(params);
-            result.setStatus(1);
-            result.setMsg("报告解析成功");
-            result.setData(reportData);
+            if (StrUtil.isBlank(aiFileId)) {
+                ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
+                        .registerNumber(registerNumber).reportType(reportType).build();
+                ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
+                reportData = instance.parse(params);
+                result.setStatus(1);
+                result.setMsg("报告解析成功");
+                result.setData(reportData);
+            } else {
+                if (log.isInfoEnabled()) {
+                    log.info("报告{} 是已经存在ai解析记录,上传过文件{},直接跳转到AI解析器进行解析", fileName, fileId);
+                }
+            }
         } catch (ReportParseException e) {
-            log.error("解析失败\n{}", StrUtil.format(e.getMsg(), fileName));
+            log.error("解析失败{}", StrUtil.format(e.getMsg(), fileName));
             result.setStatus(e.getCode());
             result.setMsg(StrUtil.format(e.getMsg(), fileName));
+            if (e instanceof NotSupportReportException) {
+                notSupportFile = true;
+            }
         } catch (Exception e) {
-            log.error("解析错误\n{}", ExceptionUtil.stacktraceToString(e));
+            log.error("解析错误{}", ExceptionUtil.stacktraceToString(e));
             result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
             result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
         } finally {
+            // 如果解析结果是空的就用AI工具解析一次
+            if (reportData == null && !notSupportFile) {
+                if (log.isInfoEnabled()) {
+                    log.info("报告{} 开始AI解析......", fileName);
+                }
+                ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
+                        .registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
+                ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
+                try {
+                    reportData = instance.parse(params);
+                    result.setStatus(1);
+                    result.setMsg("报告解析成功");
+                    result.setData(reportData);
+                } catch (Exception e) {
+                    log.error("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
+                    result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
+                    result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
+                }
+                if (log.isInfoEnabled()) {
+                    log.info("报告{} AI解析结束!", fileName);
+                }
+            }
             parserWatch.stop();
             if (log.isInfoEnabled()) {
                 log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());