Jelajahi Sumber

fix:上传解析时的报告名称处理问题

wangzaijun 9 jam lalu
induk
melakukan
a0d0bd070f

+ 8 - 2
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -448,9 +448,11 @@ public final class ReportParseUtils {
             if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)
                     || StrUtil.containsAny(text, ReportType.LETTER.getPatterns())) {
                 reportType = ReportType.LETTER;
-            } else if (StrUtil.containsAny(text, ReportType.WEEKLY.getPatterns())) {
+            } else if (Objects.equals(EmailTypeConst.REPORT_WEEKLY_TYPE, emailType)
+                    || StrUtil.containsAny(text, ReportType.WEEKLY.getPatterns())) {
                 reportType = ReportType.WEEKLY;
-            } else if (StrUtil.containsAny(text, ReportType.OTHER.getPatterns())) {
+            } else if (Objects.equals(EmailTypeConst.REPORT_OTHER_TYPE, emailType)
+                    || StrUtil.containsAny(text, ReportType.OTHER.getPatterns())) {
                 reportType = ReportType.OTHER;
             }
             return reportType;
@@ -466,6 +468,10 @@ public final class ReportParseUtils {
             // 特殊的月报(当季度->年度->月度报告无法识别时)
             reportType = ReportType.MONTHLY;
         }
+        // 如果type=3并且没有识别出报告类型则默认为月报来解析
+        if (reportType == null && Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
+            reportType = ReportType.MONTHLY;
+        }
         return reportType;
     }
 

+ 4 - 2
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AbstractAIReportParser.java

@@ -31,7 +31,9 @@ public abstract class AbstractAIReportParser<T extends ReportData> extends Abstr
     private String aiParserUrl;
 
 //    protected String aiFileId;
-
+    /**
+     * 报告AI解析结果,如果有嵌套的数据结构要平铺出来
+     */
     protected Map<String, Object> allInfoMap;
 
     public AbstractAIReportParser(EmailFieldMappingMapper fieldMappingMapper) {
@@ -151,7 +153,7 @@ public abstract class AbstractAIReportParser<T extends ReportData> extends Abstr
                 this.handleAiResult(result);
             }
         } catch (ReportParseException e) {
-            this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+            this.logger.warn("报告{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
             throw e;
         } catch (IORuntimeException e) {
             throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);

+ 10 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/EmailZipFileDTO.java

@@ -34,6 +34,16 @@ public class EmailZipFileDTO {
         this.extName = FileUtil.extName(file);
     }
 
+    public EmailZipFileDTO(String emailTitle, String filename, String filepath, Integer emailType) {
+        File file = FileUtil.file(filepath);
+        this.emailTitle = emailTitle;
+        this.filepath = filepath;
+        this.emailType = emailType;
+        this.filename = filename;
+        this.fileSize = FileUtil.size(file);
+        this.extName = FileUtil.extName(file);
+    }
+
     public EmailZipFileDTO(String emailTitle, EmailContentInfoDTO emailDto) {
         this.emailTitle = emailTitle;
         this.filepath = emailDto.getFilePath();

+ 33 - 37
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -370,7 +370,7 @@ public class EmailParseService {
                     dataList.add(new ParseResult<>(ReportParseStatus.ARCHIVE_FAIL, reportData));
                 }
             } else {
-                dtos.add(new EmailZipFileDTO(emailTitle, reportPath, e.getReportType()));
+                dtos.add(new EmailZipFileDTO(emailTitle, e.getReportName(), reportPath, e.getReportType()));
             }
         }
         // 重新判断类型
@@ -540,42 +540,42 @@ public class EmailParseService {
                                                                EmailZipFileDTO zipFile) {
         Integer fileId = zipFile.getFileId();
         Integer emailType = zipFile.getEmailType();
-        String fileName = zipFile.getFilename();
+        String reportName = zipFile.getFilename();
         String filepath = zipFile.getFilepath();
         ParseResult<ReportData> result = new ParseResult<>();
         boolean reportFlag = emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType);
-        if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(Constants.FILE_HTML)) {
-            return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, null, fileName);
+        if (reportFlag || StrUtil.isBlank(reportName) || reportName.endsWith(Constants.FILE_HTML)) {
+            return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, null, reportName);
         }
         // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
-        ReportType reportType = ReportParseUtils.matchReportType(emailType, fileName);
+        ReportType reportType = ReportParseUtils.matchReportType(emailType, reportName);
         if (reportType == null) {
             reportType = ReportParseUtils.matchReportType(emailType, emailTitle);
             if (log.isDebugEnabled()) {
-                log.debug("报告{} 根据邮件主题{} 重新识别的类型是:{}", fileName, emailTitle, reportType);
+                log.debug("报告{} 根据邮件主题{} 重新识别的类型是:{}", reportName, emailTitle, reportType);
             }
         }
         // 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
         ReportParserFileType fileType = ReportParserFileType.getBySuffix(zipFile.getExtName());
         // 不支持的格式
         if (fileType == null) {
-            ReportData reportData = this.buildNvlReportData(fileId, reportType, null, fileName);
-            return new ParseResult<>(ReportParseStatus.NO_SUPPORT_TEMPLATE, reportData, fileName);
+            ReportData reportData = this.buildNvlReportData(fileId, reportType, reportName);
+            return new ParseResult<>(ReportParseStatus.NO_SUPPORT_TEMPLATE, reportData, reportName);
         }
         // 不是定期报告的判断逻辑放在不支持的格式下面
         if (reportType == null) {
-            ReportData reportData = this.buildNvlReportData(fileId, ReportType.OTHER, null, fileName);
-            return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, reportData, fileName);
+            ReportData reportData = this.buildNvlReportData(fileId, ReportType.OTHER, reportName);
+            return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, reportData, reportName);
         }
 
         // docx转pdf
         if (Objects.equals(ReportParserFileType.WORD, fileType)) {
             try {
-                String outputFile = FileUtil.getParent(filepath, 1) + File.separator + FileUtil.mainName(fileName) + ".pdf";
+                String outputFile = FileUtil.getParent(filepath, 1) + File.separator + FileUtil.mainName(reportName) + ".pdf";
                 PdfUtil.convertDocxToPdf(filepath, outputFile);
                 filepath = outputFile;
             } catch (Exception e) {
-                log.warn("报告{} 转换为pdf失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
+                log.warn("报告{} 转换为pdf失败:{}", reportName, ExceptionUtil.stacktraceToString(e));
             }
         }
         // 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
@@ -586,24 +586,24 @@ public class EmailParseService {
                 File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
                 images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300, zipFile.getPdfPwd());
                 if (log.isDebugEnabled()) {
-                    log.debug("报告{} 生成的图片地址是:\n{}", fileName, images);
+                    log.debug("报告{} 生成的图片地址是:\n{}", reportName, images);
                 }
             } catch (Exception e) {
-                log.warn("报告{} 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
+                log.warn("报告{} 生成图片失败:{}", reportName, ExceptionUtil.stacktraceToString(e));
             }
         } else if (Objects.equals(ReportParserFileType.IMG, fileType)) {
             try {
                 String outputFile = PdfUtil.compressAndSave(filepath);
                 images.add(outputFile);
             } catch (IOException e) {
-                log.error("报告{} 图片压缩失败,{}", fileName, ExceptionUtil.stacktraceToString(e));
+                log.error("报告{} 图片压缩失败,{}", reportName, ExceptionUtil.stacktraceToString(e));
             }
         }
 
         // ocr识别月报是否管理人版或协会版
         ReportMonthlyType monthlyType = ReportMonthlyType.NO_NEED;
         if (ReportType.MONTHLY == reportType) {
-            monthlyType = this.determineReportType(emailTitle, fileName, filepath, images);
+            monthlyType = this.determineReportType(emailTitle, reportName, filepath, images);
         }
         boolean isAmac = reportType == ReportType.ANNUALLY || reportType == ReportType.QUARTERLY
                 || (reportType == ReportType.MONTHLY && ReportMonthlyType.AMAC == monthlyType);
@@ -611,7 +611,7 @@ public class EmailParseService {
         boolean notSupportFile = false;
         // 解析报告
         ReportData reportData = null;
-        ReportParserParams params = new ReportParserParams(fileId, fileName, filepath, reportType);
+        ReportParserParams params = new ReportParserParams(fileId, reportName, filepath, reportType);
         long start = System.currentTimeMillis();
         try {
             if (isAmac || reportType == ReportType.LETTER) {
@@ -620,7 +620,7 @@ public class EmailParseService {
                 result = new ParseResult<>(1, "报告解析成功", reportData);
             }
         } catch (ReportParseException e) {
-            result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
+            result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), reportName), null);
             log.warn("解析失败:{}", result.getMsg());
             if (e instanceof NotSupportReportException) {
                 notSupportFile = true;
@@ -632,18 +632,18 @@ public class EmailParseService {
             // 如果解析结果是空的就用AI工具解析一次
             if (reportData == null && !notSupportFile) {
                 if (log.isInfoEnabled()) {
-                    log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", fileName);
+                    log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", reportName);
                 }
                 try {
                     if (!isAmac && CollUtil.isNotEmpty(images)) {
                         filepath = images.get(0);
                     }
-                    params = new ReportParserParams(fileId, fileName, filepath, reportType);
+                    params = new ReportParserParams(fileId, reportName, filepath, reportType);
                     ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
                     reportData = instance.parse(params);
                     result = new ParseResult<>(1, "报告解析成功--AI", reportData);
                 } catch (ReportParseException e) {
-                    result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
+                    result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), reportName), null);
                     log.warn("AI解析失败:{}", result.getMsg());
                 } catch (Exception e) {
                     log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
@@ -651,7 +651,9 @@ public class EmailParseService {
                 }
             }
             // 当reportData==null时重新构建一个reportData对象
-            reportData = this.buildNvlReportData(fileId, reportType, reportData, fileName);
+            if (reportData == null) {
+                reportData = this.buildNvlReportData(fileId, reportType, reportName);
+            }
             if (reportData.getBaseInfo() != null) {
                 // 设置月报类型
                 reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
@@ -661,14 +663,14 @@ public class EmailParseService {
                 }
             }
             // ocr信息提取(印章、联系人、基金名称和产品代码)
-            reportData = this.ocrReportData(fileId, reportType, monthlyType, reportData, fileName, senderEmail, images);
+            reportData = this.ocrReportData(fileId, reportType, monthlyType, reportData, reportName, senderEmail, images);
             result.setData(reportData);
             if (log.isInfoEnabled()) {
-                log.info("报告{} 解析耗时{}ms,结果是:{}", fileName, (System.currentTimeMillis() - start), reportData);
+                log.info("报告{} 解析耗时{}ms,结果是:{}", reportName, (System.currentTimeMillis() - start), reportData);
             }
         }
         // 保存报告解析结果
-        this.saveReportData(reportData, reportType, fileName);
+        this.saveReportData(reportData, reportType, reportName);
         return result;
     }
 
@@ -852,24 +854,18 @@ public class EmailParseService {
      *
      * @param fileId     文件ID
      * @param reportType 报告类型
-     * @param reportData 解析结果对象
-     * @param fileName   报告名称
+     * @param reportName   报告名称
      * @return /
      */
-    private ReportData buildNvlReportData(Integer fileId,
-                                          ReportType reportType,
-                                          ReportData reportData,
-                                          String fileName) {
-        if (reportData != null) {
-            return reportData;
-        }
+    private ReportData buildNvlReportData(Integer fileId, ReportType reportType, String reportName) {
+        ReportData reportData = null;
         if (reportType == null) {
             reportType = ReportType.OTHER;
         }
         ReportBaseInfoDTO baseInfo = new ReportBaseInfoDTO(fileId);
-        baseInfo.setReportName(fileName);
+        baseInfo.setReportName(reportName);
         baseInfo.setReportType(reportType.name());
-        String reportDate = ReportParseUtils.matchReportDate(reportType, fileName);
+        String reportDate = ReportParseUtils.matchReportDate(reportType, reportName);
         baseInfo.setReportDate(ConvertUtil.toDate(reportDate));
         ReportFundInfoDTO fundInfo = new ReportFundInfoDTO(fileId);
         if (ReportType.ANNUALLY == reportType) {
@@ -909,7 +905,7 @@ public class EmailParseService {
         } finally {
             writeWatch.stop();
             if (log.isInfoEnabled()) {
-                log.info("报告{}解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
+                log.info("报告{} 解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
             }
         }
     }

+ 8 - 8
mo-daq/src/main/resources/mapper/EmailFileInfoMapper.xml

@@ -226,14 +226,14 @@
           and id = #{id}
     </update>
 
-    <update id="updateAiParseByFileId">
-        update mo_email_file_info
-        set updatetime = now(),
-            ai_parse = #{aiParse},
-            ai_file_id = #{aiFileId}
-        where isvalid = 1
-          and id = #{fileId}
-    </update>
+<!--    <update id="updateAiParseByFileId">-->
+<!--        update mo_email_file_info-->
+<!--        set updatetime = now(),-->
+<!--            ai_parse = #{aiParse},-->
+<!--            ai_file_id = #{aiFileId}-->
+<!--        where isvalid = 1-->
+<!--          and id = #{fileId}-->
+<!--    </update>-->
 
     <update id="batchUpdateByFileId">
         <foreach collection="entityList" item="entity">