Forráskód Böngészése

feat:新增报告上传解析的接口

wangzaijun 2 hete
szülő
commit
249c9951d3

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/api/ParseApi.java

@@ -41,6 +41,6 @@ public class ParseApi {
 
     @PostMapping("upload-parse")
     public ResponseEntity<List<UploadReportResult>> uploadReport(@RequestBody UploadReportParams params) {
-        return null;
+        return ResponseEntity.ok(this.service.uploadReport(params));
     }
 }

+ 2 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/OCRReportParser.java

@@ -55,6 +55,7 @@ public class OCRReportParser {
             }
             return res;
         } catch (IORuntimeException e) {
+            this.logger.warn("报告{} 解析出错:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
             throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
         } catch (Exception e) {
             this.logger.warn("报告{} OCR提取月报类型错误:{}", filename, ExceptionUtil.stacktraceToString(e));
@@ -91,6 +92,7 @@ public class OCRReportParser {
             }
             return res;
         } catch (IORuntimeException e) {
+            this.logger.warn("报告{} 解析错误:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
             throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
         } catch (Exception e) {
             this.logger.warn("报告{} OCR识别错误:{}", filename, ExceptionUtil.stacktraceToString(e));

+ 10 - 0
mo-daq/src/main/java/com/smppw/modaq/application/service/EmailParseApiService.java

@@ -1,6 +1,8 @@
 package com.smppw.modaq.application.service;
 
 import com.smppw.modaq.domain.dto.MailboxInfoDTO;
+import com.smppw.modaq.domain.dto.UploadReportParams;
+import com.smppw.modaq.domain.dto.UploadReportResult;
 
 import java.util.Date;
 import java.util.List;
@@ -25,6 +27,14 @@ public interface EmailParseApiService {
      */
     void parseEmail(MailboxInfoDTO mailboxInfoDTO, Date startDate, Date endDate, List<String> folderNames, List<Integer> emailTypes);
 
+    /**
+     * 上传文件解析
+     *
+     * @param params 上传参数
+     * @return /
+     */
+    List<UploadReportResult> uploadReport(UploadReportParams params);
+
 //    /**
 //     * 重新解析指定邮件
 //     *

+ 6 - 0
mo-daq/src/main/java/com/smppw/modaq/application/service/EmailParseApiServiceImpl.java

@@ -7,6 +7,8 @@ import com.smppw.modaq.application.util.EmailUtil;
 import com.smppw.modaq.common.conts.DateConst;
 import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
 import com.smppw.modaq.domain.dto.MailboxInfoDTO;
+import com.smppw.modaq.domain.dto.UploadReportParams;
+import com.smppw.modaq.domain.dto.UploadReportResult;
 import com.smppw.modaq.domain.entity.EmailFileInfoDO;
 import com.smppw.modaq.domain.entity.EmailParseInfoDO;
 import com.smppw.modaq.domain.entity.MailboxInfoDO;
@@ -93,6 +95,10 @@ public class EmailParseApiServiceImpl implements EmailParseApiService {
         emailParseService.parseEmail(mailboxInfoDTO, startDate, endDate, folderNames, emailTypes);
     }
 
+    @Override
+    public List<UploadReportResult> uploadReport(UploadReportParams params) {
+        return this.emailParseService.uploadReportResults(params);
+    }
 //    private void endEmailTask(Integer id,Integer taskStatus) {
 //        try{
 //            EmailTaskInfoDO emailTaskInfoDO = new EmailTaskInfoDO();

+ 1 - 0
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParseStatus.java

@@ -2,6 +2,7 @@ package com.smppw.modaq.common.enums;
 
 public enum ReportParseStatus implements StatusCode {
     SYSTEM_ERROR(20001, "系统异常"),
+    ARCHIVE_FAIL(20002, "压缩包解压失败"),
 
     AI_NOT_FOUND(20009, "AI资源找不到"),
     NO_SUPPORT_AI(20010, "报告[{}]不支持AI解析"),

+ 2 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportParserFileType.java

@@ -1,5 +1,6 @@
 package com.smppw.modaq.common.enums;
 
+import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.common.conts.Constants;
 import lombok.Getter;
 
@@ -27,6 +28,6 @@ public enum ReportParserFileType {
 
     public static ReportParserFileType getBySuffix(String suffix) {
         return Arrays.stream(ReportParserFileType.values())
-                .filter(e -> e.getSuffix().contains(suffix)).findFirst().orElse(null);
+                .filter(e -> StrUtil.containsIgnoreCase(e.getSuffix(), suffix)).findFirst().orElse(null);
     }
 }

+ 6 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/EmailInfoDTO.java

@@ -34,6 +34,12 @@ public class EmailInfoDTO {
         this.emailFileList = ListUtil.list(false);
     }
 
+    public EmailInfoDTO(String emailTitle, List<EmailZipFileDTO> emailFileList) {
+        this();
+        this.emailTitle = emailTitle;
+        this.emailFileList = emailFileList;
+    }
+
     public EmailInfoDTO(EmailContentInfoDTO emailDto, List<EmailZipFileDTO> emailFileList) {
         this();
         this.emailAddress = emailDto.getEmailAddress();

+ 42 - 1
mo-daq/src/main/java/com/smppw/modaq/domain/dto/UploadReportParams.java

@@ -1,8 +1,13 @@
 package com.smppw.modaq.domain.dto;
 
+import cn.hutool.core.io.FileUtil;
+import cn.hutool.core.util.StrUtil;
+import com.smppw.modaq.application.util.EmailUtil;
+import com.smppw.modaq.common.conts.EmailTypeConst;
 import lombok.Getter;
 import lombok.Setter;
 
+import java.io.File;
 import java.util.List;
 
 @Setter
@@ -12,10 +17,46 @@ public class UploadReportParams {
     private List<ReportInfo> reportInfos;
 
     @Setter
-    @Getter
     public static class ReportInfo {
+        /**
+         * 报告名称
+         */
         private String reportName;
+        /**
+         * 报告类型
+         *
+         * @see EmailTypeConst
+         */
         private Integer reportType;
+        /**
+         * 报告路径,必传
+         */
         private String reportPath;
+        @Getter
+        private transient String extName;
+        /**
+         * 报告路径转file对象
+         */
+        private transient File reportFile;
+
+        public String getReportName() {
+            if (StrUtil.isNotBlank(this.reportName)) {
+                return this.reportName;
+            }
+            return FileUtil.getName(this.reportFile);
+        }
+
+        public Integer getReportType() {
+            if (this.reportType != null && EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(this.reportType)) {
+                return this.reportType;
+            }
+            return EmailUtil.getEmailTypeBySubject(this.reportPath);
+        }
+
+        public String getReportPath() {
+            this.reportFile = FileUtil.file(reportPath);
+            this.extName = FileUtil.extName(this.reportFile);
+            return reportPath;
+        }
     }
 }

+ 0 - 9
mo-daq/src/main/java/com/smppw/modaq/domain/dto/UploadReportResult.java

@@ -6,19 +6,10 @@ import lombok.Setter;
 @Setter
 @Getter
 public class UploadReportResult {
-    private String reportName;
-    private Integer reportType;
     private String reportPath;
     private int status;
     private String msg;
 
     public UploadReportResult() {
     }
-
-    public UploadReportResult(UploadReportParams.ReportInfo info) {
-        this.reportName = info.getReportName();
-        this.reportType = info.getReportType();
-        this.reportPath = info.getReportPath();
-        this.status = 0;
-    }
 }

+ 11 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportData.java

@@ -18,6 +18,11 @@ public abstract class ReportData implements Serializable {
     @Serial
     private static final long serialVersionUID = Constants.DEFAULT_SERIAL_ID;
     /**
+     * 报告路径
+     */
+    @Setter
+    private String reportPath;
+    /**
      * 报告基本信息
      */
     private final ReportBaseInfoDTO baseInfo;
@@ -61,4 +66,10 @@ public abstract class ReportData implements Serializable {
                 ", fundInfo=" + fundInfo +
                 ", aiParse=" + aiParse;
     }
+
+    public static class DefaultReportData extends ReportData {
+        public DefaultReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+            super(baseInfo, fundInfo);
+        }
+    }
 }

+ 153 - 56
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -24,10 +24,7 @@ import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
 import com.smppw.modaq.common.exception.NotSupportReportException;
 import com.smppw.modaq.common.exception.ReportParseException;
-import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
-import com.smppw.modaq.domain.dto.EmailInfoDTO;
-import com.smppw.modaq.domain.dto.EmailZipFileDTO;
-import com.smppw.modaq.domain.dto.MailboxInfoDTO;
+import com.smppw.modaq.domain.dto.*;
 import com.smppw.modaq.domain.dto.report.OCRParseData;
 import com.smppw.modaq.domain.dto.report.ParseResult;
 import com.smppw.modaq.domain.dto.report.ReportData;
@@ -53,8 +50,6 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -201,12 +196,8 @@ public class EmailParseService {
         String filepath = emailContentInfoDTO.getFilePath();
         String emailTitle = emailContentInfoDTO.getEmailTitle();
 
-        if (ArchiveUtil.isZip(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_ZIP, emailType, resultList);
-        } else if (ArchiveUtil.isRAR(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_RAR, emailType, resultList);
-        } else if (ArchiveUtil.is7z(filepath)) {
-            this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_7Z, emailType, resultList);
+        if (ArchiveUtil.isArchive(filepath)) {
+            this.handleCompressedFiles(emailTitle, filepath, emailType, resultList);
         } else {
             // 不是压缩包时
             EmailZipFileDTO dto = new EmailZipFileDTO(emailTitle, emailContentInfoDTO);
@@ -231,11 +222,21 @@ public class EmailParseService {
         return resultList;
     }
 
-    private void handleCompressedFiles(String emailTitle, String filepath, String extension,
-                                       Integer emailType, List<EmailZipFileDTO> resultList) throws IOException {
+    /**
+     * 解压压缩包并把压缩包里面的所有文件放在resultList中
+     *
+     * @param emailTitle 邮件主题
+     * @param filepath   压缩包路径
+     * @param emailType  邮件解析类型
+     * @param resultList 解压结果列表
+     * @throws IOException /
+     */
+    private void handleCompressedFiles(String emailTitle,
+                                       String filepath,
+                                       Integer emailType,
+                                       List<EmailZipFileDTO> resultList) throws IOException {
         String parent = FileUtil.getParent(filepath, 2);
         String destPath = parent + File.separator + "archive" + File.separator + FileUtil.mainName(filepath);
-
         File destFile = new File(destPath);
         if (!destFile.exists()) {
             if (!destFile.mkdirs()) {
@@ -273,14 +274,99 @@ public class EmailParseService {
         }
     }
 
-    private String getDestinationPath(String filepath, String extension) {
-        Path path = Paths.get(filepath);
-        String fileName = path.getFileName().toString();
-        String baseName = fileName.substring(0, fileName.length() - extension.length());
-        return path.getParent().resolve(baseName).toString();
+    /**
+     * 邮件附件解析并保存结果数据
+     *
+     * @param emailKey     没封邮件的uuid
+     * @param emailAddress 发送人地址
+     * @param emailInfo    邮件信息
+     */
+    public void saveRelatedTable(String emailKey, String emailAddress, EmailInfoDTO emailInfo) {
+        // 附件文件检查
+        Long totalSize = this.checkEmailFileInfo(emailInfo);
+        if (totalSize == null) {
+            return;
+        }
+        // 解析并保存数据
+        List<ParseResult<ReportData>> dataList = ListUtil.list(true);
+        Integer emailId = this.parseResults(null, emailKey, emailAddress, totalSize, emailInfo, dataList);
+
+        String failReason = null;
+        int emailParseStatus = EmailParseStatusConst.SUCCESS;
+        // 报告邮件有一条失败就表示整个邮件解析失败
+        if (CollUtil.isNotEmpty(dataList)) {
+            // ai解析结果
+            List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
+                    .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
+            if (CollUtil.isNotEmpty(aiParaseList)) {
+                for (ReportData data : aiParaseList) {
+                    this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(),
+                            data.getAiParse(), data.getAiFileId());
+                }
+            }
+            long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
+            if (failNum > 0) {
+                emailParseStatus = EmailParseStatusConst.FAIL;
+                failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
+            }
+        }
+        this.emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
     }
 
-    public void saveRelatedTable(String emailKey, String emailAddress, EmailInfoDTO emailInfo) {
+    /**
+     * 上传文件解析并返回解析状态
+     *
+     * @param params 上传文件路径
+     * @return /
+     */
+    public List<UploadReportResult> uploadReportResults(UploadReportParams params) {
+        List<ParseResult<ReportData>> dataList = ListUtil.list(false);
+        List<UploadReportParams.ReportInfo> reportInfos = params.getReportInfos();
+        List<EmailZipFileDTO> dtos = ListUtil.list(false);
+        for (UploadReportParams.ReportInfo e : reportInfos) {
+            String reportPath = e.getReportPath();
+            if (ArchiveUtil.isArchive(reportPath)) {
+                try {
+                    this.handleCompressedFiles(params.getTitle(), reportPath, e.getReportType(), dtos);
+                } catch (Exception ex) {
+                    log.warn("报告{} 压缩包解压失败:{}", reportPath, ExceptionUtil.stacktraceToString(ex));
+                    ParseResult<ReportData> result = new ParseResult<>();
+                    result.setStatus(ReportParseStatus.ARCHIVE_FAIL.getCode());
+                    result.setMsg(ReportParseStatus.ARCHIVE_FAIL.getMsg());
+                    ReportData reportData = new ReportData.DefaultReportData(null, null);
+                    reportData.setReportPath(reportPath);
+                    dataList.add(result);
+                }
+            } else {
+                dtos.add(new EmailZipFileDTO(params.getTitle(), reportPath, e.getReportType()));
+            }
+        }
+        EmailInfoDTO emailInfo = new EmailInfoDTO(params.getTitle(), dtos);
+        // 附件文件检查
+        Long totalSize = this.checkEmailFileInfo(emailInfo);
+        if (totalSize == null) {
+            return null;
+        }
+        this.parseResults(-1, null, null, totalSize, emailInfo, dataList);
+        List<UploadReportResult> resultList = ListUtil.list(false);
+        for (ParseResult<ReportData> result : dataList) {
+            ReportData data = result.getData();
+            UploadReportResult temp = new UploadReportResult();
+            temp.setReportPath(data.getReportPath());
+            temp.setMsg(result.getMsg());
+            temp.setStatus(result.getStatus());
+            resultList.add(temp);
+        }
+        return resultList;
+    }
+
+    /**
+     * 邮件信息前置处理,在解析操作执行之前的过滤逻辑和校验逻辑。返回所有附件大小汇总
+     *
+     * @param emailInfo 邮件信息(包含所有解压后的文件)
+     * @return 所有附件大小汇总,为null说明没有文件需要上传
+     */
+    private Long checkEmailFileInfo(EmailInfoDTO emailInfo) {
         String emailTitle = emailInfo.getEmailTitle();
         List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
         // 如果压缩包里面既有pdf又有其他格式的文件,说明其他格式的文件是不需要解析的
@@ -333,57 +419,62 @@ public class EmailParseService {
         }
         if (CollUtil.isEmpty(dtos)) {
             log.info("邮件{} 所有文件都已经解析成功过,不能重复解析了", emailTitle);
-            return;
+            return null;
         }
         if (log.isInfoEnabled()) {
             log.info("邮件{} 还有报告待解析:\n{}", emailTitle, dtos);
         }
-        // 解析并保存数据
-        this.parseAndSave(emailKey, emailAddress, emailInfo, totalSize);
+        return totalSize;
     }
 
-    private void parseAndSave(String emailKey, String emailAddress, EmailInfoDTO emailInfo, long totalSize) {
+    /**
+     * 邮件信息保存+附件解析
+     *
+     * @param emailId      邮件ID,上传解析时一定是-1
+     * @param emailKey     邮件uuid(邮箱下载解析时)
+     * @param emailAddress 接收人地址(邮箱下载解析时)
+     * @param totalSize    所有附件大小汇总
+     * @param emailInfo    邮件信息,包含附件
+     * @param resultList   解析结果
+     * @return 邮件数据ID
+     */
+    private Integer parseResults(Integer emailId,
+                                 String emailKey,
+                                 String emailAddress,
+                                 long totalSize,
+                                 EmailInfoDTO emailInfo,
+                                 List<ParseResult<ReportData>> resultList) {
         String emailTitle = emailInfo.getEmailTitle();
         List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
-        // 保存邮件信息
-        Integer emailType = dtos.get(0).getEmailType();
-        EmailParseInfoDO emailParseInfoDO = this.buildEmailParseInfo(emailAddress, emailType, emailInfo, totalSize);
-        emailParseInfoDO.setEmailKey(emailKey);
-        Integer emailId = this.saveEmailParseInfo(emailParseInfoDO);
         if (emailId == null) {
-            return;
+            // 保存邮件信息
+            Integer emailType = dtos.get(0).getEmailType();
+            EmailParseInfoDO emailParseInfoDO = this.buildEmailParseInfo(emailAddress, emailType, emailInfo, totalSize);
+            emailParseInfoDO.setEmailKey(emailKey);
+            emailId = this.saveEmailParseInfo(emailParseInfoDO);
         }
         // 解析邮件报告
-        List<ParseResult<ReportData>> dataList = ListUtil.list(false);
         for (EmailZipFileDTO zipFile : dtos) {
             EmailFileInfoDO emailFile = this.saveEmailFileInfo(emailId, zipFile.getFilename(), zipFile.getFilepath());
             // 解析并保存报告
             ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailTitle, emailFile, zipFile);
-            dataList.add(parseResult);
-        }
-
-        String failReason = null;
-        int emailParseStatus = EmailParseStatusConst.SUCCESS;
-        // 报告邮件有一条失败就表示整个邮件解析失败
-        if (CollUtil.isNotEmpty(dataList)) {
-            // ai解析结果
-            List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
-                    .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
-            if (CollUtil.isNotEmpty(aiParaseList)) {
-                for (ReportData data : aiParaseList) {
-                    this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(),
-                            data.getAiParse(), data.getAiFileId());
-                }
-            }
-            long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
-            if (failNum > 0) {
-                emailParseStatus = EmailParseStatusConst.FAIL;
-                failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
+            if (parseResult.getData() == null) {
+                parseResult.setData(new ReportData.DefaultReportData(null, null));
             }
+            parseResult.getData().setReportPath(zipFile.getFilepath());
+            resultList.add(parseResult);
         }
-        this.emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
+        return emailId;
     }
 
+    /**
+     * 解析报告并保存解析结果
+     *
+     * @param emailTitle    邮件主题
+     * @param emailFileInfo 当前报告信息
+     * @param zipFile       当前报告的路径信息
+     * @return /
+     */
     private ParseResult<ReportData> parseReportAndHandleResult(String emailTitle,
                                                                EmailFileInfoDO emailFileInfo,
                                                                EmailZipFileDTO zipFile) {
@@ -577,7 +668,11 @@ public class EmailParseService {
         }
         // 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有基金份额则是协会
         if (CollUtil.isNotEmpty(images)) {
-            return new OCRReportParser().parseMonthlyType(fileName, this.ocrParserUrl, images.get(0));
+            try {
+                return new OCRReportParser().parseMonthlyType(fileName, this.ocrParserUrl, images.get(0));
+            } catch (Exception ignored) {
+                return ReportMonthlyType.FAILED;
+            }
         }
         return ReportMonthlyType.FAILED;
     }
@@ -589,8 +684,10 @@ public class EmailParseService {
      * @param fileName   报告名称
      * @param images     报告的收益和尾页png图片
      */
-    private void ocrReportData(ReportType reportType, ReportData reportData,
-                               String fileName, List<String> images) {
+    private void ocrReportData(ReportType reportType,
+                               ReportData reportData,
+                               String fileName,
+                               List<String> images) {
         if (reportData == null || CollUtil.isEmpty(images)) {
             return;
         }