|
@@ -24,10 +24,7 @@ import com.smppw.modaq.common.enums.ReportParserFileType;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.exception.NotSupportReportException;
|
|
import com.smppw.modaq.common.exception.NotSupportReportException;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
-import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
|
|
|
|
-import com.smppw.modaq.domain.dto.EmailInfoDTO;
|
|
|
|
-import com.smppw.modaq.domain.dto.EmailZipFileDTO;
|
|
|
|
-import com.smppw.modaq.domain.dto.MailboxInfoDTO;
|
|
|
|
|
|
+import com.smppw.modaq.domain.dto.*;
|
|
import com.smppw.modaq.domain.dto.report.OCRParseData;
|
|
import com.smppw.modaq.domain.dto.report.OCRParseData;
|
|
import com.smppw.modaq.domain.dto.report.ParseResult;
|
|
import com.smppw.modaq.domain.dto.report.ParseResult;
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
@@ -53,8 +50,6 @@ import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.InputStream;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Files;
|
|
-import java.nio.file.Path;
|
|
|
|
-import java.nio.file.Paths;
|
|
|
|
import java.util.*;
|
|
import java.util.*;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.regex.Pattern;
|
|
@@ -201,12 +196,8 @@ public class EmailParseService {
|
|
String filepath = emailContentInfoDTO.getFilePath();
|
|
String filepath = emailContentInfoDTO.getFilePath();
|
|
String emailTitle = emailContentInfoDTO.getEmailTitle();
|
|
String emailTitle = emailContentInfoDTO.getEmailTitle();
|
|
|
|
|
|
- if (ArchiveUtil.isZip(filepath)) {
|
|
|
|
- this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_ZIP, emailType, resultList);
|
|
|
|
- } else if (ArchiveUtil.isRAR(filepath)) {
|
|
|
|
- this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_RAR, emailType, resultList);
|
|
|
|
- } else if (ArchiveUtil.is7z(filepath)) {
|
|
|
|
- this.handleCompressedFiles(emailTitle, filepath, Constants.ARCHIVE_7Z, emailType, resultList);
|
|
|
|
|
|
+ if (ArchiveUtil.isArchive(filepath)) {
|
|
|
|
+ this.handleCompressedFiles(emailTitle, filepath, emailType, resultList);
|
|
} else {
|
|
} else {
|
|
// 不是压缩包时
|
|
// 不是压缩包时
|
|
EmailZipFileDTO dto = new EmailZipFileDTO(emailTitle, emailContentInfoDTO);
|
|
EmailZipFileDTO dto = new EmailZipFileDTO(emailTitle, emailContentInfoDTO);
|
|
@@ -231,11 +222,21 @@ public class EmailParseService {
|
|
return resultList;
|
|
return resultList;
|
|
}
|
|
}
|
|
|
|
|
|
- private void handleCompressedFiles(String emailTitle, String filepath, String extension,
|
|
|
|
- Integer emailType, List<EmailZipFileDTO> resultList) throws IOException {
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 解压压缩包并把压缩包里面的所有文件放在resultList中
|
|
|
|
+ *
|
|
|
|
+ * @param emailTitle 邮件主题
|
|
|
|
+ * @param filepath 压缩包路径
|
|
|
|
+ * @param emailType 邮件解析类型
|
|
|
|
+ * @param resultList 解压结果列表
|
|
|
|
+ * @throws IOException /
|
|
|
|
+ */
|
|
|
|
+ private void handleCompressedFiles(String emailTitle,
|
|
|
|
+ String filepath,
|
|
|
|
+ Integer emailType,
|
|
|
|
+ List<EmailZipFileDTO> resultList) throws IOException {
|
|
String parent = FileUtil.getParent(filepath, 2);
|
|
String parent = FileUtil.getParent(filepath, 2);
|
|
String destPath = parent + File.separator + "archive" + File.separator + FileUtil.mainName(filepath);
|
|
String destPath = parent + File.separator + "archive" + File.separator + FileUtil.mainName(filepath);
|
|
-
|
|
|
|
File destFile = new File(destPath);
|
|
File destFile = new File(destPath);
|
|
if (!destFile.exists()) {
|
|
if (!destFile.exists()) {
|
|
if (!destFile.mkdirs()) {
|
|
if (!destFile.mkdirs()) {
|
|
@@ -273,14 +274,99 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private String getDestinationPath(String filepath, String extension) {
|
|
|
|
- Path path = Paths.get(filepath);
|
|
|
|
- String fileName = path.getFileName().toString();
|
|
|
|
- String baseName = fileName.substring(0, fileName.length() - extension.length());
|
|
|
|
- return path.getParent().resolve(baseName).toString();
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 邮件附件解析并保存结果数据
|
|
|
|
+ *
|
|
|
|
+ * @param emailKey 没封邮件的uuid
|
|
|
|
+ * @param emailAddress 发送人地址
|
|
|
|
+ * @param emailInfo 邮件信息
|
|
|
|
+ */
|
|
|
|
+ public void saveRelatedTable(String emailKey, String emailAddress, EmailInfoDTO emailInfo) {
|
|
|
|
+ // 附件文件检查
|
|
|
|
+ Long totalSize = this.checkEmailFileInfo(emailInfo);
|
|
|
|
+ if (totalSize == null) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ // 解析并保存数据
|
|
|
|
+ List<ParseResult<ReportData>> dataList = ListUtil.list(true);
|
|
|
|
+ Integer emailId = this.parseResults(null, emailKey, emailAddress, totalSize, emailInfo, dataList);
|
|
|
|
+
|
|
|
|
+ String failReason = null;
|
|
|
|
+ int emailParseStatus = EmailParseStatusConst.SUCCESS;
|
|
|
|
+ // 报告邮件有一条失败就表示整个邮件解析失败
|
|
|
|
+ if (CollUtil.isNotEmpty(dataList)) {
|
|
|
|
+ // ai解析结果
|
|
|
|
+ List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
|
|
|
|
+ .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
|
|
|
|
+ if (CollUtil.isNotEmpty(aiParaseList)) {
|
|
|
|
+ for (ReportData data : aiParaseList) {
|
|
|
|
+ this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(),
|
|
|
|
+ data.getAiParse(), data.getAiFileId());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
|
|
|
|
+ if (failNum > 0) {
|
|
|
|
+ emailParseStatus = EmailParseStatusConst.FAIL;
|
|
|
|
+ failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ this.emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
|
|
}
|
|
}
|
|
|
|
|
|
- public void saveRelatedTable(String emailKey, String emailAddress, EmailInfoDTO emailInfo) {
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 上传文件解析并返回解析状态
|
|
|
|
+ *
|
|
|
|
+ * @param params 上传文件路径
|
|
|
|
+ * @return /
|
|
|
|
+ */
|
|
|
|
+ public List<UploadReportResult> uploadReportResults(UploadReportParams params) {
|
|
|
|
+ List<ParseResult<ReportData>> dataList = ListUtil.list(false);
|
|
|
|
+ List<UploadReportParams.ReportInfo> reportInfos = params.getReportInfos();
|
|
|
|
+ List<EmailZipFileDTO> dtos = ListUtil.list(false);
|
|
|
|
+ for (UploadReportParams.ReportInfo e : reportInfos) {
|
|
|
|
+ String reportPath = e.getReportPath();
|
|
|
|
+ if (ArchiveUtil.isArchive(reportPath)) {
|
|
|
|
+ try {
|
|
|
|
+ this.handleCompressedFiles(params.getTitle(), reportPath, e.getReportType(), dtos);
|
|
|
|
+ } catch (Exception ex) {
|
|
|
|
+ log.warn("报告{} 压缩包解压失败:{}", reportPath, ExceptionUtil.stacktraceToString(ex));
|
|
|
|
+ ParseResult<ReportData> result = new ParseResult<>();
|
|
|
|
+ result.setStatus(ReportParseStatus.ARCHIVE_FAIL.getCode());
|
|
|
|
+ result.setMsg(ReportParseStatus.ARCHIVE_FAIL.getMsg());
|
|
|
|
+ ReportData reportData = new ReportData.DefaultReportData(null, null);
|
|
|
|
+ reportData.setReportPath(reportPath);
|
|
|
|
+ dataList.add(result);
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ dtos.add(new EmailZipFileDTO(params.getTitle(), reportPath, e.getReportType()));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ EmailInfoDTO emailInfo = new EmailInfoDTO(params.getTitle(), dtos);
|
|
|
|
+ // 附件文件检查
|
|
|
|
+ Long totalSize = this.checkEmailFileInfo(emailInfo);
|
|
|
|
+ if (totalSize == null) {
|
|
|
|
+ return null;
|
|
|
|
+ }
|
|
|
|
+ this.parseResults(-1, null, null, totalSize, emailInfo, dataList);
|
|
|
|
+ List<UploadReportResult> resultList = ListUtil.list(false);
|
|
|
|
+ for (ParseResult<ReportData> result : dataList) {
|
|
|
|
+ ReportData data = result.getData();
|
|
|
|
+ UploadReportResult temp = new UploadReportResult();
|
|
|
|
+ temp.setReportPath(data.getReportPath());
|
|
|
|
+ temp.setMsg(result.getMsg());
|
|
|
|
+ temp.setStatus(result.getStatus());
|
|
|
|
+ resultList.add(temp);
|
|
|
|
+ }
|
|
|
|
+ return resultList;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 邮件信息前置处理,在解析操作执行之前的过滤逻辑和校验逻辑。返回所有附件大小汇总
|
|
|
|
+ *
|
|
|
|
+ * @param emailInfo 邮件信息(包含所有解压后的文件)
|
|
|
|
+ * @return 所有附件大小汇总,为null说明没有文件需要上传
|
|
|
|
+ */
|
|
|
|
+ private Long checkEmailFileInfo(EmailInfoDTO emailInfo) {
|
|
String emailTitle = emailInfo.getEmailTitle();
|
|
String emailTitle = emailInfo.getEmailTitle();
|
|
List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
|
|
List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
|
|
// 如果压缩包里面既有pdf又有其他格式的文件,说明其他格式的文件是不需要解析的
|
|
// 如果压缩包里面既有pdf又有其他格式的文件,说明其他格式的文件是不需要解析的
|
|
@@ -333,57 +419,62 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
if (CollUtil.isEmpty(dtos)) {
|
|
if (CollUtil.isEmpty(dtos)) {
|
|
log.info("邮件{} 所有文件都已经解析成功过,不能重复解析了", emailTitle);
|
|
log.info("邮件{} 所有文件都已经解析成功过,不能重复解析了", emailTitle);
|
|
- return;
|
|
|
|
|
|
+ return null;
|
|
}
|
|
}
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
log.info("邮件{} 还有报告待解析:\n{}", emailTitle, dtos);
|
|
log.info("邮件{} 还有报告待解析:\n{}", emailTitle, dtos);
|
|
}
|
|
}
|
|
- // 解析并保存数据
|
|
|
|
- this.parseAndSave(emailKey, emailAddress, emailInfo, totalSize);
|
|
|
|
|
|
+ return totalSize;
|
|
}
|
|
}
|
|
|
|
|
|
- private void parseAndSave(String emailKey, String emailAddress, EmailInfoDTO emailInfo, long totalSize) {
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 邮件信息保存+附件解析
|
|
|
|
+ *
|
|
|
|
+ * @param emailId 邮件ID,上传解析时一定是-1
|
|
|
|
+ * @param emailKey 邮件uuid(邮箱下载解析时)
|
|
|
|
+ * @param emailAddress 接收人地址(邮箱下载解析时)
|
|
|
|
+ * @param totalSize 所有附件大小汇总
|
|
|
|
+ * @param emailInfo 邮件信息,包含附件
|
|
|
|
+ * @param resultList 解析结果
|
|
|
|
+ * @return 邮件数据ID
|
|
|
|
+ */
|
|
|
|
+ private Integer parseResults(Integer emailId,
|
|
|
|
+ String emailKey,
|
|
|
|
+ String emailAddress,
|
|
|
|
+ long totalSize,
|
|
|
|
+ EmailInfoDTO emailInfo,
|
|
|
|
+ List<ParseResult<ReportData>> resultList) {
|
|
String emailTitle = emailInfo.getEmailTitle();
|
|
String emailTitle = emailInfo.getEmailTitle();
|
|
List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
|
|
List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
|
|
- // 保存邮件信息
|
|
|
|
- Integer emailType = dtos.get(0).getEmailType();
|
|
|
|
- EmailParseInfoDO emailParseInfoDO = this.buildEmailParseInfo(emailAddress, emailType, emailInfo, totalSize);
|
|
|
|
- emailParseInfoDO.setEmailKey(emailKey);
|
|
|
|
- Integer emailId = this.saveEmailParseInfo(emailParseInfoDO);
|
|
|
|
if (emailId == null) {
|
|
if (emailId == null) {
|
|
- return;
|
|
|
|
|
|
+ // 保存邮件信息
|
|
|
|
+ Integer emailType = dtos.get(0).getEmailType();
|
|
|
|
+ EmailParseInfoDO emailParseInfoDO = this.buildEmailParseInfo(emailAddress, emailType, emailInfo, totalSize);
|
|
|
|
+ emailParseInfoDO.setEmailKey(emailKey);
|
|
|
|
+ emailId = this.saveEmailParseInfo(emailParseInfoDO);
|
|
}
|
|
}
|
|
// 解析邮件报告
|
|
// 解析邮件报告
|
|
- List<ParseResult<ReportData>> dataList = ListUtil.list(false);
|
|
|
|
for (EmailZipFileDTO zipFile : dtos) {
|
|
for (EmailZipFileDTO zipFile : dtos) {
|
|
EmailFileInfoDO emailFile = this.saveEmailFileInfo(emailId, zipFile.getFilename(), zipFile.getFilepath());
|
|
EmailFileInfoDO emailFile = this.saveEmailFileInfo(emailId, zipFile.getFilename(), zipFile.getFilepath());
|
|
// 解析并保存报告
|
|
// 解析并保存报告
|
|
ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailTitle, emailFile, zipFile);
|
|
ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailTitle, emailFile, zipFile);
|
|
- dataList.add(parseResult);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- String failReason = null;
|
|
|
|
- int emailParseStatus = EmailParseStatusConst.SUCCESS;
|
|
|
|
- // 报告邮件有一条失败就表示整个邮件解析失败
|
|
|
|
- if (CollUtil.isNotEmpty(dataList)) {
|
|
|
|
- // ai解析结果
|
|
|
|
- List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
|
|
|
|
- .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
|
|
|
|
- if (CollUtil.isNotEmpty(aiParaseList)) {
|
|
|
|
- for (ReportData data : aiParaseList) {
|
|
|
|
- this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(),
|
|
|
|
- data.getAiParse(), data.getAiFileId());
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
|
|
|
|
- if (failNum > 0) {
|
|
|
|
- emailParseStatus = EmailParseStatusConst.FAIL;
|
|
|
|
- failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
|
|
|
|
|
|
+ if (parseResult.getData() == null) {
|
|
|
|
+ parseResult.setData(new ReportData.DefaultReportData(null, null));
|
|
}
|
|
}
|
|
|
|
+ parseResult.getData().setReportPath(zipFile.getFilepath());
|
|
|
|
+ resultList.add(parseResult);
|
|
}
|
|
}
|
|
- this.emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
|
|
|
|
|
|
+ return emailId;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 解析报告并保存解析结果
|
|
|
|
+ *
|
|
|
|
+ * @param emailTitle 邮件主题
|
|
|
|
+ * @param emailFileInfo 当前报告信息
|
|
|
|
+ * @param zipFile 当前报告的路径信息
|
|
|
|
+ * @return /
|
|
|
|
+ */
|
|
private ParseResult<ReportData> parseReportAndHandleResult(String emailTitle,
|
|
private ParseResult<ReportData> parseReportAndHandleResult(String emailTitle,
|
|
EmailFileInfoDO emailFileInfo,
|
|
EmailFileInfoDO emailFileInfo,
|
|
EmailZipFileDTO zipFile) {
|
|
EmailZipFileDTO zipFile) {
|
|
@@ -577,7 +668,11 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
// 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有基金份额则是协会
|
|
// 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有基金份额则是协会
|
|
if (CollUtil.isNotEmpty(images)) {
|
|
if (CollUtil.isNotEmpty(images)) {
|
|
- return new OCRReportParser().parseMonthlyType(fileName, this.ocrParserUrl, images.get(0));
|
|
|
|
|
|
+ try {
|
|
|
|
+ return new OCRReportParser().parseMonthlyType(fileName, this.ocrParserUrl, images.get(0));
|
|
|
|
+ } catch (Exception ignored) {
|
|
|
|
+ return ReportMonthlyType.FAILED;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
return ReportMonthlyType.FAILED;
|
|
return ReportMonthlyType.FAILED;
|
|
}
|
|
}
|
|
@@ -589,8 +684,10 @@ public class EmailParseService {
|
|
* @param fileName 报告名称
|
|
* @param fileName 报告名称
|
|
* @param images 报告的收益和尾页png图片
|
|
* @param images 报告的收益和尾页png图片
|
|
*/
|
|
*/
|
|
- private void ocrReportData(ReportType reportType, ReportData reportData,
|
|
|
|
- String fileName, List<String> images) {
|
|
|
|
|
|
+ private void ocrReportData(ReportType reportType,
|
|
|
|
+ ReportData reportData,
|
|
|
|
+ String fileName,
|
|
|
|
+ List<String> images) {
|
|
if (reportData == null || CollUtil.isEmpty(images)) {
|
|
if (reportData == null || CollUtil.isEmpty(images)) {
|
|
return;
|
|
return;
|
|
}
|
|
}
|