|
@@ -7,6 +7,7 @@ import cn.hutool.core.exceptions.ExceptionUtil;
|
|
import cn.hutool.core.io.FileUtil;
|
|
import cn.hutool.core.io.FileUtil;
|
|
import cn.hutool.core.map.MapUtil;
|
|
import cn.hutool.core.map.MapUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
|
|
+import com.smppw.modaq.application.components.OCRReportParser;
|
|
import com.smppw.modaq.application.components.ReportParseUtils;
|
|
import com.smppw.modaq.application.components.ReportParseUtils;
|
|
import com.smppw.modaq.application.components.report.parser.ReportParser;
|
|
import com.smppw.modaq.application.components.report.parser.ReportParser;
|
|
import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
|
|
import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
|
|
@@ -24,6 +25,7 @@ import com.smppw.modaq.common.exception.ReportParseException;
|
|
import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
|
|
import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
|
|
import com.smppw.modaq.domain.dto.EmailZipFileDTO;
|
|
import com.smppw.modaq.domain.dto.EmailZipFileDTO;
|
|
import com.smppw.modaq.domain.dto.MailboxInfoDTO;
|
|
import com.smppw.modaq.domain.dto.MailboxInfoDTO;
|
|
|
|
+import com.smppw.modaq.domain.dto.report.OCRParseData;
|
|
import com.smppw.modaq.domain.dto.report.ParseResult;
|
|
import com.smppw.modaq.domain.dto.report.ParseResult;
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
import com.smppw.modaq.domain.dto.report.ReportParserParams;
|
|
import com.smppw.modaq.domain.dto.report.ReportParserParams;
|
|
@@ -32,6 +34,7 @@ import com.smppw.modaq.domain.entity.EmailParseInfoDO;
|
|
import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
|
|
import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
|
|
import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
|
|
import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
|
|
import com.smppw.modaq.infrastructure.util.ArchiveUtil;
|
|
import com.smppw.modaq.infrastructure.util.ArchiveUtil;
|
|
|
|
+import com.smppw.modaq.infrastructure.util.DateUtils;
|
|
import com.smppw.modaq.infrastructure.util.PdfUtil;
|
|
import com.smppw.modaq.infrastructure.util.PdfUtil;
|
|
import jakarta.mail.*;
|
|
import jakarta.mail.*;
|
|
import jakarta.mail.internet.MimeUtility;
|
|
import jakarta.mail.internet.MimeUtility;
|
|
@@ -87,6 +90,8 @@ public class EmailParseService {
|
|
@Value("${email.file.path}")
|
|
@Value("${email.file.path}")
|
|
private String path;
|
|
private String path;
|
|
|
|
|
|
|
|
+ @Value("${email.report.ocr-parser-url}")
|
|
|
|
+ private String ocrParserUrl;
|
|
|
|
|
|
public EmailParseService(EmailParseInfoMapper emailParseInfoMapper,
|
|
public EmailParseService(EmailParseInfoMapper emailParseInfoMapper,
|
|
EmailFileInfoMapper emailFileInfoMapper,
|
|
EmailFileInfoMapper emailFileInfoMapper,
|
|
@@ -205,11 +210,12 @@ public class EmailParseService {
|
|
Integer emailType = emailContentInfoDTO.getEmailType();
|
|
Integer emailType = emailContentInfoDTO.getEmailType();
|
|
String filepath = emailContentInfoDTO.getFilePath();
|
|
String filepath = emailContentInfoDTO.getFilePath();
|
|
String emailTitle = emailContentInfoDTO.getEmailTitle();
|
|
String emailTitle = emailContentInfoDTO.getEmailTitle();
|
|
|
|
+ int fileSize = emailContentInfoDTO.getFileSize();
|
|
|
|
|
|
if (ArchiveUtil.isZip(filepath)) {
|
|
if (ArchiveUtil.isZip(filepath)) {
|
|
- handleCompressedFiles(emailTitle, filepath, ".zip", emailType, resultList);
|
|
|
|
|
|
+ handleCompressedFiles(emailTitle, filepath, ".zip", emailType, fileSize, resultList);
|
|
} else if (ArchiveUtil.isRAR(filepath)) {
|
|
} else if (ArchiveUtil.isRAR(filepath)) {
|
|
- handleCompressedFiles(emailTitle, filepath, ".rar", emailType, resultList);
|
|
|
|
|
|
+ handleCompressedFiles(emailTitle, filepath, ".rar", emailType, fileSize, resultList);
|
|
}
|
|
}
|
|
|
|
|
|
// 文件中的类型判断
|
|
// 文件中的类型判断
|
|
@@ -231,7 +237,7 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
|
|
|
|
private void handleCompressedFiles(String emailTitle, String filepath, String extension,
|
|
private void handleCompressedFiles(String emailTitle, String filepath, String extension,
|
|
- Integer emailType, List<EmailZipFileDTO> resultList) throws IOException {
|
|
|
|
|
|
+ Integer emailType, int fileSize, List<EmailZipFileDTO> resultList) throws IOException {
|
|
String destPath = getDestinationPath(filepath, extension);
|
|
String destPath = getDestinationPath(filepath, extension);
|
|
|
|
|
|
File destFile = new File(destPath);
|
|
File destFile = new File(destPath);
|
|
@@ -259,13 +265,13 @@ public class EmailParseService {
|
|
String[] subDirs = file.list();
|
|
String[] subDirs = file.list();
|
|
if (subDirs != null) {
|
|
if (subDirs != null) {
|
|
for (String subDir : subDirs) {
|
|
for (String subDir : subDirs) {
|
|
- resultList.add(new EmailZipFileDTO(emailTitle, subDir, emailType));
|
|
|
|
|
|
+ resultList.add(new EmailZipFileDTO(emailTitle, subDir, fileSize, emailType));
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
log.warn("目录 {} 下无文件", dir);
|
|
log.warn("目录 {} 下无文件", dir);
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- resultList.add(new EmailZipFileDTO(emailTitle, dir, emailType));
|
|
|
|
|
|
+ resultList.add(new EmailZipFileDTO(emailTitle, dir, fileSize, emailType));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -292,7 +298,7 @@ public class EmailParseService {
|
|
List<EmailZipFileDTO> dtos = ListUtil.list(false);
|
|
List<EmailZipFileDTO> dtos = ListUtil.list(false);
|
|
List<EmailZipFileDTO> zipFiles = entry.getValue();
|
|
List<EmailZipFileDTO> zipFiles = entry.getValue();
|
|
if (CollUtil.isEmpty(zipFiles)) {
|
|
if (CollUtil.isEmpty(zipFiles)) {
|
|
- dtos.add(new EmailZipFileDTO(emailTitle, emailDto.getFilePath(), emailDto.getFileName(), emailDto.getEmailType()));
|
|
|
|
|
|
+ dtos.add(new EmailZipFileDTO(emailTitle, emailDto));
|
|
} else {
|
|
} else {
|
|
dtos.addAll(zipFiles);
|
|
dtos.addAll(zipFiles);
|
|
}
|
|
}
|
|
@@ -304,25 +310,26 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- // 数据库已存在的数据过滤
|
|
|
|
|
|
+ // 数据库已存在的数据过滤(邮件主题+报告名称+附件大小)
|
|
Iterator<EmailZipFileDTO> iterator = dtos.iterator();
|
|
Iterator<EmailZipFileDTO> iterator = dtos.iterator();
|
|
while (iterator.hasNext()) {
|
|
while (iterator.hasNext()) {
|
|
EmailZipFileDTO dto = iterator.next();
|
|
EmailZipFileDTO dto = iterator.next();
|
|
Integer emailType = dto.getEmailType();
|
|
Integer emailType = dto.getEmailType();
|
|
String filename = dto.getFilename();
|
|
String filename = dto.getFilename();
|
|
|
|
+ int fileSize = dto.getFileSize();
|
|
int count = 0;
|
|
int count = 0;
|
|
if (Objects.equals(emailType, EmailTypeConst.REPORT_LETTER_EMAIL_TYPE)) {
|
|
if (Objects.equals(emailType, EmailTypeConst.REPORT_LETTER_EMAIL_TYPE)) {
|
|
// 确认单
|
|
// 确认单
|
|
count = this.emailFileInfoMapper.getLetterFilenameSuccessCount(emailTitle, filename);
|
|
count = this.emailFileInfoMapper.getLetterFilenameSuccessCount(emailTitle, filename);
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_EMAIL_TYPE)) {
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_EMAIL_TYPE)) {
|
|
// 定期报告
|
|
// 定期报告
|
|
- count = this.emailFileInfoMapper.getAmacFilenameSuccessCount(emailTitle, filename);
|
|
|
|
|
|
+ count = this.emailFileInfoMapper.getAmacFilenameSuccessCount(emailTitle, filename, fileSize);
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_WEEKLY_TYPE)) {
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_WEEKLY_TYPE)) {
|
|
// 管理人周报
|
|
// 管理人周报
|
|
- count = this.emailFileInfoMapper.getWeeklyFilenameSuccessCount(emailTitle, filename);
|
|
|
|
|
|
+ count = this.emailFileInfoMapper.getWeeklyFilenameSuccessCount(emailTitle, filename, fileSize);
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_OTHER_TYPE)) {
|
|
} else if (Objects.equals(emailType, EmailTypeConst.REPORT_OTHER_TYPE)) {
|
|
// 其他报告
|
|
// 其他报告
|
|
- count = this.emailFileInfoMapper.getOtherFilenameSuccessCount(emailTitle, filename);
|
|
|
|
|
|
+ count = this.emailFileInfoMapper.getOtherFilenameSuccessCount(emailTitle, filename, fileSize);
|
|
} else {
|
|
} else {
|
|
log.info("邮件{} 类型{} 不支持解析。", emailTitle, emailType);
|
|
log.info("邮件{} 类型{} 不支持解析。", emailTitle, emailType);
|
|
iterator.remove();
|
|
iterator.remove();
|
|
@@ -385,8 +392,6 @@ public class EmailParseService {
|
|
log.error(result.getMsg());
|
|
log.error(result.getMsg());
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
- // 基金代码、备案编码
|
|
|
|
- String registerNumber = ReportParseUtils.matchFundCode(fileName);
|
|
|
|
// 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
|
|
// 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
|
|
ReportType reportType = ReportParseUtils.matchReportType(emailType, fileName);
|
|
ReportType reportType = ReportParseUtils.matchReportType(emailType, fileName);
|
|
if (reportType == null) {
|
|
if (reportType == null) {
|
|
@@ -414,15 +419,18 @@ public class EmailParseService {
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
Integer fileId = emailFileInfo.getId();
|
|
Integer fileId = emailFileInfo.getId();
|
|
- String aiFileId = emailFileInfo.getAiFileId();
|
|
|
|
|
|
|
|
// 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
|
|
// 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
|
|
List<String> images = ListUtil.empty();
|
|
List<String> images = ListUtil.empty();
|
|
try {
|
|
try {
|
|
String output = FileUtil.getParent(filepath, 1) + File.separator + "image";
|
|
String output = FileUtil.getParent(filepath, 1) + File.separator + "image";
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, FileUtil.file(output), 300);
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, FileUtil.file(output), 300);
|
|
- if (log.isInfoEnabled()) {
|
|
|
|
- log.info("报告[{}] 生成的图片地址是:{}", fileName, images);
|
|
|
|
|
|
+ if (log.isDebugEnabled()) {
|
|
|
|
+ log.debug("报告[{}] 生成的图片地址是:{}", fileName, images);
|
|
|
|
+ }
|
|
|
|
+ // 首页和尾页相等
|
|
|
|
+ if (images.size() == 1) {
|
|
|
|
+ images.add(images.get(0));
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
@@ -432,26 +440,19 @@ public class EmailParseService {
|
|
boolean notSupportFile = false;
|
|
boolean notSupportFile = false;
|
|
// 解析报告
|
|
// 解析报告
|
|
ReportData reportData = null;
|
|
ReportData reportData = null;
|
|
|
|
+ ReportParserParams params = new ReportParserParams(fileId, fileName, filepath, reportType);
|
|
StopWatch parserWatch = new StopWatch();
|
|
StopWatch parserWatch = new StopWatch();
|
|
parserWatch.start();
|
|
parserWatch.start();
|
|
try {
|
|
try {
|
|
- if (StrUtil.isBlank(aiFileId) && reportType != ReportType.OTHER && reportType != ReportType.WEEKLY) {
|
|
|
|
- ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
|
|
|
|
- .registerNumber(registerNumber).reportType(reportType).build();
|
|
|
|
|
|
+ if (reportType != ReportType.OTHER && reportType != ReportType.WEEKLY) {
|
|
ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
|
|
ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
|
|
reportData = instance.parse(params);
|
|
reportData = instance.parse(params);
|
|
result.setStatus(1);
|
|
result.setStatus(1);
|
|
result.setMsg("报告解析成功");
|
|
result.setMsg("报告解析成功");
|
|
result.setData(reportData);
|
|
result.setData(reportData);
|
|
} else {
|
|
} else {
|
|
- if (reportType == ReportType.OTHER || reportType == ReportType.WEEKLY) {
|
|
|
|
- if (log.isInfoEnabled()) {
|
|
|
|
- log.info("报告{} 是周报或其他类型,直接用AI解析器解析", fileName);
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- if (log.isInfoEnabled()) {
|
|
|
|
- log.info("报告{} 是已经存在ai解析记录,上传过文件{},直接跳转到AI解析器进行解析", fileName, aiFileId);
|
|
|
|
- }
|
|
|
|
|
|
+ if (log.isInfoEnabled()) {
|
|
|
|
+ log.info("报告{} 是周报或其他类型,直接用AI解析器解析", fileName);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (ReportParseException e) {
|
|
} catch (ReportParseException e) {
|
|
@@ -478,10 +479,9 @@ public class EmailParseService {
|
|
log.info("报告{} 用首页图片{} 开始AI解析......", fileName, filepath);
|
|
log.info("报告{} 用首页图片{} 开始AI解析......", fileName, filepath);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
|
|
|
|
- .registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
|
|
|
|
- ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
|
|
|
|
try {
|
|
try {
|
|
|
|
+ params = new ReportParserParams(fileId, fileName, filepath, reportType);
|
|
|
|
+ ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
|
|
reportData = instance.parse(params);
|
|
reportData = instance.parse(params);
|
|
result.setStatus(1);
|
|
result.setStatus(1);
|
|
result.setMsg("报告解析成功--AI");
|
|
result.setMsg("报告解析成功--AI");
|
|
@@ -499,6 +499,8 @@ public class EmailParseService {
|
|
log.info("报告{} AI解析结束!", fileName);
|
|
log.info("报告{} AI解析结束!", fileName);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ // ocr信息提取
|
|
|
|
+ this.ocrReportData(reportData, fileName, images);
|
|
parserWatch.stop();
|
|
parserWatch.stop();
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
|
|
log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
|
|
@@ -510,6 +512,63 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
|
|
+ * ocr 提取信息(包括首页的基金名称或报告日期,尾页的印章或联系人等信息)
|
|
|
|
+ *
|
|
|
|
+ * @param reportData 报告解析结果
|
|
|
|
+ * @param fileName 报告名称
|
|
|
|
+ * @param images 报告的收益和尾页png图片
|
|
|
|
+ */
|
|
|
|
+ private void ocrReportData(ReportData reportData, String fileName, List<String> images) {
|
|
|
|
+ if (reportData == null || CollUtil.isEmpty(images) || images.size() != 2) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ OCRParseData parseRes = null;
|
|
|
|
+ try {
|
|
|
|
+ parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, images.get(1));
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ log.error("报告{} OCR识别印章和联系人出错:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
+ }
|
|
|
|
+ // ocr识别尾页是否包含印章和联系人信息
|
|
|
|
+ if (parseRes != null) {
|
|
|
|
+ if (reportData.getBaseInfo() != null) {
|
|
|
|
+ Date reportDate = DateUtils.toDate(parseRes.getReportDate());
|
|
|
|
+ if (reportData.getBaseInfo().getReportDate() == null && reportDate != null) {
|
|
|
|
+ reportData.getBaseInfo().setReportDate(reportDate);
|
|
|
|
+ }
|
|
|
|
+ reportData.getBaseInfo().setWithSeals(parseRes.getWithSeals());
|
|
|
|
+ reportData.getBaseInfo().setWithContacts(parseRes.getWithContacts());
|
|
|
|
+ if (fileName.contains("用印") && !Objects.equals(true, reportData.getBaseInfo().getWithSeals())) {
|
|
|
|
+ reportData.getBaseInfo().setWithSeals(true);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ // 用首页识别基金名称、产品代码和报告日期
|
|
|
|
+ if ((reportData.getBaseInfo() != null && reportData.getBaseInfo().getReportDate() == null)
|
|
|
|
+ || (reportData.getFundInfo() != null && StrUtil.isBlank(reportData.getFundInfo().getFundName()))
|
|
|
|
+ || (reportData.getFundInfo() != null && StrUtil.isBlank(reportData.getFundInfo().getFundCode()))) {
|
|
|
|
+ try {
|
|
|
|
+ parseRes = new OCRReportParser().parse(fileName, this.ocrParserUrl, images.get(0));
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ log.error("报告{} OCR识别首页基金名称和报告日期出错:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
+ }
|
|
|
|
+ if (reportData.getBaseInfo() != null && parseRes != null) {
|
|
|
|
+ Date reportDate = DateUtils.toDate(parseRes.getReportDate());
|
|
|
|
+ if (reportDate != null) {
|
|
|
|
+ reportData.getBaseInfo().setReportDate(reportDate);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (reportData.getFundInfo() != null && parseRes != null) {
|
|
|
|
+ if (StrUtil.isBlank(reportData.getFundInfo().getFundName())) {
|
|
|
|
+ reportData.getFundInfo().setFundName(parseRes.getFundName());
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isBlank(reportData.getFundInfo().getFundCode())) {
|
|
|
|
+ reportData.getFundInfo().setFundCode(parseRes.getFundCode());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
* 保存报告解析结果
|
|
* 保存报告解析结果
|
|
*
|
|
*
|
|
* @param reportData 报告解析结果
|
|
* @param reportData 报告解析结果
|