|
@@ -370,7 +370,7 @@ public class EmailParseService {
|
|
dataList.add(new ParseResult<>(ReportParseStatus.ARCHIVE_FAIL, reportData));
|
|
dataList.add(new ParseResult<>(ReportParseStatus.ARCHIVE_FAIL, reportData));
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- dtos.add(new EmailZipFileDTO(emailTitle, reportPath, e.getReportType()));
|
|
|
|
|
|
+ dtos.add(new EmailZipFileDTO(emailTitle, e.getReportName(), reportPath, e.getReportType()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 重新判断类型
|
|
// 重新判断类型
|
|
@@ -540,42 +540,42 @@ public class EmailParseService {
|
|
EmailZipFileDTO zipFile) {
|
|
EmailZipFileDTO zipFile) {
|
|
Integer fileId = zipFile.getFileId();
|
|
Integer fileId = zipFile.getFileId();
|
|
Integer emailType = zipFile.getEmailType();
|
|
Integer emailType = zipFile.getEmailType();
|
|
- String fileName = zipFile.getFilename();
|
|
|
|
|
|
+ String reportName = zipFile.getFilename();
|
|
String filepath = zipFile.getFilepath();
|
|
String filepath = zipFile.getFilepath();
|
|
ParseResult<ReportData> result = new ParseResult<>();
|
|
ParseResult<ReportData> result = new ParseResult<>();
|
|
boolean reportFlag = emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType);
|
|
boolean reportFlag = emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType);
|
|
- if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(Constants.FILE_HTML)) {
|
|
|
|
- return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, null, fileName);
|
|
|
|
|
|
+ if (reportFlag || StrUtil.isBlank(reportName) || reportName.endsWith(Constants.FILE_HTML)) {
|
|
|
|
+ return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, null, reportName);
|
|
}
|
|
}
|
|
// 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
|
|
// 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
|
|
- ReportType reportType = ReportParseUtils.matchReportType(emailType, fileName);
|
|
|
|
|
|
+ ReportType reportType = ReportParseUtils.matchReportType(emailType, reportName);
|
|
if (reportType == null) {
|
|
if (reportType == null) {
|
|
reportType = ReportParseUtils.matchReportType(emailType, emailTitle);
|
|
reportType = ReportParseUtils.matchReportType(emailType, emailTitle);
|
|
if (log.isDebugEnabled()) {
|
|
if (log.isDebugEnabled()) {
|
|
- log.debug("报告{} 根据邮件主题{} 重新识别的类型是:{}", fileName, emailTitle, reportType);
|
|
|
|
|
|
+ log.debug("报告{} 根据邮件主题{} 重新识别的类型是:{}", reportName, emailTitle, reportType);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
|
|
// 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
|
|
ReportParserFileType fileType = ReportParserFileType.getBySuffix(zipFile.getExtName());
|
|
ReportParserFileType fileType = ReportParserFileType.getBySuffix(zipFile.getExtName());
|
|
// 不支持的格式
|
|
// 不支持的格式
|
|
if (fileType == null) {
|
|
if (fileType == null) {
|
|
- ReportData reportData = this.buildNvlReportData(fileId, reportType, null, fileName);
|
|
|
|
- return new ParseResult<>(ReportParseStatus.NO_SUPPORT_TEMPLATE, reportData, fileName);
|
|
|
|
|
|
+ ReportData reportData = this.buildNvlReportData(fileId, reportType, reportName);
|
|
|
|
+ return new ParseResult<>(ReportParseStatus.NO_SUPPORT_TEMPLATE, reportData, reportName);
|
|
}
|
|
}
|
|
// 不是定期报告的判断逻辑放在不支持的格式下面
|
|
// 不是定期报告的判断逻辑放在不支持的格式下面
|
|
if (reportType == null) {
|
|
if (reportType == null) {
|
|
- ReportData reportData = this.buildNvlReportData(fileId, ReportType.OTHER, null, fileName);
|
|
|
|
- return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, reportData, fileName);
|
|
|
|
|
|
+ ReportData reportData = this.buildNvlReportData(fileId, ReportType.OTHER, reportName);
|
|
|
|
+ return new ParseResult<>(ReportParseStatus.NOT_A_REPORT, reportData, reportName);
|
|
}
|
|
}
|
|
|
|
|
|
// docx转pdf
|
|
// docx转pdf
|
|
if (Objects.equals(ReportParserFileType.WORD, fileType)) {
|
|
if (Objects.equals(ReportParserFileType.WORD, fileType)) {
|
|
try {
|
|
try {
|
|
- String outputFile = FileUtil.getParent(filepath, 1) + File.separator + FileUtil.mainName(fileName) + ".pdf";
|
|
|
|
|
|
+ String outputFile = FileUtil.getParent(filepath, 1) + File.separator + FileUtil.mainName(reportName) + ".pdf";
|
|
PdfUtil.convertDocxToPdf(filepath, outputFile);
|
|
PdfUtil.convertDocxToPdf(filepath, outputFile);
|
|
filepath = outputFile;
|
|
filepath = outputFile;
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
- log.warn("报告{} 转换为pdf失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.warn("报告{} 转换为pdf失败:{}", reportName, ExceptionUtil.stacktraceToString(e));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
|
|
// 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
|
|
@@ -586,24 +586,24 @@ public class EmailParseService {
|
|
File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
|
|
File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300, zipFile.getPdfPwd());
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300, zipFile.getPdfPwd());
|
|
if (log.isDebugEnabled()) {
|
|
if (log.isDebugEnabled()) {
|
|
- log.debug("报告{} 生成的图片地址是:\n{}", fileName, images);
|
|
|
|
|
|
+ log.debug("报告{} 生成的图片地址是:\n{}", reportName, images);
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
- log.warn("报告{} 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.warn("报告{} 生成图片失败:{}", reportName, ExceptionUtil.stacktraceToString(e));
|
|
}
|
|
}
|
|
} else if (Objects.equals(ReportParserFileType.IMG, fileType)) {
|
|
} else if (Objects.equals(ReportParserFileType.IMG, fileType)) {
|
|
try {
|
|
try {
|
|
String outputFile = PdfUtil.compressAndSave(filepath);
|
|
String outputFile = PdfUtil.compressAndSave(filepath);
|
|
images.add(outputFile);
|
|
images.add(outputFile);
|
|
} catch (IOException e) {
|
|
} catch (IOException e) {
|
|
- log.error("报告{} 图片压缩失败,{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.error("报告{} 图片压缩失败,{}", reportName, ExceptionUtil.stacktraceToString(e));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ocr识别月报是否管理人版或协会版
|
|
// ocr识别月报是否管理人版或协会版
|
|
ReportMonthlyType monthlyType = ReportMonthlyType.NO_NEED;
|
|
ReportMonthlyType monthlyType = ReportMonthlyType.NO_NEED;
|
|
if (ReportType.MONTHLY == reportType) {
|
|
if (ReportType.MONTHLY == reportType) {
|
|
- monthlyType = this.determineReportType(emailTitle, fileName, filepath, images);
|
|
|
|
|
|
+ monthlyType = this.determineReportType(emailTitle, reportName, filepath, images);
|
|
}
|
|
}
|
|
boolean isAmac = reportType == ReportType.ANNUALLY || reportType == ReportType.QUARTERLY
|
|
boolean isAmac = reportType == ReportType.ANNUALLY || reportType == ReportType.QUARTERLY
|
|
|| (reportType == ReportType.MONTHLY && ReportMonthlyType.AMAC == monthlyType);
|
|
|| (reportType == ReportType.MONTHLY && ReportMonthlyType.AMAC == monthlyType);
|
|
@@ -611,7 +611,7 @@ public class EmailParseService {
|
|
boolean notSupportFile = false;
|
|
boolean notSupportFile = false;
|
|
// 解析报告
|
|
// 解析报告
|
|
ReportData reportData = null;
|
|
ReportData reportData = null;
|
|
- ReportParserParams params = new ReportParserParams(fileId, fileName, filepath, reportType);
|
|
|
|
|
|
+ ReportParserParams params = new ReportParserParams(fileId, reportName, filepath, reportType);
|
|
long start = System.currentTimeMillis();
|
|
long start = System.currentTimeMillis();
|
|
try {
|
|
try {
|
|
if (isAmac || reportType == ReportType.LETTER) {
|
|
if (isAmac || reportType == ReportType.LETTER) {
|
|
@@ -620,7 +620,7 @@ public class EmailParseService {
|
|
result = new ParseResult<>(1, "报告解析成功", reportData);
|
|
result = new ParseResult<>(1, "报告解析成功", reportData);
|
|
}
|
|
}
|
|
} catch (ReportParseException e) {
|
|
} catch (ReportParseException e) {
|
|
- result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
|
|
|
|
+ result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), reportName), null);
|
|
log.warn("解析失败:{}", result.getMsg());
|
|
log.warn("解析失败:{}", result.getMsg());
|
|
if (e instanceof NotSupportReportException) {
|
|
if (e instanceof NotSupportReportException) {
|
|
notSupportFile = true;
|
|
notSupportFile = true;
|
|
@@ -632,18 +632,18 @@ public class EmailParseService {
|
|
// 如果解析结果是空的就用AI工具解析一次
|
|
// 如果解析结果是空的就用AI工具解析一次
|
|
if (reportData == null && !notSupportFile) {
|
|
if (reportData == null && !notSupportFile) {
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
- log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", fileName);
|
|
|
|
|
|
+ log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", reportName);
|
|
}
|
|
}
|
|
try {
|
|
try {
|
|
if (!isAmac && CollUtil.isNotEmpty(images)) {
|
|
if (!isAmac && CollUtil.isNotEmpty(images)) {
|
|
filepath = images.get(0);
|
|
filepath = images.get(0);
|
|
}
|
|
}
|
|
- params = new ReportParserParams(fileId, fileName, filepath, reportType);
|
|
|
|
|
|
+ params = new ReportParserParams(fileId, reportName, filepath, reportType);
|
|
ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
|
|
ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
|
|
reportData = instance.parse(params);
|
|
reportData = instance.parse(params);
|
|
result = new ParseResult<>(1, "报告解析成功--AI", reportData);
|
|
result = new ParseResult<>(1, "报告解析成功--AI", reportData);
|
|
} catch (ReportParseException e) {
|
|
} catch (ReportParseException e) {
|
|
- result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
|
|
|
|
+ result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), reportName), null);
|
|
log.warn("AI解析失败:{}", result.getMsg());
|
|
log.warn("AI解析失败:{}", result.getMsg());
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
|
|
log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
|
|
@@ -651,7 +651,9 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 当reportData==null时重新构建一个reportData对象
|
|
// 当reportData==null时重新构建一个reportData对象
|
|
- reportData = this.buildNvlReportData(fileId, reportType, reportData, fileName);
|
|
|
|
|
|
+ if (reportData == null) {
|
|
|
|
+ reportData = this.buildNvlReportData(fileId, reportType, reportName);
|
|
|
|
+ }
|
|
if (reportData.getBaseInfo() != null) {
|
|
if (reportData.getBaseInfo() != null) {
|
|
// 设置月报类型
|
|
// 设置月报类型
|
|
reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
|
|
reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
|
|
@@ -661,14 +663,14 @@ public class EmailParseService {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// ocr信息提取(印章、联系人、基金名称和产品代码)
|
|
// ocr信息提取(印章、联系人、基金名称和产品代码)
|
|
- reportData = this.ocrReportData(fileId, reportType, monthlyType, reportData, fileName, senderEmail, images);
|
|
|
|
|
|
+ reportData = this.ocrReportData(fileId, reportType, monthlyType, reportData, reportName, senderEmail, images);
|
|
result.setData(reportData);
|
|
result.setData(reportData);
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
- log.info("报告{} 解析耗时{}ms,结果是:{}", fileName, (System.currentTimeMillis() - start), reportData);
|
|
|
|
|
|
+ log.info("报告{} 解析耗时{}ms,结果是:{}", reportName, (System.currentTimeMillis() - start), reportData);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 保存报告解析结果
|
|
// 保存报告解析结果
|
|
- this.saveReportData(reportData, reportType, fileName);
|
|
|
|
|
|
+ this.saveReportData(reportData, reportType, reportName);
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -852,24 +854,18 @@ public class EmailParseService {
|
|
*
|
|
*
|
|
* @param fileId 文件ID
|
|
* @param fileId 文件ID
|
|
* @param reportType 报告类型
|
|
* @param reportType 报告类型
|
|
- * @param reportData 解析结果对象
|
|
|
|
- * @param fileName 报告名称
|
|
|
|
|
|
+ * @param reportName 报告名称
|
|
* @return /
|
|
* @return /
|
|
*/
|
|
*/
|
|
- private ReportData buildNvlReportData(Integer fileId,
|
|
|
|
- ReportType reportType,
|
|
|
|
- ReportData reportData,
|
|
|
|
- String fileName) {
|
|
|
|
- if (reportData != null) {
|
|
|
|
- return reportData;
|
|
|
|
- }
|
|
|
|
|
|
+ private ReportData buildNvlReportData(Integer fileId, ReportType reportType, String reportName) {
|
|
|
|
+ ReportData reportData = null;
|
|
if (reportType == null) {
|
|
if (reportType == null) {
|
|
reportType = ReportType.OTHER;
|
|
reportType = ReportType.OTHER;
|
|
}
|
|
}
|
|
ReportBaseInfoDTO baseInfo = new ReportBaseInfoDTO(fileId);
|
|
ReportBaseInfoDTO baseInfo = new ReportBaseInfoDTO(fileId);
|
|
- baseInfo.setReportName(fileName);
|
|
|
|
|
|
+ baseInfo.setReportName(reportName);
|
|
baseInfo.setReportType(reportType.name());
|
|
baseInfo.setReportType(reportType.name());
|
|
- String reportDate = ReportParseUtils.matchReportDate(reportType, fileName);
|
|
|
|
|
|
+ String reportDate = ReportParseUtils.matchReportDate(reportType, reportName);
|
|
baseInfo.setReportDate(ConvertUtil.toDate(reportDate));
|
|
baseInfo.setReportDate(ConvertUtil.toDate(reportDate));
|
|
ReportFundInfoDTO fundInfo = new ReportFundInfoDTO(fileId);
|
|
ReportFundInfoDTO fundInfo = new ReportFundInfoDTO(fileId);
|
|
if (ReportType.ANNUALLY == reportType) {
|
|
if (ReportType.ANNUALLY == reportType) {
|
|
@@ -909,7 +905,7 @@ public class EmailParseService {
|
|
} finally {
|
|
} finally {
|
|
writeWatch.stop();
|
|
writeWatch.stop();
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
- log.info("报告{}解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
|
|
|
|
|
|
+ log.info("报告{} 解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|