|
@@ -69,10 +69,7 @@ public class EmailParseService {
|
|
|
|
|
|
// 常量定义:统一管理关键词
|
|
// 常量定义:统一管理关键词
|
|
private static final Set<String> AMAC_KEYWORDS = Set.of("协会", "信披");
|
|
private static final Set<String> AMAC_KEYWORDS = Set.of("协会", "信披");
|
|
- private static final Set<String> MANAGER_KEYWORDS = Set.of(
|
|
|
|
- "管理人", "公司版", "投资者月报", "运行报告", "月策略",
|
|
|
|
- "投资者报告", "投资报告", "投资月报", "月度简报", "运行月报"
|
|
|
|
- );
|
|
|
|
|
|
+
|
|
private static final Set<String> EXCLUDE_PATH_KEYWORDS = Set.of("公司及协会版", "公司和协会版");
|
|
private static final Set<String> EXCLUDE_PATH_KEYWORDS = Set.of("公司及协会版", "公司和协会版");
|
|
|
|
|
|
// 扩展支持的 MIME 类型
|
|
// 扩展支持的 MIME 类型
|
|
@@ -178,6 +175,16 @@ public class EmailParseService {
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
Integer type = EmailUtil.getEmailTypeBySubject(emailTitle + emailFile.getFilename());
|
|
Integer type = EmailUtil.getEmailTypeBySubject(emailTitle + emailFile.getFilename());
|
|
|
|
+ // 特殊月报
|
|
|
|
+ if (Objects.equals(EmailTypeConst.NAV_EMAIL_TYPE, type)
|
|
|
|
+ && (ReportParseUtils.containsAny(emailTitle, ReportParseUtils.MANAGER_KEYWORDS)
|
|
|
|
+ || emailTitle.contains("定期报告"))) {
|
|
|
|
+ type = EmailTypeConst.REPORT_EMAIL_TYPE;
|
|
|
|
+ }
|
|
|
|
+ // 其他报告
|
|
|
|
+ if (Objects.equals(EmailTypeConst.NAV_EMAIL_TYPE, type)) {
|
|
|
|
+ type = EmailTypeConst.REPORT_OTHER_TYPE;
|
|
|
|
+ }
|
|
emailFile.setEmailType(type);
|
|
emailFile.setEmailType(type);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -519,10 +526,10 @@ public class EmailParseService {
|
|
File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
|
|
File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300);
|
|
images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300);
|
|
if (log.isDebugEnabled()) {
|
|
if (log.isDebugEnabled()) {
|
|
- log.debug("报告[{}] 生成的图片地址是:\n{}", fileName, images);
|
|
|
|
|
|
+ log.debug("报告{} 生成的图片地址是:\n{}", fileName, images);
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
- log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.warn("报告{} 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
}
|
|
}
|
|
} else if (Objects.equals(ReportParserFileType.IMG, fileType)) {
|
|
} else if (Objects.equals(ReportParserFileType.IMG, fileType)) {
|
|
try {
|
|
try {
|
|
@@ -553,8 +560,8 @@ public class EmailParseService {
|
|
result = new ParseResult<>(1, "报告解析成功", reportData);
|
|
result = new ParseResult<>(1, "报告解析成功", reportData);
|
|
}
|
|
}
|
|
} catch (ReportParseException e) {
|
|
} catch (ReportParseException e) {
|
|
- log.warn("解析失败:{}", StrUtil.format(e.getMsg(), fileName));
|
|
|
|
result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
|
|
+ log.warn("解析失败:{}", result.getMsg());
|
|
if (e instanceof NotSupportReportException) {
|
|
if (e instanceof NotSupportReportException) {
|
|
notSupportFile = true;
|
|
notSupportFile = true;
|
|
}
|
|
}
|
|
@@ -565,7 +572,7 @@ public class EmailParseService {
|
|
// 如果解析结果是空的就用AI工具解析一次
|
|
// 如果解析结果是空的就用AI工具解析一次
|
|
if (reportData == null && !notSupportFile) {
|
|
if (reportData == null && !notSupportFile) {
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
- log.info("报告{} 是周报或管理人月报或其他类型,用AI解析器解析", fileName);
|
|
|
|
|
|
+ log.info("报告{} 是周报或管理人月报或其他类型或解析失败,用AI解析器解析", fileName);
|
|
}
|
|
}
|
|
try {
|
|
try {
|
|
if (!isAmac && CollUtil.isNotEmpty(images)) {
|
|
if (!isAmac && CollUtil.isNotEmpty(images)) {
|
|
@@ -576,8 +583,8 @@ public class EmailParseService {
|
|
reportData = instance.parse(params);
|
|
reportData = instance.parse(params);
|
|
result = new ParseResult<>(1, "报告解析成功--AI", reportData);
|
|
result = new ParseResult<>(1, "报告解析成功--AI", reportData);
|
|
} catch (ReportParseException e) {
|
|
} catch (ReportParseException e) {
|
|
- log.warn("AI解析失败:{}", StrUtil.format(e.getMsg(), fileName));
|
|
|
|
result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
result = new ParseResult<>(e.getCode(), StrUtil.format(e.getMsg(), fileName), null);
|
|
|
|
+ log.warn("AI解析失败:{}", result.getMsg());
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
|
|
log.warn("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
|
|
result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
|
|
result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
|
|
@@ -612,10 +619,10 @@ public class EmailParseService {
|
|
public ReportMonthlyType determineReportType(String emailTitle, String fileName,
|
|
public ReportMonthlyType determineReportType(String emailTitle, String fileName,
|
|
String filepath, List<String> images) {
|
|
String filepath, List<String> images) {
|
|
// 1. 优先根据文件名判断
|
|
// 1. 优先根据文件名判断
|
|
- if (containsAny(fileName, AMAC_KEYWORDS)) {
|
|
|
|
|
|
+ if (ReportParseUtils.containsAny(fileName, AMAC_KEYWORDS)) {
|
|
return ReportMonthlyType.AMAC;
|
|
return ReportMonthlyType.AMAC;
|
|
}
|
|
}
|
|
- if (containsAny(fileName, MANAGER_KEYWORDS)) {
|
|
|
|
|
|
+ if (ReportParseUtils.containsAny(fileName, ReportParseUtils.MANAGER_KEYWORDS)) {
|
|
return ReportMonthlyType.MANAGER;
|
|
return ReportMonthlyType.MANAGER;
|
|
}
|
|
}
|
|
if (StrUtil.isNotBlank(ReportParseUtils.matchFundCode(fileName))) {
|
|
if (StrUtil.isNotBlank(ReportParseUtils.matchFundCode(fileName))) {
|
|
@@ -624,21 +631,21 @@ public class EmailParseService {
|
|
// 2. 根据文件路径判断
|
|
// 2. 根据文件路径判断
|
|
List<String> pathSegments = StrUtil.split(filepath, File.separator);
|
|
List<String> pathSegments = StrUtil.split(filepath, File.separator);
|
|
for (String segment : pathSegments) {
|
|
for (String segment : pathSegments) {
|
|
- boolean isExcluded = containsAny(segment, EXCLUDE_PATH_KEYWORDS);
|
|
|
|
- if (!isExcluded && containsAny(segment, AMAC_KEYWORDS)) {
|
|
|
|
|
|
+ boolean isExcluded = ReportParseUtils.containsAny(segment, EXCLUDE_PATH_KEYWORDS);
|
|
|
|
+ if (!isExcluded && ReportParseUtils.containsAny(segment, AMAC_KEYWORDS)) {
|
|
return ReportMonthlyType.AMAC;
|
|
return ReportMonthlyType.AMAC;
|
|
}
|
|
}
|
|
- if (!isExcluded && containsAny(segment, MANAGER_KEYWORDS)) {
|
|
|
|
|
|
+ if (!isExcluded && ReportParseUtils.containsAny(segment, ReportParseUtils.MANAGER_KEYWORDS)) {
|
|
return ReportMonthlyType.MANAGER;
|
|
return ReportMonthlyType.MANAGER;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 3. 根据邮件主题判断
|
|
// 3. 根据邮件主题判断
|
|
- boolean isAmacEmail = containsAny(emailTitle, AMAC_KEYWORDS)
|
|
|
|
|
|
+ boolean isAmacEmail = ReportParseUtils.containsAny(emailTitle, AMAC_KEYWORDS)
|
|
&& !emailTitle.contains("公司及协会版");
|
|
&& !emailTitle.contains("公司及协会版");
|
|
if (isAmacEmail) {
|
|
if (isAmacEmail) {
|
|
return ReportMonthlyType.AMAC;
|
|
return ReportMonthlyType.AMAC;
|
|
}
|
|
}
|
|
- if (containsAny(emailTitle, MANAGER_KEYWORDS)) {
|
|
|
|
|
|
+ if (ReportParseUtils.containsAny(emailTitle, ReportParseUtils.MANAGER_KEYWORDS)) {
|
|
return ReportMonthlyType.MANAGER;
|
|
return ReportMonthlyType.MANAGER;
|
|
}
|
|
}
|
|
// 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有估值日期则是协会
|
|
// 4.ocr 提取“曲线”、“基金份额”等关键字,如果有曲线则是管理人,如果有估值日期则是协会
|
|
@@ -652,14 +659,6 @@ public class EmailParseService {
|
|
return ReportMonthlyType.FAILED;
|
|
return ReportMonthlyType.FAILED;
|
|
}
|
|
}
|
|
|
|
|
|
- // 工具方法:检查字符串是否包含任意关键词
|
|
|
|
- private boolean containsAny(String input, Set<String> keywords) {
|
|
|
|
- if (StrUtil.isBlank(input)) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- return keywords.stream().anyMatch(input::contains);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* ocr 提取信息(包括首页的基金名称或报告日期,尾页的印章或联系人等信息)
|
|
* ocr 提取信息(包括首页的基金名称或报告日期,尾页的印章或联系人等信息)
|
|
*
|
|
*
|
|
@@ -735,7 +734,7 @@ public class EmailParseService {
|
|
ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
|
|
ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
|
|
instance.write(reportData);
|
|
instance.write(reportData);
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
- log.error("报告{}结果保存失败\n{}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.error("报告{} 结果保存失败 {}", fileName, ExceptionUtil.stacktraceToString(e));
|
|
} finally {
|
|
} finally {
|
|
writeWatch.stop();
|
|
writeWatch.stop();
|
|
if (log.isInfoEnabled()) {
|
|
if (log.isInfoEnabled()) {
|
|
@@ -824,10 +823,10 @@ public class EmailParseService {
|
|
|
|
|
|
Map<String, List<EmailContentInfoDTO>> result = MapUtil.newHashMap(128);
|
|
Map<String, List<EmailContentInfoDTO>> result = MapUtil.newHashMap(128);
|
|
try {
|
|
try {
|
|
- if (log.isInfoEnabled()) {
|
|
|
|
|
|
+ if (log.isDebugEnabled()) {
|
|
Folder[] list = store.getDefaultFolder().list("*");
|
|
Folder[] list = store.getDefaultFolder().list("*");
|
|
List<String> names = Arrays.stream(list).map(Folder::getFullName).toList();
|
|
List<String> names = Arrays.stream(list).map(Folder::getFullName).toList();
|
|
- log.info("获取所有邮箱文件夹:{}", names);
|
|
|
|
|
|
+ log.debug("获取所有邮箱文件夹:{}", names);
|
|
}
|
|
}
|
|
|
|
|
|
for (String folderName : folderNames) {
|
|
for (String folderName : folderNames) {
|
|
@@ -910,7 +909,7 @@ public class EmailParseService {
|
|
} else if (content instanceof Part part) {
|
|
} else if (content instanceof Part part) {
|
|
this.rePart(emailAddress, emailTitle, emailDate, part, dtos);
|
|
this.rePart(emailAddress, emailTitle, emailDate, part, dtos);
|
|
} else {
|
|
} else {
|
|
- log.warn("{} 不支持的邮件数据 {}", folderName, emailTitle);
|
|
|
|
|
|
+ log.warn("{} 邮件{} 获取不了附件", folderName, emailTitle);
|
|
}
|
|
}
|
|
if (CollUtil.isEmpty(dtos)) {
|
|
if (CollUtil.isEmpty(dtos)) {
|
|
log.warn("{} 邮件{} 没有获取到附件", folderName, emailTitle);
|
|
log.warn("{} 邮件{} 没有获取到附件", folderName, emailTitle);
|
|
@@ -922,9 +921,9 @@ public class EmailParseService {
|
|
});
|
|
});
|
|
emailMessageMap.put(IdUtil.simpleUUID(), dtos);
|
|
emailMessageMap.put(IdUtil.simpleUUID(), dtos);
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
- log.error("{} 获取邮箱的邮件{} 报错,堆栈信息:{}", folderName, emailTitle, ExceptionUtil.stacktraceToString(e));
|
|
|
|
|
|
+ log.error("{} 邮件{} 下载报错 {}", folderName, emailTitle, ExceptionUtil.stacktraceToString(e));
|
|
} finally {
|
|
} finally {
|
|
- if (log.isInfoEnabled()) {
|
|
|
|
|
|
+ if (CollUtil.isNotEmpty(dtos) && log.isInfoEnabled()) {
|
|
log.info("{} 邮件{} 下载完成,总计耗时{} ms,文件内容如下\n {}", folderName,
|
|
log.info("{} 邮件{} 下载完成,总计耗时{} ms,文件内容如下\n {}", folderName,
|
|
emailTitle, System.currentTimeMillis() - start, dtos);
|
|
emailTitle, System.currentTimeMillis() - start, dtos);
|
|
}
|
|
}
|
|
@@ -953,8 +952,8 @@ public class EmailParseService {
|
|
String disposition = part.getDisposition();
|
|
String disposition = part.getDisposition();
|
|
String contentType = part.getContentType();
|
|
String contentType = part.getContentType();
|
|
|
|
|
|
- String[] att_files = new String[]{Constants.ARCHIVE_7Z, Constants.ARCHIVE_RAR,
|
|
|
|
- Constants.ARCHIVE_ZIP, Constants.FILE_PDF, Constants.FILE_DOCX, Constants.FILE_JPG, Constants.FILE_PNG};
|
|
|
|
|
|
+ String[] att_files = new String[]{Constants.ARCHIVE_7Z, Constants.ARCHIVE_RAR, Constants.ARCHIVE_ZIP,
|
|
|
|
+ Constants.FILE_PDF, Constants.FILE_DOCX, Constants.FILE_JPG, Constants.FILE_PNG};
|
|
boolean attachmentFlag = StrUtil.endWithAny(fileName, att_files);
|
|
boolean attachmentFlag = StrUtil.endWithAny(fileName, att_files);
|
|
boolean isAttachment = attachmentFlag
|
|
boolean isAttachment = attachmentFlag
|
|
|| Part.ATTACHMENT.equalsIgnoreCase(disposition)
|
|
|| Part.ATTACHMENT.equalsIgnoreCase(disposition)
|