Преглед изворни кода

feat:AI解析时出季报和年报都用第一页转图片后去识别信息

wangzaijun пре 4 недеља
родитељ
комит
66437c4e23

+ 11 - 11
mo-daq/src/main/java/com/smppw/modaq/application/task/ParseSchedulerTask.java

@@ -29,21 +29,21 @@ public class ParseSchedulerTask {
 
     @PostConstruct
     public void executeOnStartup() {
-//        try {
-//            // 定期报告从 其他文件夹/报告公告 文件夹获取邮件
-//            this.emailParseApiService.parseEmail(
-//                    DateUtils.parseDateTime("2025-05-15 11:15:00"),
-//                    DateUtils.parseDateTime("2025-05-15 15:25:00"),
-//                    ListUtil.of("其他文件夹/报告公告"), EmailTypeConst.REPORT_EMAIL_TYPES);
-//        } catch (Exception e) {
-//            logger.error(ExceptionUtil.getMessage(e));
-//        }
+        try {
+            // 定期报告从 其他文件夹/报告公告 文件夹获取邮件
+            this.emailParseApiService.parseEmail(
+                    DateUtil.parseDateTime("2025-05-30 14:53:00"),
+                    DateUtil.parseDateTime("2025-05-50 14:54:00"),
+                    ListUtil.of("其他文件夹/报告公告"), EmailTypeConst.REPORT_EMAIL_TYPES);
+        } catch (Exception e) {
+            logger.error(ExceptionUtil.getMessage(e));
+        }
 //
 //        try {
 //            // 确认函从 INBOX 文件夹获取邮件
 //            this.emailParseApiService.parseEmail(
-//                    DateUtils.parseDateTime("2025-05-15 11:44:00"),
-//                    DateUtils.parseDateTime("2025-05-15 16:32:00"),
+//                    DateUtil.parseDateTime("2025-05-15 11:44:00"),
+//                    DateUtil.parseDateTime("2025-05-15 16:32:00"),
 //                    null, ListUtil.of(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE));
 //        } catch (Exception e) {
 //            logger.error(ExceptionUtil.getMessage(e));

+ 21 - 17
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -32,6 +32,7 @@ import com.smppw.modaq.domain.entity.EmailParseInfoDO;
 import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
 import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
 import com.smppw.modaq.infrastructure.util.ArchiveUtil;
+import com.smppw.modaq.infrastructure.util.PdfUtil;
 import jakarta.mail.*;
 import jakarta.mail.internet.MimeUtility;
 import jakarta.mail.search.ComparisonTerm;
@@ -415,21 +416,17 @@ public class EmailParseService {
         Integer fileId = emailFileInfo.getId();
         String aiFileId = emailFileInfo.getAiFileId();
 
-//        // 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
-//        List<String> images = null;
-//        try {
-//            String output = FileUtil.getParent(filepath, 1) + File.separator + "image";
-//            images = PdfUtil.convertFirstAndLastPagesToPng(filepath, FileUtil.file(output), 300);
-//            if (log.isInfoEnabled()) {
-//                log.info("报告[{}] 生成的图片地址是:{}", fileName, images);
-//            }
-//        } catch (Exception e) {
-//            log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
-//        }
-//        // todo 用图片来解析报告印章和联系人
-//        if (CollUtil.isNotEmpty(images)) {
-//            // do something ...
-//        }
+        // 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
+        List<String> images = ListUtil.empty();
+        try {
+            String output = FileUtil.getParent(filepath, 1) + File.separator + "image";
+            images = PdfUtil.convertFirstAndLastPagesToPng(filepath, FileUtil.file(output), 300);
+            if (log.isInfoEnabled()) {
+                log.info("报告[{}] 生成的图片地址是:{}", fileName, images);
+            }
+        } catch (Exception e) {
+            log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
+        }
 
         // 不支持解析的格式文件
         boolean notSupportFile = false;
@@ -471,8 +468,15 @@ public class EmailParseService {
         } finally {
             // 如果解析结果是空的就用AI工具解析一次
             if (reportData == null && !notSupportFile) {
-                if (log.isInfoEnabled()) {
-                    log.info("报告{} 开始AI解析......", fileName);
+                if (reportType == ReportType.QUARTERLY || reportType == ReportType.ANNUALLY) {
+                    if (log.isInfoEnabled()) {
+                        log.info("报告{} 开始AI解析......", fileName);
+                    }
+                } else if (CollUtil.isNotEmpty(images)) {
+                    filepath = images.get(0);
+                    if (log.isInfoEnabled()) {
+                        log.info("报告{} 用首页图片{} 开始AI解析......", fileName, filepath);
+                    }
                 }
                 ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
                         .registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();

+ 3 - 3
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -37,9 +37,9 @@ public class MoDaqApplicationTests {
 
     @Test
     public void reportTest() {
-        MailboxInfoDTO emailInfoDTO = this.buildMailbox("**@simuwang.com", "**");
-        Date startDate = DateUtil.parse("2025-05-23 15:50:00", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2025-05-23 15:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        MailboxInfoDTO emailInfoDTO = this.buildMailbox("wangzaijun@simuwang.com", "WZJ2twy1314");
+        Date startDate = DateUtil.parse("2025-05-30 16:05:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2025-05-30 16:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             List<String> folderNames = ListUtil.list(false);
 //            folderNames.add("其他文件夹/报告公告");