Prechádzať zdrojové kódy

fix:优化ai提示词+压缩文件乱码问题处理

wangzaijun 4 dní pred
rodič
commit
d1d5797048

+ 7 - 14
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIOtherReportParser.java

@@ -1,14 +1,13 @@
 package com.smppw.modaq.application.components.report.parser.ai;
 
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
 import com.smppw.modaq.application.components.report.parser.ReportParserConstant;
-import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.exception.ReportParseException;
 import com.smppw.modaq.domain.dto.report.*;
 import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
 import org.springframework.stereotype.Component;
 
+import java.util.Objects;
+
 /**
  * 其他格式的报告(只解析报告基本信息、基金基本信息)
  */
@@ -20,7 +19,7 @@ public class AIOtherReportParser extends AbstractAIReportParser<ReportData> {
 
     @Override
     protected String prompt() {
-        return "识别文件中的基金名称、基金管理人、基金托管人和报告日期,如果无法识别就返回空字符,结果用json返回";
+        return "识别文件中的基金名称、基金管理人、基金托管人和报告日期,并且判断文件中是否存在联系人等信息,如果无法识别就返回空字符,结果用json返回";
     }
 
     @Override
@@ -29,18 +28,12 @@ public class AIOtherReportParser extends AbstractAIReportParser<ReportData> {
     }
 
     @Override
-    protected void handleAiResult(String result) throws ReportParseException {
-        try {
-            JSONObject jsonObject = JSONUtil.parseObj(result);
-            this.allInfoMap.putAll(jsonObject);
-        } catch (Exception e) {
-            throw new ReportParseException(ReportParseStatus.PARSE_HANDLE_FAIL);
-        }
-    }
-
-    @Override
     protected ReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
                                                 ReportFundInfoDTO fundInfo) throws ReportParseException {
+        Object contact = this.allInfoMap.get("联系人信息");
+        if (Objects.equals("存在", contact)) {
+            reportInfo.setWithContacts(true);
+        }
         return new MonthlyReportData(reportInfo, fundInfo);
     }
 

+ 18 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/WeeklyReportWriter.java

@@ -0,0 +1,18 @@
+package com.smppw.modaq.application.components.report.writer;
+
+import com.smppw.modaq.domain.dto.report.WeeklyReportData;
+import com.smppw.modaq.domain.mapper.report.ReportBaseInfoMapper;
+import com.smppw.modaq.domain.mapper.report.ReportFundInfoMapper;
+import org.springframework.stereotype.Component;
+
+@Component(ReportWriterConstant.WRITER_WEEKLY)
+public class WeeklyReportWriter extends AbstractReportWriter<WeeklyReportData> {
+    public WeeklyReportWriter(ReportBaseInfoMapper baseInfoMapper, ReportFundInfoMapper fundInfoMapper) {
+        super(baseInfoMapper, fundInfoMapper);
+    }
+
+    @Override
+    protected void writeExtData(WeeklyReportData reportData) {
+        // 没有数据要保存
+    }
+}

+ 0 - 19
mo-daq/src/main/java/com/smppw/modaq/common/dto/FilenamePathDTO.java

@@ -1,19 +0,0 @@
-package com.smppw.modaq.common.dto;
-
-public class FilenamePathDTO {
-    private final String filename;
-    private final String filepath;
-
-    public FilenamePathDTO(String filename, String filepath) {
-        this.filename = filename;
-        this.filepath = filepath;
-    }
-
-    public String getFilename() {
-        return filename;
-    }
-
-    public String getFilepath() {
-        return filepath;
-    }
-}

+ 2 - 3
mo-daq/src/main/java/com/smppw/modaq/domain/dto/EmailZipFileDTO.java

@@ -6,14 +6,13 @@ import lombok.Getter;
 @Getter
 public class EmailZipFileDTO {
     private final String filename;
-    private final String originalName;
+//    private final String originalName;
     private final String filepath;
     private final Integer emailType;
 
-    public EmailZipFileDTO(String filepath, String originalName, Integer emailType) {
+    public EmailZipFileDTO(String filepath, Integer emailType) {
         this.filepath = filepath;
         this.emailType = emailType;
-        this.originalName = originalName;
         this.filename = FileUtil.getName(filepath);
     }
 }

+ 8 - 4
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportBaseInfoDTO.java

@@ -24,10 +24,14 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
      * 报告类型
      */
     private String reportType;
+//    /**
+//     * 报告是否用印
+//     */
+//    private Boolean withSeals;
     /**
-     * 报告是否用印
+     * 观点报告是否存在联系人信息(可能包含联系电话、地址等敏感信息)
      */
-    private Boolean withSeals;
+    private Boolean withContacts;
 
     public ReportBaseInfoDTO() {
         super();
@@ -44,7 +48,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
         entity.setReportDate(this.toDate(this.reportDate));
         entity.setReportName(this.reportName);
         entity.setReportType(this.reportType);
-//        entity.setWithSeals(this.withSeals);
+        entity.setWithContacts(this.withContacts);
         this.initEntity(entity);
         return entity;
     }
@@ -56,7 +60,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
                 ", reportDate='" + reportDate + '\'' +
                 ", reportName='" + reportName + '\'' +
                 ", reportType='" + reportType + '\'' +
-                ", withSeals=" + withSeals +
+                ", withContacts=" + withContacts +
                 '}';
     }
 }

+ 4 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/entity/report/ReportBaseInfoDO.java

@@ -31,4 +31,8 @@ public class ReportBaseInfoDO extends BaseReportDO {
 //     * 报告是否用印
 //     */
 //    private Boolean withSeals;
+    /**
+     * 观点报告是否存在联系人信息(可能包含联系电话、地址等敏感信息)
+     */
+    private Boolean withContacts;
 }

+ 6 - 8
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -15,7 +15,6 @@ import com.smppw.modaq.application.util.EmailUtil;
 import com.smppw.modaq.common.conts.DateConst;
 import com.smppw.modaq.common.conts.EmailParseStatusConst;
 import com.smppw.modaq.common.conts.EmailTypeConst;
-import com.smppw.modaq.common.dto.FilenamePathDTO;
 import com.smppw.modaq.common.enums.ReportParseStatus;
 import com.smppw.modaq.common.enums.ReportParserFileType;
 import com.smppw.modaq.common.enums.ReportType;
@@ -176,7 +175,7 @@ public class EmailParseService {
             }
         }
 
-        List<FilenamePathDTO> extractedDirs;
+        List<String> extractedDirs;
         if (ExcelUtil.isZip(filepath)) {
             extractedDirs = ExcelUtil.extractCompressedFiles(filepath, destPath);
         } else if (ExcelUtil.isRAR(filepath)) {
@@ -184,8 +183,7 @@ public class EmailParseService {
         } else {
             return;
         }
-        for (FilenamePathDTO dto : extractedDirs) {
-            String dir = dto.getFilepath();
+        for (String dir : extractedDirs) {
             // 如果邮件类型不满足解析条件则重新根据文件名判断
             if (!Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)
                     && !Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)) {
@@ -196,13 +194,13 @@ public class EmailParseService {
                 String[] subDirs = file.list();
                 if (subDirs != null) {
                     for (String subDir : subDirs) {
-                        resultList.add(new EmailZipFileDTO(subDir, dto.getFilename(), emailType));
+                        resultList.add(new EmailZipFileDTO(subDir, emailType));
                     }
                 } else {
                     log.warn("目录 {} 下无文件", dir);
                 }
             } else {
-                resultList.add(new EmailZipFileDTO(dir, dto.getFilename(), emailType));
+                resultList.add(new EmailZipFileDTO(dir, emailType));
             }
         }
     }
@@ -234,9 +232,9 @@ public class EmailParseService {
             List<EmailZipFileDTO> zipFiles = entry.getValue();
             if (CollUtil.isNotEmpty(zipFiles)) {
                 for (EmailZipFileDTO zipFile : zipFiles) {
-                    EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, null, zipFile.getOriginalName(), zipFile.getFilepath(), null);
+                    EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, null, zipFile.getFilename(), zipFile.getFilepath(), null);
                     // 解析并保存报告
-                    ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), zipFile.getOriginalName(),
+                    ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), zipFile.getFilename(),
                             zipFile.getFilepath(), zipFile.getEmailType(), emailFile.getAiFileId());
                     dataList.add(parseResult);
                 }

+ 65 - 27
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/ExcelUtil.java

@@ -4,7 +4,6 @@ import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.util.StrUtil;
-import com.smppw.modaq.common.dto.FilenamePathDTO;
 import net.sf.sevenzipjbinding.ExtractOperationResult;
 import net.sf.sevenzipjbinding.IInArchive;
 import net.sf.sevenzipjbinding.SevenZip;
@@ -24,10 +23,19 @@ import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.List;
 
 public class ExcelUtil {
+    // 候选编码列表(按常见顺序排列)
+    private static final List<String> CANDIDATE_ENCODINGS = Arrays.asList(
+            "GBK",      // 中文环境常用
+            "UTF-8",   // 标准编码
+            "GB2312",  // 旧版中文
+            "ISO-8859-1" // 默认回退
+    );
+
     public static boolean isExcel(String fileName) {
         return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
     }
@@ -48,37 +56,40 @@ public class ExcelUtil {
         return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
     }
 
-    public static List<FilenamePathDTO> extractCompressedFiles(String zipFilePath, String destFilePath) throws IOException, ArchiveException {
-        List<FilenamePathDTO> filePathList = CollUtil.newArrayList();
+    public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) throws IOException, ArchiveException {
+        List<String> filePathList = CollUtil.newArrayList();
 
         File destFile = FileUtil.file(destFilePath);
         if (!destFile.exists()) {
             Files.createDirectories(destFile.toPath());
         }
 
+        String encoding = detectEncoding(zipFilePath);
+        if (encoding == null) {
+            encoding = "GBK";
+        }
+
         try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
-             ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
+             ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory()
+                     .createArchiveInputStream(ArchiveStreamFactory.detect(fis), fis, encoding)) {
             ArchiveEntry entry;
-            int i = 1;
             while ((entry = ais.getNextEntry()) != null) {
                 String name = entry.getName();
-                if (name.startsWith("__MACOSX/")) {
-                    continue;
-                }
-                String zipFilename = FileUtil.getName(destFilePath);
-                if (name.contains("确认")) {
-                    zipFilename += "_确认单";
-                }
-                String ext = FileUtil.extName(name);
-                name = zipFilename + "_" + i + "." + ext;
-                File entryFile = FileUtil.file(destFilePath, name);
-                i++;
                 if (entry.isDirectory()) {
+                    File entryFile = FileUtil.file(destFilePath, name);
                     Files.createDirectories(entryFile.toPath());
                 } else {
+                    if (name.startsWith("__MACOSX/")) {
+                        continue;
+                    }
+                    String zipFilename = FileUtil.getName(destFilePath);
+                    if (zipFilename.contains("确认")) {
+                        name = "确认单_" + name;
+                    }
+                    File entryFile = FileUtil.file(destFilePath, name);
                     try (FileOutputStream fos = new FileOutputStream(entryFile)) {
                         IOUtils.copy(ais, fos);
-                        filePathList.add(new FilenamePathDTO(entryFile.getName(), entryFile.getPath()));
+                        filePathList.add(entryFile.getPath());
                     }
                 }
             }
@@ -86,7 +97,7 @@ public class ExcelUtil {
             if (e.getMessage() != null
                     && (e.getMessage().contains("split")
                     || e.getMessage().contains("volume"))) {
-                filePathList.addAll(extractSplitZip(zipFilePath, destFilePath));
+                filePathList.addAll(extractSplitZip(zipFilePath, destFilePath, encoding));
             } else {
                 throw e;
             }
@@ -95,10 +106,10 @@ public class ExcelUtil {
         return filePathList;
     }
 
-    public static List<FilenamePathDTO> extractSplitZip(String zipFilePath, String destFilePath) throws IOException {
-        List<FilenamePathDTO> resultList = ListUtil.list(false);
+    public static List<String> extractSplitZip(String zipFilePath, String destFilePath, String encoding) throws IOException {
+        List<String> resultList = ListUtil.list(false);
         File file = new File(zipFilePath);
-        try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
+        try (ZipFile zipFile = ZipFile.builder().setFile(file).setCharset(encoding).get()) {
             Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
             while (entries.hasMoreElements()) {
                 ZipArchiveEntry entry = entries.nextElement();
@@ -107,21 +118,21 @@ public class ExcelUtil {
                     Path path = Paths.get(destFilePath, entry.getName());
                     FileUtil.del(path);
                     Files.copy(is, path);
-                    resultList.add(new FilenamePathDTO(entry.getName(), path.toAbsolutePath().toString()));
+                    resultList.add(path.toAbsolutePath().toString());
                 }
             }
         }
         return resultList;
     }
 
-    public static List<FilenamePathDTO> extractRar5(String rarFilePath, String outputDir) throws Exception {
+    public static List<String> extractRar5(String rarFilePath, String outputDir) throws Exception {
         // 初始化 SevenZipJBinding 本地库
         SevenZip.initSevenZipFromPlatformJAR();
 
         RandomAccessFile randomAccessFile = null;
         IInArchive inArchive = null;
 
-        List<FilenamePathDTO> resultList = ListUtil.list(false);
+        List<String> resultList = ListUtil.list(false);
         try {
             // 打开 RAR 文件
             randomAccessFile = new RandomAccessFile(rarFilePath, "r");
@@ -131,7 +142,7 @@ public class ExcelUtil {
             ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
             for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
                 if (!item.isFolder()) {
-                    resultList.add(new FilenamePathDTO(item.getPath(), extractItem(item, outputDir)));
+                    resultList.add(extractItem(item, outputDir));
                 }
             }
         } finally {
@@ -176,11 +187,38 @@ public class ExcelUtil {
         return outputFile.getAbsolutePath();
     }
 
+    // 检测压缩包编码
+    private static String detectEncoding(String zipPath) {
+        for (String encoding : CANDIDATE_ENCODINGS) {
+            try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipPath));
+                 ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory()
+                         .createArchiveInputStream(ArchiveStreamFactory.detect(fis), fis, encoding)) {
+
+                ArchiveEntry entry = ais.getNextEntry();
+                if (entry == null) continue; // 空压缩包
+
+                String fileName = entry.getName();
+                if (!hasInvalidCharacters(fileName)) {
+                    return encoding; // 找到有效编码
+                }
+            } catch (Exception e) {
+                // 编码不支持或文件错误,继续尝试下一个
+            }
+        }
+        return null;
+    }
+
+    // 检查文件名是否包含无效字符(如替换符)
+    private static boolean hasInvalidCharacters(String fileName) {
+        // 检查常见乱码符号:�或连续问号
+        return fileName.contains("�") || fileName.matches(".*\\?{2,}.*");
+    }
+
     public static void main(String[] args) throws Exception {
         String zipFilePath = "D:\\home\\wwwroot\\mo_report_file\\wangzaijun@simuwang.com\\20250321\\20250321143709排排网确认单.rar";
         String destFilePath = "D:\\home\\wwwroot\\mo_report_file\\wangzaijun@simuwang.com\\20250321";
-        List<FilenamePathDTO> strings = extractRar5(zipFilePath, destFilePath);
-        for (FilenamePathDTO string : strings) {
+        List<String> strings = extractRar5(zipFilePath, destFilePath);
+        for (String string : strings) {
             System.out.println(string);
         }
 //        List<String> fileList = extractCompressedFiles(zipFilePath, destFilePath);

+ 1 - 1
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -34,7 +34,7 @@ public class MoDaqApplicationTests {
 
     @Test
     public void reportTest() {
-        MailboxInfoDTO emailInfoDTO = this.buildMailbox("wangzaijun@simuwang.com", "WZJ2twy1314");
+        MailboxInfoDTO emailInfoDTO = this.buildMailbox("**@simuwang.com", "**");
         Date startDate = DateUtil.parse("2025-04-24 08:40:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         Date endDate = DateUtil.parse("2025-04-24 19:42:05", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {