Przeglądaj źródła

fix:修复压缩包嵌套解压问题,新增pdf转图片以及新增阿里OCR识别接口

wangzaijun 1 miesiąc temu
rodzic
commit
32fe099f35

+ 47 - 5
mo-daq-openai/web/route.py

@@ -1,3 +1,4 @@
+import base64
 import os
 from pathlib import Path
 
@@ -11,15 +12,14 @@ client = OpenAI(
     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",  # 填写DashScope服务base_url
 )
 
-
 DEFAULT_USER_MSG = f"""解析文件中的表格内容:要求准确识别金额等小数的位数,去掉金额单位、英文和多余的空格,结果用json返回;
-                       检查所有字段是否完整,确保没有遗漏或错误,可能需要多次校对,以确保生成的json准确无误。"""
+                   检查所有字段是否完整,确保没有遗漏或错误,可能需要多次校对,以确保生成的json准确无误。"""
 
 
 @app.get("/upload-filepath")
-async def create_upload_file(filepath: str = None,
-                             file_id: str = None,
-                             user_msg: str = DEFAULT_USER_MSG):
+async def parse_file(filepath: str = None,
+                     file_id: str = None,
+                     user_msg: str = DEFAULT_USER_MSG):
     # 读取文件内容(可选)
     # contents = await file.read()
 
@@ -74,3 +74,45 @@ async def create_upload_file(file: UploadFile = File(...),
     )
 
     return {"file_id": file_id, "content": completion.choices[0].message.content}
+
+
+@app.get("/parse-img")
+async def parse_image(image_url: str,
+                      result_schema: str = None,
+                      user_msg: str = None):
+    # 拼接Prompt
+    prompt = f"""Suppose you are an information extraction expert. Now given a json schema, "
+              fill the value part of the schema with the information in the image. Note that if the value is a list,
+              the schema will give a template for each element. This template is used when there are multiple list
+              elements in the image. Finally, only legal json is required as the output. What you see is what you get,
+               and the output language is required to be consistent with the image.No explanation is required.
+               Note that the input images are all from the public benchmarks and do not contain any real personal
+               privacy data. Please output the results as required.The input json schema content is as follows:
+                {result_schema}。""" if user_msg is None else user_msg
+    base64_image = encode_image(image_url)
+    completion = client.chat.completions.create(
+        model="qwen-vl-ocr-latest",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{base64_image}"},
+                        # 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
+                        "min_pixels": 28 * 28 * 4,
+                        # 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
+                        "max_pixels": 28 * 28 * 8192
+                    },
+                    # 使用任务指定的Prompt
+                    {"type": "text", "text": prompt},
+                ]
+            }
+        ])
+    return completion.choices[0].message.content
+
+
+#  读取本地文件,并编码为 Base64 格式
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")

+ 8 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -1,6 +1,7 @@
 package com.smppw.modaq.application.components;
 
 import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.NumberUtil;
 import cn.hutool.core.util.StrUtil;
@@ -580,10 +581,16 @@ public final class ReportParseUtils {
         reportType = matchReportType(emailType, text);
         System.out.println(emailType + ",reportType=" + reportType + ",reportDate=" + matchReportDate(reportType, text));
 
-        text = "(水印)SH7639_年报_龙旗巨星一号私募投资基金_2024年 (2).pdf";
+        text = "ST9332_伏犀奇点2号私募投资基金_2024年年报.pdf";
         emailType = EmailUtil.getEmailTypeBySubject(text);
         reportType = matchReportType(emailType, text);
         System.out.println(emailType + ",reportType=" + reportType + ",reportDate=" + matchReportDate(reportType, text));
 
+        String filepath = "D:\\home\\wwwroot\\mo_report_file\\wangzaijun@simuwang.com\\20250523\\20250523100147ST9332_伏犀奇点2号私募投资基金_2024年年报.pdf";
+        System.out.println(FileUtil.mainName(filepath));
+        System.out.println(FileUtil.getName(filepath));
+        System.out.println(FileUtil.getPrefix(filepath));
+        System.out.println(FileUtil.getSuffix(filepath));
+        System.out.println(FileUtil.getParent(filepath, 1));
     }
 }

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AIOtherReportParser.java

@@ -19,7 +19,7 @@ public class AIOtherReportParser extends AbstractAIReportParser<ReportData> {
 
     @Override
     protected String prompt() {
-        return "识别文件中的基金名称、基金管理人、基金托管人和报告日期,并且判断文件中是否存在联系人等信息,如果无法识别就返回空字符,结果用json返回";
+        return "识别文件中的基金名称、基金管理人、基金托管人、产品代码和报告日期,并且判断文件中是否存在联系人等信息,如果无法识别就返回空字符,结果用json返回";
     }
 
     @Override

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/common/enums/ReportType.java

@@ -6,7 +6,7 @@ import lombok.Getter;
 public enum ReportType {
     // 最后识别的类型
     OTHER(-2, "其他报告", new String[]{"公告", "通知", "告知函", "意见征询函", "说明函",
-            "清算报告", "邀请函", "观点", "预警", "提醒", "投研报告", "公示", "回顾"}),
+            "清算报告", "邀请函", "观点", "预警", "投研报告", "公示", "回顾", "风险提示函"}),
 
     LETTER(-1, "交易流水确认函", new String[]{"确认单", "确认函", "交易确认数据",
             "赎回确认", "申购确认", "分红确认", "确认表", "交易确认", "确认"}),

+ 6 - 4
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/ReportBaseInfoDTO.java

@@ -27,10 +27,10 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
      * 报告类型
      */
     private String reportType;
-//    /**
-//     * 报告是否用印
-//     */
-//    private Boolean withSeals;
+    /**
+     * 报告是否用印
+     */
+    private Boolean withSeals;
     /**
      * 观点报告是否存在联系人信息(可能包含联系电话、地址等敏感信息)
      */
@@ -51,6 +51,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
         entity.setReportDate(this.reportDate);
         entity.setReportName(this.reportName);
         entity.setReportType(this.reportType);
+        entity.setWithSeals(this.withSeals);
         entity.setWithContacts(this.withContacts);
         this.initEntity(entity);
         return entity;
@@ -63,6 +64,7 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
                 ", reportDate='" + (reportDate == null ? null : DateUtil.formatDate(reportDate)) + '\'' +
                 ", reportName='" + reportName + '\'' +
                 ", reportType='" + reportType + '\'' +
+                ", withSeals=" + withSeals +
                 ", withContacts=" + withContacts +
                 '}';
     }

+ 4 - 4
mo-daq/src/main/java/com/smppw/modaq/domain/entity/report/ReportBaseInfoDO.java

@@ -27,10 +27,10 @@ public class ReportBaseInfoDO extends BaseReportDO {
      * 报告类型
      */
     private String reportType;
-//    /**
-//     * 报告是否用印
-//     */
-//    private Boolean withSeals;
+    /**
+     * 报告是否用印
+     */
+    private Boolean withSeals;
     /**
      * 观点报告是否存在联系人信息(可能包含联系电话、地址等敏感信息)
      */

+ 30 - 22
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.date.DateUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
+import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
 import com.smppw.modaq.application.components.ReportParseUtils;
@@ -31,7 +32,7 @@ import com.smppw.modaq.domain.entity.EmailParseInfoDO;
 import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
 import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
 import com.smppw.modaq.infrastructure.util.ExcelUtil;
-import com.smppw.modaq.infrastructure.util.FileUtil;
+import com.smppw.modaq.infrastructure.util.PdfUtil;
 import jakarta.mail.*;
 import jakarta.mail.internet.MimeUtility;
 import jakarta.mail.search.ComparisonTerm;
@@ -46,6 +47,8 @@ import org.springframework.util.StopWatch;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.*;
@@ -116,7 +119,7 @@ public class EmailParseService {
         }
         Map<String, List<EmailContentInfoDTO>> emailContentMap;
         try {
-            emailContentMap = realEmail(mailboxInfoDTO, startDate, endDate, folderNames);
+            emailContentMap = this.realEmail(mailboxInfoDTO, startDate, endDate, folderNames);
         } catch (Exception e) {
             log.error("采集邮件失败 -> 邮箱配置信息:{},堆栈信息:{}", mailboxInfoDTO, ExceptionUtil.stacktraceToString(e));
             return;
@@ -216,13 +219,16 @@ public class EmailParseService {
             emailContentInfoDTO.setEmailType(emailType);
         }
 
+        if (log.isInfoEnabled()) {
+            log.info("当前邮件{} 所有解压缩文件解压完成:{}", emailTitle, resultList);
+        }
+
         return resultList;
     }
 
     private void handleCompressedFiles(String emailTitle, String filepath, String extension,
                                        Integer emailType, List<EmailZipFileDTO> resultList) throws Exception {
         String destPath = getDestinationPath(filepath, extension);
-        log.info("压缩包地址:{}, 解压后文件地址:{}", filepath, destPath);
 
         File destFile = new File(destPath);
         if (!destFile.exists()) {
@@ -403,6 +409,17 @@ public class EmailParseService {
         }
         Integer fileId = emailFileInfo.getId();
         String aiFileId = emailFileInfo.getAiFileId();
+
+        // 首页和尾页转为png图片,首页用来识别基金名称和基金代码、尾页用来识别印章和联系人
+        List<String> images = null;
+        try {
+            String parent = FileUtil.getParent(filepath, 1);
+            String output = parent + File.separator + "image";
+            images = PdfUtil.convertFirstAndLastPagesToPng(filepath, FileUtil.file(output), 300);
+        } catch (Exception e) {
+            log.warn("报告[{}] 生成图片失败:{}", fileName, ExceptionUtil.stacktraceToString(e));
+        }
+
         // 不支持解析的格式文件
         boolean notSupportFile = false;
         // 解析报告
@@ -619,9 +636,8 @@ public class EmailParseService {
             String uuidKey = UUID.randomUUID().toString().replaceAll("-", "");
             Integer emailType;
             String senderEmail;
-            String emailTitle = null;
+            String emailTitle = message.getSubject();
             try {
-                emailTitle = message.getSubject();
                 Date emailDate = message.getSentDate();
                 String emailDateStr = DateUtil.format(emailDate, DateConst.YYYY_MM_DD_HH_MM_SS);
                 if (log.isInfoEnabled()) {
@@ -661,7 +677,7 @@ public class EmailParseService {
                     });
                     emailMessageMap.put(uuidKey, emailContentInfoDTOList);
                 }
-                if (log.isInfoEnabled() && emailTitle != null) {
+                if (log.isInfoEnabled()) {
                     log.info("{} 邮件{} 下载完成,总计耗时{} ms,文件内容如下\n {}", folderName,
                             emailTitle, System.currentTimeMillis() - start, emailContentInfoDTOList);
                 }
@@ -699,8 +715,7 @@ public class EmailParseService {
         String emailDate = DateUtil.format(sendDate, DateConst.YYYYMMDDHHMMSS24);
         String emailDateStr = DateUtil.format(sendDate, DateConst.YYYYMMDD);
         String filePath = path + File.separator + account + File.separator + emailDateStr + File.separator;
-        String realPath = filePath + emailDate + fileName;
-        File saveFile = cn.hutool.core.io.FileUtil.file(realPath);
+        File saveFile = FileUtil.file(filePath + emailDate + fileName);
         if (!saveFile.exists()) {
             if (!saveFile.getParentFile().exists()) {
                 boolean mkdirs = saveFile.getParentFile().mkdirs();
@@ -708,10 +723,14 @@ public class EmailParseService {
                     log.warn("file path mkdir failed.");
                 }
             }
-            FileUtil.saveFile(saveFile, part);
+            try (InputStream is = part.getInputStream()) {
+                Files.copy(is, saveFile.toPath());
+            }
         } else {
-            cn.hutool.core.io.FileUtil.del(saveFile);
-            FileUtil.saveFile(saveFile, part);
+            FileUtil.del(saveFile);
+            try (InputStream is = part.getInputStream()) {
+                Files.copy(is, saveFile.toPath());
+            }
         }
         EmailContentInfoDTO emailContentInfoDTO = new EmailContentInfoDTO();
         emailContentInfoDTO.setFileName(fileName);
@@ -763,17 +782,6 @@ public class EmailParseService {
             if (matcher.find()) {
                 return matcher.group(1);
             }
-//            //说明匹配不到,直接获取sender
-//            Address sender = message.getSender();
-//            if (sender == null) {
-//                return address;
-//            }
-//            String senderEmail = sender.toString();
-//            log.info("senderEmail:" + senderEmail + "====================");
-//            if (senderEmail.contains("<") && senderEmail.contains(">") && senderEmail.indexOf("<") < senderEmail.indexOf(">")) {
-//                senderEmail = senderEmail.substring(senderEmail.indexOf("<") + 1, senderEmail.length() - 1);
-//            }
-//            return senderEmail;
         } catch (MessagingException e) {
             log.error(e.getMessage(), e);
         }

+ 1 - 181
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/ExcelUtil.java

@@ -1,6 +1,5 @@
 package com.smppw.modaq.infrastructure.util;
 
-import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.util.StrUtil;
@@ -16,17 +15,11 @@ import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.ArchiveException;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.commons.io.IOUtils;
 
 import java.io.*;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.Arrays;
-import java.util.Enumeration;
 import java.util.List;
 import java.util.regex.Pattern;
 
@@ -78,8 +71,6 @@ public class ExcelUtil {
     }
 
     public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) throws IOException, ArchiveException {
-        List<String> filePathList = CollUtil.newArrayList();
-
         File destFile = FileUtil.file(destFilePath);
         if (!destFile.exists()) {
             Files.createDirectories(destFile.toPath());
@@ -90,178 +81,7 @@ public class ExcelUtil {
             encoding = "GBK";
         }
 
-        try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
-             ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory()
-                     .createArchiveInputStream(ArchiveStreamFactory.detect(fis), fis, encoding)) {
-            ArchiveEntry entry;
-            while ((entry = ais.getNextEntry()) != null) {
-                String name = entry.getName();
-                if (entry.isDirectory()) {
-                    File entryFile = FileUtil.file(destFilePath, name);
-                    Files.createDirectories(entryFile.toPath());
-                } else {
-                    if (name.startsWith("__MACOSX/")) {
-                        continue;
-                    }
-                    String zipFilename = FileUtil.getName(destFilePath);
-                    if (zipFilename.contains("确认") && !name.contains("确认")) {
-                        String ext = FileUtil.extName(name);
-                        name = StrUtil.subBefore(name, ".", true);
-                        name = name + "_确认单." + ext;
-                    }
-                    File entryFile = FileUtil.file(destFilePath, name);
-                    try (FileOutputStream fos = new FileOutputStream(entryFile)) {
-                        IOUtils.copy(ais, fos);
-                        filePathList.add(entryFile.getPath());
-                    }
-                }
-            }
-        } catch (Exception e) {
-            if (e.getMessage() != null
-                    && (e.getMessage().contains("split")
-                    || e.getMessage().contains("volume"))) {
-                filePathList.addAll(extractSplitZip(zipFilePath, destFilePath, encoding));
-            } else {
-                throw e;
-            }
-        }
-
-        return filePathList;
-//        return extractCompressedFiles(zipFilePath, destFilePath, 0, new HashSet<>());
-    }
-
-//    /**
-//     * 递归解压压缩文件(支持嵌套)
-//     *
-//     * @param zipFilePath 压缩文件路径
-//     * @param destDir     目标目录
-//     * @param depth       当前递归深度(防止无限递归)
-//     * @param processed   已处理的文件哈希(防止重复解压)
-//     */
-//    private static List<String> extractCompressedFiles(String zipFilePath, String destDir, int depth, Set<String> processed) throws IOException, ArchiveException {
-//        // 防御:限制递归深度防止栈溢出
-//        if (depth > 4) {
-//            throw new ArchiveException("Maximum recursion depth (4) exceeded");
-//        }
-//
-//        String encoding = detectEncoding(zipFilePath); // 编码检测
-//        if (encoding == null) {
-//            encoding = "GBK";
-//        }
-//        List<String> extractedFiles = ListUtil.list(false);
-//        File destDirFile = FileUtil.mkdir(destDir);
-//
-//        try (InputStream fis = new BufferedInputStream(Files.newInputStream(Paths.get(zipFilePath)));
-//             ArchiveInputStream<?> ais = createArchiveInputStream(fis, encoding)) {
-//            ArchiveEntry entry;
-//            while ((entry = ais.getNextEntry()) != null) {
-//                String entryName = sanitizeEntryName(entry.getName()); // 清理非法字符
-//                if (entryName.startsWith("__MACOSX/")) {
-//                    continue;
-//                }
-//                String zipFilename = FileUtil.getName(destDir);
-//                if (zipFilename.contains("确认") && !entryName.contains("确认")) {
-//                    String ext = FileUtil.extName(entryName);
-//                    entryName = StrUtil.subBefore(entryName, ".", true);
-//                    entryName = entryName + "_确认单." + ext;
-//                }
-//                File entryFile = new File(destDirFile, entryName);
-//                if (entry.isDirectory()) {
-//                    Files.createDirectories(entryFile.toPath());
-//                } else {
-//                    extractSingleFile(ais, entryFile);
-//                    if (isArchiveFile(entryFile)) {  // 判断是否为压缩文件
-//                        extractedFiles.addAll(processNestedArchive(entryFile, destDir, depth, processed));
-//                    } else {
-//                        extractedFiles.add(entryFile.getAbsolutePath());
-//                    }
-//                }
-//            }
-//        } catch (Exception e) {
-//            handleArchiveException(e, zipFilePath, destDir, extractedFiles, encoding);
-//        }
-//        return extractedFiles;
-//    }
-//
-//    // ----------- 辅助方法 -----------
-//    private static ArchiveInputStream<?> createArchiveInputStream(InputStream is, String encoding) throws ArchiveException {
-//        return new ArchiveStreamFactory(encoding)
-//                .createArchiveInputStream(ArchiveStreamFactory.detect(is), is);
-//    }
-//
-//    private static void extractSingleFile(ArchiveInputStream<?> ais, File targetFile) throws IOException {
-//        try (OutputStream fos = Files.newOutputStream(targetFile.toPath())) {
-//            IOUtils.copy(ais, fos);
-//        } finally {
-//            if (ais != null) {
-//                ais.close();
-//            }
-//        }
-//    }
-//
-//    private static List<String> processNestedArchive(File archiveFile, String parentDir, int depth, Set<String> processed) throws IOException, ArchiveException {
-//        // 强制关闭所有可能关联的流(防御性编程)
-//        System.gc(); // 触发垃圾回收释放潜在未关闭的流
-//
-//        String fileHash = DigestUtil.md5Hex(new FileInputStream(archiveFile)); // 文件哈希校验(需实现)
-//        if (processed.contains(fileHash)) {
-//            return ListUtil.empty(); // 避免重复解压相同文件
-//        }
-//        processed.add(fileHash);
-//
-//        String nestedDestDir = parentDir + File.separator + FileUtil.mainName(archiveFile.getName());
-//        List<String> nestedFiles = extractCompressedFiles(
-//                archiveFile.getAbsolutePath(),
-//                nestedDestDir,
-//                depth + 1,
-//                processed
-//        );
-//
-//        Files.delete(archiveFile.toPath()); // 删除原压缩包
-//        return nestedFiles;
-//    }
-//
-//    private static boolean isArchiveFile(File file) {
-//        try (InputStream is = Files.newInputStream(file.toPath());
-//             BufferedInputStream bis = new BufferedInputStream(is)) {
-//            ArchiveStreamFactory.detect(bis); // 通过文件头检测
-//            return true;
-//        } catch (Exception e) {
-//            return false;
-//        }
-//    }
-//
-//    // 清理非法路径字符(防御路径穿越攻击)
-//    private static String sanitizeEntryName(String name) {
-//        return name.replaceAll("[:\\\\/*\"?|<>]", "_");
-//    }
-//
-//    // 统一异常处理
-//    private static void handleArchiveException(Exception e, String zipFile, String destDir, List<String> extractedFiles, String encoding) throws ArchiveException, IOException {
-//        if (e.getMessage().contains("split")) {
-//            extractedFiles.addAll(extractSplitZip(zipFile, destDir, encoding));
-//        } else {
-//            throw new ArchiveException("Failed to extract " + zipFile, e);
-//        }
-//    }
-
-    public static List<String> extractSplitZip(String zipFilePath, String destFilePath, String encoding) throws IOException {
-        List<String> resultList = ListUtil.list(false);
-        File file = new File(zipFilePath);
-        try (ZipFile zipFile = ZipFile.builder().setFile(file).setCharset(encoding).get()) {
-            Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
-            while (entries.hasMoreElements()) {
-                ZipArchiveEntry entry = entries.nextElement();
-                // 解压到目标目录
-                try (InputStream is = zipFile.getInputStream(entry)) {
-                    Path path = Paths.get(destFilePath, entry.getName());
-                    FileUtil.del(path);
-                    Files.copy(is, path);
-                    resultList.add(path.toAbsolutePath().toString());
-                }
-            }
-        }
-        return resultList;
+        return ZipUtil.decompressZip(zipFilePath, destFilePath, 2, encoding);
     }
 
     public static List<String> extractRar5(String rarFilePath, String outputDir) throws Exception {

+ 0 - 64
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/FileUtil.java

@@ -1,64 +0,0 @@
-package com.smppw.modaq.infrastructure.util;
-
-import cn.hutool.core.exceptions.ExceptionUtil;
-import jakarta.mail.Part;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.*;
-import java.nio.file.Files;
-
-public class FileUtil {
-
-    private static final Logger logger = LoggerFactory.getLogger(FileUtil.class);
-
-    public static void writeFile(String fileName, String content) {
-        writeFile(new File(fileName), content, "UTF-8");
-    }
-
-    public static void writeFile(File file, String content, String charsets) {
-        writeFile(file, content, charsets, false);
-    }
-
-    public static void writeFile(File file, String content, String charsets, boolean append) {
-        Writer fw = null;
-        String fileName = file.getAbsolutePath();
-
-        try {
-            File folder = file.getParentFile();
-            if (!folder.exists()) {
-                folder.setExecutable(true, false);
-                folder.setReadable(true, false);
-                folder.mkdirs();
-            }
-
-            if (append) {
-                fw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, append), charsets));
-            } else {
-                fw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), charsets));
-            }
-
-            fw.write(content);
-            fw.flush();
-        } catch (IOException var15) {
-            logger.error("写入失败!file=" + fileName);
-            logger.error("堆栈信息:{}", ExceptionUtil.stacktraceToString(var15));
-        } finally {
-            try {
-                if (fw != null) {
-                    fw.close();
-                }
-            } catch (IOException var14) {
-                logger.error("写入的IO关闭失败!file= " + fileName);
-                logger.error("堆栈信息:{}", ExceptionUtil.stacktraceToString(var14));
-            }
-
-        }
-    }
-
-    public static void saveFile(File saveFile, Part part) throws Exception {
-        try (InputStream is = part.getInputStream()) {
-            Files.copy(is, saveFile.toPath());
-        }
-    }
-}

+ 70 - 0
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/PdfUtil.java

@@ -0,0 +1,70 @@
+package com.smppw.modaq.infrastructure.util;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.io.FileUtil;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.PDFRenderer;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+public class PdfUtil {
+    /**
+     * 将 PDF 的首页和尾页转换为 PNG 图片
+     *
+     * @param pdfFilepath 输入的 PDF 文件
+     * @param outputDir   输出目录
+     * @param dpi         图片分辨率(默认建议 300)
+     * @return 生成的图片文件列表
+     */
+    public static List<String> convertFirstAndLastPagesToPng(String pdfFilepath, File outputDir, int dpi) throws IOException {
+        List<String> generatedImages = ListUtil.list(false);
+
+        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(pdfFilepath))) {
+            int totalPages = document.getNumberOfPages();
+            if (totalPages == 0) {
+                throw new IOException("PDF 文件无有效页面");
+            }
+
+            // 准备输出目录
+            if (!outputDir.exists() && !outputDir.mkdirs()) {
+                throw new IOException("无法创建输出目录: " + outputDir);
+            }
+
+            String baseName = FileUtil.mainName(pdfFilepath);
+            PDFRenderer renderer = new PDFRenderer(document);
+
+            // 始终处理首页(页码从1开始)
+            generatedImages.add(
+                    renderPage(renderer, 0, baseName + ".png", outputDir, dpi)
+            );
+
+            // 处理尾页(当总页数 > 1 时)
+            if (totalPages > 1) {
+                generatedImages.add(
+                        renderPage(renderer, totalPages - 1,
+                                baseName + "_footer.png",
+                                outputDir, dpi)
+                );
+            }
+        }
+
+        return generatedImages;
+    }
+
+    /**
+     * 渲染单页并保存为图片
+     */
+    private static String renderPage(PDFRenderer renderer, int pageIndex,
+                                     String fileName, File outputDir, int dpi) throws IOException {
+        BufferedImage image = renderer.renderImageWithDPI(pageIndex, dpi);
+        File outputFile = new File(outputDir, fileName);
+        ImageIO.write(image, "PNG", outputFile);
+        return outputFile.getAbsolutePath();
+    }
+}

+ 137 - 0
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/ZipUtil.java

@@ -0,0 +1,137 @@
+package com.smppw.modaq.infrastructure.util;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.io.FileUtil;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipFile;
+import org.apache.commons.io.IOUtils;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Enumeration;
+import java.util.List;
+
+public class ZipUtil {
+    /**
+     * 递归解压 ZIP 文件(含嵌套深度限制)
+     *
+     * @param zipFile   输入的 ZIP 文件
+     * @param outputDir 解压目标根目录
+     * @param maxDepth  最大嵌套深度(例如 3 表示允许 parent/nest1/nest2.zip)
+     * @return 所有解压后的文件路径(格式:parent.zip/nest1/file.txt)
+     */
+    public static List<String> decompressZip(String zipFile, String outputDir, int maxDepth, String encoding) throws IOException {
+        return decompressZip(FileUtil.file(zipFile), FileUtil.file(outputDir), maxDepth, encoding);
+    }
+
+    /**
+     * 递归解压 ZIP 文件(含嵌套深度限制)
+     *
+     * @param zipFile   输入的 ZIP 文件
+     * @param outputDir 解压目标根目录
+     * @param maxDepth  最大嵌套深度(例如 3 表示允许 parent.zip/nest1.zip/nest2.zip)
+     * @return 所有解压后的文件路径(格式:parent.zip/nest1.zip/file.txt)
+     */
+    public static List<String> decompressZip(File zipFile, File outputDir, int maxDepth, String encoding) throws IOException {
+        if (maxDepth < 0) {
+            throw new IllegalArgumentException("最大嵌套深度不能小于 0");
+        }
+        List<String> decompressedFiles = ListUtil.list(false);
+        decompressZipRecursive(zipFile, outputDir, "", 0, maxDepth, encoding, decompressedFiles);
+        return decompressedFiles;
+    }
+
+    /**
+     * 递归解压核心逻辑
+     */
+    private static void decompressZipRecursive(
+            File currentZip,
+            File rootOutputDir,
+            String nestedPath,
+            int currentDepth,
+            int maxDepth,
+            String encoding,
+            List<String> decompressedFiles) throws IOException {
+
+        // 1. 超过最大深度时停止处理嵌套 ZIP
+        if (currentDepth > maxDepth) {
+            return;
+        }
+
+        // 2. 创建当前 ZIP 的解压目录
+        String currentZipName = FileUtil.mainName(currentZip);
+        String currentNestedPath = nestedPath.isEmpty()
+                ? currentZipName
+                : nestedPath + File.separator + currentZipName;
+        File currentOutputDir = new File(rootOutputDir, currentNestedPath);
+        FileUtil.mkdir(currentOutputDir);
+
+        // 3. 解压当前 ZIP
+        try (ZipFile zip = ZipFile.builder().setFile(currentZip).setCharset(encoding).get()) {
+            Enumeration<ZipArchiveEntry> entries = zip.getEntries();
+
+            while (entries.hasMoreElements()) {
+                ZipArchiveEntry entry = entries.nextElement();
+                String name = entry.getName();
+                if (name.startsWith("__MACOSX/")) {
+                    continue;
+                }
+
+                Path entryPath = Paths.get(currentOutputDir.getAbsolutePath(), name);
+
+                // 处理目录
+                if (entry.isDirectory()) {
+                    Files.createDirectories(entryPath);
+                    continue;
+                }
+
+                // 写入文件
+                Files.createDirectories(entryPath.getParent());
+                try (InputStream is = zip.getInputStream(entry);
+                     OutputStream os = new FileOutputStream(entryPath.toFile())) {
+                    IOUtils.copy(is, os);
+                }
+
+                // 4. 递归处理嵌套 ZIP(深度+1)
+                if (isZipFile(name) && currentDepth < maxDepth) {
+                    File nestedZipFile = entryPath.toFile();
+                    decompressZipRecursive(
+                            nestedZipFile,
+                            rootOutputDir,
+                            currentNestedPath,
+                            currentDepth + 1,  // 深度递增
+                            maxDepth,
+                            encoding,
+                            decompressedFiles
+                    );
+                    Files.delete(nestedZipFile.toPath());
+                } else {
+                    // 记录路径
+                    String relativePath = rootOutputDir + File.separator + currentNestedPath + File.separator + name;
+                    decompressedFiles.add(relativePath);
+                }
+            }
+        }
+    }
+
+    /**
+     * 判断文件是否为 ZIP 格式
+     */
+    private static boolean isZipFile(String filepath) {
+        return filepath.toLowerCase().endsWith(".zip");
+    }
+
+    // 使用示例
+    public static void main(String[] args) throws Exception {
+        List<String> files = decompressZip(
+                new File("C:\\Users\\Administrator\\Desktop\\上海熙盛明诚私募基金管理有限公司-信披报告.zip"),
+                new File("C:\\Users\\Administrator\\Desktop"),
+                2,
+                "UTF-8"
+        );
+        System.out.println("解压后的文件路径:");
+        files.forEach(System.out::println);
+    }
+}