Sfoglia il codice sorgente

feat:支持有密码的PDF解析

wangzaijun 1 settimana fa
parent
commit
555cd406af

+ 17 - 0
mo-daq/src/main/java/com/smppw/modaq/application/components/ReportParseUtils.java

@@ -325,6 +325,23 @@ public final class ReportParseUtils {
     }
     }
 
 
     /**
     /**
+     * 匹配邮件正文是否有pdf的解密密码
+     *
+     * @param text 邮件正文
+     * @return 返回密码
+     */
+    public static String matchPdfPwd(String text) {
+        if (StrUtil.isBlank(text)) {
+            return null;
+        }
+        Matcher matcher = PatternConsts.PDF_PWD_PATTERN.matcher(text);
+        if (matcher.find()) {
+            return matcher.group();
+        }
+        return null;
+    }
+
+    /**
      * 匹配基金代码
      * 匹配基金代码
      *
      *
      * @param text 字符串
      * @param text 字符串

+ 2 - 0
mo-daq/src/main/java/com/smppw/modaq/common/conts/PatternConsts.java

@@ -38,6 +38,8 @@ public class PatternConsts {
      */
      */
     public static final Pattern FUND_LEVEL_PATTERN = Pattern.compile("[A-F]级|基金[A-F]");
     public static final Pattern FUND_LEVEL_PATTERN = Pattern.compile("[A-F]级|基金[A-F]");
 
 
+    public static final Pattern PDF_PWD_PATTERN = Pattern.compile("密码[::][A-Z]{1,10}");
+
     // 正则表达式匹配单行和多行注释
     // 正则表达式匹配单行和多行注释
     public static final Pattern JSON_COMMENT_PATTERN = Pattern.compile(
     public static final Pattern JSON_COMMENT_PATTERN = Pattern.compile(
             "(\"(?:\\\\\"|[^\"])*?\")" +  // 匹配双引号内的内容(避免匹配字符串内的注释符号)
             "(\"(?:\\\\\"|[^\"])*?\")" +  // 匹配双引号内的内容(避免匹配字符串内的注释符号)

+ 5 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/EmailInfoDTO.java

@@ -26,6 +26,10 @@ public class EmailInfoDTO {
      */
      */
     private String senderEmail;
     private String senderEmail;
     /**
     /**
+     * 邮件正文(主要用来解析密码或者判断报告类型)
+     */
+    private String emailContent;
+    /**
      * 当前邮件的所有附件信息(如果是压缩包则只记录解压后的文件)
      * 当前邮件的所有附件信息(如果是压缩包则只记录解压后的文件)
      */
      */
     private List<EmailZipFileDTO> emailFileList;
     private List<EmailZipFileDTO> emailFileList;
@@ -46,6 +50,7 @@ public class EmailInfoDTO {
         this.emailTitle = emailDto.getEmailTitle();
         this.emailTitle = emailDto.getEmailTitle();
         this.emailDate = emailDto.getEmailDate();
         this.emailDate = emailDto.getEmailDate();
         this.senderEmail = emailDto.getSenderEmail();
         this.senderEmail = emailDto.getSenderEmail();
+        this.emailContent = emailDto.getEmailContent();
         this.emailFileList = emailFileList;
         this.emailFileList = emailFileList;
     }
     }
 }
 }

+ 6 - 9
mo-daq/src/main/java/com/smppw/modaq/domain/dto/EmailZipFileDTO.java

@@ -1,6 +1,7 @@
 package com.smppw.modaq.domain.dto;
 package com.smppw.modaq.domain.dto;
 
 
 import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.io.FileUtil;
+import com.smppw.modaq.application.components.ReportParseUtils;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
@@ -12,6 +13,9 @@ public class EmailZipFileDTO {
     private final String filename;
     private final String filename;
     private final String filepath;
     private final String filepath;
     private final long fileSize;
     private final long fileSize;
+    // pdf文件的密码(需要密码时)
+    @Setter
+    private String pdfPwd;
     @Setter
     @Setter
     private Integer emailType;
     private Integer emailType;
 
 
@@ -27,21 +31,13 @@ public class EmailZipFileDTO {
         this.extName = FileUtil.extName(file);
         this.extName = FileUtil.extName(file);
     }
     }
 
 
-//    public EmailZipFileDTO(String emailTitle, String filepath, String filename, int fileSize, Integer emailType) {
-//        this.emailTitle = emailTitle;
-//        this.filepath = filepath;
-//        this.emailType = emailType;
-//        this.filename = filename;
-//        this.fileSize = fileSize;
-//        this.extName = FileUtil.extName(filepath);
-//    }
-
     public EmailZipFileDTO(String emailTitle, EmailContentInfoDTO emailDto) {
     public EmailZipFileDTO(String emailTitle, EmailContentInfoDTO emailDto) {
         this.emailTitle = emailTitle;
         this.emailTitle = emailTitle;
         this.filepath = emailDto.getFilePath();
         this.filepath = emailDto.getFilePath();
         this.emailType = emailDto.getEmailType();
         this.emailType = emailDto.getEmailType();
         this.filename = emailDto.getFileName();
         this.filename = emailDto.getFileName();
         this.fileSize = emailDto.getFileSize();
         this.fileSize = emailDto.getFileSize();
+        this.pdfPwd = ReportParseUtils.matchPdfPwd(emailDto.getEmailContent());
         this.extName = FileUtil.extName(filepath);
         this.extName = FileUtil.extName(filepath);
     }
     }
 
 
@@ -50,6 +46,7 @@ public class EmailZipFileDTO {
         this.filepath = uploadReportInfo.getReportPath();
         this.filepath = uploadReportInfo.getReportPath();
         this.emailType = uploadReportInfo.getReportType();
         this.emailType = uploadReportInfo.getReportType();
         this.filename = uploadReportInfo.getReportName();
         this.filename = uploadReportInfo.getReportName();
+        this.pdfPwd = uploadReportInfo.getPdfPwd();
         this.fileSize = FileUtil.size(FileUtil.file(this.filepath));
         this.fileSize = FileUtil.size(FileUtil.file(this.filepath));
         this.extName = FileUtil.extName(filepath);
         this.extName = FileUtil.extName(filepath);
     }
     }

+ 5 - 0
mo-daq/src/main/java/com/smppw/modaq/domain/dto/UploadReportParams.java

@@ -34,6 +34,11 @@ public class UploadReportParams {
          */
          */
         private Integer reportType;
         private Integer reportType;
         /**
         /**
+         * 如果pdf有密码,可以传密码过来
+         */
+        @Getter
+        private String pdfPwd;
+        /**
          * 报告路径,必传
          * 报告路径,必传
          */
          */
         private String reportPath;
         private String reportPath;

+ 15 - 11
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -529,7 +529,7 @@ public class EmailParseService {
             try {
             try {
                 String output = filepath.replaceAll("archive|original", "image");
                 String output = filepath.replaceAll("archive|original", "image");
                 File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
                 File outputFile = FileUtil.file(FileUtil.getParent(output, 1));
-                images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300);
+                images = PdfUtil.convertFirstAndLastPagesToPng(filepath, outputFile, 300, zipFile.getPdfPwd());
                 if (log.isDebugEnabled()) {
                 if (log.isDebugEnabled()) {
                     log.debug("报告{} 生成的图片地址是:\n{}", fileName, images);
                     log.debug("报告{} 生成的图片地址是:\n{}", fileName, images);
                 }
                 }
@@ -995,13 +995,13 @@ public class EmailParseService {
                 if (log.isInfoEnabled()) {
                 if (log.isInfoEnabled()) {
                     log.info("{} 邮件{} 基本信息获取完成,开始下载附件!邮件日期:{}", folderName, emailTitle, emailDateStr);
                     log.info("{} 邮件{} 基本信息获取完成,开始下载附件!邮件日期:{}", folderName, emailTitle, emailDateStr);
                 }
                 }
-                Object content = message.getContent();
 
 
-                if (content instanceof Multipart multipart) {
-                    this.reMultipart(emailAddress, emailTitle, emailDate, multipart, dtos);
-                } else if (content instanceof Part part) {
-                    this.rePart(emailAddress, emailTitle, emailDate, part, dtos);
+                Object messageContent = message.getContent();
+                String content;
+                if (messageContent instanceof Multipart multipart) {
+                    content = this.reMultipart(emailAddress, emailTitle, emailDate, multipart, dtos);
                 } else {
                 } else {
+                    content = null;
                     log.warn("{} 邮件{} 获取不了附件", folderName, emailTitle);
                     log.warn("{} 邮件{} 获取不了附件", folderName, emailTitle);
                 }
                 }
                 if (CollUtil.isEmpty(dtos)) {
                 if (CollUtil.isEmpty(dtos)) {
@@ -1011,6 +1011,7 @@ public class EmailParseService {
                 dtos.forEach(e -> {
                 dtos.forEach(e -> {
                     e.setEmailType(emailType);
                     e.setEmailType(emailType);
                     e.setSenderEmail(senderEmail);
                     e.setSenderEmail(senderEmail);
+                    e.setEmailContent(content);
                 });
                 });
                 emailMessageMap.put(IdUtil.simpleUUID(), dtos);
                 emailMessageMap.put(IdUtil.simpleUUID(), dtos);
             } catch (Exception e) {
             } catch (Exception e) {
@@ -1095,23 +1096,26 @@ public class EmailParseService {
         return FileUtil.file(filePath + realName);
         return FileUtil.file(filePath + realName);
     }
     }
 
 
-    private void reMultipart(String account, String subject, Date emailDate, Multipart multipart,
+    private String reMultipart(String account, String subject, Date emailDate, Multipart multipart,
                              List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
                              List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
+        String content = null;
         for (int i = 0; i < multipart.getCount(); i++) {
         for (int i = 0; i < multipart.getCount(); i++) {
             Part bodyPart = multipart.getBodyPart(i);
             Part bodyPart = multipart.getBodyPart(i);
-            Object content = bodyPart.getContent();
-            if (content instanceof String) {
+            Object bodyPartContent = bodyPart.getContent();
+            if (bodyPartContent instanceof String) {
                 if (log.isDebugEnabled()) {
                 if (log.isDebugEnabled()) {
-                    log.debug("邮件{} 获取的正文不做解析,内容是 {}", subject, content);
+                    log.debug("邮件{} 获取的正文不做解析,内容是 {}", subject, bodyPartContent);
                 }
                 }
+                content = bodyPartContent.toString();
                 continue;
                 continue;
             }
             }
-            if (content instanceof Multipart mp) {
+            if (bodyPartContent instanceof Multipart mp) {
                 this.reMultipart(account, subject, emailDate, mp, emailContentInfoDTOList);
                 this.reMultipart(account, subject, emailDate, mp, emailContentInfoDTOList);
             } else {
             } else {
                 this.rePart(account, subject, emailDate, bodyPart, emailContentInfoDTOList);
                 this.rePart(account, subject, emailDate, bodyPart, emailContentInfoDTOList);
             }
             }
         }
         }
+        return content;
     }
     }
 
 
     private String getSenderEmail(Message message) {
     private String getSenderEmail(Message message) {

+ 5 - 2
mo-daq/src/main/java/com/smppw/modaq/infrastructure/util/PdfUtil.java

@@ -67,10 +67,13 @@ public class PdfUtil {
      * @param dpi         图片分辨率(默认建议 300)
      * @param dpi         图片分辨率(默认建议 300)
      * @return 生成的图片文件列表
      * @return 生成的图片文件列表
      */
      */
-    public static List<String> convertFirstAndLastPagesToPng(String pdfFilepath, File outputDir, int dpi) throws IOException {
+    public static List<String> convertFirstAndLastPagesToPng(String pdfFilepath,
+                                                             File outputDir,
+                                                             int dpi,
+                                                             String password) throws IOException {
         List<String> generatedImages = ListUtil.list(false);
         List<String> generatedImages = ListUtil.list(false);
 
 
-        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(pdfFilepath))) {
+        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(pdfFilepath), password)) {
             int totalPages = document.getNumberOfPages();
             int totalPages = document.getNumberOfPages();
             if (totalPages == 0) {
             if (totalPages == 0) {
                 throw new IOException("PDF 文件无有效页面");
                 throw new IOException("PDF 文件无有效页面");

+ 3 - 3
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -41,9 +41,9 @@ public class MoDaqApplicationTests {
 
 
     @Test
     @Test
     public void reportTest() {
     public void reportTest() {
-        MailboxInfoDTO emailInfoDTO = this.buildMailbox("*@simuwang.com", "*");
-        Date startDate = DateUtil.parse("2025-06-19 08:47:00", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2025-06-19 13:57:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        MailboxInfoDTO emailInfoDTO = this.buildMailbox("wangzaijun@simuwang.com", "WZJ2twy1314");
+        Date startDate = DateUtil.parse("2025-06-19 17:47:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2025-06-19 17:57:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
         try {
             List<String> folderNames = ListUtil.list(false);
             List<String> folderNames = ListUtil.list(false);
 //            folderNames.add("其他文件夹/报告公告");
 //            folderNames.add("其他文件夹/报告公告");