浏览代码

feat:报告解析优化重构+java解析支持开始

wangzaijun 7 月之前
父节点
当前提交
05f14139f3
共有 35 个文件被更改,包括 937 次插入1084 次删除
  1. 34 0
      service-base/src/main/java/com/simuwang/base/common/enums/ReportParserFileType.java
  2. 5 0
      service-base/src/main/java/com/simuwang/base/config/DaqProperties.java
  3. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFinancialIndicatorsDO.java
  4. 11 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java
  5. 6 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java
  6. 26 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java
  7. 0 184
      service-daq/src/main/java/com/simuwang/daq/components/AbstractReportParser.java
  8. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java
  9. 0 18
      service-daq/src/main/java/com/simuwang/daq/components/ReportParser.java
  10. 20 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParser.java
  11. 69 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserConstant.java
  12. 26 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserFactory.java
  13. 292 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractReportParser.java
  14. 33 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java
  15. 26 128
      service-daq/src/main/java/com/simuwang/daq/components/PDMonthlyReportParser.java
  16. 33 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java
  17. 22 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/AbstractPyReportParser.java
  18. 14 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonMonthlyReportParser.java
  19. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/AbstractReportWriter.java
  20. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/AnnuallyReportWriter.java
  21. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/MonthlyReportWriter.java
  22. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/QuarterlyReportWriter.java
  23. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriter.java
  24. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterConstant.java
  25. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterFactory.java
  26. 0 43
      service-daq/src/main/java/com/simuwang/daq/dto/MonthlyReportNavInfo.java
  27. 0 13
      service-daq/src/main/java/com/simuwang/daq/dto/ReportExtInfo.java
  28. 0 18
      service-daq/src/main/java/com/simuwang/daq/dto/ReportFileType.java
  29. 0 291
      service-daq/src/main/java/com/simuwang/daq/dto/ReportFundInfo.java
  30. 0 54
      service-daq/src/main/java/com/simuwang/daq/dto/ReportInfo.java
  31. 42 62
      service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java
  32. 3 4
      service-daq/src/main/java/com/simuwang/daq/service/ReportParseService.java
  33. 261 249
      service-daq/src/main/java/com/simuwang/daq/utils/ReportParseUtil.java
  34. 0 12
      service-deploy/pom.xml
  35. 2 0
      service-deploy/src/main/resources/application.yml

+ 34 - 0
service-base/src/main/java/com/simuwang/base/common/enums/ReportParserFileType.java

@@ -0,0 +1,34 @@
+package com.simuwang.base.common.enums;
+
+import cn.hutool.core.util.StrUtil;
+
+import java.util.Arrays;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 10:57
+ * @description 解析文件格式类型,支持调用python接口解析
+ */
+public enum ReportParserFileType {
+    PDF("pdf"),
+    DOCX("docx"),
+    DOC("doc"),
+    XLSX("xlsx"),
+    XLS("xls"),
+    PYTHON("python");
+
+    private final String suffix;
+
+    ReportParserFileType(String suffix) {
+        this.suffix = suffix;
+    }
+
+    public static ReportParserFileType getBySuffix(String suffix) {
+        return Arrays.stream(ReportParserFileType.values())
+                .filter(e -> StrUtil.equals(e.getSuffix(), suffix)).findFirst().orElse(null);
+    }
+
+    public String getSuffix() {
+        return suffix;
+    }
+}

+ 5 - 0
service-base/src/main/java/com/simuwang/base/config/DaqProperties.java

@@ -32,6 +32,11 @@ public class DaqProperties {
      */
     private String tokenSecret;
     /**
+     * 是否开启python的报告解析功能,开启后报告全部用python接口来解析
+     * 当开启时要配置python解析地址
+     */
+    private Boolean enablePyParser = Boolean.FALSE;
+    /**
      * 报告解析的python接口地址
      */
     private String pyBaseUrl = "http://localhost:8080";

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFinancialIndicatorsDO.java

@@ -19,4 +19,8 @@ public class ReportFinancialIndicatorsDO extends BaseReportDO {
     private BigDecimal nav;
     private BigDecimal profit;
     private BigDecimal realizedIncome;
+    /**
+     * 期末可供分配利润
+     */
+    private BigDecimal undistributedProfit;
 }

+ 11 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java

@@ -4,10 +4,21 @@ import com.simuwang.base.common.enums.ReportType;
 import lombok.Getter;
 import lombok.Setter;
 
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 9:32
+ * @description 报告解析结果对象
+ */
 @Setter
 @Getter
 public abstract class ReportData {
+    /**
+     * 报告基本信息
+     */
     private ReportBaseInfoDTO baseInfo;
+    /**
+     * 报告包含的基金基本新
+     */
     private ReportFundInfoDTO fundInfo;
 
     public abstract ReportType getReportType();

+ 6 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java

@@ -20,6 +20,10 @@ public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialI
     private BigDecimal nav;
     private BigDecimal profit;
     private BigDecimal realizedIncome;
+    /**
+     * 期末可供分配利润
+     */
+    private BigDecimal undistributedProfit;
 
     @Override
     public ReportFinancialIndicatorsDO toEntity() {
@@ -31,6 +35,7 @@ public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialI
         entity.setNav(this.nav);
         entity.setProfit(this.profit);
         entity.setRealizedIncome(this.realizedIncome);
+        entity.setUndistributedProfit(this.undistributedProfit);
         return entity;
     }
 
@@ -43,6 +48,7 @@ public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialI
                 ", fundAssetSize=" + fundAssetSize +
                 ", nav=" + nav +
                 ", profit=" + profit +
+                ", undistributedProfit=" + undistributedProfit +
                 ", realizedIncome=" + realizedIncome +
                 '}';
     }

+ 26 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java

@@ -0,0 +1,26 @@
+package com.simuwang.base.pojo.dto.report;
+
+import lombok.*;
+
+@Getter
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class ReportParserParams {
+    /**
+     * 文件id
+     * 报告解析表的关联字段
+     */
+    private Integer fileId;
+    /**
+     * 文件名称
+     * 优先从这个名称里先获取基金备案编码,没有就不获取
+     */
+    private String filename;
+    /**
+     * 文件路径
+     */
+    private String filepath;
+
+    private String registerNumber;
+}

+ 0 - 184
service-daq/src/main/java/com/simuwang/daq/components/AbstractReportParser.java

@@ -1,184 +0,0 @@
-package com.simuwang.daq.components;
-
-import cn.hutool.core.exceptions.ExceptionUtil;
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.StrUtil;
-import com.simuwang.daq.dto.ReportExtInfo;
-import com.simuwang.daq.dto.ReportFundInfo;
-import com.simuwang.daq.dto.ReportInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.util.StopWatch;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-public abstract class AbstractReportParser<EXT extends ReportExtInfo> implements ReportParser {
-    protected final Logger logger = LoggerFactory.getLogger(this.getClass());
-    protected String filepath;
-    protected Map<String, List<String>> watermarkListMap;
-
-    @Override
-    public void parse(Integer fileId, String filepath, String watermarkName) {
-        StopWatch watch = new StopWatch();
-        watch.start();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("报告{} 开始解析!", filepath);
-        }
-        this.filepath = filepath;
-        this.watermarkListMap = this.generateWatermarkMap(watermarkName);
-        ReportInfo reportInfo = null;
-        ReportFundInfo reportFundInfo = null;
-        List<EXT> exts = null;
-        try {
-            this.initParse();
-            reportInfo = this.parseReportInfo(fileId);
-            reportFundInfo = this.parseBaseInfo();
-            exts = this.parseExtInfo();
-        } catch (Exception e) {
-            this.logger.error("报告{} 解析错误\n{}", filepath, ExceptionUtil.stacktraceToString(e));
-        }
-        this.saveResult(reportInfo, reportFundInfo, exts);
-        watch.stop();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("报告{} 解析结束!耗时:{}s", filepath, watch.getTotalTime(TimeUnit.SECONDS));
-        }
-    }
-
-    protected abstract void initParse() throws IOException;
-
-    protected abstract ReportInfo parseReportInfo(Integer fileId);
-
-    protected abstract ReportFundInfo parseBaseInfo();
-
-    protected abstract List<EXT> parseExtInfo();
-
-    protected abstract void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<EXT> exts);
-
-    private Map<String, List<String>> generateWatermarkMap(String watermarkName) {
-        Map<String, List<String>> result = MapUtil.newHashMap(32);
-        // 生成水印列表
-        String text = watermarkName;
-        text = text.replaceAll("[()]", ""); // 移除括号
-        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
-        Collections.reverse(textList);
-        StringBuilder sb = new StringBuilder(textList.size());
-        for (String ch : textList) {
-            sb.append(ch);
-        }
-        String joinedText = sb.toString();
-
-        // 基本水印列表
-        List<String> wkList = new ArrayList<>();
-        for (String ch : textList) {
-            wkList.add(ch + "\r\n");
-            wkList.add("\r\n" + ch);
-        }
-
-        // 查找数字
-        List<String> matches = findDigits(watermarkName);
-        if (!matches.isEmpty()) {
-            for (String match : matches) {
-                wkList.add("\r\n" + match);
-                wkList.add(match + "\r\n");
-            }
-        }
-        wkList.add("-");
-        wkList.add("【");
-        wkList.add("】");
-        wkList.add("\r");
-        wkList.add("\n");
-        wkList.add("\r\n");
-
-        String noNumberText = removeDigits(joinedText);
-
-        // 生成不同字段的水印列表
-        result.put("report_name", new ArrayList<>(wkList));
-        result.get("report_name").addAll(convertStringToList("有限公司"));
-
-        result.put("less", new ArrayList<>(wkList));
-
-        result.put("more", new ArrayList<>(wkList));
-        result.get("more").addAll(convertStringToList(noNumberText));
-
-        result.put("leverage", new ArrayList<>(wkList));
-        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
-
-        result.put("base_info", new ArrayList<>(wkList));
-        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
-
-        result.put("industry", new ArrayList<>(wkList));
-        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
-
-        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
-        return result;
-    }
-
-    private List<String> findDigits(String text) {
-        List<String> digits = new ArrayList<>();
-        Pattern pattern = Pattern.compile("\\d");
-        Matcher matcher = pattern.matcher(text);
-        while (matcher.find()) {
-            digits.add(matcher.group());
-        }
-        return digits;
-    }
-
-    private String removeDigits(String text) {
-        return text.replaceAll("\\d", "");
-    }
-
-    private String removeKeywords(String text, String... keywords) {
-        for (String keyword : keywords) {
-            text = text.replaceAll(keyword, "");
-        }
-        return text;
-    }
-
-    private List<String> convertStringToList(String text) {
-        List<String> charList = new ArrayList<>();
-        for (char c : text.toCharArray()) {
-            charList.add(c + "");
-        }
-        return charList;
-    }
-
-    protected String processString(List<String> wmList, String string) {
-        if (StrUtil.isBlank(string)) {
-            return null;
-        }
-        // 生成正则表达式模式
-        String pat = String.join("|", wmList);
-        // 使用正则表达式移除wmList中的元素
-        string = removeMatches(string, pat);
-        // 替换中文括号为英文括号
-        string = string.replace("(", "(").replace(")", ")");
-        // 移除空格
-        string = string.replace(" ", "");
-        // 如果字符串以括号开头,则移除第一个字符
-        if (startsWithParenthesis(string)) {
-            string = string.substring(1);
-        }
-
-        return string;
-    }
-
-    private String removeMatches(String input, String pattern) {
-        // 编译正则表达式
-        Pattern compiledPattern = Pattern.compile(pattern);
-        // 创建Matcher对象
-        Matcher matcher = compiledPattern.matcher(input);
-        // 使用replaceAll方法替换所有匹配到的字符为空字符串
-        return matcher.replaceAll("");
-    }
-
-    private boolean startsWithParenthesis(String input) {
-        // 匹配以括号开头的字符串
-        Pattern pattern = Pattern.compile("^[()].*");
-        Matcher matcher = pattern.matcher(input);
-        return matcher.find();
-    }
-}

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java

@@ -32,7 +32,7 @@ public class CustomPDFTextStripper extends PDFTextStripper {
                 if (width > watermarkWidth[0]) {
                     watermarkWidth[0] = width;
                 }
-                newTexts.add("++");
+                newTexts.add("+_+");
             }
         }
         if (CollUtil.isNotEmpty(newTexts)) {

+ 0 - 18
service-daq/src/main/java/com/simuwang/daq/components/ReportParser.java

@@ -1,18 +0,0 @@
-package com.simuwang.daq.components;
-
-/**
- * @author wangzaijun
- * @date 2024/9/9 19:18
- * @description 报告模板解析器,计划支持pdf、word等
- */
-public interface ReportParser {
-    /**
-     * 报告模板解析接口
-     * 扩展支持月报、季报和年报,解析文件格式支持pdf、word和excel
-     *
-     * @param fileId        文件id
-     * @param filepath      文件路径
-     * @param watermarkName 生成水印
-     */
-    void parse(Integer fileId, String filepath, String watermarkName);
-}

+ 20 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParser.java

@@ -0,0 +1,20 @@
+package com.simuwang.daq.components.report.parser;
+
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/9 19:18
+ * @description 报告模板解析器,计划支持pdf、word等
+ */
+public interface ReportParser<T extends ReportData> {
+    /**
+     * 报告模板解析接口
+     * 扩展支持月报、季报和年报,解析文件格式支持pdf、word和excel
+     *
+     * @param params 解析请求参数
+     * @return 解析结果
+     */
+    T parse(ReportParserParams params);
+}

+ 69 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserConstant.java

@@ -0,0 +1,69 @@
+package com.simuwang.daq.components.report.parser;
+
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
+
+import java.util.Map;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 13:39
+ * @description 报告解析的bean名称关系配置
+ */
+public final class ReportParserConstant {
+    public static final Map<ReportType, Map<ReportParserFileType, String>> REPORT_PARSER_BEAN_MAP = MapUtil.newHashMap(8);
+
+    public static final String PARSER_PDF_MONTHLY = "report-parser:pdf:monthly";
+    public static final String PARSER_DOC_MONTHLY = "report-parser:doc:monthly";
+    public static final String PARSER_DOCX_MONTHLY = "report-parser:docx:monthly";
+    public static final String PARSER_XLSX_MONTHLY = "report-parser:xlsx:monthly";
+    public static final String PARSER_XLS_MONTHLY = "report-parser:xls:monthly";
+    public static final String PARSER_PYTHON_MONTHLY = "report-parser:python:monthly";
+
+    public static final String PARSER_PDF_QUARTERLY = "report-parser:pdf:quarterly";
+    public static final String PARSER_DOC_QUARTERLY = "report-parser:doc:quarterly";
+    public static final String PARSER_DOCX_QUARTERLY = "report-parser:docx:quarterly";
+    public static final String PARSER_XLSX_QUARTERLY = "report-parser:xlsx:quarterly";
+    public static final String PARSER_XLS_QUARTERLY = "report-parser:xls:quarterly";
+    public static final String PARSER_PYTHON_QUARTERLY = "report-parser:python:quarterly";
+
+    public static final String PARSER_PDF_ANNUALLY = "report-parser:pdf:annually";
+    public static final String PARSER_DOC_ANNUALLY = "report-parser:doc:annually";
+    public static final String PARSER_DOCX_ANNUALLY = "report-parser:docx:annually";
+    public static final String PARSER_XLSX_ANNUALLY = "report-parser:xlsx:annually";
+    public static final String PARSER_XLS_ANNUALLY = "report-parser:xls:annually";
+    public static final String PARSER_PYTHON_ANNUALLY = "report-parser:python:annually";
+
+    static {
+        REPORT_PARSER_BEAN_MAP.put(ReportType.MONTHLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_MONTHLY,
+                        ReportParserFileType.DOC, PARSER_DOC_MONTHLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_MONTHLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_MONTHLY,
+                        ReportParserFileType.XLS, PARSER_XLS_MONTHLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_MONTHLY
+                ));
+
+        REPORT_PARSER_BEAN_MAP.put(ReportType.QUARTERLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_QUARTERLY,
+                        ReportParserFileType.DOC, PARSER_DOC_QUARTERLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_QUARTERLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_QUARTERLY,
+                        ReportParserFileType.XLS, PARSER_XLS_QUARTERLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_QUARTERLY
+                ));
+
+        REPORT_PARSER_BEAN_MAP.put(ReportType.ANNUALLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_ANNUALLY,
+                        ReportParserFileType.DOC, PARSER_DOC_ANNUALLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_ANNUALLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_ANNUALLY,
+                        ReportParserFileType.XLS, PARSER_XLS_ANNUALLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_ANNUALLY
+                ));
+    }
+}

+ 26 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserFactory.java

@@ -0,0 +1,26 @@
+package com.simuwang.daq.components.report.parser;
+
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import org.springframework.stereotype.Component;
+
+import java.util.Map;
+
+@Component
+public class ReportParserFactory {
+    private static final ReportParser<? extends ReportData> DEFAULT = (ReportParser<ReportData>) params -> null;
+
+    private static final Map<String, ReportParser<? extends ReportData>> REPORT_WRITER_MAP = MapUtil.newHashMap(8);
+
+    public ReportParserFactory(Map<String, ReportParser<? extends ReportData>> components) {
+        REPORT_WRITER_MAP.putAll(components);
+    }
+
+    @SuppressWarnings("unchecked")
+    public <T extends ReportData> ReportParser<T> getInstance(ReportType reportType, ReportParserFileType reportParserFileType) {
+        String beanName = ReportParserConstant.REPORT_PARSER_BEAN_MAP.getOrDefault(reportType, MapUtil.empty()).get(reportParserFileType);
+        return (ReportParser<T>) REPORT_WRITER_MAP.getOrDefault(beanName, DEFAULT);
+    }
+}

+ 292 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractReportParser.java

@@ -0,0 +1,292 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.exceptions.ExceptionUtil;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.daq.components.report.parser.ReportParser;
+import com.smppw.common.pojo.ValueLabelVO;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.util.StopWatch;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+public abstract class AbstractReportParser<T extends ReportData> implements ReportParser<T> {
+    protected final Logger logger = LoggerFactory.getLogger(this.getClass());
+
+    private final EmailFieldMappingMapper fieldMappingMapper;
+    /**
+     * 字段匹配规则
+     */
+    protected List<ValueLabelVO> fieldMapper;
+
+    public AbstractReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        this.fieldMappingMapper = fieldMappingMapper;
+    }
+
+    @Override
+    public T parse(ReportParserParams params) {
+        StopWatch watch = new StopWatch();
+        watch.start();
+        String filepath = params.getFilepath();
+        try {
+            if (this.logger.isInfoEnabled()) {
+                this.logger.info("报告{} 开始解析!", filepath);
+            }
+            List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping();
+            if (CollUtil.isNotEmpty(emailFieldMapping)) {
+                this.fieldMapper = emailFieldMapping.stream().map(e -> new ValueLabelVO(e.getCode(), e.getName())).collect(Collectors.toList());
+            }
+            String reportName = this.initAndGetReportName(params);
+            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params, reportName);
+            ReportFundInfoDTO reportFundInfo = this.parseBaseInfo(params);
+            return this.parseExtInfoAndSetData(reportInfo, reportFundInfo, params);
+        } catch (Exception e) {
+            this.logger.error("报告{} 解析错误\n{}", filepath, ExceptionUtil.stacktraceToString(e));
+        } finally {
+            watch.stop();
+            if (this.logger.isInfoEnabled()) {
+                this.logger.info("报告{} 解析结束!耗时:{}s", filepath, watch.getTotalTime(TimeUnit.SECONDS));
+            }
+        }
+        return null;
+    }
+
+    protected abstract String initAndGetReportName(ReportParserParams params) throws IOException;
+
+    private ReportBaseInfoDTO buildReportInfo(ReportParserParams params, String reportName) {
+        ReportBaseInfoDTO reportInfo = new ReportBaseInfoDTO();
+        reportInfo.setFileId(params.getFileId());
+        reportInfo.setReportName(reportName);
+        reportInfo.setReportType(this.matchReportType(reportName));
+        reportInfo.setReportDate(this.matchReportDate(reportName));
+        return reportInfo;
+    }
+
+    protected abstract ReportFundInfoDTO parseBaseInfo(ReportParserParams params);
+
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo, ReportParserParams params);
+
+//    protected abstract List<EXT> parseExtInfo();
+
+//    protected abstract void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<EXT> exts);
+
+//    private Map<String, List<String>> generateWatermarkMap(String watermarkName) {
+//        Map<String, List<String>> result = MapUtil.newHashMap(32);
+//        // 生成水印列表
+//        String text = watermarkName;
+//        text = text.replaceAll("[()]", ""); // 移除括号
+//        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
+//        Collections.reverse(textList);
+//        StringBuilder sb = new StringBuilder(textList.size());
+//        for (String ch : textList) {
+//            sb.append(ch);
+//        }
+//        String joinedText = sb.toString();
+//
+//        // 基本水印列表
+//        List<String> wkList = new ArrayList<>();
+//        for (String ch : textList) {
+//            wkList.add(ch + "\r\n");
+//            wkList.add("\r\n" + ch);
+//        }
+//
+//        // 查找数字
+//        List<String> matches = findDigits(watermarkName);
+//        if (!matches.isEmpty()) {
+//            for (String match : matches) {
+//                wkList.add("\r\n" + match);
+//                wkList.add(match + "\r\n");
+//            }
+//        }
+//        wkList.add("-");
+//        wkList.add("【");
+//        wkList.add("】");
+//        wkList.add("\r");
+//        wkList.add("\n");
+//        wkList.add("\r\n");
+//
+//        String noNumberText = removeDigits(joinedText);
+//
+//        // 生成不同字段的水印列表
+//        result.put("report_name", new ArrayList<>(wkList));
+//        result.get("report_name").addAll(convertStringToList("有限公司"));
+//
+//        result.put("less", new ArrayList<>(wkList));
+//
+//        result.put("more", new ArrayList<>(wkList));
+//        result.get("more").addAll(convertStringToList(noNumberText));
+//
+//        result.put("leverage", new ArrayList<>(wkList));
+//        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
+//
+//        result.put("base_info", new ArrayList<>(wkList));
+//        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
+//
+//        result.put("industry", new ArrayList<>(wkList));
+//        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
+//
+//        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
+//        return result;
+//    }
+
+//    private List<String> findDigits(String text) {
+//        List<String> digits = new ArrayList<>();
+//        Pattern pattern = Pattern.compile("\\d");
+//        Matcher matcher = pattern.matcher(text);
+//        while (matcher.find()) {
+//            digits.add(matcher.group());
+//        }
+//        return digits;
+//    }
+//
+//    private String removeDigits(String text) {
+//        return text.replaceAll("\\d", "");
+//    }
+//
+//    private String removeKeywords(String text, String... keywords) {
+//        for (String keyword : keywords) {
+//            text = text.replaceAll(keyword, "");
+//        }
+//        return text;
+//    }
+//
+//    private List<String> convertStringToList(String text) {
+//        List<String> charList = new ArrayList<>();
+//        for (char c : text.toCharArray()) {
+//            charList.add(c + "");
+//        }
+//        return charList;
+//    }
+
+//    protected String processString(List<String> wmList, String string) {
+//        if (StrUtil.isBlank(string)) {
+//            return null;
+//        }
+//        // 生成正则表达式模式
+//        String pat = String.join("|", wmList);
+//        // 使用正则表达式移除wmList中的元素
+//        string = removeMatches(string, pat);
+//        // 替换中文括号为英文括号
+//        string = string.replace("(", "(").replace(")", ")");
+//        // 移除空格
+//        string = string.replace(" ", "");
+//        // 如果字符串以括号开头,则移除第一个字符
+//        if (startsWithParenthesis(string)) {
+//            string = string.substring(1);
+//        }
+//
+//        return string;
+//    }
+
+//    private String removeMatches(String input, String pattern) {
+//        // 编译正则表达式
+//        Pattern compiledPattern = Pattern.compile(pattern);
+//        // 创建Matcher对象
+//        Matcher matcher = compiledPattern.matcher(input);
+//        // 使用replaceAll方法替换所有匹配到的字符为空字符串
+//        return matcher.replaceAll("");
+//    }
+//
+//    private boolean startsWithParenthesis(String input) {
+//        // 匹配以括号开头的字符串
+//        Pattern pattern = Pattern.compile("^[()].*");
+//        Matcher matcher = pattern.matcher(input);
+//        return matcher.find();
+//    }
+
+    /**
+     * 匹配报告日期
+     *
+     * @param string 文本内容
+     * @return 报告日期
+     */
+    private String matchReportDate(String string) {
+        if (string == null) {
+            return null;
+        }
+
+        // 编译正则表达式模式
+        Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度");  // 2023年XXX3季度
+        Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
+        Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
+        Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
+
+        // 创建Matcher对象
+        Matcher matcher1 = pat1.matcher(string);
+        Matcher matcher2 = pat2.matcher(string);
+        Matcher matcher3 = pat3.matcher(string);
+        Matcher matcher4 = pat4.matcher(string);
+
+        // 尝试匹配
+        if (matcher1.find()) {
+            String year = matcher1.group(1);
+            String quarter = matcher1.group(2);
+            return switch (quarter) {
+                case "一", "1" -> year + "-03-31";
+                case "二", "2" -> year + "-06-30";
+                case "三", "3" -> year + "-09-30";
+                case "四", "4" -> year + "-12-31";
+                default -> null;
+            };
+        } else if (matcher2.find()) {
+            return matcher2.group();
+        } else if (matcher3.find()) {
+            return matcher3.group(1) + "-12-31";
+        } else if (matcher4.find()) {
+            String year = matcher4.group(1);
+            String month = matcher4.group(2);
+            int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
+            return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * 匹配报告类型,如“季度”、“年度”
+     *
+     * @param string 输入字符串
+     * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
+     */
+    private String matchReportType(String string) {
+        if (string == null) {
+            return null;
+        }
+
+        // 编译正则表达式模式
+        Pattern pattern = Pattern.compile("月|季度|年度");
+
+        // 创建Matcher对象
+        Matcher matcher = pattern.matcher(string);
+
+        // 尝试匹配
+        if (matcher.find()) {
+            return matcher.group();
+        } else {
+            return null;
+        }
+    }
+
+    private int getLastDayOfMonth(int year, int month) {
+        Calendar calendar = Calendar.getInstance();
+        calendar.set(Calendar.YEAR, year);
+        calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
+        return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
+    }
+
+    private String padZero(String number) {
+        return String.format("%02d", Integer.parseInt(number));
+    }
+}

+ 33 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -0,0 +1,33 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.AnnuallyReportData;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+
+@Component(ReportParserConstant.PARSER_PDF_ANNUALLY)
+public class PDAnnuallyReportParser extends AbstractReportParser<AnnuallyReportData> {
+    public PDAnnuallyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected String initAndGetReportName(ReportParserParams params) throws IOException {
+        return null;
+    }
+
+    @Override
+    protected ReportFundInfoDTO parseBaseInfo(ReportParserParams params) {
+        return null;
+    }
+
+    @Override
+    protected AnnuallyReportData parseExtInfoAndSetData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo, ReportParserParams params) {
+        return null;
+    }
+}

+ 26 - 128
service-daq/src/main/java/com/simuwang/daq/components/PDMonthlyReportParser.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components;
+package com.simuwang.daq.components.report.parser.pdf;
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
@@ -7,10 +7,9 @@ import cn.hutool.core.util.ReflectUtil;
 import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.common.exception.APIException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
-import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
-import com.simuwang.daq.dto.MonthlyReportNavInfo;
-import com.simuwang.daq.dto.ReportFundInfo;
-import com.simuwang.daq.dto.ReportInfo;
+import com.simuwang.base.pojo.dto.report.*;
+import com.simuwang.daq.components.CustomPDFTextStripper;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
 import com.smppw.common.pojo.ValueLabelVO;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
@@ -20,7 +19,6 @@ import technology.tabula.*;
 import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
 import java.io.IOException;
-import java.util.Calendar;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -32,31 +30,28 @@ import java.util.stream.Collectors;
  * @date 2024/9/11 16:19
  * @description pdf格式的月报解析
  */
-@Component("monthly-report:pdf")
-public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNavInfo> {
+@Component(ReportParserConstant.PARSER_PDF_MONTHLY)
+public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportData> {
     private final List<Table> extNavTables = ListUtil.list(true);
-    private final EmailFieldMappingMapper fieldMappingMapper;
-    private String reportName = null;
     private Table baseInfoTable = null;
-    private List<ValueLabelVO> fieldMapper = null;
 
     public PDMonthlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
-        this.fieldMappingMapper = fieldMappingMapper;
+        super(fieldMappingMapper);
     }
 
     @Override
-    protected void initParse() throws IOException {
-        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(this.filepath))) {
+    protected String initAndGetReportName(ReportParserParams params) throws IOException {
+        String reportName = null;
+        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
             CustomPDFTextStripper stripper = new CustomPDFTextStripper();
             stripper.setSortByPosition(true);
             String text = stripper.getText(document);
-            text = text.replace("++\r\n", "").replace("++", "");
+            text = text.replace("+_+\r\n", "").replace("+_+", "");
             List<String> textList = StrUtil.split(text, "\r\n");
             if (CollUtil.isNotEmpty(textList)) {
-                List<String> wkList = this.watermarkListMap.get("report_name");
-                String name = this.processString(wkList, textList.get(0));
-                this.reportName = this.matchReportName(name);
-                if (StrUtil.isBlank(this.reportName)) {
+                String name = textList.get(0);
+                reportName = this.matchReportName(name);
+                if (StrUtil.isBlank(reportName)) {
                     throw new APIException("未匹配到报告名称");
                 }
             }
@@ -77,24 +72,11 @@ public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNav
                 }
             }
         }
-        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping();
-        if (CollUtil.isNotEmpty(emailFieldMapping)) {
-            this.fieldMapper = emailFieldMapping.stream().map(e -> new ValueLabelVO(e.getCode(), e.getName())).collect(Collectors.toList());
-        }
-    }
-
-    @Override
-    protected ReportInfo parseReportInfo(Integer fileId) {
-        ReportInfo reportInfo = new ReportInfo();
-        reportInfo.setFileId(fileId);
-        reportInfo.setReportName(this.reportName);
-        reportInfo.setReportType(this.matchReportType(this.reportName));
-        reportInfo.setReportDate(this.matchReportDate(this.reportName));
-        return reportInfo;
+        return reportName;
     }
 
     @Override
-    protected ReportFundInfo parseBaseInfo() {
+    protected ReportFundInfoDTO parseBaseInfo(ReportParserParams params) {
         Table baseInfoTable = this.baseInfoTable;
         if (baseInfoTable == null) {
             throw new APIException("未解析到基本信息表格");
@@ -107,14 +89,18 @@ public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNav
             }
         }
         // 匹配字段清洗字段
-        ReportFundInfo reportFundInfo = new ReportFundInfo();
+        ReportFundInfoDTO reportFundInfo = new ReportFundInfoDTO();
         this.buildInfo(baseInfoMap, reportFundInfo);
         return reportFundInfo;
     }
 
     @Override
-    protected List<MonthlyReportNavInfo> parseExtInfo() {
-        List<MonthlyReportNavInfo> exts = ListUtil.list(false);
+    protected MonthlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo, ReportParserParams params) {
+        MonthlyReportData reportData = new MonthlyReportData();
+        reportData.setBaseInfo(baseInfo);
+        reportData.setFundInfo(fundInfo);
+
+        List<ReportNetReportDTO> exts = ListUtil.list(false);
         List<Table> extNavTables = this.extNavTables;
         for (Table extNavTable : extNavTables) {
             Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
@@ -123,11 +109,12 @@ public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNav
                 String value = extNavTable.getCell(1, i).getText();
                 extInfoMap.put(key, value);
             }
-            MonthlyReportNavInfo navInfo = new MonthlyReportNavInfo();
+            ReportNetReportDTO navInfo = new ReportNetReportDTO();
             buildInfo(extInfoMap, navInfo);
             exts.add(navInfo);
         }
-        return exts;
+        reportData.setNetReport(exts);
+        return reportData;
     }
 
     private void buildInfo(Map<String, Object> extInfoMap, Object info) {
@@ -166,84 +153,6 @@ public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNav
         }
     }
 
-    @Override
-    protected void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<MonthlyReportNavInfo> exts) {
-        System.out.println("保存数据!");
-    }
-
-    /**
-     * 匹配报告日期
-     *
-     * @param string 文本内容
-     * @return 报告日期
-     */
-    private String matchReportDate(String string) {
-        if (string == null) {
-            return null;
-        }
-
-        // 编译正则表达式模式
-        Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度");  // 2023年XXX3季度
-        Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
-        Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
-        Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
-
-        // 创建Matcher对象
-        Matcher matcher1 = pat1.matcher(string);
-        Matcher matcher2 = pat2.matcher(string);
-        Matcher matcher3 = pat3.matcher(string);
-        Matcher matcher4 = pat4.matcher(string);
-
-        // 尝试匹配
-        if (matcher1.find()) {
-            String year = matcher1.group(1);
-            String quarter = matcher1.group(2);
-            return switch (quarter) {
-                case "一", "1" -> year + "-03-31";
-                case "二", "2" -> year + "-06-30";
-                case "三", "3" -> year + "-09-30";
-                case "四", "4" -> year + "-12-31";
-                default -> null;
-            };
-        } else if (matcher2.find()) {
-            return matcher2.group();
-        } else if (matcher3.find()) {
-            return matcher3.group(1) + "-12-31";
-        } else if (matcher4.find()) {
-            String year = matcher4.group(1);
-            String month = matcher4.group(2);
-            int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
-            return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
-        } else {
-            return null;
-        }
-    }
-
-    /**
-     * 匹配报告类型,如“季度”、“年度”
-     *
-     * @param string 输入字符串
-     * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
-     */
-    private String matchReportType(String string) {
-        if (string == null) {
-            return null;
-        }
-
-        // 编译正则表达式模式
-        Pattern pattern = Pattern.compile("月|季度|年度");
-
-        // 创建Matcher对象
-        Matcher matcher = pattern.matcher(string);
-
-        // 尝试匹配
-        if (matcher.find()) {
-            return matcher.group();
-        } else {
-            return null;
-        }
-    }
-
     private String matchReportName(String text) {
         if (StrUtil.isBlank(text)) {
             return null;
@@ -271,15 +180,4 @@ public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNav
         }
         return reportName.replace("(", "(").replace(")", ")");
     }
-
-    private int getLastDayOfMonth(int year, int month) {
-        Calendar calendar = Calendar.getInstance();
-        calendar.set(Calendar.YEAR, year);
-        calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
-        return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
-    }
-
-    private String padZero(String number) {
-        return String.format("%02d", Integer.parseInt(number));
-    }
 }

+ 33 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -0,0 +1,33 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.QuarterlyReportData;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+
+@Component(ReportParserConstant.PARSER_PDF_QUARTERLY)
+public class PDQuarterlyReportParser extends AbstractReportParser<QuarterlyReportData> {
+    public PDQuarterlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected String initAndGetReportName(ReportParserParams params) throws IOException {
+        return null;
+    }
+
+    @Override
+    protected ReportFundInfoDTO parseBaseInfo(ReportParserParams params) {
+        return null;
+    }
+
+    @Override
+    protected QuarterlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo, ReportParserParams params) {
+        return null;
+    }
+}

+ 22 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/AbstractPyReportParser.java

@@ -0,0 +1,22 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.daq.components.report.parser.ReportParser;
+
+public abstract class AbstractPyReportParser<T extends ReportData> implements ReportParser<T> {
+    private final DaqProperties properties;
+    private final FundInfoMapper fundInfoMapper;
+
+    public AbstractPyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        this.properties = properties;
+        this.fundInfoMapper = fundInfoMapper;
+    }
+
+    @Override
+    public T parse(ReportParserParams params) {
+        return null;
+    }
+}

+ 14 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonMonthlyReportParser.java

@@ -0,0 +1,14 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dto.report.MonthlyReportData;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_PYTHON_MONTHLY)
+public class PythonMonthlyReportParser extends AbstractPyReportParser<MonthlyReportData> {
+    public PythonMonthlyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        super(properties, fundInfoMapper);
+    }
+}

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/AbstractReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import cn.hutool.core.exceptions.ExceptionUtil;
 import com.simuwang.base.mapper.report.ReportBaseInfoMapper;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/AnnuallyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import com.simuwang.base.mapper.report.*;
 import com.simuwang.base.pojo.dto.report.AnnuallyReportData;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/MonthlyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import cn.hutool.core.collection.CollUtil;
 import com.simuwang.base.mapper.report.ReportBaseInfoMapper;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/QuarterlyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import cn.hutool.core.collection.CollUtil;
 import com.simuwang.base.mapper.report.*;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import com.simuwang.base.pojo.dto.report.ReportData;
 

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterConstant.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import cn.hutool.core.map.MapUtil;
 import com.simuwang.base.common.enums.ReportType;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterFactory.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 import cn.hutool.core.map.MapUtil;
 import com.simuwang.base.common.enums.ReportType;

+ 0 - 43
service-daq/src/main/java/com/simuwang/daq/dto/MonthlyReportNavInfo.java

@@ -1,43 +0,0 @@
-package com.simuwang.daq.dto;
-
-public class MonthlyReportNavInfo extends ReportExtInfo {
-    private String valuationDate;
-
-    private String nav;
-
-    private String endTotalShares;
-
-    private String fundAssetSize;
-
-    public String getValuationDate() {
-        return valuationDate;
-    }
-
-    public void setValuationDate(String valuationDate) {
-        this.valuationDate = valuationDate;
-    }
-
-    public String getNav() {
-        return nav;
-    }
-
-    public void setNav(String nav) {
-        this.nav = nav;
-    }
-
-    public String getEndTotalShares() {
-        return endTotalShares;
-    }
-
-    public void setEndTotalShares(String endTotalShares) {
-        this.endTotalShares = endTotalShares;
-    }
-
-    public String getFundAssetSize() {
-        return fundAssetSize;
-    }
-
-    public void setFundAssetSize(String fundAssetSize) {
-        this.fundAssetSize = fundAssetSize;
-    }
-}

+ 0 - 13
service-daq/src/main/java/com/simuwang/daq/dto/ReportExtInfo.java

@@ -1,13 +0,0 @@
-package com.simuwang.daq.dto;
-
-public class ReportExtInfo {
-    private Integer fileId;
-
-    public Integer getFileId() {
-        return fileId;
-    }
-
-    public void setFileId(Integer fileId) {
-        this.fileId = fileId;
-    }
-}

+ 0 - 18
service-daq/src/main/java/com/simuwang/daq/dto/ReportFileType.java

@@ -1,18 +0,0 @@
-package com.simuwang.daq.dto;
-
-public enum ReportFileType {
-    PDF("pdf"),
-    DOCX("docx"),
-    DOC("doc"),
-    XLSX("xlsx");
-
-    private final String suffix;
-
-    ReportFileType(String suffix) {
-        this.suffix = suffix;
-    }
-
-    public String getSuffix() {
-        return suffix;
-    }
-}

+ 0 - 291
service-daq/src/main/java/com/simuwang/daq/dto/ReportFundInfo.java

@@ -1,291 +0,0 @@
-package com.simuwang.daq.dto;
-
-/**
- * @author wangzaijun
- * @date 2024/9/12 15:34
- * @description 报告解析的基金信息
- */
-public class ReportFundInfo {
-    private String fundName;
-    private String cFundName;
-    /**
-     * 是否分级基金
-     */
-    private Integer istiered;
-    /**
-     * 备案编码
-     */
-    private String registerNumber;
-    private String trustName;
-    private String custodianName;
-    private String advisorName;
-    /**
-     * 运作方式 开放式或封闭式
-     */
-    private String operationType;
-    private String fundType;
-    /**
-     * 成立日期
-     */
-    private String inceptionDate;
-    private String sharePerAsset;
-    private String investmentObjective;
-    private String fundStrategyDescription;
-    private String secondaryBenchmark;
-    private String riskReturnDesc;
-    private String realizedIncome;
-    private String profit;
-    private String fundAssetSize;
-    private String nav;
-    private String initTotalShares;
-    private String subscription;
-    private String redemption;
-    private String split;
-    /**
-     * 杠杆信息描述
-     */
-    private String leverageNote;
-    /**
-     * 杠杆比例
-     */
-    private String leverage;
-    private String remark;
-    private String industryTrend;
-    private String fundManager;
-    /**
-     * 是否托管复核
-     */
-    private String reviewed;
-
-    public String getFundName() {
-        return fundName;
-    }
-
-    public void setFundName(String fundName) {
-        this.fundName = fundName;
-    }
-
-    public String getcFundName() {
-        return cFundName;
-    }
-
-    public void setcFundName(String cFundName) {
-        this.cFundName = cFundName;
-    }
-
-    public Integer getIstiered() {
-        return istiered;
-    }
-
-    public void setIstiered(Integer istiered) {
-        this.istiered = istiered;
-    }
-
-    public String getRegisterNumber() {
-        return registerNumber;
-    }
-
-    public void setRegisterNumber(String registerNumber) {
-        this.registerNumber = registerNumber;
-    }
-
-    public String getTrustName() {
-        return trustName;
-    }
-
-    public void setTrustName(String trustName) {
-        this.trustName = trustName;
-    }
-
-    public String getCustodianName() {
-        return custodianName;
-    }
-
-    public void setCustodianName(String custodianName) {
-        this.custodianName = custodianName;
-    }
-
-    public String getAdvisorName() {
-        return advisorName;
-    }
-
-    public void setAdvisorName(String advisorName) {
-        this.advisorName = advisorName;
-    }
-
-    public String getOperationType() {
-        return operationType;
-    }
-
-    public void setOperationType(String operationType) {
-        this.operationType = operationType;
-    }
-
-    public String getFundType() {
-        return fundType;
-    }
-
-    public void setFundType(String fundType) {
-        this.fundType = fundType;
-    }
-
-    public String getInceptionDate() {
-        return inceptionDate;
-    }
-
-    public void setInceptionDate(String inceptionDate) {
-        this.inceptionDate = inceptionDate;
-    }
-
-    public String getSharePerAsset() {
-        return sharePerAsset;
-    }
-
-    public void setSharePerAsset(String sharePerAsset) {
-        this.sharePerAsset = sharePerAsset;
-    }
-
-    public String getInvestmentObjective() {
-        return investmentObjective;
-    }
-
-    public void setInvestmentObjective(String investmentObjective) {
-        this.investmentObjective = investmentObjective;
-    }
-
-    public String getFundStrategyDescription() {
-        return fundStrategyDescription;
-    }
-
-    public void setFundStrategyDescription(String fundStrategyDescription) {
-        this.fundStrategyDescription = fundStrategyDescription;
-    }
-
-    public String getSecondaryBenchmark() {
-        return secondaryBenchmark;
-    }
-
-    public void setSecondaryBenchmark(String secondaryBenchmark) {
-        this.secondaryBenchmark = secondaryBenchmark;
-    }
-
-    public String getRiskReturnDesc() {
-        return riskReturnDesc;
-    }
-
-    public void setRiskReturnDesc(String riskReturnDesc) {
-        this.riskReturnDesc = riskReturnDesc;
-    }
-
-    public String getRealizedIncome() {
-        return realizedIncome;
-    }
-
-    public void setRealizedIncome(String realizedIncome) {
-        this.realizedIncome = realizedIncome;
-    }
-
-    public String getProfit() {
-        return profit;
-    }
-
-    public void setProfit(String profit) {
-        this.profit = profit;
-    }
-
-    public String getFundAssetSize() {
-        return fundAssetSize;
-    }
-
-    public void setFundAssetSize(String fundAssetSize) {
-        this.fundAssetSize = fundAssetSize;
-    }
-
-    public String getNav() {
-        return nav;
-    }
-
-    public void setNav(String nav) {
-        this.nav = nav;
-    }
-
-    public String getInitTotalShares() {
-        return initTotalShares;
-    }
-
-    public void setInitTotalShares(String initTotalShares) {
-        this.initTotalShares = initTotalShares;
-    }
-
-    public String getSubscription() {
-        return subscription;
-    }
-
-    public void setSubscription(String subscription) {
-        this.subscription = subscription;
-    }
-
-    public String getRedemption() {
-        return redemption;
-    }
-
-    public void setRedemption(String redemption) {
-        this.redemption = redemption;
-    }
-
-    public String getSplit() {
-        return split;
-    }
-
-    public void setSplit(String split) {
-        this.split = split;
-    }
-
-    public String getLeverageNote() {
-        return leverageNote;
-    }
-
-    public void setLeverageNote(String leverageNote) {
-        this.leverageNote = leverageNote;
-    }
-
-    public String getLeverage() {
-        return leverage;
-    }
-
-    public void setLeverage(String leverage) {
-        this.leverage = leverage;
-    }
-
-    public String getRemark() {
-        return remark;
-    }
-
-    public void setRemark(String remark) {
-        this.remark = remark;
-    }
-
-    public String getIndustryTrend() {
-        return industryTrend;
-    }
-
-    public void setIndustryTrend(String industryTrend) {
-        this.industryTrend = industryTrend;
-    }
-
-    public String getFundManager() {
-        return fundManager;
-    }
-
-    public void setFundManager(String fundManager) {
-        this.fundManager = fundManager;
-    }
-
-    public String getReviewed() {
-        return reviewed;
-    }
-
-    public void setReviewed(String reviewed) {
-        this.reviewed = reviewed;
-    }
-}

+ 0 - 54
service-daq/src/main/java/com/simuwang/daq/dto/ReportInfo.java

@@ -1,54 +0,0 @@
-package com.simuwang.daq.dto;
-
-/**
- * @author wangzaijun
- * @date 2024/9/11 17:57
- * @description 报告基本信息
- */
-public class ReportInfo {
-    private Integer fileId;
-    /**
-     * 报告名称
-     */
-    private String reportName;
-    /**
-     * 报告类型(月、季、年)
-     */
-    private String reportType;
-    /**
-     * 报告日期
-     */
-    private String reportDate;
-
-    public Integer getFileId() {
-        return fileId;
-    }
-
-    public void setFileId(Integer fileId) {
-        this.fileId = fileId;
-    }
-
-    public String getReportName() {
-        return reportName;
-    }
-
-    public void setReportName(String reportName) {
-        this.reportName = reportName;
-    }
-
-    public String getReportType() {
-        return reportType;
-    }
-
-    public void setReportType(String reportType) {
-        this.reportType = reportType;
-    }
-
-    public String getReportDate() {
-        return reportDate;
-    }
-
-    public void setReportDate(String reportDate) {
-        this.reportDate = reportDate;
-    }
-}

+ 42 - 62
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -8,9 +8,9 @@ import cn.hutool.core.date.DateUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONUtil;
 import com.simuwang.base.common.conts.*;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
 import com.simuwang.base.common.util.EmailUtil;
 import com.simuwang.base.common.util.FileUtil;
 import com.simuwang.base.config.DaqProperties;
@@ -20,11 +20,12 @@ import com.simuwang.base.pojo.dos.*;
 import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
 import com.simuwang.base.pojo.dto.MailboxInfoDTO;
-import com.simuwang.base.pojo.dto.report.PythonResult;
 import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
 import com.simuwang.base.pojo.valuation.CmValuationTableAttribute;
-import com.simuwang.daq.components.PythonReportConverter;
-import com.simuwang.daq.components.writer.ReportWriterFactory;
+import com.simuwang.daq.components.report.parser.ReportParser;
+import com.simuwang.daq.components.report.parser.ReportParserFactory;
+import com.simuwang.daq.components.report.writer.ReportWriterFactory;
 import jakarta.mail.*;
 import jakarta.mail.internet.MimeMultipart;
 import jakarta.mail.search.ComparisonTerm;
@@ -51,10 +52,8 @@ import java.util.stream.Collectors;
 @Service
 public class EmailParseService {
 
+    public static final int stepSize = 10000;
     private static final Logger log = LoggerFactory.getLogger(EmailParseService.class);
-
-    private final String pyBaseUrl;
-
     private final EmailTypeRuleMapper emailTypeRuleMapper;
     private final EmailRuleConfig emailRuleConfig;
     private final EmailFieldMappingMapper emailFieldMapper;
@@ -73,13 +72,16 @@ public class EmailParseService {
 
     @Value("${email.file.path}")
     private String path;
+
+    @Autowired
+    private DaqProperties properties;
+
+    /* 报告解析和入库的方法 */
     @Autowired
-    private FundInfoMapper fundInfoMapper;
+    private ReportParserFactory reportParserFactory;
     @Autowired
     private ReportWriterFactory reportWriterFactory;
 
-    public static final int stepSize = 10000;
-
     public EmailParseService(EmailTypeRuleMapper emailTypeRuleMapper, EmailRuleConfig emailRuleConfig,
                              EmailFieldMappingMapper emailFieldMapper, EmailParserFactory emailParserFactory,
                              EmailParseInfoMapper emailParseInfoMapper, EmailFileInfoMapper emailFileInfoMapper,
@@ -101,7 +103,7 @@ public class EmailParseService {
         this.fundService = fundService;
         this.fundAliasMapper = fundAliasMapper;
 
-        this.pyBaseUrl = properties.getPyBaseUrl();
+        this.properties = properties;
         this.valuationTableMapper = valuationTableMapper;
         this.valuationTableAttributeMapper = valuationTableAttributeMapper;
         this.fundPositionDetailMapper = fundPositionDetailMapper;
@@ -183,8 +185,8 @@ public class EmailParseService {
             if (CollUtil.isEmpty(fundNavDTOList) && !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
                 continue;
             }
-            // python接口解析结果
-            ReportData data = this.requestPyAndResult(fileId, emailContentInfoDTO);
+            // 解析结果(可以从python获取或者自行解析)
+            ReportData data = this.parseReportAndHandleResult(fileId, emailContentInfoDTO);
             if (data != null) {
                 // 保存报告解析数据
                 this.reportWriterFactory.getInstance(data.getReportType()).write(data);
@@ -340,57 +342,35 @@ public class EmailParseService {
         }).collect(Collectors.toList());
     }
 
-    private ReportData requestPyAndResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
+    private ReportData parseReportAndHandleResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
         String fileName = emailContentInfoDTO.getFileName();
         Integer emailType = emailContentInfoDTO.getEmailType();
-        ReportData reportData = null;
-        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
-            if (StrUtil.isBlank(fileName)) {
-                return null;
-            }
-            Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
-            Matcher matcher = pattern.matcher(fileName);
-            String registerNumber = null;
-            if (matcher.find()) {
-                registerNumber = matcher.group();
-            }
-            int type = 0;
-            if (fileName.contains("季报")) {
-                type = 1;
-            } else if (fileName.contains("年报")) {
-                type = 2;
-            }
-            String api = "/api/v1/parse/amac_report";
-            Map<String, Object> params = MapUtil.newHashMap(16);
-            params.put("file_id", fileId);
-            params.put("file_path", emailContentInfoDTO.getFilePath());
-            params.put("register_number", registerNumber);
-            params.put("file_type", type);
-            params.put("file_name", fileName);
-            if (StrUtil.isNotBlank(registerNumber)) {
-                FundAndCompanyInfoDO info = this.fundInfoMapper.queryFundAndTrustByRegisterNumber(registerNumber);
-                if (info != null) {
-                    params.put("fund_name", info.getFundName());
-                    params.put("trust_name", info.getCompanyName());
-                }
-            }
-            long millis = System.currentTimeMillis();
-            try {
-                String body = HttpUtil.post(this.pyBaseUrl + api, JSONUtil.toJsonStr(params));
-                PythonResult<?> result = PythonReportConverter.convert(JSONUtil.parseObj(body), type);
-                reportData = result.getData();
-                if (log.isInfoEnabled()) {
-                    log.info("报告{}结果为:\n{}", params, reportData);
-                }
-            } catch (Exception e) {
-                log.error("请求python的报告解析接口报错\n{}", ExceptionUtil.stacktraceToString(e));
-            } finally {
-                if (log.isInfoEnabled()) {
-                    log.info("当前报告{}解析完成,总计耗时{}ms", params, (System.currentTimeMillis() - millis));
-                }
-            }
+        if (!Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) || StrUtil.isBlank(fileName)) {
+            return null;
+        }
+        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
+        Matcher matcher = pattern.matcher(fileName);
+        String registerNumber = null;
+        if (matcher.find()) {
+            registerNumber = matcher.group();
+        }
+        ReportType type = ReportType.MONTHLY;
+        if (fileName.contains(ReportType.QUARTERLY.getLabel())) {
+            type = ReportType.QUARTERLY;
+        } else if (fileName.contains(ReportType.ANNUALLY.getLabel())) {
+            type = ReportType.ANNUALLY;
+        }
+        ReportParserFileType fileType;
+        if (Objects.equals(Boolean.TRUE, this.properties.getEnablePyParser())) {
+            fileType = ReportParserFileType.PYTHON;
+        } else {
+            String fileSuffix = StrUtil.subAfter(fileName, ".", true);
+            fileType = ReportParserFileType.getBySuffix(fileSuffix);
         }
-        return reportData;
+        ReportParser<ReportData> instance = this.reportParserFactory.getInstance(type, fileType);
+        ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName)
+                .filepath(emailContentInfoDTO.getFilePath()).registerNumber(registerNumber).build();
+        return instance.parse(params);
     }
 
     private void saveNavAndAssetNet(Integer fileId, List<EmailFundNavDTO> fundNavDTOList, Date parseDate) {

+ 3 - 4
service-daq/src/main/java/com/simuwang/daq/service/ReportParseService.java

@@ -1,6 +1,6 @@
 package com.simuwang.daq.service;
 
-import com.simuwang.daq.components.ReportParser;
+import com.simuwang.daq.components.report.parser.ReportParser;
 import org.springframework.stereotype.Service;
 
 @Service
@@ -13,8 +13,7 @@ public class ReportParseService {
 
     public void parse() {
 //        this.parser.parse(1, "D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf", "幻方量化1000指数专享1号5期私募证券投资基金宁波幻方量化投资管理合伙企业(有限合伙)");
-        this.parser.parse(1,
-                "D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf",
-                "古曲泉发一号私募证券投资基金上海古曲私募基金管理有限公司");
+//        this.parser.parse(1, ,
+//                "D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf");
     }
 }

+ 261 - 249
service-daq/src/main/java/com/simuwang/daq/utils/ReportParseUtil.java

@@ -1,50 +1,62 @@
-package com.simuwang.daq.utils;
-
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
-import com.simuwang.base.pojo.dto.report.PythonResult;
-import com.simuwang.daq.components.PythonReportConverter;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.pdmodel.graphics.PDXObject;
-import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-public class ReportParseUtil {
-    public static void main(String[] args) throws IOException {
-        String fileName = "SJM970_排排精选进取一号私募证券投资基金_2022年第4季度报告.pdf";
-        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
-        Matcher matcher = pattern.matcher(fileName);
-        String registerNumber = null;
-        if (matcher.find()) {
-            registerNumber = matcher.group();
-        }
-
-        int type = 1;
-        String baseUrl = "http://192.168.0.81:8088";
-        String api = "/api/v1/parse/amac_report";
-        Map<String, Object> params = MapUtil.newHashMap(16);
-        params.put("file_id", 111112);
-        params.put("file_path", "E:/workproject/fastparse/src/fastparse/static/reports/quarterly_report/13445.pdf");
-        params.put("register_number", registerNumber);
-        params.put("file_type", type);
-        params.put("file_name", fileName);
-        params.put("fund_name", null);
-        params.put("trust_name", null);
-        String body = HttpUtil.post(baseUrl + api, JSONUtil.toJsonStr(params));
-        JSONObject obj = JSONUtil.parseObj(body);
-        PythonResult<?> result = PythonReportConverter.convert(obj, type);
-        System.out.println(result);
-
+//package com.simuwang.daq.utils;
+//
+//import cn.hutool.core.collection.ListUtil;
+//import cn.hutool.core.map.MapUtil;
+//import cn.hutool.core.util.ReflectUtil;
+//import cn.hutool.core.util.StrUtil;
+//import cn.hutool.http.HttpUtil;
+//import cn.hutool.json.JSONObject;
+//import cn.hutool.json.JSONUtil;
+//import com.simuwang.base.pojo.dto.report.PythonResult;
+//import com.simuwang.daq.components.CustomPDFTextStripper;
+//import com.simuwang.daq.components.PythonReportConverter;
+//import com.simuwang.daq.dto.ReportFundInfo;
+//import com.smppw.common.pojo.ValueLabelVO;
+//import org.apache.pdfbox.Loader;
+//import org.apache.pdfbox.cos.COSName;
+//import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+//import org.apache.pdfbox.pdmodel.PDDocument;
+//import org.apache.pdfbox.pdmodel.PDPage;
+//import org.apache.pdfbox.pdmodel.PDResources;
+//import org.apache.pdfbox.pdmodel.common.PDStream;
+//import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+//import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+//import org.apache.pdfbox.text.PDFTextStripper;
+//import technology.tabula.*;
+//import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+//
+//import java.io.IOException;
+//import java.util.*;
+//import java.util.regex.Matcher;
+//import java.util.regex.Pattern;
+//import java.util.stream.Collectors;
+//
+//public class ReportParseUtil {
+//    public static void main(String[] args) throws IOException {
+////        String fileName = "SJM970_排排精选进取一号私募证券投资基金_2022年第4季度报告.pdf";
+////        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
+////        Matcher matcher = pattern.matcher(fileName);
+////        String registerNumber = null;
+////        if (matcher.find()) {
+////            registerNumber = matcher.group();
+////        }
+////
+////        int type = 1;
+////        String baseUrl = "http://192.168.0.81:8088";
+////        String api = "/api/v1/parse/amac_report";
+////        Map<String, Object> params = MapUtil.newHashMap(16);
+////        params.put("file_id", 111112);
+////        params.put("file_path", "E:/workproject/fastparse/src/fastparse/static/reports/quarterly_report/13445.pdf");
+////        params.put("register_number", registerNumber);
+////        params.put("file_type", type);
+////        params.put("file_name", fileName);
+////        params.put("fund_name", null);
+////        params.put("trust_name", null);
+////        String body = HttpUtil.post(baseUrl + api, JSONUtil.toJsonStr(params));
+////        JSONObject obj = JSONUtil.parseObj(body);
+////        PythonResult<?> result = PythonReportConverter.convert(obj, type);
+////        System.out.println(result);
+//
 //        List<ValueLabelVO> fieldMapper = ListUtil.list(false);
 //        fieldMapper.add(new ValueLabelVO("fundName", "基金名称"));
 //        fieldMapper.add(new ValueLabelVO("registerNumber", "基金编码"));
@@ -137,220 +149,220 @@ public class ReportParseUtil {
 //                }
 //            }
 //        }
-    }
-
-    /**
-     * 找图片水印
-     *
-     * @param page
-     * @return
-     * @throws IOException
-     */
-    public static Map<COSName, PDImageXObject> findImageWatermark(PDPage page) throws IOException {
-        Map<COSName, PDImageXObject> watermarkMap = MapUtil.newHashMap();
-        PDResources resources = page.getResources();
-        Iterable<COSName> xObjectNames = resources.getXObjectNames();
-        for (COSName xObjectName : xObjectNames) {
-            PDXObject xObject = resources.getXObject(xObjectName);
-            PDStream stream = xObject.getStream();
-            PDImageXObject imageXObject = null;
-            try {
-                imageXObject = new PDImageXObject(stream, resources);
-            } catch (Exception e) {
-                e.printStackTrace();
-            }
-            if (imageXObject != null) {
-                watermarkMap.put(xObjectName, imageXObject);
-            }
-        }
-        return watermarkMap;
-    }
-
-    private static Map<String, List<String>> generateWatermarkListMap(String fundName, String trustName, String registerNumber) {
-        Map<String, List<String>> result = MapUtil.newHashMap(32);
-        // 生成水印列表
-
-        fundName = StrUtil.isNotBlank(fundName) ? fundName : "私募证券投资基金";
-        trustName = StrUtil.isNotBlank(trustName) ? trustName : "资产管理有限公司";
-        registerNumber = StrUtil.isNotBlank(registerNumber) ? registerNumber : "";
-        String text = fundName + trustName + registerNumber;
-        text = text.replaceAll("[()]", ""); // 移除括号
-        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
-        Collections.reverse(textList);
-        StringBuilder sb = new StringBuilder(textList.size());
-        for (String ch : textList) {
-            sb.append(ch);
-        }
-        String joinedText = sb.toString();
-
-        // 基本水印列表
-        List<String> wkList = new ArrayList<>();
-        for (String ch : textList) {
-            wkList.add(ch + "\r\n");
-            wkList.add("\r\n" + ch);
-        }
-
-        // 查找数字
-        List<String> matches = findDigits(fundName);
-        if (!matches.isEmpty()) {
-            for (String match : matches) {
-                wkList.add("\r\n" + match);
-                wkList.add(match + "\r\n");
-            }
-        }
-        wkList.add("-");
-        wkList.add("【");
-        wkList.add("】");
-        wkList.add("\r");
-        wkList.add("\r\n");
-
-        String noNumberText = removeDigits(joinedText);
-
-        // 生成不同字段的水印列表
-        result.put("report_name", new ArrayList<>(wkList));
-        result.get("report_name").addAll(convertStringToList("有限公司"));
-
-        result.put("less", new ArrayList<>(wkList));
-
-        result.put("more", new ArrayList<>(wkList));
-        result.get("more").addAll(convertStringToList(noNumberText));
-
-        result.put("leverage", new ArrayList<>(wkList));
-        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
-
-        result.put("base_info", new ArrayList<>(wkList));
-        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
-
-        result.put("industry", new ArrayList<>(wkList));
-        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
-
-        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
-        return result;
-    }
-
-    private static List<String> findDigits(String text) {
-        List<String> digits = new ArrayList<>();
-        Pattern pattern = Pattern.compile("\\d");
-        Matcher matcher = pattern.matcher(text);
-        while (matcher.find()) {
-            digits.add(matcher.group());
-        }
-        return digits;
-    }
-
-    private static String removeDigits(String text) {
-        return text.replaceAll("\\d", "");
-    }
-
-    private static String removeKeywords(String text, String... keywords) {
-        for (String keyword : keywords) {
-            text = text.replaceAll(keyword, "");
-        }
-        return text;
-    }
-
-    private static List<String> convertStringToList(String text) {
-        List<String> charList = new ArrayList<>();
-        for (char c : text.toCharArray()) {
-            charList.add(c + "");
-        }
-        return charList;
-    }
-
-    public static String processString(List<String> wmList, String string) {
-        // 生成正则表达式模式
-        String pat = String.join("|", wmList);
-        // 使用正则表达式移除wmList中的元素
-        string = removeMatches(string, pat);
-        // 替换中文括号为英文括号
-        string = string.replace("(", "(").replace(")", ")");
-        // 移除空格
-        string = string.replace(" ", "");
-        // 如果字符串以括号开头,则移除第一个字符
-        if (startsWithParenthesis(string)) {
-            string = string.substring(1);
-        }
-
-        return string;
-    }
-
-    private static String removeMatches(String input, String pattern) {
-        // 编译正则表达式
-        Pattern compiledPattern = Pattern.compile(pattern);
-        // 创建Matcher对象
-        Matcher matcher = compiledPattern.matcher(input);
-        // 使用replaceAll方法替换所有匹配到的字符为空字符串
-        return matcher.replaceAll("");
-    }
-
-    private static boolean startsWithParenthesis(String input) {
-        // 匹配以括号开头的字符串
-        Pattern pattern = Pattern.compile("^[()].*");
-        Matcher matcher = pattern.matcher(input);
-        return matcher.find();
-    }
-
-//    public static void removeTextWatermark(PDPage page) throws IOException {
+//    }
+//
+//    /**
+//     * 找图片水印
+//     *
+//     * @param page
+//     * @return
+//     * @throws IOException
+//     */
+//    public static Map<COSName, PDImageXObject> findImageWatermark(PDPage page) throws IOException {
+//        Map<COSName, PDImageXObject> watermarkMap = MapUtil.newHashMap();
 //        PDResources resources = page.getResources();
-////        if (StrUtil.isAllBlank(fundName, trustName)) {
-////            return;
-////        }
-//        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
-//        stripper.setSortByPosition(true);
-//        stripper.addRegion("watermark", new Rectangle2D.Float(0, 0, page.getMediaBox().getWidth(), page.getMediaBox().getHeight()));
-//        stripper.extractRegions(page);
+//        Iterable<COSName> xObjectNames = resources.getXObjectNames();
+//        for (COSName xObjectName : xObjectNames) {
+//            PDXObject xObject = resources.getXObject(xObjectName);
+//            PDStream stream = xObject.getStream();
+//            PDImageXObject imageXObject = null;
+//            try {
+//                imageXObject = new PDImageXObject(stream, resources);
+//            } catch (Exception e) {
+//                e.printStackTrace();
+//            }
+//            if (imageXObject != null) {
+//                watermarkMap.put(xObjectName, imageXObject);
+//            }
+//        }
+//        return watermarkMap;
+//    }
 //
-//        PDFStreamEngine engine = new PDFTextStripper();
-//        engine.addOperator(new SetMatrix(stripper));
+//    private static Map<String, List<String>> generateWatermarkListMap(String fundName, String trustName, String registerNumber) {
+//        Map<String, List<String>> result = MapUtil.newHashMap(32);
+//        // 生成水印列表
 //
-//    }
+//        fundName = StrUtil.isNotBlank(fundName) ? fundName : "私募证券投资基金";
+//        trustName = StrUtil.isNotBlank(trustName) ? trustName : "资产管理有限公司";
+//        registerNumber = StrUtil.isNotBlank(registerNumber) ? registerNumber : "";
+//        String text = fundName + trustName + registerNumber;
+//        text = text.replaceAll("[()]", ""); // 移除括号
+//        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
+//        Collections.reverse(textList);
+//        StringBuilder sb = new StringBuilder(textList.size());
+//        for (String ch : textList) {
+//            sb.append(ch);
+//        }
+//        String joinedText = sb.toString();
 //
-//    private static void processResources(PDResources resources) throws IOException {
-//        for (COSName name : resources.getXObjectNames()) {
-//            PDXObject xobject = resources.getXObject(name);
-//            if (xobject instanceof PDFormXObject) {
-//                PDFormXObject formXObject = (PDFormXObject) xobject;
-//                writeTokensToStream(formXObject.getContentStream(),
-//                        createTokensWithoutText(formXObject));
-//                processResources(formXObject.getResources());
-//            }
+//        // 基本水印列表
+//        List<String> wkList = new ArrayList<>();
+//        for (String ch : textList) {
+//            wkList.add(ch + "\r\n");
+//            wkList.add("\r\n" + ch);
 //        }
-//        for (COSName name : resources.getPatternNames()) {
-//            PDAbstractPattern pattern = resources.getPattern(name);
-//            if (pattern instanceof PDTilingPattern) {
-//                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
-//                writeTokensToStream(tilingPattern.getContentStream(),
-//                        createTokensWithoutText(tilingPattern));
-//                processResources(tilingPattern.getResources());
+//
+//        // 查找数字
+//        List<String> matches = findDigits(fundName);
+//        if (!matches.isEmpty()) {
+//            for (String match : matches) {
+//                wkList.add("\r\n" + match);
+//                wkList.add(match + "\r\n");
 //            }
 //        }
+//        wkList.add("-");
+//        wkList.add("【");
+//        wkList.add("】");
+//        wkList.add("\r");
+//        wkList.add("\r\n");
+//
+//        String noNumberText = removeDigits(joinedText);
+//
+//        // 生成不同字段的水印列表
+//        result.put("report_name", new ArrayList<>(wkList));
+//        result.get("report_name").addAll(convertStringToList("有限公司"));
+//
+//        result.put("less", new ArrayList<>(wkList));
+//
+//        result.put("more", new ArrayList<>(wkList));
+//        result.get("more").addAll(convertStringToList(noNumberText));
+//
+//        result.put("leverage", new ArrayList<>(wkList));
+//        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
+//
+//        result.put("base_info", new ArrayList<>(wkList));
+//        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
+//
+//        result.put("industry", new ArrayList<>(wkList));
+//        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
+//
+//        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
+//        return result;
 //    }
 //
-//    private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException {
-//        try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE)) {
-//            ContentStreamWriter writer = new ContentStreamWriter(out);
-//            writer.writeTokens(newTokens);
+//    private static List<String> findDigits(String text) {
+//        List<String> digits = new ArrayList<>();
+//        Pattern pattern = Pattern.compile("\\d");
+//        Matcher matcher = pattern.matcher(text);
+//        while (matcher.find()) {
+//            digits.add(matcher.group());
 //        }
+//        return digits;
 //    }
 //
-//    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
-//        PDFStreamParser parser = new PDFStreamParser(contentStream);
-//        Object token = parser.parseNextToken();
-//        List<Object> newTokens = new ArrayList<>();
-//        while (token != null) {
-//            if (token instanceof Operator op) {
-//                String opName = op.getName();
-//                if (OperatorName.SET_MATRIX.equals(opName)) {
-//                    // remove the argument to this operator
-//                    newTokens.remove(newTokens.size() - 1);
+//    private static String removeDigits(String text) {
+//        return text.replaceAll("\\d", "");
+//    }
 //
-//                    token = parser.parseNextToken();
-//                    continue;
-//                }
-//            }
-//            newTokens.add(token);
-//            token = parser.parseNextToken();
+//    private static String removeKeywords(String text, String... keywords) {
+//        for (String keyword : keywords) {
+//            text = text.replaceAll(keyword, "");
+//        }
+//        return text;
+//    }
+//
+//    private static List<String> convertStringToList(String text) {
+//        List<String> charList = new ArrayList<>();
+//        for (char c : text.toCharArray()) {
+//            charList.add(c + "");
+//        }
+//        return charList;
+//    }
+//
+//    public static String processString(List<String> wmList, String string) {
+//        // 生成正则表达式模式
+//        String pat = String.join("|", wmList);
+//        // 使用正则表达式移除wmList中的元素
+//        string = removeMatches(string, pat);
+//        // 替换中文括号为英文括号
+//        string = string.replace("(", "(").replace(")", ")");
+//        // 移除空格
+//        string = string.replace(" ", "");
+//        // 如果字符串以括号开头,则移除第一个字符
+//        if (startsWithParenthesis(string)) {
+//            string = string.substring(1);
 //        }
-//        return newTokens;
+//
+//        return string;
+//    }
+//
+//    private static String removeMatches(String input, String pattern) {
+//        // 编译正则表达式
+//        Pattern compiledPattern = Pattern.compile(pattern);
+//        // 创建Matcher对象
+//        Matcher matcher = compiledPattern.matcher(input);
+//        // 使用replaceAll方法替换所有匹配到的字符为空字符串
+//        return matcher.replaceAll("");
 //    }
-}
+//
+//    private static boolean startsWithParenthesis(String input) {
+//        // 匹配以括号开头的字符串
+//        Pattern pattern = Pattern.compile("^[()].*");
+//        Matcher matcher = pattern.matcher(input);
+//        return matcher.find();
+//    }
+//
+////    public static void removeTextWatermark(PDPage page) throws IOException {
+////        PDResources resources = page.getResources();
+//////        if (StrUtil.isAllBlank(fundName, trustName)) {
+//////            return;
+//////        }
+////        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
+////        stripper.setSortByPosition(true);
+////        stripper.addRegion("watermark", new Rectangle2D.Float(0, 0, page.getMediaBox().getWidth(), page.getMediaBox().getHeight()));
+////        stripper.extractRegions(page);
+////
+////        PDFStreamEngine engine = new PDFTextStripper();
+////        engine.addOperator(new SetMatrix(stripper));
+////
+////    }
+////
+////    private static void processResources(PDResources resources) throws IOException {
+////        for (COSName name : resources.getXObjectNames()) {
+////            PDXObject xobject = resources.getXObject(name);
+////            if (xobject instanceof PDFormXObject) {
+////                PDFormXObject formXObject = (PDFormXObject) xobject;
+////                writeTokensToStream(formXObject.getContentStream(),
+////                        createTokensWithoutText(formXObject));
+////                processResources(formXObject.getResources());
+////            }
+////        }
+////        for (COSName name : resources.getPatternNames()) {
+////            PDAbstractPattern pattern = resources.getPattern(name);
+////            if (pattern instanceof PDTilingPattern) {
+////                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+////                writeTokensToStream(tilingPattern.getContentStream(),
+////                        createTokensWithoutText(tilingPattern));
+////                processResources(tilingPattern.getResources());
+////            }
+////        }
+////    }
+////
+////    private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException {
+////        try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE)) {
+////            ContentStreamWriter writer = new ContentStreamWriter(out);
+////            writer.writeTokens(newTokens);
+////        }
+////    }
+////
+////    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
+////        PDFStreamParser parser = new PDFStreamParser(contentStream);
+////        Object token = parser.parseNextToken();
+////        List<Object> newTokens = new ArrayList<>();
+////        while (token != null) {
+////            if (token instanceof Operator op) {
+////                String opName = op.getName();
+////                if (OperatorName.SET_MATRIX.equals(opName)) {
+////                    // remove the argument to this operator
+////                    newTokens.remove(newTokens.size() - 1);
+////
+////                    token = parser.parseNextToken();
+////                    continue;
+////                }
+////            }
+////            newTokens.add(token);
+////            token = parser.parseNextToken();
+////        }
+////        return newTokens;
+////    }
+//}

+ 0 - 12
service-deploy/pom.xml

@@ -58,18 +58,6 @@
                 </exclusion>
             </exclusions>
         </dependency>
-
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-devtools</artifactId>
-            <scope>runtime</scope>
-            <optional>true</optional>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-configuration-processor</artifactId>
-            <optional>true</optional>
-        </dependency>
     </dependencies>
 
     <build>

+ 2 - 0
service-deploy/src/main/resources/application.yml

@@ -81,6 +81,8 @@ simuwang:
   # token过期时间,单位:分钟
   token-expire: 1440
   token-secret: qwertyuiopasdfghjklzxcvbnm1234567890qwertyuiopasdfghjklzxcvbnm12
+  # 是否开启python的报告解析功能,开启后报告全部用python接口来解析;当开启时要配置如下python解析地址
+  enable-py-parser: false
   py-base-url: "http://192.168.1.224:8088"
   # rsa 公钥私钥配置
   security-rsa: