wangzaijun 6 kuukautta sitten
vanhempi
commit
9affd13580

+ 5 - 4
service-daq/src/main/java/com/simuwang/daq/components/ReportParseUtils.java

@@ -2,7 +2,6 @@ package com.simuwang.daq.components;
 
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.util.StrUtil;
-import com.simuwang.base.common.enums.ReportType;
 
 import java.util.Calendar;
 import java.util.List;
@@ -50,7 +49,7 @@ public final class ReportParseUtils {
     }
 
     /**
-     * 匹配分级基金名称
+     * 匹配分级基金名称(并且把母基金追加到第一行)
      *
      * @param text 文本内容
      * @return /
@@ -68,12 +67,14 @@ public final class ReportParseUtils {
             matches.add(matcher.group());
         }
         // 提取字母并按字母顺序排序
-        return matches.stream()
+        List<String> levels = matches.stream()
                 .map(s -> s.replaceAll("[^A-F]", ""))
                 .distinct()
                 .sorted()
                 .map(letter -> letter + "级")
                 .collect(Collectors.toList());
+        levels.add(0, "母基金");
+        return levels;
     }
 
     /**
@@ -140,7 +141,7 @@ public final class ReportParseUtils {
             return null;
         }
         // 所有报告的正则识别方式
-        String patterns = ReportType.getAllPatterns();
+        String patterns = "年度|年报|季度|季报|季|月度|月报|月|年";
         // 编译正则表达式模式
         Pattern pattern = Pattern.compile(patterns);
         // 创建Matcher对象

+ 0 - 118
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java

@@ -21,12 +21,10 @@ import technology.tabula.Table;
 import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
 import java.io.IOException;
-import java.util.Calendar;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
-import java.util.stream.Collectors;
 
 /**
  * @author wangzaijun
@@ -194,7 +192,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         }
         // 分级基金匹配
         List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
-        levels.add(0, "母基金");
         for (int i = 0; i < infos.size(); i++) {
             DTO dto = this.buildDto(fileId, clazz, infos.get(i));
             if (dto == null) {
@@ -225,119 +222,4 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         }
         return null;
     }
-
-//    /**
-//     * 匹配分级基金名称
-//     *
-//     * @param text 文本内容
-//     * @return /
-//     */
-//    protected List<String> matchTieredFund(String text) {
-//        List<String> matches = ListUtil.list(false);
-//        if (StrUtil.isBlank(text)) {
-//            return matches;
-//        }
-//        // 使用正则表达式查找匹配项
-//        Pattern pattern = Pattern.compile("[A-F]级|基金[A-F]");
-//        Matcher matcher = pattern.matcher(text);
-//        // 收集所有匹配项
-//        while (matcher.find()) {
-//            matches.add(matcher.group());
-//        }
-//        // 提取字母并按字母顺序排序
-//        return matches.stream()
-//                .map(s -> s.replaceAll("[^A-F]", ""))
-//                .distinct()
-//                .sorted()
-//                .map(letter -> letter + "级")
-//                .collect(Collectors.toList());
-//    }
-//
-//    /**
-//     * 匹配报告日期
-//     *
-//     * @param string 文本内容
-//     * @return 报告日期
-//     */
-//    private String matchReportDate(String string) {
-//        if (string == null) {
-//            return null;
-//        }
-//        // 编译正则表达式模式
-//        Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度");  // 2023年XXX3季度
-//        Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
-//        Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
-//        Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
-//        Pattern pat5 = Pattern.compile("\\d{4}\\d{2}\\d{2}");  // 20231231
-//        Pattern pat6 = Pattern.compile("(2\\d{3})年度");  // 2023年度
-//        // 创建Matcher对象
-//        Matcher matcher1 = pat1.matcher(string);
-//        Matcher matcher2 = pat2.matcher(string);
-//        Matcher matcher3 = pat3.matcher(string);
-//        Matcher matcher4 = pat4.matcher(string);
-//        Matcher matcher5 = pat5.matcher(string);
-//        Matcher matcher6 = pat6.matcher(string);
-//        // 尝试匹配
-//        if (matcher1.find()) {
-//            String year = matcher1.group(1);
-//            String quarter = matcher1.group(2);
-//            return switch (quarter) {
-//                case "一", "1" -> year + "-03-31";
-//                case "二", "2" -> year + "-06-30";
-//                case "三", "3" -> year + "-09-30";
-//                case "四", "4" -> year + "-12-31";
-//                default -> null;
-//            };
-//        } else if (matcher2.find()) {
-//            return matcher2.group();
-//        } else if (matcher5.find()) {
-//            return matcher5.group();
-//        } else if (matcher3.find()) {
-//            return matcher3.group(1) + "-12-31";
-//        } else if (matcher6.find()) {
-//            return matcher6.group(1) + "-12-31";
-//        } else if (matcher4.find()) {
-//            String year = matcher4.group(1);
-//            String month = matcher4.group(2);
-//            int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
-//            return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
-//        } else {
-//            return null;
-//        }
-//    }
-//
-//    /**
-//     * 匹配报告类型,如“季度”、“年度”
-//     *
-//     * @param string 输入字符串
-//     * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
-//     */
-//    private String matchReportType(String string) {
-//        if (string == null) {
-//            return null;
-//        }
-//        // 所有报告的正则识别方式
-//        String patterns = ReportType.getAllPatterns();
-//        // 编译正则表达式模式
-//        Pattern pattern = Pattern.compile(patterns);
-//        // 创建Matcher对象
-//        Matcher matcher = pattern.matcher(string);
-//        // 尝试匹配
-//        if (matcher.find()) {
-//            return matcher.group();
-//        } else {
-//            return null;
-//        }
-//    }
-//
-//    private int getLastDayOfMonth(int year, int month) {
-//        Calendar calendar = Calendar.getInstance();
-//        calendar.set(Calendar.YEAR, year);
-//        calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
-//        return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
-//    }
-//
-//    private String padZero(String number) {
-//        return String.format("%02d", Integer.parseInt(number));
-//    }
 }

+ 0 - 1
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -112,7 +112,6 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
         List<ReportFinancialIndicatorsDTO> dtos = ListUtil.list(false);
         // 分级基金
         List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
-        levels.add(0, "母基金");
         // 假设这里可能存在分级基金,不存在表格跨页
         for (int k = 0; k < this.financialIndicatorsTables.size(); k++) {
             Table table = this.financialIndicatorsTables.get(k);