|
@@ -10,10 +10,9 @@ import com.smppw.modaq.domain.dto.report.ReportAssetAllocationDTO;
|
|
|
import jakarta.mail.internet.MimeUtility;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
-import java.util.Calendar;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.Objects;
|
|
|
+import java.time.YearMonth;
|
|
|
+import java.util.*;
|
|
|
+import java.util.function.Function;
|
|
|
import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
import java.util.stream.Collectors;
|
|
@@ -37,6 +36,51 @@ public final class ReportParseUtils {
|
|
|
*/
|
|
|
public static final Map<String, String> ASSET_ALLOCATION_TYPE_MAPPER = MapUtil.newHashMap(32, true);
|
|
|
|
|
|
+ // 预编译所有正则表达式(线程安全)
|
|
|
+ private static final Map<Pattern, Function<Matcher, String>> PATTERNS = new LinkedHashMap<>() {{
|
|
|
+ // 季度报告(最高优先级)
|
|
|
+ put(Pattern.compile("(20\\d{2})[^\\d]*([一二三四1234])季"), matcher -> {
|
|
|
+ String year = matcher.group(1);
|
|
|
+ return switch (matcher.group(2)) {
|
|
|
+ case "一", "1" -> year + "-03-31";
|
|
|
+ case "二", "2" -> year + "-06-30";
|
|
|
+ case "三", "3" -> year + "-09-30";
|
|
|
+ case "四", "4" -> year + "-12-31";
|
|
|
+ default -> null;
|
|
|
+ };
|
|
|
+ });
|
|
|
+
|
|
|
+ // 明确日期格式(次优先级)
|
|
|
+ put(Pattern.compile("\\d{4}-\\d{2}-\\d{2}"), Matcher::group);
|
|
|
+
|
|
|
+ // 年度报告(合并相似正则)
|
|
|
+ put(Pattern.compile("(20\\d{2})年(度|年度)"), matcher -> matcher.group(1) + "-12-31");
|
|
|
+
|
|
|
+ // 月份相关格式(统一处理)
|
|
|
+ put(Pattern.compile("(20\\d{2})年[_-]?(\\d{1,2})月"), matcher -> {
|
|
|
+ int year = Integer.parseInt(matcher.group(1));
|
|
|
+ int month = Integer.parseInt(matcher.group(2));
|
|
|
+ return formatMonthEnd(year, month);
|
|
|
+ });
|
|
|
+
|
|
|
+ // 紧凑格式(如202312月)
|
|
|
+ put(Pattern.compile("(\\d{4})(\\d{2})月"), matcher -> {
|
|
|
+ int year = Integer.parseInt(matcher.group(1));
|
|
|
+ int month = Integer.parseInt(matcher.group(2));
|
|
|
+ return formatMonthEnd(year, month);
|
|
|
+ });
|
|
|
+
|
|
|
+ // 纯数字格式(最低优先级)
|
|
|
+ put(Pattern.compile("(?<!\\d)\\d{4}(0[1-9]|1[0-2])(?!\\d)"), matcher -> {
|
|
|
+ String group = matcher.group();
|
|
|
+ int year = Integer.parseInt(group.substring(0, 4));
|
|
|
+ int month = Integer.parseInt(group.substring(4, 6));
|
|
|
+ return formatMonthEnd(year, month);
|
|
|
+ });
|
|
|
+ put(Pattern.compile("(?<!\\d)\\d{4}(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(?!\\d)"), Matcher::group);
|
|
|
+ }};
|
|
|
+
|
|
|
+
|
|
|
static {
|
|
|
// 财务指标
|
|
|
FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末基金净资产");
|
|
@@ -316,74 +360,19 @@ public final class ReportParseUtils {
|
|
|
/**
|
|
|
* 匹配报告日期
|
|
|
*
|
|
|
- * @param string 文本内容
|
|
|
+ * @param text 文本内容
|
|
|
* @return 报告日期
|
|
|
*/
|
|
|
- public static String matchReportDate(String string) {
|
|
|
- if (string == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- // 编译正则表达式模式
|
|
|
- Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季"); // 2023年XXX3季(度\报)
|
|
|
- Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); // 2023-12-31
|
|
|
- Pattern pat3 = Pattern.compile("(2\\d{3})年年度"); // 2023年年度
|
|
|
- Pattern pat6 = Pattern.compile("(2\\d{3})年度"); // 2023年度
|
|
|
- Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月"); // 2023年12月
|
|
|
- Pattern pat7 = Pattern.compile("(\\d{4})年_(\\d{1,2})月"); // 2023年_12月
|
|
|
- Pattern pat8 = Pattern.compile("(\\d{4})(\\d{2})月"); // 202312月
|
|
|
- Pattern pat5 = Pattern.compile("(?<!\\d)\\d{4}(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])(?!\\d)"); // 20231231
|
|
|
- // 创建Matcher对象
|
|
|
- Matcher matcher1 = pat1.matcher(string);
|
|
|
- Matcher matcher2 = pat2.matcher(string);
|
|
|
- Matcher matcher3 = pat3.matcher(string);
|
|
|
- Matcher matcher6 = pat6.matcher(string);
|
|
|
- Matcher matcher4 = pat4.matcher(string);
|
|
|
- Matcher matcher7 = pat7.matcher(string);
|
|
|
- Matcher matcher8 = pat8.matcher(string);
|
|
|
- Matcher matcher5 = pat5.matcher(string);
|
|
|
- // 尝试匹配
|
|
|
- if (matcher1.find()) {
|
|
|
- String year = matcher1.group(1);
|
|
|
- String quarter = matcher1.group(2);
|
|
|
- return switch (quarter) {
|
|
|
- case "一", "1" -> year + "-03-31";
|
|
|
- case "二", "2" -> year + "-06-30";
|
|
|
- case "三", "3" -> year + "-09-30";
|
|
|
- case "四", "4" -> year + "-12-31";
|
|
|
- default -> null;
|
|
|
- };
|
|
|
- } else if (matcher2.find()) {
|
|
|
- return matcher2.group();
|
|
|
- } else if (matcher3.find()) {
|
|
|
- return matcher3.group(1) + "-12-31";
|
|
|
- } else if (matcher6.find()) {
|
|
|
- return matcher6.group(1) + "-12-31";
|
|
|
- } else {
|
|
|
- // 格式4和格式7优先,不满足时才用格式5,都不满足返回null
|
|
|
- boolean m4 = matcher4.find();
|
|
|
- boolean m7 = matcher7.find();
|
|
|
- boolean m8 = matcher8.find();
|
|
|
- if (m4 || m7 || m8) {
|
|
|
- String year;
|
|
|
- String month;
|
|
|
- if (m8) {
|
|
|
- year = matcher8.group(1);
|
|
|
- month = matcher8.group(2);
|
|
|
- } else if (m4) {
|
|
|
- year = matcher4.group(1);
|
|
|
- month = matcher4.group(2);
|
|
|
- } else {
|
|
|
- year = matcher7.group(1);
|
|
|
- month = matcher7.group(2);
|
|
|
- }
|
|
|
- int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
|
|
|
- return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
|
|
|
- } else if (matcher5.find()) {
|
|
|
- return matcher5.group();
|
|
|
- } else {
|
|
|
- return null;
|
|
|
- }
|
|
|
- }
|
|
|
+ public static String matchReportDate(String text) {
|
|
|
+ return Optional.ofNullable(text)
|
|
|
+ .flatMap(str -> PATTERNS.entrySet().stream()
|
|
|
+ .map(entry -> {
|
|
|
+ Matcher matcher = entry.getKey().matcher(str);
|
|
|
+ return matcher.find() ? entry.getValue().apply(matcher) : null;
|
|
|
+ })
|
|
|
+ .filter(result -> result != null)
|
|
|
+ .findFirst())
|
|
|
+ .orElse(null);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -436,6 +425,18 @@ public final class ReportParseUtils {
|
|
|
return String.format("%02d", Integer.parseInt(number));
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 计算指定月份的最后一天
|
|
|
+ */
|
|
|
+ private static String formatMonthEnd(int year, int month) {
|
|
|
+ try {
|
|
|
+ YearMonth ym = YearMonth.of(year, month);
|
|
|
+ return String.format("%d-%02d-%02d", year, month, ym.lengthOfMonth());
|
|
|
+ } catch (Exception e) { // 处理非法月份(如month=13)
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static void main(String[] args) throws IOException, ReportParseException {
|
|
|
String s = """
|
|
|
=?utf-8?b?5oGS5aSp5Y2D6LGh5LqM5pyf56eB5Yuf6K+B5Yi45oqV6LWE5Z+66YeRLeaBkg==?=
|