|
@@ -21,12 +21,10 @@ import technology.tabula.Table;
|
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
-import java.util.Calendar;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.function.Function;
|
|
|
-import java.util.stream.Collectors;
|
|
|
|
|
|
/**
|
|
|
* @author wangzaijun
|
|
@@ -194,7 +192,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
|
}
|
|
|
// 分级基金匹配
|
|
|
List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
|
|
|
- levels.add(0, "母基金");
|
|
|
for (int i = 0; i < infos.size(); i++) {
|
|
|
DTO dto = this.buildDto(fileId, clazz, infos.get(i));
|
|
|
if (dto == null) {
|
|
@@ -225,119 +222,4 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
|
}
|
|
|
return null;
|
|
|
}
|
|
|
-
|
|
|
-// /**
|
|
|
-// * 匹配分级基金名称
|
|
|
-// *
|
|
|
-// * @param text 文本内容
|
|
|
-// * @return /
|
|
|
-// */
|
|
|
-// protected List<String> matchTieredFund(String text) {
|
|
|
-// List<String> matches = ListUtil.list(false);
|
|
|
-// if (StrUtil.isBlank(text)) {
|
|
|
-// return matches;
|
|
|
-// }
|
|
|
-// // 使用正则表达式查找匹配项
|
|
|
-// Pattern pattern = Pattern.compile("[A-F]级|基金[A-F]");
|
|
|
-// Matcher matcher = pattern.matcher(text);
|
|
|
-// // 收集所有匹配项
|
|
|
-// while (matcher.find()) {
|
|
|
-// matches.add(matcher.group());
|
|
|
-// }
|
|
|
-// // 提取字母并按字母顺序排序
|
|
|
-// return matches.stream()
|
|
|
-// .map(s -> s.replaceAll("[^A-F]", ""))
|
|
|
-// .distinct()
|
|
|
-// .sorted()
|
|
|
-// .map(letter -> letter + "级")
|
|
|
-// .collect(Collectors.toList());
|
|
|
-// }
|
|
|
-//
|
|
|
-// /**
|
|
|
-// * 匹配报告日期
|
|
|
-// *
|
|
|
-// * @param string 文本内容
|
|
|
-// * @return 报告日期
|
|
|
-// */
|
|
|
-// private String matchReportDate(String string) {
|
|
|
-// if (string == null) {
|
|
|
-// return null;
|
|
|
-// }
|
|
|
-// // 编译正则表达式模式
|
|
|
-// Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度"); // 2023年XXX3季度
|
|
|
-// Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); // 2023-12-31
|
|
|
-// Pattern pat3 = Pattern.compile("(2\\d{3})年年度"); // 2023年年度
|
|
|
-// Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月"); // 2023年12月
|
|
|
-// Pattern pat5 = Pattern.compile("\\d{4}\\d{2}\\d{2}"); // 20231231
|
|
|
-// Pattern pat6 = Pattern.compile("(2\\d{3})年度"); // 2023年度
|
|
|
-// // 创建Matcher对象
|
|
|
-// Matcher matcher1 = pat1.matcher(string);
|
|
|
-// Matcher matcher2 = pat2.matcher(string);
|
|
|
-// Matcher matcher3 = pat3.matcher(string);
|
|
|
-// Matcher matcher4 = pat4.matcher(string);
|
|
|
-// Matcher matcher5 = pat5.matcher(string);
|
|
|
-// Matcher matcher6 = pat6.matcher(string);
|
|
|
-// // 尝试匹配
|
|
|
-// if (matcher1.find()) {
|
|
|
-// String year = matcher1.group(1);
|
|
|
-// String quarter = matcher1.group(2);
|
|
|
-// return switch (quarter) {
|
|
|
-// case "一", "1" -> year + "-03-31";
|
|
|
-// case "二", "2" -> year + "-06-30";
|
|
|
-// case "三", "3" -> year + "-09-30";
|
|
|
-// case "四", "4" -> year + "-12-31";
|
|
|
-// default -> null;
|
|
|
-// };
|
|
|
-// } else if (matcher2.find()) {
|
|
|
-// return matcher2.group();
|
|
|
-// } else if (matcher5.find()) {
|
|
|
-// return matcher5.group();
|
|
|
-// } else if (matcher3.find()) {
|
|
|
-// return matcher3.group(1) + "-12-31";
|
|
|
-// } else if (matcher6.find()) {
|
|
|
-// return matcher6.group(1) + "-12-31";
|
|
|
-// } else if (matcher4.find()) {
|
|
|
-// String year = matcher4.group(1);
|
|
|
-// String month = matcher4.group(2);
|
|
|
-// int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
|
|
|
-// return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
|
|
|
-// } else {
|
|
|
-// return null;
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// /**
|
|
|
-// * 匹配报告类型,如“季度”、“年度”
|
|
|
-// *
|
|
|
-// * @param string 输入字符串
|
|
|
-// * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
|
|
|
-// */
|
|
|
-// private String matchReportType(String string) {
|
|
|
-// if (string == null) {
|
|
|
-// return null;
|
|
|
-// }
|
|
|
-// // 所有报告的正则识别方式
|
|
|
-// String patterns = ReportType.getAllPatterns();
|
|
|
-// // 编译正则表达式模式
|
|
|
-// Pattern pattern = Pattern.compile(patterns);
|
|
|
-// // 创建Matcher对象
|
|
|
-// Matcher matcher = pattern.matcher(string);
|
|
|
-// // 尝试匹配
|
|
|
-// if (matcher.find()) {
|
|
|
-// return matcher.group();
|
|
|
-// } else {
|
|
|
-// return null;
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// private int getLastDayOfMonth(int year, int month) {
|
|
|
-// Calendar calendar = Calendar.getInstance();
|
|
|
-// calendar.set(Calendar.YEAR, year);
|
|
|
-// calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
|
|
|
-// return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
|
|
|
-// }
|
|
|
-//
|
|
|
-// private String padZero(String number) {
|
|
|
-// return String.format("%02d", Integer.parseInt(number));
|
|
|
-// }
|
|
|
}
|