|
@@ -10,7 +10,6 @@ import cn.hutool.json.JSONUtil;
|
|
import com.smppw.modaq.application.components.CustomPDFTextStripper;
|
|
import com.smppw.modaq.application.components.CustomPDFTextStripper;
|
|
import com.smppw.modaq.application.components.ReportParseUtils;
|
|
import com.smppw.modaq.application.components.ReportParseUtils;
|
|
import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
|
|
import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
|
|
-import com.smppw.modaq.common.conts.Constants;
|
|
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
@@ -65,9 +64,8 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
// 解析报告和表格
|
|
// 解析报告和表格
|
|
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(filepath))) {
|
|
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(filepath))) {
|
|
// 识别所有文字(去水印后的)
|
|
// 识别所有文字(去水印后的)
|
|
- CustomPDFTextStripper stripper = new CustomPDFTextStripper();
|
|
|
|
- stripper.setSortByPosition(true);
|
|
|
|
- String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, StrUtil.EMPTY);
|
|
|
|
|
|
+ CustomPDFTextStripper stripper = new CustomPDFTextStripper(true, StrUtil.EMPTY);
|
|
|
|
+ String text = stripper.getText(document);
|
|
this.textList = StrUtil.split(text, System.lineSeparator());
|
|
this.textList = StrUtil.split(text, System.lineSeparator());
|
|
this.textList.removeIf(StrUtil::isBlank);
|
|
this.textList.removeIf(StrUtil::isBlank);
|
|
if (this.textList.isEmpty()) {
|
|
if (this.textList.isEmpty()) {
|
|
@@ -188,15 +186,10 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
List<DTO> dtos = ListUtil.list(true);
|
|
List<DTO> dtos = ListUtil.list(true);
|
|
// 信息表格字段和值映射
|
|
// 信息表格字段和值映射
|
|
List<Map<String, Object>> infos = ListUtil.list(true);
|
|
List<Map<String, Object>> infos = ListUtil.list(true);
|
|
- Map<String, Object> infoMap = null;
|
|
|
|
for (Table table : tables) {
|
|
for (Table table : tables) {
|
|
|
|
+ Map<String, Object> infoMap = MapUtil.newHashMap(16);
|
|
Map<String, Object> temp = function.apply(table);
|
|
Map<String, Object> temp = function.apply(table);
|
|
for (String key : temp.keySet()) {
|
|
for (String key : temp.keySet()) {
|
|
- // 如果infoMap为null,先声明然后放在infos中
|
|
|
|
- if (infoMap == null) {
|
|
|
|
- infoMap = MapUtil.newHashMap(16);
|
|
|
|
- infos.add(infoMap);
|
|
|
|
- }
|
|
|
|
// 如果infoMap中包含了该key时,先放infos中然后重新声明新map对象
|
|
// 如果infoMap中包含了该key时,先放infos中然后重新声明新map对象
|
|
if (infoMap.containsKey(key)) {
|
|
if (infoMap.containsKey(key)) {
|
|
infos.add(new HashMap<>(infoMap));
|
|
infos.add(new HashMap<>(infoMap));
|
|
@@ -205,6 +198,7 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
infoMap.put(key, temp.get(key));
|
|
infoMap.put(key, temp.get(key));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ infos.add(infoMap);
|
|
}
|
|
}
|
|
// 分级基金匹配
|
|
// 分级基金匹配
|
|
List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
|
|
List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
|
|
@@ -213,7 +207,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
if (dto == null) {
|
|
if (dto == null) {
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
- dto.setLevel(levels.get(i));
|
|
|
|
|
|
+ if (levels.size() > i) {
|
|
|
|
+ dto.setLevel(levels.get(i));
|
|
|
|
+ }
|
|
dtos.add(dto);
|
|
dtos.add(dto);
|
|
}
|
|
}
|
|
return dtos;
|
|
return dtos;
|