|
@@ -1,15 +1,12 @@
|
|
package com.smppw.modaq.application.components.report.parser.pdf;
|
|
package com.smppw.modaq.application.components.report.parser.pdf;
|
|
|
|
|
|
import cn.hutool.core.collection.ListUtil;
|
|
import cn.hutool.core.collection.ListUtil;
|
|
-import cn.hutool.core.map.MapUtil;
|
|
|
|
import cn.hutool.core.util.StrUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
import com.smppw.modaq.application.components.CustomPDFTextStripper;
|
|
import com.smppw.modaq.application.components.CustomPDFTextStripper;
|
|
-import com.smppw.modaq.application.components.ReportParseUtils;
|
|
|
|
import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
|
|
import com.smppw.modaq.application.components.report.parser.AbstractReportParser;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
-import com.smppw.modaq.domain.dto.report.BaseReportLevelDTO;
|
|
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
import com.smppw.modaq.domain.dto.report.ReportData;
|
|
import com.smppw.modaq.domain.dto.report.ReportParserParams;
|
|
import com.smppw.modaq.domain.dto.report.ReportParserParams;
|
|
import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
|
|
import com.smppw.modaq.domain.mapper.EmailFieldMappingMapper;
|
|
@@ -23,10 +20,7 @@ import technology.tabula.Table;
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
|
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
-import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
-import java.util.Map;
|
|
|
|
-import java.util.function.Function;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
* @author wangzaijun
|
|
* @author wangzaijun
|
|
@@ -118,101 +112,101 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
this.textList = null;
|
|
this.textList = null;
|
|
}
|
|
}
|
|
|
|
|
|
- /**
|
|
|
|
- * 构建只有两列表格的dto数据对象,如果有分级基金时(并且一个表格可能跨页)
|
|
|
|
- *
|
|
|
|
- * @param <DTO> 泛型对象
|
|
|
|
- * @param fileId 文件id
|
|
|
|
- * @param tables 表格
|
|
|
|
- * @param clazz 泛型对象
|
|
|
|
- * @param function 表格转换的函数
|
|
|
|
- * @return /
|
|
|
|
- */
|
|
|
|
- protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId,
|
|
|
|
- List<Table> tables,
|
|
|
|
- Class<DTO> clazz,
|
|
|
|
- Function<Table, Map<String, Object>> function) {
|
|
|
|
- List<DTO> dtos = ListUtil.list(true);
|
|
|
|
- // 信息表格字段和值映射
|
|
|
|
- List<Map<String, Object>> infos = ListUtil.list(true);
|
|
|
|
- for (Table table : tables) {
|
|
|
|
- Map<String, Object> infoMap = MapUtil.newHashMap(16);
|
|
|
|
- Map<String, Object> temp = function.apply(table);
|
|
|
|
- for (String key : temp.keySet()) {
|
|
|
|
- // 如果infoMap中包含了该key时,先放infos中然后重新声明新map对象
|
|
|
|
- if (infoMap.containsKey(key)) {
|
|
|
|
- infos.add(new HashMap<>(infoMap));
|
|
|
|
- infoMap = MapUtil.newHashMap(16);
|
|
|
|
- } else {
|
|
|
|
- infoMap.put(key, temp.get(key));
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- infos.add(infoMap);
|
|
|
|
- }
|
|
|
|
- // 分级基金匹配
|
|
|
|
- List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
|
|
|
|
- for (int i = 0; i < infos.size(); i++) {
|
|
|
|
- DTO dto = this.buildDto(fileId, clazz, infos.get(i));
|
|
|
|
- if (dto == null) {
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
- if (levels.size() > i) {
|
|
|
|
- dto.setLevel(levels.get(i));
|
|
|
|
- }
|
|
|
|
- dtos.add(dto);
|
|
|
|
- }
|
|
|
|
- return dtos;
|
|
|
|
- }
|
|
|
|
|
|
+// /**
|
|
|
|
+// * 构建只有两列表格的dto数据对象,如果有分级基金时(并且一个表格可能跨页)
|
|
|
|
+// *
|
|
|
|
+// * @param <DTO> 泛型对象
|
|
|
|
+// * @param fileId 文件id
|
|
|
|
+// * @param tables 表格
|
|
|
|
+// * @param clazz 泛型对象
|
|
|
|
+// * @param function 表格转换的函数
|
|
|
|
+// * @return /
|
|
|
|
+// */
|
|
|
|
+// protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId,
|
|
|
|
+// List<Table> tables,
|
|
|
|
+// Class<DTO> clazz,
|
|
|
|
+// Function<Table, Map<String, Object>> function) {
|
|
|
|
+// List<DTO> dtos = ListUtil.list(true);
|
|
|
|
+// // 信息表格字段和值映射
|
|
|
|
+// List<Map<String, Object>> infos = ListUtil.list(true);
|
|
|
|
+// for (Table table : tables) {
|
|
|
|
+// Map<String, Object> infoMap = MapUtil.newHashMap(16);
|
|
|
|
+// Map<String, Object> temp = function.apply(table);
|
|
|
|
+// for (String key : temp.keySet()) {
|
|
|
|
+// // 如果infoMap中包含了该key时,先放infos中然后重新声明新map对象
|
|
|
|
+// if (infoMap.containsKey(key)) {
|
|
|
|
+// infos.add(new HashMap<>(infoMap));
|
|
|
|
+// infoMap = MapUtil.newHashMap(16);
|
|
|
|
+// } else {
|
|
|
|
+// infoMap.put(key, temp.get(key));
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// infos.add(infoMap);
|
|
|
|
+// }
|
|
|
|
+// // 分级基金匹配
|
|
|
|
+// List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));
|
|
|
|
+// for (int i = 0; i < infos.size(); i++) {
|
|
|
|
+// DTO dto = this.buildDto(fileId, clazz, infos.get(i));
|
|
|
|
+// if (dto == null) {
|
|
|
|
+// continue;
|
|
|
|
+// }
|
|
|
|
+// if (levels.size() > i) {
|
|
|
|
+// dto.setLevel(levels.get(i));
|
|
|
|
+// }
|
|
|
|
+// dtos.add(dto);
|
|
|
|
+// }
|
|
|
|
+// return dtos;
|
|
|
|
+// }
|
|
|
|
|
|
- /**
|
|
|
|
- * 判断表格是否需要合并并且把需要合并的表格放在一个索引对应的map中(主要处理有分级基金数据表格,不处理可能会把数据绑定到错误的分级基金中)
|
|
|
|
- *
|
|
|
|
- * @param table 待判断的表格
|
|
|
|
- * @param rowCount 判断依据(一个完整的表格有多少行)
|
|
|
|
- * @param index 当前完整表格所在的索引位置
|
|
|
|
- * @param tables 不需要合并的表格集合
|
|
|
|
- * @param spanningPageTableMap 需要合并的表格数据
|
|
|
|
- * @return /
|
|
|
|
- */
|
|
|
|
- protected int splitTables(Table table, int rowCount, int index,
|
|
|
|
- List<Table> tables, Map<Integer, List<Table>> spanningPageTableMap) {
|
|
|
|
- if (table.getRowCount() == rowCount) {
|
|
|
|
- index++;
|
|
|
|
- tables.add(table);
|
|
|
|
- } else {
|
|
|
|
- List<Table> tempList = spanningPageTableMap.getOrDefault(index, ListUtil.list(true));
|
|
|
|
- tempList.add(table);
|
|
|
|
- spanningPageTableMap.putIfAbsent(index, tempList);
|
|
|
|
- // 一个表格最多跨两页,所以一个表格最多被分成2部分
|
|
|
|
- if (tempList.size() == 2) {
|
|
|
|
- index++;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return index;
|
|
|
|
- }
|
|
|
|
|
|
+// /**
|
|
|
|
+// * 判断表格是否需要合并并且把需要合并的表格放在一个索引对应的map中(主要处理有分级基金数据表格,不处理可能会把数据绑定到错误的分级基金中)
|
|
|
|
+// *
|
|
|
|
+// * @param table 待判断的表格
|
|
|
|
+// * @param rowCount 判断依据(一个完整的表格有多少行)
|
|
|
|
+// * @param index 当前完整表格所在的索引位置
|
|
|
|
+// * @param tables 不需要合并的表格集合
|
|
|
|
+// * @param spanningPageTableMap 需要合并的表格数据
|
|
|
|
+// * @return /
|
|
|
|
+// */
|
|
|
|
+// protected int splitTables(Table table, int rowCount, int index,
|
|
|
|
+// List<Table> tables, Map<Integer, List<Table>> spanningPageTableMap) {
|
|
|
|
+// if (table.getRowCount() == rowCount) {
|
|
|
|
+// index++;
|
|
|
|
+// tables.add(table);
|
|
|
|
+// } else {
|
|
|
|
+// List<Table> tempList = spanningPageTableMap.getOrDefault(index, ListUtil.list(true));
|
|
|
|
+// tempList.add(table);
|
|
|
|
+// spanningPageTableMap.putIfAbsent(index, tempList);
|
|
|
|
+// // 一个表格最多跨两页,所以一个表格最多被分成2部分
|
|
|
|
+// if (tempList.size() == 2) {
|
|
|
|
+// index++;
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// return index;
|
|
|
|
+// }
|
|
|
|
|
|
- /**
|
|
|
|
- * 把跨页的表格合并为一个并且插入到数据集合中的特定位置
|
|
|
|
- *
|
|
|
|
- * @param tables 数据集合
|
|
|
|
- * @param spanningPageTableMap 跨页的表格对象
|
|
|
|
- */
|
|
|
|
- protected void handleSpanningPageTables(List<Table> tables,
|
|
|
|
- Map<Integer, List<Table>> spanningPageTableMap) {
|
|
|
|
- // 跨页的表格数据处理
|
|
|
|
- for (Map.Entry<Integer, List<Table>> entry : spanningPageTableMap.entrySet()) {
|
|
|
|
- List<Table> spanningPageShareChangeTables = entry.getValue();
|
|
|
|
- Table master = spanningPageShareChangeTables.get(0);
|
|
|
|
- if (spanningPageShareChangeTables.size() == 2) {
|
|
|
|
- Table slave = spanningPageShareChangeTables.get(1);
|
|
|
|
- int rowCount = master.getRowCount();
|
|
|
|
- for (int j = 0; j < slave.getRowCount(); j++) {
|
|
|
|
- for (int k = 0; k < slave.getColCount(); k++) {
|
|
|
|
- master.add(slave.getCell(j, k), rowCount + j, k);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- tables.add(entry.getKey(), master);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+// /**
|
|
|
|
+// * 把跨页的表格合并为一个并且插入到数据集合中的特定位置
|
|
|
|
+// *
|
|
|
|
+// * @param tables 数据集合
|
|
|
|
+// * @param spanningPageTableMap 跨页的表格对象
|
|
|
|
+// */
|
|
|
|
+// protected void handleSpanningPageTables(List<Table> tables,
|
|
|
|
+// Map<Integer, List<Table>> spanningPageTableMap) {
|
|
|
|
+// // 跨页的表格数据处理
|
|
|
|
+// for (Map.Entry<Integer, List<Table>> entry : spanningPageTableMap.entrySet()) {
|
|
|
|
+// List<Table> spanningPageShareChangeTables = entry.getValue();
|
|
|
|
+// Table master = spanningPageShareChangeTables.get(0);
|
|
|
|
+// if (spanningPageShareChangeTables.size() == 2) {
|
|
|
|
+// Table slave = spanningPageShareChangeTables.get(1);
|
|
|
|
+// int rowCount = master.getRowCount();
|
|
|
|
+// for (int j = 0; j < slave.getRowCount(); j++) {
|
|
|
|
+// for (int k = 0; k < slave.getColCount(); k++) {
|
|
|
|
+// master.add(slave.getCell(j, k), rowCount + j, k);
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// }
|
|
|
|
+// tables.add(entry.getKey(), master);
|
|
|
|
+// }
|
|
|
|
+// }
|
|
}
|
|
}
|