浏览代码

feat:excel报告识别的季报解析逻辑

wangzaijun 6 月之前
父节点
当前提交
d0f2f2ea22

+ 67 - 3
service-base/src/main/java/com/simuwang/base/pojo/dto/report/CustomExcelTable.java

@@ -8,10 +8,74 @@ import java.io.Serializable;
 /**
  * @author wangzaijun
  * @date 2024/10/14 8:52
- * @description 自定义传递的表格对象,设置表格表格、列数(如果为0则不限制列数)、开始列索引和行数(包含表头,如果为0则不限制行数)
- * key 表示这个表格属于什么信息的表格
+ * @description 自定义传递的表格对象
  */
-public record CustomExcelTable(String key, String title, int colCount, int startCol, int rowCount) implements Serializable {
+public class CustomExcelTable implements Serializable {
     @Serial
     private static final long serialVersionUID = Constants.DEFAULT_SERIAL_ID;
+    /**
+     * 表格标识,数据入表的逻辑判断,不能为空
+     */
+    private final String key;
+    /**
+     * 表格识别标题(equals或者contains识别),不能为空
+     */
+    private final String title;
+    /**
+     * 表格识别列数(不能小于等于0)
+     */
+    private final int colCount;
+    /**
+     * 表格识别的行数(除了标题行包含表头,如果为0则不限制行数)
+     */
+    private final int rowCount;
+    /**
+     * 表格识别起始列,默认第二列开始
+     */
+    private final int startCol;
+
+    public CustomExcelTable(String key, String title, int colCount) {
+        this(key, title, colCount, 0);
+    }
+
+    public CustomExcelTable(String key, String title, int colCount, int rowCount) {
+        this(key, title, colCount, rowCount, 1);
+    }
+
+    public CustomExcelTable(String key, String title, int colCount, int rowCount, int startCol) {
+        if (key == null) {
+            throw new NullPointerException("table key is null.");
+        }
+        if (title == null) {
+            throw new NullPointerException("table title is null.");
+        }
+        if (colCount <= 0) {
+            throw new IndexOutOfBoundsException("table col index out of.");
+        }
+        this.key = key;
+        this.title = title;
+        this.colCount = colCount;
+        this.rowCount = rowCount <= 0 ? Integer.MAX_VALUE : rowCount;
+        this.startCol = Math.max(startCol, 1);
+    }
+
+    public String getKey() {
+        return key;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public int getColCount() {
+        return colCount;
+    }
+
+    public int getStartCol() {
+        return startCol;
+    }
+
+    public int getRowCount() {
+        return rowCount;
+    }
 }

+ 5 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParseStatus.java

@@ -8,7 +8,12 @@ public enum ReportParseStatus implements StatusCode {
     REPORT_IS_SCAN(21002, "报告[{}] 为扫描件"),
     NO_SUPPORT_TEMPLATE(21003, "报告[{}] 是不支持的文件格式"),
     NOT_A_FIXED_FORMAT(21004, "报告[{}] 不是基协统一格式"),
+
     PARSE_FUND_INFO_FAIL(21010, "报告[{}] 没有解析到基金基本信息"),
+    PARSE_NAV_INFO_FAIL(21011, "报告[{}] 没有解析到基金净值信息"),
+    PARSE_FINANCIAL_INFO_FAIL(21012, "报告[{}] 没有解析到基金财务指标信息"),
+    PARSE_INDUSTRY_INFO_FAIL(21013, "报告[{}] 没有解析到基金行业配置信息"),
+    PARSE_ASSET_INFO_FAIL(21014, "报告[{}] 没有解析到基金资产配置信息"),
     ;
     private final int code;
     private final String msg;

+ 25 - 13
service-base/src/main/java/com/simuwang/base/pojo/dto/report/SimpleTable.java

@@ -25,7 +25,7 @@ public class SimpleTable implements Serializable {
     /**
      * 表格数据行
      */
-    private final List<List<String>> rows;
+    private final List<List<String>> tables;
     /**
      * 表格列数
      */
@@ -39,14 +39,14 @@ public class SimpleTable implements Serializable {
         this.tableKey = tableKey;
         this.title = title;
         this.colCount = colCount;
-        this.rows = ListUtil.list(true);
+        this.tables = ListUtil.list(true);
     }
 
     public void addRow(List<String> row) {
-        rows.add(row);
+        tables.add(row);
         this.rowCount++;
         if (this.colCount == 0) {
-            this.colCount = this.rows.stream().map(List::size).max(Comparator.naturalOrder()).orElse(0);
+            this.colCount = this.tables.stream().map(List::size).max(Comparator.naturalOrder()).orElse(0);
         }
     }
 
@@ -66,8 +66,19 @@ public class SimpleTable implements Serializable {
         return title;
     }
 
-    public List<List<String>> getRows() {
-        return rows;
+    public List<List<String>> getTables() {
+        int subRows = this.rowCount - this.tables.size();
+        if (subRows <= 0) {
+            return this.tables;
+        }
+        for (int i = 0; i < subRows; i++) {
+            List<String> row = ListUtil.list(true);
+            for (int j = 0; j < this.colCount; j++) {
+                row.add(null);
+            }
+            this.tables.add(row);
+        }
+        return this.tables;
     }
 
     /**
@@ -76,7 +87,7 @@ public class SimpleTable implements Serializable {
      * @return /
      */
     public Iterator<List<String>> iterator() {
-        return new TableIterator(rows);
+        return new TableIterator(this.getTables());
     }
 
     /**
@@ -87,6 +98,7 @@ public class SimpleTable implements Serializable {
      * @return 单元格内容
      */
     public String getCell(int row, int column) {
+        List<List<String>> rows = this.getTables();
         if (row < 0 || row >= rows.size() || column < 0 || column >= rows.get(row).size()) {
             throw new IndexOutOfBoundsException("Invalid row or column index");
         }
@@ -97,7 +109,7 @@ public class SimpleTable implements Serializable {
     public String toString() {
         return "SimpleTable{" +
                 "title='" + title + '\'' +
-                ", rows=" + rows +
+                ", tables=" + tables +
                 '}';
     }
 
@@ -105,21 +117,21 @@ public class SimpleTable implements Serializable {
      * 内部迭代器类
      */
     private static class TableIterator implements Iterator<List<String>> {
-        private final List<List<String>> rows;
+        private final List<List<String>> tables;
         private int currentIndex = 0;
 
-        public TableIterator(List<List<String>> rows) {
-            this.rows = rows;
+        public TableIterator(List<List<String>> tables) {
+            this.tables = tables;
         }
 
         @Override
         public boolean hasNext() {
-            return currentIndex < rows.size();
+            return currentIndex < tables.size();
         }
 
         @Override
         public List<String> next() {
-            return rows.get(currentIndex++);
+            return tables.get(currentIndex++);
         }
     }
 }

+ 6 - 6
service-daq/src/main/java/com/simuwang/daq/components/CustomExcelMultiSheetListener.java

@@ -47,20 +47,20 @@ public class CustomExcelMultiSheetListener extends AnalysisEventListener<LinkedH
         String title = ReportParseUtils.cleaningValue(row.get(1));
         if (title != null) {
             for (CustomExcelTable customExcelTable : customExcelTables) {
-                String tableTitle = customExcelTable.title();
+                String tableTitle = customExcelTable.getTitle();
                 if (title.equals(tableTitle) || title.contains(tableTitle)) {
-                    this.table = new SimpleTable(customExcelTable.key(), title, customExcelTable.colCount());
+                    this.table = new SimpleTable(customExcelTable.getKey(), title, customExcelTable.getColCount());
                     this.customExcelTable = customExcelTable;
                     this.tables.add(this.table);
                     return;
                 }
             }
         }
-        int rowCount = this.customExcelTable.rowCount() <= 0 ? Integer.MAX_VALUE : this.customExcelTable.rowCount();
-        if (this.table != null && this.table.getRowCount() < rowCount) {
+        // 表格不为空 并且 识别的表格行数比配置的表格行数小,则把当前行追加到表格的行中
+        if (this.table != null && this.table.getRowCount() < this.customExcelTable.getRowCount()) {
             List<String> tableRow = ListUtil.list(true);
-            int colCount = this.table.getColCount() <= 0 ? row.size() : this.table.getColCount() + this.customExcelTable.startCol();
-            for (int i = this.customExcelTable.startCol(); i < colCount; i++) {
+            int colCount = this.table.getColCount() <= 0 ? row.size() : this.table.getColCount() + this.customExcelTable.getStartCol();
+            for (int i = this.customExcelTable.getStartCol(); i < colCount; i++) {
                 tableRow.add(ReportParseUtils.cleaningValue(row.get(i)));
             }
             this.table.addRow(tableRow);

+ 67 - 0
service-daq/src/main/java/com/simuwang/daq/components/ReportParseUtils.java

@@ -12,6 +12,73 @@ import java.util.stream.Collectors;
 
 public final class ReportParseUtils {
     /**
+     * 行业配置的表格列名称
+     */
+    public static final List<String> INDUSTRY_COLUMN_NAMES = ListUtil.list(false);
+    /**
+     * 份额变动的表格列名称
+     */
+    public static final List<String> SHARE_CHANGE_COLUMN_NAMES = ListUtil.list(false);
+    /**
+     * 主要财务指标识别列名称
+     */
+    public static final List<String> FINANCIAL_INDICATORS_COLUMN_NAMES = ListUtil.list(false);
+
+    static {
+        // 财务指标
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末基金净资产");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("报告期期末单位净值");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期已实现收益");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配基金份额利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("基金份额累计净值增长率");
+
+        // 中国证监会行业标准
+        INDUSTRY_COLUMN_NAMES.add("农、林、牧、渔业");
+        INDUSTRY_COLUMN_NAMES.add("采矿业");
+        INDUSTRY_COLUMN_NAMES.add("制造业");
+        INDUSTRY_COLUMN_NAMES.add("电力、热力、燃气及水生产和供应业");
+        INDUSTRY_COLUMN_NAMES.add("建筑业");
+        INDUSTRY_COLUMN_NAMES.add("批发和零售业");
+        INDUSTRY_COLUMN_NAMES.add("交通运输、仓储和邮政业");
+        INDUSTRY_COLUMN_NAMES.add("住宿和餐饮业");
+        INDUSTRY_COLUMN_NAMES.add("信息传输、软件和信息技术服务业");
+        INDUSTRY_COLUMN_NAMES.add("金融业");
+        INDUSTRY_COLUMN_NAMES.add("房地产业");
+        INDUSTRY_COLUMN_NAMES.add("租赁和商务服务业");
+        INDUSTRY_COLUMN_NAMES.add("科学研究和技术服务业");
+        INDUSTRY_COLUMN_NAMES.add("水利、环境和公共设施管理业");
+        INDUSTRY_COLUMN_NAMES.add("居民服务、修理和其他服务业");
+        INDUSTRY_COLUMN_NAMES.add("教育");
+        INDUSTRY_COLUMN_NAMES.add("卫生和社会工作");
+        INDUSTRY_COLUMN_NAMES.add("文化、体育和娱乐业");
+        INDUSTRY_COLUMN_NAMES.add("综合");
+
+        INDUSTRY_COLUMN_NAMES.add("港股通");
+
+        // 以下为国际标准
+        INDUSTRY_COLUMN_NAMES.add("能源");
+        INDUSTRY_COLUMN_NAMES.add("原材料");
+        INDUSTRY_COLUMN_NAMES.add("工业");
+        INDUSTRY_COLUMN_NAMES.add("非日常生活消费品");
+        INDUSTRY_COLUMN_NAMES.add("日常消费品");
+        INDUSTRY_COLUMN_NAMES.add("医疗保健");
+        INDUSTRY_COLUMN_NAMES.add("金融");
+        INDUSTRY_COLUMN_NAMES.add("信息技术");
+        INDUSTRY_COLUMN_NAMES.add("通讯服务");
+        INDUSTRY_COLUMN_NAMES.add("公用事业");
+        INDUSTRY_COLUMN_NAMES.add("房地产");
+
+        // 份额变动表格识别列
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期初基金份额总额");
+        SHARE_CHANGE_COLUMN_NAMES.add("减:报告期期间基金总赎回份额");
+        SHARE_CHANGE_COLUMN_NAMES.add("期末基金总份额/期末基金实缴总额");
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金拆分变动份额");
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金总申购份额");
+    }
+
+    /**
      * 数据清洗,替换圆括号,包含中文或英文的圆括号
      *
      * @param value /

+ 21 - 31
service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java

@@ -6,6 +6,7 @@ import cn.hutool.core.util.ReflectUtil;
 import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
 import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
+import com.simuwang.base.pojo.dto.report.BaseReportDTO;
 import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
 import com.simuwang.base.pojo.dto.report.ReportData;
 import com.simuwang.base.pojo.dto.report.ReportParserParams;
@@ -61,6 +62,26 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
     protected abstract void cleaningReportData(T reportData);
 
     /**
+     * 构建只有两列表格的dto数据对象
+     *
+     * @param <DTO>   泛型对象
+     * @param fileId  文件id
+     * @param clazz   泛型对象
+     * @param infoMap 表格转换的函数
+     * @return /
+     */
+    protected <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId, Class<DTO> clazz, Map<String, Object> infoMap) {
+        try {
+            DTO dto = clazz.getDeclaredConstructor().newInstance();
+            dto.setFileId(fileId);
+            this.buildInfo(infoMap, dto);
+            return dto;
+        } catch (Exception ignored) {
+        }
+        return null;
+    }
+
+    /**
      * 对象字段设置
      *
      * @param extInfoMap 名称与值的对应关系
@@ -100,35 +121,4 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
         reportInfo.setReportDate(ReportParseUtils.matchReportDate(reportName));
         return reportInfo;
     }
-
-//    protected String cleaningValue(Object value) {
-//        return this.cleaningValue(value, true);
-//    }
-//
-//    /**
-//     * 数据简单清洗,并全部转为字符串类型
-//     *
-//     * @param value              待清洗的数据
-//     * @param replaceParentheses 是否替换圆括号
-//     * @return /
-//     */
-//    protected String cleaningValue(Object value, boolean replaceParentheses) {
-//        String fieldValue = StrUtil.toStringOrNull(value);
-//        if (!StrUtil.isNullOrUndefined(fieldValue)) {
-//            // 特殊字符替换,空格替换为空字符
-//            fieldValue = fieldValue
-//                    .replace("\r", StrUtil.EMPTY)
-//                    .replace(";", ";")
-//                    .replaceAll(" ", StrUtil.EMPTY);
-//            if (replaceParentheses) {
-//                // 正则表达式匹配中文括号及其内容,并替换为空字符串
-//                fieldValue = Pattern.compile("[(|(][^)]*[)|)]").matcher(fieldValue).replaceAll(StrUtil.EMPTY);
-//            }
-//        }
-//        // 如果仅有 “-” 该字段值为null
-//        if (Objects.equals("-", fieldValue)) {
-//            fieldValue = null;
-//        }
-//        return StrUtil.isBlank(fieldValue) ? null : fieldValue;
-//    }
 }

+ 50 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/excel/AbstractExcelReportParser.java

@@ -1,16 +1,22 @@
 package com.simuwang.daq.components.report.parser.excel;
 
+import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
+import cn.hutool.core.util.StrUtil;
 import com.alibaba.excel.EasyExcel;
 import com.alibaba.excel.read.builder.ExcelReaderBuilder;
 import com.simuwang.base.common.exception.ReportParseException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
 import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.daq.components.CustomExcelMultiSheetListener;
+import com.simuwang.daq.components.ReportParseUtils;
 import com.simuwang.daq.components.report.parser.AbstractReportParser;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 public abstract class AbstractExcelReportParser<T extends ReportData> extends AbstractReportParser<T> {
     public AbstractExcelReportParser(EmailFieldMappingMapper fieldMappingMapper) {
@@ -58,8 +64,21 @@ public abstract class AbstractExcelReportParser<T extends ReportData> extends Ab
         // cleaning.
     }
 
+    /**
+     * 每个子类都有自己的表格解析配置
+     *
+     * @return /
+     */
     protected abstract List<CustomExcelTable> customExcelTables();
 
+    /**
+     * 解析其他数据并设置到报告结果对象中
+     *
+     * @param reportInfo     报告基本信息
+     * @param reportFundInfo 报告基金基本信息
+     * @param tables         所有解析的表格
+     * @return /
+     */
     protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO reportFundInfo, List<SimpleTable> tables);
 
     /**
@@ -69,4 +88,35 @@ public abstract class AbstractExcelReportParser<T extends ReportData> extends Ab
      * @return /
      */
     protected abstract ReportFundInfoDTO buildFundInfo(ReportParserParams params, List<SimpleTable> tables);
+
+    /**
+     * 构建当前基金和分级基金的数据(分级基金表格可能没有数据)
+     *
+     * @param fileId   文件id
+     * @param tables   数据表
+     * @param clazz    类对象
+     * @param function 映射关系
+     * @param <DTO>    /
+     * @return /
+     */
+    protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId, List<SimpleTable> tables, Class<DTO> clazz,
+                                                                          Function<SimpleTable, Map<String, Object>> function) {
+        String titles = tables.stream().map(SimpleTable::getTitle).collect(Collectors.joining(","));
+        List<String> levels = ReportParseUtils.matchTieredFund(titles);
+        List<DTO> dtos = ListUtil.list(true);
+        for (int i = 0; i < tables.size(); i++) {
+            String level = levels.get(i);
+            if (StrUtil.isBlank(level)) {
+                continue;
+            }
+            Map<String, Object> infoMap = function.apply(tables.get(i));
+            DTO dto = this.buildDto(fileId, clazz, infoMap);
+            if (dto == null) {
+                continue;
+            }
+            dto.setLevel(level);
+            dtos.add(dto);
+        }
+        return dtos;
+    }
 }

+ 38 - 5
service-daq/src/main/java/com/simuwang/daq/components/report/parser/excel/ExcelMonthlyReportParser.java

@@ -1,12 +1,16 @@
 package com.simuwang.daq.components.report.parser.excel;
 
 import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.exception.ReportParseException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
 import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.daq.components.report.parser.ReportParserConstant;
 import org.springframework.stereotype.Component;
 
 import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 
 @Component(ReportParserConstant.PARSER_EXCEL_MONTHLY)
 public class ExcelMonthlyReportParser extends AbstractExcelReportParser<MonthlyReportData> {
@@ -17,21 +21,50 @@ public class ExcelMonthlyReportParser extends AbstractExcelReportParser<MonthlyR
     @Override
     protected List<CustomExcelTable> customExcelTables() {
         List<CustomExcelTable> customExcelTables = ListUtil.list(true);
-        customExcelTables.add(new CustomExcelTable("fundInfo", "基金概况", 4, 1, 0));
-        customExcelTables.add(new CustomExcelTable("netReport", "净值月报", 5, 1, 2));
-        customExcelTables.add(new CustomExcelTable("netReport", "级基金净值表", 5, 1, 2));
+        customExcelTables.add(new CustomExcelTable("fundInfo", "基金概况", 4));
+        customExcelTables.add(new CustomExcelTable("netReport", "净值月报", 5, 2));
+        customExcelTables.add(new CustomExcelTable("netReport", "级基金净值表", 5, 2));
         return customExcelTables;
     }
 
     @Override
     protected MonthlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO reportFundInfo, List<SimpleTable> tables) {
-        return null;
+        MonthlyReportData reportData = new MonthlyReportData(reportInfo, reportFundInfo);
+        // 取分级基金和当前母基金
+        List<SimpleTable> netNavTables = tables.stream().filter(e -> "netReport".equals(e.getTableKey())).collect(Collectors.toList());
+        // 母基金和分级基金的净值
+        List<ReportNetReportDTO> dtos = this.buildLevelDto(reportInfo.getFileId(), netNavTables,
+                ReportNetReportDTO.class, t -> {
+                    Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+                    for (int i = 0; i < t.getColCount(); i++) {
+                        String key = t.getCell(0, i);
+                        String value = t.getCell(1, i);
+                        extInfoMap.put(key, value);
+                    }
+                    return extInfoMap;
+                });
+        reportData.setNetReport(dtos);
+        return reportData;
     }
 
     @Override
     protected ReportFundInfoDTO buildFundInfo(ReportParserParams params, List<SimpleTable> tables) {
         SimpleTable fundInfoTable = tables.stream().filter(e -> "fundInfo".equals(e.getTableKey())).findFirst().orElse(null);
-        return null;
+        if (fundInfoTable == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_FUND_INFO_FAIL, params.getFilename());
+        }
+        // 月报的基金基本信息是四列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getTables().size(); i++) {
+            @SuppressWarnings("all")
+            List<String> row = fundInfoTable.getTables().get(i);
+            for (int j = 0; j < 2; j++) {
+                baseInfoMap.put(row.get(j * 2), row.get(j * 2 + 1));
+            }
+        }
+        ReportFundInfoDTO dto = new ReportFundInfoDTO(params.getFileId());
+        this.buildInfo(baseInfoMap, dto);
+        return dto;
     }
 
     @Override

+ 97 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/excel/ExcelQuarterlyReportParser.java

@@ -0,0 +1,97 @@
+package com.simuwang.daq.components.report.parser.excel;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.exception.ReportParseException;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.*;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+@Component(ReportParserConstant.PARSER_EXCEL_QUARTERLY)
+public class ExcelQuarterlyReportParser extends AbstractExcelReportParser<QuarterlyReportData> {
+    public ExcelQuarterlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    protected List<CustomExcelTable> customExcelTables() {
+        List<CustomExcelTable> customExcelTables = ListUtil.list(true);
+        customExcelTables.add(new CustomExcelTable("fundInfo", "基金基本情况", 2));
+        customExcelTables.add(new CustomExcelTable("financialIndicators", "主要财务指标", 5, 6));
+        customExcelTables.add(new CustomExcelTable("financialIndicators", "级基金主要财务指标", 5, 6));
+        customExcelTables.add(new CustomExcelTable("assetAllocation", "期末基金资产组合情况", 3));
+        customExcelTables.add(new CustomExcelTable("investmentIndustry", "报告期末按行业分类的股票投资组合", 4));
+        customExcelTables.add(new CustomExcelTable("investmentIndustry", "报告期末按行业分类的港股通投资股票投资组合", 3));
+        customExcelTables.add(new CustomExcelTable("shareChange", "基金份额变动情况", 3, 6));
+        customExcelTables.add(new CustomExcelTable("shareChange", "级基金份额变动情况", 3, 6));
+        return customExcelTables;
+    }
+
+    @Override
+    protected QuarterlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO reportFundInfo, List<SimpleTable> tables) {
+        Integer fileId = reportInfo.getFileId();
+        String reportName = reportInfo.getReportName();
+        // 主要财务指标
+        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildFinancialIndicatorsInfo(fileId, tables);
+        // 资产配置
+        List<ReportAssetAllocationDTO> assetAllocations = this.buildAssetAllocationInfo(fileId, reportName, tables);
+        // 行业配置
+        List<ReportInvestmentIndustryDTO> investmentIndustries = this.buildInvestmentIndustryInfo(fileId, tables);
+        // 份额变动
+        List<ReportShareChangeDTO> shareChanges = this.buildShareChangeInfo(fileId, tables);
+        // 构建返回结构
+        QuarterlyReportData reportData = new QuarterlyReportData(reportInfo, reportFundInfo);
+        reportData.setFinancialIndicators(financialIndicators);
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        reportData.setShareChange(shareChanges);
+        return reportData;
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params, List<SimpleTable> tables) {
+        SimpleTable fundInfoTable = tables.stream().filter(e -> "fundInfo".equals(e.getTableKey())).findFirst().orElse(null);
+        if (fundInfoTable == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_FUND_INFO_FAIL, params.getFilename());
+        }
+        // 季报和年报的基金基本信息是两列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getTables().size(); i++) {
+            List<String> cols = fundInfoTable.getTables().get(i);
+            for (int j = 0; j < 1; j++) {
+                baseInfoMap.put(cols.get(j), cols.get(j + 1));
+            }
+        }
+        ReportFundInfoDTO dto = new ReportFundInfoDTO(params.getFileId());
+        this.buildInfo(baseInfoMap, dto);
+        return dto;
+    }
+
+    private List<ReportShareChangeDTO> buildShareChangeInfo(Integer fileId, List<SimpleTable> tables) {
+        List<SimpleTable> simpleTables = tables.stream().filter(e -> "shareChange".equals(e.getTableKey())).collect(Collectors.toList());
+        return null;
+    }
+
+    private List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId, List<SimpleTable> tables) {
+        List<SimpleTable> simpleTables = tables.stream().filter(e -> "financialIndicators".equals(e.getTableKey())).collect(Collectors.toList());
+        return null;
+    }
+
+    private List<ReportInvestmentIndustryDTO> buildInvestmentIndustryInfo(Integer fileId, List<SimpleTable> tables) {
+        List<SimpleTable> simpleTables = tables.stream().filter(e -> "investmentIndustry".equals(e.getTableKey())).collect(Collectors.toList());
+        return null;
+    }
+
+    private List<ReportAssetAllocationDTO> buildAssetAllocationInfo(Integer fileId, String filename, List<SimpleTable> tables) {
+        SimpleTable assetAllocationTable = tables.stream().filter(e -> "assetAllocation".equals(e.getTableKey())).findFirst().orElse(null);
+        if (assetAllocationTable == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_ASSET_INFO_FAIL, filename);
+        }
+        return null;
+    }
+}

+ 0 - 20
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java

@@ -186,24 +186,4 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         }
         return dtos;
     }
-
-    /**
-     * 构建只有两列表格的dto数据对象
-     *
-     * @param <DTO>   泛型对象
-     * @param fileId  文件id
-     * @param clazz   泛型对象
-     * @param infoMap 表格转换的函数
-     * @return /
-     */
-    private <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId, Class<DTO> clazz, Map<String, Object> infoMap) {
-        try {
-            DTO dto = clazz.getDeclaredConstructor().newInstance();
-            dto.setFileId(fileId);
-            this.buildInfo(infoMap, dto);
-            return dto;
-        } catch (Exception ignored) {
-        }
-        return null;
-    }
 }

+ 5 - 5
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -48,25 +48,25 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
             }
             // 用表格的第一列的数据判断是否主要财务指标数据
             List<String> texts = this.getTableColTexts(table, 0);
-            if (CollUtil.containsAny(texts, FINANCIAL_INDICATORS_COLUMN_NAMES)) {
+            if (CollUtil.containsAny(texts, ReportParseUtils.FINANCIAL_INDICATORS_COLUMN_NAMES)) {
                 this.financialIndicatorsTables.add(table);
                 continue;
             }
             int colCount = table.getColCount();
             if (colCount == 2) {
                 // 用表格的第一列的数据判断是否份额变动记录
-                if (CollUtil.containsAny(texts, SHARE_CHANGE_COLUMN_NAMES)) {
+                if (CollUtil.containsAny(texts, ReportParseUtils.SHARE_CHANGE_COLUMN_NAMES)) {
                     this.shareChangeTables.add(table);
                 }
             } else if (colCount == 4) {
                 // 用表格的第二列的数据判断是否行业配置数据(内地)
                 texts = this.getTableColTexts(table, 1);
-                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                if (CollUtil.containsAny(texts, ReportParseUtils.INDUSTRY_COLUMN_NAMES)) {
                     this.investmentIndustryTables.add(table);
                 }
             } else if (colCount == 3) {
                 // 用表格的第一列的数据判断是否行业配置数据(港股通)
-                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                if (CollUtil.containsAny(texts, ReportParseUtils.INDUSTRY_COLUMN_NAMES)) {
                     this.investmentIndustryTables.add(table);
                     continue;
                 }
@@ -122,7 +122,7 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
                 infoMap.put("年度", year);
                 for (int i = 0; i < table.getRowCount(); i++) {
                     String columnName = ReportParseUtils.cleaningValue(table.getCell(i, 0).getText());
-                    if (!CollUtil.contains(FINANCIAL_INDICATORS_COLUMN_NAMES, columnName)) {
+                    if (!CollUtil.contains(ReportParseUtils.FINANCIAL_INDICATORS_COLUMN_NAMES, columnName)) {
                         continue;
                     }
                     String value = ReportParseUtils.cleaningValue(table.getCell(i, j).getText());

+ 3 - 61
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -26,64 +26,6 @@ import java.util.function.Function;
  */
 @Component(ReportParserConstant.PARSER_PDF_QUARTERLY)
 public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends AbstractPDReportParser<T> {
-    protected static final List<String> INDUSTRY_COLUMN_NAMES = ListUtil.list(false);
-    protected static final List<String> SHARE_CHANGE_COLUMN_NAMES = ListUtil.list(false);
-    protected static final List<String> FINANCIAL_INDICATORS_COLUMN_NAMES = ListUtil.list(false);
-
-    static {
-        // 财务指标
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末基金净资产");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("报告期期末单位净值");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期利润");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期已实现收益");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配利润");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配基金份额利润");
-        FINANCIAL_INDICATORS_COLUMN_NAMES.add("基金份额累计净值增长率");
-
-        // 中国证监会行业标准
-        INDUSTRY_COLUMN_NAMES.add("农、林、牧、渔业");
-        INDUSTRY_COLUMN_NAMES.add("采矿业");
-        INDUSTRY_COLUMN_NAMES.add("制造业");
-        INDUSTRY_COLUMN_NAMES.add("电力、热力、燃气及水生产和供应业");
-        INDUSTRY_COLUMN_NAMES.add("建筑业");
-        INDUSTRY_COLUMN_NAMES.add("批发和零售业");
-        INDUSTRY_COLUMN_NAMES.add("交通运输、仓储和邮政业");
-        INDUSTRY_COLUMN_NAMES.add("住宿和餐饮业");
-        INDUSTRY_COLUMN_NAMES.add("信息传输、软件和信息技术服务业");
-        INDUSTRY_COLUMN_NAMES.add("金融业");
-        INDUSTRY_COLUMN_NAMES.add("房地产业");
-        INDUSTRY_COLUMN_NAMES.add("租赁和商务服务业");
-        INDUSTRY_COLUMN_NAMES.add("科学研究和技术服务业");
-        INDUSTRY_COLUMN_NAMES.add("水利、环境和公共设施管理业");
-        INDUSTRY_COLUMN_NAMES.add("居民服务、修理和其他服务业");
-        INDUSTRY_COLUMN_NAMES.add("教育");
-        INDUSTRY_COLUMN_NAMES.add("卫生和社会工作");
-        INDUSTRY_COLUMN_NAMES.add("文化、体育和娱乐业");
-        INDUSTRY_COLUMN_NAMES.add("综合");
-
-        INDUSTRY_COLUMN_NAMES.add("港股通");
-
-        // 以下为国际标准
-        INDUSTRY_COLUMN_NAMES.add("能源");
-        INDUSTRY_COLUMN_NAMES.add("原材料");
-        INDUSTRY_COLUMN_NAMES.add("工业");
-        INDUSTRY_COLUMN_NAMES.add("非日常生活消费品");
-        INDUSTRY_COLUMN_NAMES.add("日常消费品");
-        INDUSTRY_COLUMN_NAMES.add("医疗保健");
-        INDUSTRY_COLUMN_NAMES.add("金融");
-        INDUSTRY_COLUMN_NAMES.add("信息技术");
-        INDUSTRY_COLUMN_NAMES.add("通讯服务");
-        INDUSTRY_COLUMN_NAMES.add("公用事业");
-        INDUSTRY_COLUMN_NAMES.add("房地产");
-
-        // 份额变动表格识别列
-        SHARE_CHANGE_COLUMN_NAMES.add("报告期期初基金份额总额");
-        SHARE_CHANGE_COLUMN_NAMES.add("减:报告期期间基金总赎回份额");
-        SHARE_CHANGE_COLUMN_NAMES.add("期末基金总份额/期末基金实缴总额");
-        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金拆分变动份额");
-        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金总申购份额");
-    }
-
     protected List<Table> financialIndicatorsTables;
     protected List<Table> shareChangeTables;
     protected List<Table> assetAllocationTables;
@@ -116,9 +58,9 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
                 // 用表格的第一列的数据判断是否份额变动记录
                 List<String> texts = this.getTableColTexts(table, 0);
                 // 主要财务指标或份额变动
-                if (CollUtil.containsAny(texts, SHARE_CHANGE_COLUMN_NAMES)) {
+                if (CollUtil.containsAny(texts, ReportParseUtils.SHARE_CHANGE_COLUMN_NAMES)) {
                     this.shareChangeTables.add(table);
-                } else if (CollUtil.containsAny(texts, FINANCIAL_INDICATORS_COLUMN_NAMES)) {
+                } else if (CollUtil.containsAny(texts, ReportParseUtils.FINANCIAL_INDICATORS_COLUMN_NAMES)) {
                     this.financialIndicatorsTables.add(table);
                 }
             } else if (colCount == 4) {
@@ -127,7 +69,7 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
             } else if (colCount == 3) {
                 // 用表格的第一列单元格判断是否资产配置表
                 List<String> texts = this.getTableColTexts(table, 0);
-                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                if (CollUtil.containsAny(texts, ReportParseUtils.INDUSTRY_COLUMN_NAMES)) {
                     this.investmentIndustryTables.add(table);
                 } else {
                     this.assetAllocationTables.add(table);

+ 3 - 3
service-daq/src/main/java/com/simuwang/daq/utils/ExcelReportParseUtil.java

@@ -14,9 +14,9 @@ public class ExcelReportParseUtil {
 
     public static void main(String[] args) {
         List<CustomExcelTable> customExcelTables = ListUtil.list(true);
-        customExcelTables.add(new CustomExcelTable("fundInfo", "基金概况", 4, 1, 0));
-        customExcelTables.add(new CustomExcelTable("netReport", "级基金净值表", 5, 1, 2));
-        customExcelTables.add(new CustomExcelTable("netReport", "净值月报", 5, 1, 2));
+        customExcelTables.add(new CustomExcelTable("fundInfo", "基金概况", 4));
+        customExcelTables.add(new CustomExcelTable("netReport", "级基金净值表", 5, 2));
+        customExcelTables.add(new CustomExcelTable("netReport", "净值月报", 5, 2));
 
         CustomExcelMultiSheetListener readListener = new CustomExcelMultiSheetListener();
         ExcelReaderBuilder readerBuilder = EasyExcel.read(filepath);