Selaa lähdekoodia

fix:修复资产配置表格大类识别错误问题

wangzaijun 6 kuukautta sitten
vanhempi
commit
5a4b9c3bbf

+ 3 - 1
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -11,6 +11,7 @@ import technology.tabula.Table;
 
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.function.Function;
 
 /**
@@ -71,7 +72,8 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
                 }
                 // 资产配置表格识别(兼容跨页的表格)获取表格中第二列的所有文字,判断所有文字中包含"股权投资"等字符串
                 texts = this.getTableColTexts(table, 1);
-                if (CollUtil.containsAny(texts, ListUtil.of("股权投资", "股票投资", "债券投资", "另类投资", "其他资产", "其他融资总额"))) {
+                Set<String> keys = ASSET_ALLOCATION_TYPE_MAPPER.keySet();
+                if (CollUtil.containsAny(texts, keys)) {
                     this.assetAllocationTables.add(table);
                 }
             }

+ 63 - 14
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -12,10 +12,7 @@ import technology.tabula.RectangularTextContainer;
 import technology.tabula.Table;
 
 import java.awt.geom.Rectangle2D;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
+import java.util.*;
 import java.util.function.Function;
 
 /**
@@ -28,6 +25,8 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
     protected static final List<String> INDUSTRY_COLUMN_NAMES = ListUtil.list(false);
     protected static final List<String> SHARE_CHANGE_COLUMN_NAMES = ListUtil.list(false);
     protected static final List<String> FINANCIAL_INDICATORS_COLUMN_NAMES = ListUtil.list(false);
+    // 资产配置明细和大类关系映射
+    protected static final Map<String, String> ASSET_ALLOCATION_TYPE_MAPPER = MapUtil.newHashMap(32, true);
 
     static {
         // 财务指标
@@ -81,6 +80,59 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
         SHARE_CHANGE_COLUMN_NAMES.add("期末基金总份额/期末基金实缴总额");
         SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金拆分变动份额");
         SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金总申购份额");
+
+        // 资产配置
+        ASSET_ALLOCATION_TYPE_MAPPER.put("银行存款", "现金类资产");
+        // 境内未上市、未挂牌公司股权投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("股权投资", "境内未上市、未挂牌公司股权投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:优先股", "境内未上市、未挂牌公司股权投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其他股权类投资", "境内未上市、未挂牌公司股权投资");
+        // 上市公司定向增发投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("上市公司定向增发投资", "上市公司定向增发投资");
+        // 新三板投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("新三板挂牌企业投资", "新三板投资");
+        // 境内证券投资规模
+        ASSET_ALLOCATION_TYPE_MAPPER.put("结算备付金", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("存出保证金", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("股票投资", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("债券投资", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:银行间市场债券", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:利率债", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:信用债", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("资产支持证券", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("基金投资(公募基金)", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:货币基金", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("期货及衍生品交易保证金", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("买入返售金融资产", "境内证券投资规模");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其他证券类标的", "境内证券投资规模");
+        // 资管计划投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("商业银行理财产品投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("信托计划投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("基金公司及其子公司资产管理计划投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("保险资产管理计划投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("证券公司及其子公司资产管理计划投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("期货公司及其子公司资产管理计划投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("私募基金产品投资", "资管计划投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("未在协会备案的合伙企业份额", "资管计划投资");
+        // 另类投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("另类投资", "另类投资");
+        // 境内债权类投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("银行委托贷款规模", "境内债权类投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("信托贷款", "境内债权类投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("应收账款投资", "境内债权类投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("各类受(收)益权投资", "境内债权类投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("票据(承兑汇票等)投资", "境内债权类投资");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其他债权投资", "境内债权类投资");
+        // 境外投资
+        ASSET_ALLOCATION_TYPE_MAPPER.put("境外投资", "境外投资");
+        // 其他资产
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其他资产", "其他资产");
+        // 基金负债情况
+        ASSET_ALLOCATION_TYPE_MAPPER.put("债券回购总额", "基金负债情况");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("融资、融券总额", "基金负债情况");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其中:融券总额", "基金负债情况");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("银行借款总额", "基金负债情况");
+        ASSET_ALLOCATION_TYPE_MAPPER.put("其他融资总额", "基金负债情况");
     }
 
     protected List<Table> financialIndicatorsTables;
@@ -129,7 +181,11 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
                 if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
                     this.investmentIndustryTables.add(table);
                 } else {
-                    this.assetAllocationTables.add(table);
+                    texts = this.getTableColTexts(table, 1);
+                    Set<String> keys = ASSET_ALLOCATION_TYPE_MAPPER.keySet();
+                    if (CollUtil.containsAny(texts, keys)) {
+                        this.assetAllocationTables.add(table);
+                    }
                 }
             }
         }
@@ -258,24 +314,17 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
      */
     private List<ReportAssetAllocationDTO> buildAssetAllocationInfo(Integer fileId) {
         List<ReportAssetAllocationDTO> dtos = ListUtil.list(false);
-        String assetType = null;
         for (Table table : this.assetAllocationTables) {
             // 按行遍历
             for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
                 // x坐标升序(防止部分行乱序问题)
                 row.sort(Comparator.comparing(Rectangle2D.Float::getX));
-                // 大类
-                String type = this.cleaningValue(row.get(0).getText());
-                if (StrUtil.isNotBlank(type)) {
-                    assetType = type;
-                }
                 // 金额、市值,有时是 “备注#金额”的格式
                 String marketValueAndRemark = this.cleaningValue(row.get(2).getText());
-                if (StrUtil.isBlank(assetType)) {
-                    continue;
-                }
                 // 资产明细
                 String detail = this.cleaningValue(row.get(1).getText(), false);
+                // 大类
+                String assetType = ASSET_ALLOCATION_TYPE_MAPPER.get(detail);
                 if (StrUtil.contains(marketValueAndRemark, "#")) {
                     // 有#表示有备注,而且可能有多个,多个用分号分隔的.
                     List<String> marketValueAndRemarks = StrUtil.split(marketValueAndRemark, ";");