Selaa lähdekoodia

fix:报告解析的错误优化

wangzaijun 6 kuukautta sitten
vanhempi
commit
41a33a7032

+ 10 - 10
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParseStatus.java

@@ -4,17 +4,17 @@ import com.smppw.common.pojo.enums.status.StatusCode;
 
 public enum ReportParseStatus implements StatusCode {
     PARSE_FAIL(21000, "定期报告解析错误:{}"),
-    NOT_A_REPORT(21001, "[{}] 不是定期报告"),
-    REPORT_IS_SCAN(21002, "报告[{}] 为扫描件"),
-    NO_SUPPORT_TEMPLATE(21003, "报告[{}] 是不支持的文件格式"),
-    NOT_A_FIXED_FORMAT(21004, "报告[{}] 不是基协统一格式"),
+    NOT_A_REPORT(21001, "[{}]不是定期报告"),
+    REPORT_IS_SCAN(21002, "报告[{}]为扫描件"),
+    NO_SUPPORT_TEMPLATE(21003, "报告[{}]是不支持的文件格式"),
+    NOT_A_FIXED_FORMAT(21004, "报告[{}]不是基协统一格式"),
 
-    PARSE_FUND_INFO_FAIL(21010, "报告[{}] 没有解析到基金基本信息"),
-    PARSE_NAV_INFO_FAIL(21011, "报告[{}] 没有解析到基金净值信息"),
-    PARSE_FINANCIAL_INFO_FAIL(21012, "报告[{}] 没有解析到基金财务指标信息"),
-    PARSE_INDUSTRY_INFO_FAIL(21013, "报告[{}] 没有解析到基金行业配置信息"),
-    PARSE_ASSET_INFO_FAIL(21014, "报告[{}] 没有解析到基金资产配置信息"),
-    PARSE_SHARE_INFO_FAIL(21015, "报告[{}] 没有解析到基金份额变动信息"),
+    PARSE_FUND_INFO_FAIL(21010, "报告[{}]没有解析到基金基本信息"),
+    PARSE_NAV_INFO_FAIL(21011, "报告[{}]没有解析到基金净值信息"),
+    PARSE_FINANCIAL_INFO_FAIL(21012, "报告[{}]没有解析到基金财务指标信息"),
+    PARSE_INDUSTRY_INFO_FAIL(21013, "报告[{}]没有解析到基金行业配置信息"),
+    PARSE_ASSET_INFO_FAIL(21014, "报告[{}]没有解析到基金资产配置信息"),
+    PARSE_SHARE_INFO_FAIL(21015, "报告[{}]没有解析到基金份额变动信息"),
     ;
     private final int code;
     private final String msg;

+ 45 - 35
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -28,7 +28,9 @@ import com.simuwang.base.pojo.dto.report.ReportData;
 import com.simuwang.base.pojo.dto.report.ReportParseStatus;
 import com.simuwang.base.pojo.dto.report.ReportParserParams;
 import com.simuwang.base.pojo.valuation.CmValuationTableAttribute;
-import com.simuwang.base.pojo.vo.*;
+import com.simuwang.base.pojo.vo.EmailParseDataViewVO;
+import com.simuwang.base.pojo.vo.EmailParseFailAnalysisVO;
+import com.simuwang.base.pojo.vo.NameValueVO;
 import com.simuwang.daq.components.report.parser.ReportParser;
 import com.simuwang.daq.components.report.parser.ReportParserFactory;
 import com.simuwang.daq.components.report.writer.ReportWriter;
@@ -394,11 +396,18 @@ public class EmailParseService {
             registerNumber = matcher.group();
         }
         // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
-        ReportType reportType = ReportType.MONTHLY;
+        ReportType reportType = null;
         if (StrUtil.containsAny(fileName, ReportType.QUARTERLY.getPatterns())) {
             reportType = ReportType.QUARTERLY;
         } else if (StrUtil.containsAny(fileName, ReportType.ANNUALLY.getPatterns())) {
             reportType = ReportType.ANNUALLY;
+        } else if (StrUtil.containsAny(fileName, ReportType.MONTHLY.getPatterns())) {
+            reportType = ReportType.MONTHLY;
+        }
+        if (reportType == null) {
+            result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
+            result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
+            return result;
         }
         // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
         ReportParserFileType fileType;
@@ -414,12 +423,11 @@ public class EmailParseService {
             return result;
         }
         // 解析报告
-        ReportParserParams params = null;
         ReportData reportData = null;
         StopWatch parserWatch = new StopWatch();
         parserWatch.start();
         try {
-            params = ReportParserParams.builder().fileId(fileId).filename(fileName)
+            ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName)
                     .filepath(emailContentInfoDTO.getFilePath()).registerNumber(registerNumber).build();
             ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
             reportData = instance.parse(params);
@@ -427,17 +435,17 @@ public class EmailParseService {
             result.setMsg("报告解析成功");
             result.setData(reportData);
         } catch (ReportParseException e) {
-            log.error("报告{}解析失败\n{}", params, e.getMsg());
+            log.error("解析失败\n{}", e.getMsg());
             result.setStatus(e.getCode());
             result.setMsg(e.getMsg());
         } catch (Exception e) {
-            log.error("报告{}解析失败\n{}", params, ExceptionUtil.stacktraceToString(e));
+            log.error("解析失败\n{}", ExceptionUtil.stacktraceToString(e));
             result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
             result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
         } finally {
             parserWatch.stop();
             if (log.isInfoEnabled()) {
-                log.info("报告{}解析结果为{},耗时{}ms", params, reportData, parserWatch.getTotalTimeMillis());
+                log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
             }
         }
         // 保存报告解析结果
@@ -448,11 +456,11 @@ public class EmailParseService {
                 ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
                 instance.write(reportData);
             } catch (Exception e) {
-                log.error("报告{}结果保存失败\n{}", params, ExceptionUtil.stacktraceToString(e));
+                log.error("报告{}结果保存失败\n{}", fileName, ExceptionUtil.stacktraceToString(e));
             } finally {
                 writeWatch.stop();
                 if (log.isInfoEnabled()) {
-                    log.info("报告{}解析结果保存完成,耗时{}ms", params, writeWatch.getTotalTimeMillis());
+                    log.info("报告{}解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
                 }
             }
         }
@@ -851,12 +859,12 @@ public class EmailParseService {
                 // 2.邮件只有正文
                 if (content instanceof String) {
                     EmailContentInfoDTO emailContentInfoDTO = new EmailContentInfoDTO();
-                    try{
+                    try {
                         //获取邮件编码
                         String contentType = message.getContentType();
-                        contentType = contentType.substring(contentType.indexOf("charset=")+8,contentType.length());
-                        emailContentInfoDTO.setEmailContent(new String(content.toString().getBytes(contentType),"UTF-8"));
-                    }catch (Exception e){
+                        contentType = contentType.substring(contentType.indexOf("charset=") + 8, contentType.length());
+                        emailContentInfoDTO.setEmailContent(new String(content.toString().getBytes(contentType), "UTF-8"));
+                    } catch (Exception e) {
                         emailContentInfoDTO.setEmailContent(content.toString());
                     }
                     emailContentInfoDTO.setEmailDate(emailDateStr);
@@ -912,20 +920,21 @@ public class EmailParseService {
         }
         return null;
     }
+
     public List<NameValueVO> searchEmailCount(DataboardQuery databoardQuery) {
         List<Map<String, Object>> dataList = emailParseInfoMapper.searchEmailDataBoard(databoardQuery);
         List<NameValueVO> result = new ArrayList<>();
         Long total = 0L;
-        for(Map<String, Object> data : dataList){
+        for (Map<String, Object> data : dataList) {
             NameValueVO vo = new NameValueVO();
-            if(1 == ((Integer)data.get("parse_status")).intValue()){
+            if (1 == ((Integer) data.get("parse_status")).intValue()) {
                 vo.setValue((Long) data.get("total"));
                 vo.setName("解析成功");
-            }else{
+            } else {
                 vo.setValue((Long) data.get("total"));
                 vo.setName("解析失败");
             }
-            total+=(Long)data.get("total");
+            total += (Long) data.get("total");
             result.add(vo);
         }
         NameValueVO vo = new NameValueVO();
@@ -940,17 +949,17 @@ public class EmailParseService {
         List<Map<String, Object>> dataList = emailParseInfoMapper.searchEmailTypeCount(databoardQuery);
         List<NameValueVO> result = new ArrayList<>();
         Integer total = 0;
-        for(Map<String, Object> data : dataList){
+        for (Map<String, Object> data : dataList) {
             NameValueVO vo = new NameValueVO();
             Integer emailType = (Integer) data.get("email_type");
-            Long totalType = (Long)data.get("total");
-            if(1 == emailType){
+            Long totalType = (Long) data.get("total");
+            if (1 == emailType) {
                 vo.setName("净值规模");
                 vo.setValue(totalType);
-            }else if(2 == emailType){
+            } else if (2 == emailType) {
                 vo.setName("估值表");
                 vo.setValue(totalType);
-            }else{
+            } else {
                 vo.setName("定期报告");
                 vo.setValue(totalType);
             }
@@ -965,22 +974,22 @@ public class EmailParseService {
         List<NameValueVO> navNameValueVOS = new ArrayList<>();
         NameValueVO pdfNoDataVO = new NameValueVO();
         databoardQuery.setEmailType(1);
-        Long pdfNoData = emailParseInfoMapper.countpdfNoData(databoardQuery,"无法从PDF文件中获取到数据");
+        Long pdfNoData = emailParseInfoMapper.countpdfNoData(databoardQuery, "无法从PDF文件中获取到数据");
         pdfNoDataVO.setName("无法从PDF文件中获取到数据");
         pdfNoDataVO.setValue(pdfNoData);
         navNameValueVOS.add(pdfNoDataVO);
         NameValueVO priceDateMissVO = new NameValueVO();
-        Long priceDateMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"缺少净值日期");
+        Long priceDateMiss = emailParseInfoMapper.countpdfNoData(databoardQuery, "缺少净值日期");
         priceDateMissVO.setValue(priceDateMiss);
         priceDateMissVO.setName("缺少净值日期");
         navNameValueVOS.add(priceDateMissVO);
         NameValueVO navMissVO = new NameValueVO();
-        Long navMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"单位净值和累计净值和资产净值均缺失");
+        Long navMiss = emailParseInfoMapper.countpdfNoData(databoardQuery, "单位净值和累计净值和资产净值均缺失");
         navMissVO.setName("单位净值和累计净值和资产净值均缺失");
         navMissVO.setValue(navMiss);
         navNameValueVOS.add(navMissVO);
         NameValueVO fundNameNumberMissVO = new NameValueVO();
-        Long fundNameNumberMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"基金名称和备案编码均缺失");
+        Long fundNameNumberMiss = emailParseInfoMapper.countpdfNoData(databoardQuery, "基金名称和备案编码均缺失");
         fundNameNumberMissVO.setName("基金名称和备案编码均缺失");
         fundNameNumberMissVO.setValue(fundNameNumberMiss);
         navNameValueVOS.add(fundNameNumberMissVO);
@@ -989,27 +998,27 @@ public class EmailParseService {
         databoardQuery.setEmailType(2);
         List<NameValueVO> valuationNameValueVOS = new ArrayList<>();
         NameValueVO fileTypeErrorVO = new NameValueVO();
-        Long fileTypeError = emailParseInfoMapper.countpdfNoData(databoardQuery,"文件格式错误");
+        Long fileTypeError = emailParseInfoMapper.countpdfNoData(databoardQuery, "文件格式错误");
         fileTypeErrorVO.setName("文件格式错误");
         fileTypeErrorVO.setValue(fileTypeError);
         valuationNameValueVOS.add(fileTypeErrorVO);
         NameValueVO columnMissVO = new NameValueVO();
-        Long columnMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"无市值列或无数量列");
+        Long columnMiss = emailParseInfoMapper.countpdfNoData(databoardQuery, "无市值列或无数量列");
         columnMissVO.setName("无市值列或无数量列");
         columnMissVO.setValue(columnMiss);
         valuationNameValueVOS.add(columnMissVO);
         NameValueVO numbericMissVO = new NameValueVO();
-        Long numbericMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"非数值数据");
+        Long numbericMiss = emailParseInfoMapper.countpdfNoData(databoardQuery, "非数值数据");
         numbericMissVO.setName("非数值数据");
         numbericMissVO.setValue(numbericMiss);
         valuationNameValueVOS.add(numbericMissVO);
         NameValueVO noDataVO = new NameValueVO();
-        Long noData = emailParseInfoMapper.countpdfNoData(databoardQuery,"无数据");
+        Long noData = emailParseInfoMapper.countpdfNoData(databoardQuery, "无数据");
         noDataVO.setValue(noData);
         noDataVO.setName("无数据");
         valuationNameValueVOS.add(noDataVO);
         NameValueVO templateErrorVO = new NameValueVO();
-        Long templateError = emailParseInfoMapper.countpdfNoData(databoardQuery,"模板不支持");
+        Long templateError = emailParseInfoMapper.countpdfNoData(databoardQuery, "模板不支持");
         templateErrorVO.setValue(templateError);
         templateErrorVO.setName("模板不支持");
         valuationNameValueVOS.add(templateErrorVO);
@@ -1018,22 +1027,22 @@ public class EmailParseService {
         databoardQuery.setEmailType(3);
         List<NameValueVO> reportNameValueVOS = new ArrayList<>();
         NameValueVO scannedFileVO = new NameValueVO();
-        Long scannedFile = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告为扫描件");
+        Long scannedFile = emailParseInfoMapper.countpdfNoData(databoardQuery, "报告为扫描件");
         scannedFileVO.setName("报告为扫描件");
         scannedFileVO.setValue(scannedFile);
         reportNameValueVOS.add(scannedFileVO);
         NameValueVO errorAmacFileTypeVO = new NameValueVO();
-        Long errorAmacFileType = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告不是基协统一格式");
+        Long errorAmacFileType = emailParseInfoMapper.countpdfNoData(databoardQuery, "报告不是基协统一格式");
         errorAmacFileTypeVO.setName("报告不是基协统一格式");
         errorAmacFileTypeVO.setValue(errorAmacFileType);
         reportNameValueVOS.add(errorAmacFileTypeVO);
         NameValueVO watermarkFileErrorVO = new NameValueVO();
-        Long watermarkFileError = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告水印干扰导致部分没有解析");
+        Long watermarkFileError = emailParseInfoMapper.countpdfNoData(databoardQuery, "报告水印干扰导致部分没有解析");
         watermarkFileErrorVO.setName("报告水印干扰导致部分没有解析");
         watermarkFileErrorVO.setValue(watermarkFileError);
         reportNameValueVOS.add(watermarkFileErrorVO);
         NameValueVO noReportVO = new NameValueVO();
-        Long noReport = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告不是定期报告");
+        Long noReport = emailParseInfoMapper.countpdfNoData(databoardQuery, "报告不是定期报告");
         noReportVO.setName("报告不是定期报告");
         noReportVO.setValue(noReport);
         reportNameValueVOS.add(noReportVO);
@@ -1056,6 +1065,7 @@ public class EmailParseService {
         dataViewVO.setCompanyNum(companyInformationMapper.countCompanyTotal());
         return dataViewVO;
     }
+
     private Message[] getEmailMessage(Folder folder, String protocol, Date startDate) {
         try {
             if (protocol.contains("imap")) {