|
@@ -10,7 +10,8 @@ import cn.hutool.json.JSONUtil;
|
|
import com.smppw.modaq.common.enums.ReportMonthlyType;
|
|
import com.smppw.modaq.common.enums.ReportMonthlyType;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
-import com.smppw.modaq.domain.dto.report.OCRParseData;
|
|
|
|
|
|
+import com.smppw.modaq.domain.dto.report.ocr.OCRLetterParseData;
|
|
|
|
+import com.smppw.modaq.domain.dto.report.ocr.OCRParseData;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
@@ -20,6 +21,94 @@ import java.util.Objects;
|
|
public class OCRReportParser {
|
|
public class OCRReportParser {
|
|
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
|
private final Logger logger = LoggerFactory.getLogger(this.getClass());
|
|
|
|
|
|
|
|
+ public OCRLetterParseData parseLetterData(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
|
|
|
|
+ Map<String, Object> paramsMap = MapUtil.newHashMap(4);
|
|
|
|
+ paramsMap.put("image_url", ocrImgUrl);
|
|
|
|
+ paramsMap.put("user_msg", """
|
|
|
|
+ 请提取文件中的基金名称、产品代码、投资人姓名、证件类型、证件号码、基金账户、交易账号、业务类型、申请日期、申请金额、申请份额、确认日期、确认金额、确认份额、单位净值。
|
|
|
|
+ 要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
|
|
|
|
+ 返回数据格式以json方式输出,格式为:{"基金名称":"","产品代码":"","投资人姓名":"","证件类型":"","证件号码":"","基金账户":"","交易账号":"","业务类型":"","申请日期":"","申请金额":"","申请份额":"","确认日期":"","确认金额":"","确认份额":"","单位净值":""}
|
|
|
|
+ """);
|
|
|
|
+ OCRLetterParseData res = new OCRLetterParseData();
|
|
|
|
+ String objectStr = null;
|
|
|
|
+ try {
|
|
|
|
+ objectStr = this.parseOcrResult(ocrApi, paramsMap);
|
|
|
|
+ JSONObject jsonObject = JSONUtil.parseObj(objectStr);
|
|
|
|
+ String fundName = this.cleanData(jsonObject.getStr("基金名称"));
|
|
|
|
+ String fundCode = this.cleanData(jsonObject.getStr("产品代码"));
|
|
|
|
+ String investorName = this.cleanData(jsonObject.getStr("投资人姓名"));
|
|
|
|
+ String certificateType = this.cleanData(jsonObject.getStr("证件类型"));
|
|
|
|
+ String certificateNumber = this.cleanData(jsonObject.getStr("证件号码"));
|
|
|
|
+ String fundAccount = this.cleanData(jsonObject.getStr("基金账户"));
|
|
|
|
+ String tradingAccount = this.cleanData(jsonObject.getStr("交易账号"));
|
|
|
|
+ String transactionType = this.cleanData(jsonObject.getStr("业务类型"));
|
|
|
|
+ String applyDate = this.cleanData(jsonObject.getStr("申请日期"));
|
|
|
|
+ String applyAmount = this.cleanData(jsonObject.getStr("申请金额"));
|
|
|
|
+ String applyShare = this.cleanData(jsonObject.getStr("申请份额"));
|
|
|
|
+ String holdingDate = this.cleanData(jsonObject.getStr("确认日期"));
|
|
|
|
+ String amount = this.cleanData(jsonObject.getStr("确认金额"));
|
|
|
|
+ String share = this.cleanData(jsonObject.getStr("确认份额"));
|
|
|
|
+ String nav = this.cleanData(jsonObject.getStr("单位净值"));
|
|
|
|
+ if (StrUtil.isNotBlank(fundName) && (fundName.contains("基金") || fundName.contains("资产管理")) && !fundName.contains("公司")) {
|
|
|
|
+ res.setFundName(fundName);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(fundCode)) {
|
|
|
|
+ res.setFundCode(ReportParseUtils.matchFundCode(fundCode));
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(investorName)) {
|
|
|
|
+ res.setInvestorName(investorName);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(certificateType)) {
|
|
|
|
+ res.setCertificateType(certificateType);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(certificateNumber)) {
|
|
|
|
+ res.setCertificateNumber(certificateNumber);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(fundAccount)) {
|
|
|
|
+ res.setFundAccount(fundAccount);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(tradingAccount)) {
|
|
|
|
+ res.setTradingAccount(tradingAccount);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(transactionType)) {
|
|
|
|
+ res.setTransactionType(transactionType);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(applyDate)) {
|
|
|
|
+ res.setApplyDate(applyDate);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(applyAmount)) {
|
|
|
|
+ res.setApplyAmount(applyAmount);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(applyShare)) {
|
|
|
|
+ res.setApplyShare(applyShare);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(holdingDate)) {
|
|
|
|
+ res.setHoldingDate(holdingDate);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(amount)) {
|
|
|
|
+ res.setAmount(amount);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(share)) {
|
|
|
|
+ res.setShare(share);
|
|
|
|
+ }
|
|
|
|
+ if (StrUtil.isNotBlank(nav)) {
|
|
|
|
+ res.setNav(nav);
|
|
|
|
+ }
|
|
|
|
+ return res;
|
|
|
|
+ } catch (IORuntimeException e) {
|
|
|
|
+ this.logger.warn("确认单{} OCR解析错误:{}", filename, ReportParseStatus.AI_NOT_FOUND.getMsg());
|
|
|
|
+ throw new ReportParseException(ReportParseStatus.AI_NOT_FOUND);
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ this.logger.warn("确认单{} OCR识别错误:{}", filename, ExceptionUtil.stacktraceToString(e));
|
|
|
|
+ throw new ReportParseException(ReportParseStatus.SYSTEM_ERROR);
|
|
|
|
+ } finally {
|
|
|
|
+ if (logger.isInfoEnabled()) {
|
|
|
|
+ this.logger.info("确认单{} OCR识别参数{},OCR识别结果:{},处理后的结果是:{}",
|
|
|
|
+ filename, paramsMap, objectStr, res);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
public ReportMonthlyType parseMonthlyType(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
|
|
public ReportMonthlyType parseMonthlyType(String filename, String ocrApi, String ocrImgUrl) throws ReportParseException {
|
|
Map<String, Object> paramsMap = MapUtil.newHashMap(4);
|
|
Map<String, Object> paramsMap = MapUtil.newHashMap(4);
|
|
paramsMap.put("image_url", ocrImgUrl);
|
|
paramsMap.put("image_url", ocrImgUrl);
|
|
@@ -59,7 +148,7 @@ public class OCRReportParser {
|
|
paramsMap.put("user_msg", """
|
|
paramsMap.put("user_msg", """
|
|
请提取文件中的基金名称、基金公司、产品代码,并判断是否有红色印章和是否有电话。
|
|
请提取文件中的基金名称、基金公司、产品代码,并判断是否有红色印章和是否有电话。
|
|
要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
|
|
要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息。
|
|
- 返回数据格式以json方式输出,格式为:{"基金名称":"","基金公司":"产品代码":"","是否有红色印章":"","是否有电话":""}
|
|
|
|
|
|
+ 返回数据格式以json方式输出,格式为:{"基金名称":"","基金公司":"","产品代码":"","是否有红色印章":"","是否有电话":""}
|
|
""");
|
|
""");
|
|
OCRParseData res = new OCRParseData();
|
|
OCRParseData res = new OCRParseData();
|
|
String objectStr = null;
|
|
String objectStr = null;
|