Pārlūkot izejas kodu

fix:一些优化

wangzaijun 1 dienu atpakaļ
vecāks
revīzija
c878e92994

+ 28 - 28
mo-daq-openai/web/route.py

@@ -158,34 +158,34 @@ async def parse_file(filepath: str = None,
     return {"file_id": file_id, "content": completion.choices[0].message.content}
 
 
-@app.post("/upload-file")
-async def create_upload_file(file: UploadFile = File(...),
-                             file_id: str = None,
-                             user_msg: str = DEFAULT_USER_MSG):
-    if file_id is None:
-        # 读取文件内容(可选)
-        contents = await file.read()
-
-        # 这里可以对文件进行进一步处理,比如保存到服务器上
-        with open(f"./uploads/{file.filename}", "wb") as f:
-            f.write(contents)
-
-        file_object = client.files.create(file=Path(f"./uploads/{file.filename}"), purpose="file-extract")
-        file_id = file_object.id
-
-    # 初始化messages列表
-    completion = client.chat.completions.create(
-        model="qwen-long",
-        temperature=0.1,
-        presence_penalty=1,
-        messages=[
-            {'role': 'system', 'content': 'You are a helpful assistant.'},
-            {'role': 'system', 'content': f'fileid://{file_id}'},
-            {'role': 'user', 'content': user_msg}
-        ],
-    )
-
-    return {"file_id": file_id, "content": completion.choices[0].message.content}
+# @app.post("/upload-file")
+# async def create_upload_file(file: UploadFile = File(...),
+#                              file_id: str = None,
+#                              user_msg: str = DEFAULT_USER_MSG):
+#     if file_id is None:
+#         # 读取文件内容(可选)
+#         contents = await file.read()
+#
+#         # 这里可以对文件进行进一步处理,比如保存到服务器上
+#         with open(f"./uploads/{file.filename}", "wb") as f:
+#             f.write(contents)
+#
+#         file_object = client.files.create(file=Path(f"./uploads/{file.filename}"), purpose="file-extract")
+#         file_id = file_object.id
+#
+#     # 初始化messages列表
+#     completion = client.chat.completions.create(
+#         model="qwen-long",
+#         temperature=0.1,
+#         presence_penalty=1,
+#         messages=[
+#             {'role': 'system', 'content': 'You are a helpful assistant.'},
+#             {'role': 'system', 'content': f'fileid://{file_id}'},
+#             {'role': 'user', 'content': user_msg}
+#         ],
+#     )
+#
+#     return {"file_id": file_id, "content": completion.choices[0].message.content}
 
 
 @app.get("/parse-img")

+ 38 - 31
mo-daq/pom.xml

@@ -52,10 +52,10 @@
             <artifactId>spring-boot-starter-aop</artifactId>
         </dependency>
 
-<!--        <dependency>-->
-<!--            <groupId>org.springframework.boot</groupId>-->
-<!--            <artifactId>spring-boot-starter-quartz</artifactId>-->
-<!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>org.springframework.boot</groupId>-->
+        <!--            <artifactId>spring-boot-starter-quartz</artifactId>-->
+        <!--        </dependency>-->
 
         <dependency>
             <groupId>org.springframework.boot</groupId>
@@ -98,11 +98,11 @@
             <artifactId>commons-compress</artifactId>
             <version>1.27.1</version>
         </dependency>
-<!--        <dependency>-->
-<!--            <groupId>com.github.junrar</groupId>-->
-<!--            <artifactId>junrar</artifactId>-->
-<!--            <version>7.5.1</version>-->
-<!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.github.junrar</groupId>-->
+        <!--            <artifactId>junrar</artifactId>-->
+        <!--            <version>7.5.1</version>-->
+        <!--        </dependency>-->
         <dependency>
             <groupId>net.sf.sevenzipjbinding</groupId>
             <artifactId>sevenzipjbinding</artifactId>
@@ -163,33 +163,40 @@
             <version>11.5.3</version>
         </dependency>
 
-<!--        &lt;!&ndash; 通义千问 ai &ndash;&gt;-->
+        <!-- 通义千问 ai -->
 <!--        <dependency>-->
 <!--            <groupId>com.alibaba</groupId>-->
 <!--            <artifactId>dashscope-sdk-java</artifactId>-->
-<!--            <version>2.18.2</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>com.squareup.okio</groupId>-->
-<!--            <artifactId>okio</artifactId>-->
-<!--            <version>3.6.0</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>com.squareup.okhttp3</groupId>-->
-<!--            <artifactId>logging-interceptor</artifactId>-->
-<!--            <version>4.12.0</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>com.squareup.okhttp3</groupId>-->
-<!--            <artifactId>okhttp-sse</artifactId>-->
-<!--            <version>4.12.0</version>-->
-<!--        </dependency>-->
-<!--        <dependency>-->
-<!--            <groupId>com.squareup.okhttp3</groupId>-->
-<!--            <artifactId>okhttp</artifactId>-->
-<!--            <version>4.12.0</version>-->
+<!--            <version>2.20.3</version>-->
 <!--        </dependency>-->
 
+        <!--        &lt;!&ndash; 通义千问 ai &ndash;&gt;-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.alibaba</groupId>-->
+        <!--            <artifactId>dashscope-sdk-java</artifactId>-->
+        <!--            <version>2.18.2</version>-->
+        <!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.squareup.okio</groupId>-->
+        <!--            <artifactId>okio</artifactId>-->
+        <!--            <version>3.6.0</version>-->
+        <!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.squareup.okhttp3</groupId>-->
+        <!--            <artifactId>logging-interceptor</artifactId>-->
+        <!--            <version>4.12.0</version>-->
+        <!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.squareup.okhttp3</groupId>-->
+        <!--            <artifactId>okhttp-sse</artifactId>-->
+        <!--            <version>4.12.0</version>-->
+        <!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>com.squareup.okhttp3</groupId>-->
+        <!--            <artifactId>okhttp</artifactId>-->
+        <!--            <version>4.12.0</version>-->
+        <!--        </dependency>-->
+
         <dependency>
             <groupId>com.zaxxer</groupId>
             <artifactId>HikariCP</artifactId>

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/ai/AILetterReportParser.java

@@ -56,7 +56,7 @@ public class AILetterReportParser extends AbstractAIReportParser<LetterReportDat
         ReportFundTransactionDTO fundTransaction = this.buildDto(fileId, ReportFundTransactionDTO.class, this.allInfoMap);
         // 构建结果数据
         LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
-        reportData.setFundTransaction(fundTransaction);
+        reportData.setTransaction(fundTransaction);
         reportData.setInvestorInfo(investorInfo);
         return reportData;
     }

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDLetterReportParser.java

@@ -86,7 +86,7 @@ public class PDLetterReportParser extends AbstractPDReportParser<LetterReportDat
         // 构建结果数据
         LetterReportData reportData = new LetterReportData(reportInfo, fundInfo);
         reportData.setInvestorInfo(investorInfo);
-        reportData.setFundTransaction(fundTransaction);
+        reportData.setTransaction(fundTransaction);
         reportData.setAiParse(false);
         return reportData;
     }

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/writer/LetterReportWriter.java

@@ -27,7 +27,7 @@ public class LetterReportWriter extends AbstractReportWriter<LetterReportData> {
         if (investorInfo != null) {
             this.investorInfoMapper.insert(investorInfo.toEntity());
         }
-        ReportFundTransactionDTO fundTransaction = reportData.getFundTransaction();
+        ReportFundTransactionDTO fundTransaction = reportData.getTransaction();
         if (fundTransaction != null) {
             this.fundTransactionMapper.insert(fundTransaction.toEntity());
         }

+ 6 - 6
mo-daq/src/main/java/com/smppw/modaq/domain/dto/report/LetterReportData.java

@@ -8,7 +8,7 @@ import lombok.Setter;
 @Getter
 public class LetterReportData extends ReportData {
     private ReportInvestorInfoDTO investorInfo;
-    private ReportFundTransactionDTO fundTransaction;
+    private ReportFundTransactionDTO transaction;
 
     public LetterReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
         super(baseInfo, fundInfo);
@@ -16,15 +16,15 @@ public class LetterReportData extends ReportData {
 
     @Override
     public boolean wasFailed() {
-        boolean superFlag = !super.wasFailed();
-        if (!superFlag) {
+        boolean superFlag = super.wasFailed();
+        if (superFlag) {
             return true;
         }
-        if (this.investorInfo == null || fundTransaction == null) {
+        if (this.investorInfo == null || transaction == null) {
             return true;
         }
         return StrUtil.isBlank(this.investorInfo.getInvestorName())
-                || StrUtil.isBlank(this.fundTransaction.getFundName());
+                || StrUtil.isAllBlank(this.transaction.getHoldingDate(), this.transaction.getTransactionType());
     }
 
     @Override
@@ -32,7 +32,7 @@ public class LetterReportData extends ReportData {
         return "{" +
                 super.toString() +
                 ", investorInfo=" + investorInfo +
-                ", fundTransaction=" + fundTransaction +
+                ", transaction=" + transaction +
                 '}';
     }
 }

+ 67 - 61
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -281,9 +281,12 @@ public class EmailParseService {
                                        String filepath,
                                        Integer emailType,
                                        List<EmailZipFileDTO> resultList) throws IOException {
-        String parent = FileUtil.getParent(filepath, 2);
-        String destPath = parent + File.separator + "archive" + File.separator + FileUtil.mainName(filepath);
-        File destFile = new File(destPath);
+        if (!ArchiveUtil.isArchive(filepath)) {
+            return;
+        }
+        String output = filepath.replaceAll("original", "archive");
+        String destPath = FileUtil.getParent(output, 1) + File.separator + FileUtil.mainName(output);
+        File destFile = FileUtil.file(destPath);
         if (!destFile.exists()) {
             if (!destFile.mkdirs()) {
                 throw new IOException("无法创建目标目录: " + destPath);
@@ -435,11 +438,10 @@ public class EmailParseService {
     /**
      * 邮件信息前置处理,在解析操作执行之前的过滤逻辑和校验逻辑。返回所有附件大小汇总
      *
-     * @param emailInfo 邮件信息(包含所有解压后的文件)
+     * @param emailTitle 邮件信息(包含所有解压后的文件)
+     * @param dtos       邮件信息(包含所有解压后的文件)
      */
-    private void checkEmailFileInfo(EmailInfoDTO emailInfo) {
-        String emailTitle = emailInfo.getEmailTitle();
-        List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
+    private void checkEmailFileInfo(String emailTitle, List<EmailZipFileDTO> dtos) {
         // 如果压缩包里面既有pdf又有其他格式的文件,说明其他格式的文件是不需要解析的
         List<String> exts = dtos.stream().map(EmailZipFileDTO::getExtName).distinct().toList();
         if (exts.contains(Constants.FILE_PDF) && exts.size() > 1) {
@@ -506,18 +508,17 @@ public class EmailParseService {
     private void parseResults(EmailInfoDTO emailInfo,
                               List<ParseResult<ReportData>> resultList) {
         String emailTitle = emailInfo.getEmailTitle();
-        String senderEmail = emailInfo.getSenderEmail();
-        List<EmailZipFileDTO> dtos = emailInfo.getEmailFileList();
+        List<EmailZipFileDTO> dtos = ListUtil.toList(emailInfo.getEmailFileList());
         if (CollUtil.isEmpty(dtos)) {
             return;
         }
         // 附件文件检查
-        this.checkEmailFileInfo(emailInfo);
+        this.checkEmailFileInfo(emailTitle, dtos);
         // 解析邮件报告
         for (EmailZipFileDTO zipFile : dtos) {
 //            EmailFileInfoDO emailFile = this.saveEmailFileInfo(emailId, zipFile.getFilename(), zipFile.getFilepath());
             // 解析并保存报告
-            ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailTitle, senderEmail, zipFile);
+            ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailTitle, emailInfo.getSenderEmail(), zipFile);
             if (!Objects.equals(1, parseResult.getStatus())) {
                 log.error(parseResult.getMsg());
             }
@@ -649,17 +650,19 @@ public class EmailParseService {
                     result = new ParseResult<>(ReportParseStatus.PARSE_FAIL, null, e.getMessage());
                 }
             }
-            if (reportData != null && reportData.getBaseInfo() != null) {
+            // 当reportData==null时重新构建一个reportData对象
+            reportData = this.buildNvlReportData(fileId, reportType, reportData, fileName);
+            if (reportData.getBaseInfo() != null) {
                 // 设置月报类型
                 reportData.getBaseInfo().setMonthlyType(monthlyType.getType());
                 // 当报告日期还是空时设置为今天的前一天
                 if (reportData.getBaseInfo().getReportDate() == null) {
-                    Date date = DateUtil.offsetDay(new Date(), -1);
-                    reportData.getBaseInfo().setReportDate(date);
+                    reportData.getBaseInfo().setReportDate(DateUtil.offsetDay(new Date(), -1));
                 }
             }
             // ocr信息提取(印章、联系人、基金名称和产品代码)
             reportData = this.ocrReportData(fileId, reportType, reportData, fileName, senderEmail, images);
+            result.setData(reportData);
             if (log.isInfoEnabled()) {
                 log.info("报告{} 解析耗时{}ms,结果是:{}", fileName, (System.currentTimeMillis() - start), reportData);
             }
@@ -736,13 +739,11 @@ public class EmailParseService {
         if (CollUtil.isEmpty(images)) {
             return reportData;
         }
-        if (log.isInfoEnabled()) {
-            log.info("报告{} 用ocr补充解析结果。补充前的结果是:{}", fileName, reportData);
-        }
-        // 当reportData==null时重新构建一个reportData对象
-        reportData = this.buildNvlReportData(fileId, reportType, reportData, fileName);
         // 报告才识别尾页的印章和联系人,确认单不识别尾页
         if (ReportType.LETTER != reportType) {
+            if (log.isInfoEnabled()) {
+                log.info("报告{} 用ocr补充解析结果。补充前的结果是:{}", fileName, reportData);
+            }
             OCRParseData parseRes = null;
             try {
                 // 首页和尾页相等时只读首页
@@ -784,48 +785,54 @@ public class EmailParseService {
                     reportData.getFundInfo().setCompanyName(parseRes.getCompanyName());
                 }
             }
-        } else {
-            // 确认单AI解析失败时重新用OCR识别
-            LetterReportData letterReportData = (LetterReportData) reportData;
-            if (letterReportData.wasFailed()) {
-                OCRLetterParseData parseRes = null;
-                try {
-                    parseRes = new OCRReportParser().parseLetterData(fileName, this.ocrParserUrl, images.get(0));
-                } catch (Exception e) {
-                    log.error("报告{} OCR提取确认单关键信息出错:{}", fileName, e.getMessage());
-                }
-                if (parseRes == null) {
-                    return reportData;
-                }
-                if (letterReportData.getFundInfo() != null) {
-                    letterReportData.getFundInfo().setFundName(parseRes.getFundName());
-                    letterReportData.getFundInfo().setFundCode(parseRes.getFundCode());
-                }
-                // 投资者信息
-                if (letterReportData.getInvestorInfo() == null) {
-                    letterReportData.setInvestorInfo(new ReportInvestorInfoDTO(fileId));
-                }
-                letterReportData.getInvestorInfo().setInvestorName(parseRes.getInvestorName());
-                letterReportData.getInvestorInfo().setCertificateNumber(parseRes.getCertificateNumber());
-                letterReportData.getInvestorInfo().setTradingAccount(parseRes.getTradingAccount());
-                letterReportData.getInvestorInfo().setFundAccount(parseRes.getFundAccount());
-                letterReportData.getInvestorInfo().setCertificateType(parseRes.getCertificateType());
-                // 交易流水
-                if (letterReportData.getFundTransaction() == null) {
-                    letterReportData.setFundTransaction(new ReportFundTransactionDTO(fileId));
-                }
-                letterReportData.getFundTransaction().setTransactionType(parseRes.getTransactionType());
-                letterReportData.getFundTransaction().setApplyDate(parseRes.getApplyDate());
-                letterReportData.getFundTransaction().setApplyShare(parseRes.getApplyShare());
-                letterReportData.getFundTransaction().setApplyAmount(parseRes.getApplyAmount());
-                letterReportData.getFundTransaction().setHoldingDate(parseRes.getHoldingDate());
-                letterReportData.getFundTransaction().setAmount(parseRes.getAmount());
-                letterReportData.getFundTransaction().setShare(parseRes.getShare());
-                letterReportData.getFundTransaction().setNav(parseRes.getNav());
-            }
-            return letterReportData;
+            reportData.setAiParse(true);
+            return reportData;
         }
-        return reportData;
+        // 确认单AI解析失败时重新用OCR识别
+        if (!reportData.wasFailed()) {
+            return reportData;
+        }
+        if (log.isInfoEnabled()) {
+            log.info("确认单报告{} 用ocr补充解析结果。补充前的结果是:{}", fileName, reportData);
+        }
+        LetterReportData letterReportData = (LetterReportData) reportData;
+        OCRLetterParseData parseRes = null;
+        try {
+            parseRes = new OCRReportParser().parseLetterData(fileName, this.ocrParserUrl, images.get(0));
+        } catch (Exception e) {
+            log.error("确认单报告{} OCR提取确认单关键信息出错:{}", fileName, e.getMessage());
+        }
+        if (parseRes == null) {
+            return reportData;
+        }
+        if (letterReportData.getFundInfo() != null) {
+            letterReportData.getFundInfo().setFundName(parseRes.getFundName());
+            letterReportData.getFundInfo().setFundCode(parseRes.getFundCode());
+        }
+        // 投资者信息
+        if (letterReportData.getInvestorInfo() == null) {
+            letterReportData.setInvestorInfo(new ReportInvestorInfoDTO(fileId));
+        }
+        letterReportData.getInvestorInfo().setInvestorName(parseRes.getInvestorName());
+        letterReportData.getInvestorInfo().setCertificateNumber(parseRes.getCertificateNumber());
+        letterReportData.getInvestorInfo().setTradingAccount(parseRes.getTradingAccount());
+        letterReportData.getInvestorInfo().setFundAccount(parseRes.getFundAccount());
+        letterReportData.getInvestorInfo().setCertificateType(parseRes.getCertificateType());
+        // 交易流水
+        if (letterReportData.getTransaction() == null) {
+            letterReportData.setTransaction(new ReportFundTransactionDTO(fileId));
+        }
+        letterReportData.getTransaction().setTransactionType(parseRes.getTransactionType());
+        letterReportData.getTransaction().setApplyDate(parseRes.getApplyDate());
+        letterReportData.getTransaction().setApplyShare(parseRes.getApplyShare());
+        letterReportData.getTransaction().setApplyAmount(parseRes.getApplyAmount());
+        letterReportData.getTransaction().setHoldingDate(parseRes.getHoldingDate());
+        letterReportData.getTransaction().setAmount(parseRes.getAmount());
+        letterReportData.getTransaction().setShare(parseRes.getShare());
+        letterReportData.getTransaction().setNav(parseRes.getNav());
+
+        letterReportData.setAiParse(true);
+        return letterReportData;
     }
 
     /**
@@ -863,7 +870,6 @@ public class EmailParseService {
         } else if (ReportType.LETTER == reportType) {
             reportData = new LetterReportData(baseInfo, fundInfo);
         }
-        reportData.setAiParse(true);
         return reportData;
     }
 

+ 51 - 0
mo-daq/src/main/java/com/smppw/modaq/infrastructure/components/AliyunAIRPC.java

@@ -0,0 +1,51 @@
+//package com.smppw.modaq.infrastructure.components;
+//
+//import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
+//import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
+//import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
+//import com.alibaba.dashscope.common.MultiModalMessage;
+//import com.alibaba.dashscope.common.Role;
+//import com.alibaba.dashscope.exception.NoApiKeyException;
+//import com.alibaba.dashscope.exception.UploadFileException;
+//import cn.hutool.core.map.MapUtil;
+//import cn.hutool.core.util.StrUtil;
+//
+//import java.util.Arrays;
+//import java.util.Collections;
+//import java.util.Map;
+//
+//public class AliyunAIRPC {
+//    private Object callOCR(String localPath, String userMsg) throws NoApiKeyException, UploadFileException {
+//        String filePath = "file://" + localPath;
+//        MultiModalConversation conv = new MultiModalConversation();
+//        Map<String, Object> map = MapUtil.newHashMap();
+//        map.put("image", filePath);
+//        // 输入图像的最大像素阈值,超过该值图像会按原比例缩小,直到总像素低于max_pixels
+//        map.put("max_pixels", "6422528");
+//        // 输入图像的最小像素阈值,小于该值图像会按原比例放大,直到总像素大于min_pixels
+//        map.put("min_pixels", "3136");
+//        // 开启图像自动转正功能
+//        map.put("enable_rotate", true);
+//        MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
+//                .content(Arrays.asList(
+//                        map,
+//                        // qwen-vl-ocr-latest未设置内置任务时,支持在以下text字段中传入Prompt,若未传入则使用默认的Prompt:Please output only the text content from the image without any additional descriptions or formatting.
+//                        // 如调用qwen-vl-ocr-1028,模型会使用固定Prompt:Read all the text in the image.,不支持用户在text中传入自定义Prompt
+//                        Collections.singletonMap("text", userMsg))).build();
+//        String dashscopeApiKey = System.getenv("DASHSCOPE_API_KEY");
+//        if (StrUtil.isBlank(dashscopeApiKey)) {
+//            dashscopeApiKey = "sk-7f1caa54f94047db91d4e36f7ee811c5";
+//        }
+//        MultiModalConversationParam param = MultiModalConversationParam.builder()
+//                // 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
+//                .apiKey(dashscopeApiKey)
+//                .model("qwen-vl-ocr-latest")
+//                .message(userMessage)
+//                .topP(0.001)
+//                .temperature(0.1f)
+//                .maxLength(8192)
+//                .build();
+//        MultiModalConversationResult result = conv.call(param);
+//        return result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text");
+//    }
+//}