Bläddra i källkod

fix:报告解析优化

wangzaijun 2 månader sedan
förälder
incheckning
1a54e0db8b

+ 9 - 2
mo-daq-openai/web/route.py

@@ -12,10 +12,13 @@ client = OpenAI(
 )
 
 
+DEFAULT_USER_MSG = "解析文件中的表格内容:要求准确识别金额等小数的位数,日期用yyyy-MM-dd的格式返回,去掉金额单位、英文和多余的空格,结果用字典返回"
+
+
 @app.get("/upload-filepath")
 async def create_upload_file(filepath: str = None,
                              file_id: str = None,
-                             user_msg: str = '请准确提取文件中的表格内容,要求用字典返回并去掉金额单位、英文和空格'):
+                             user_msg: str = DEFAULT_USER_MSG):
     # 读取文件内容(可选)
     # contents = await file.read()
 
@@ -30,6 +33,8 @@ async def create_upload_file(filepath: str = None,
     # 初始化messages列表
     completion = client.chat.completions.create(
         model="qwen-long",
+        temperature=0.1,
+        presence_penalty=1,
         messages=[
             {'role': 'system', 'content': 'You are a helpful assistant.'},
             {'role': 'system', 'content': f'fileid://{file_id}'},
@@ -43,7 +48,7 @@ async def create_upload_file(filepath: str = None,
 @app.post("/upload-file")
 async def create_upload_file(file: UploadFile = File(...),
                              file_id: str = None,
-                             user_msg: str = '请准确提取文件中的表格内容,要求用字典返回并去掉金额单位、英文和空格'):
+                             user_msg: str = DEFAULT_USER_MSG):
     if file_id is None:
         # 读取文件内容(可选)
         contents = await file.read()
@@ -58,6 +63,8 @@ async def create_upload_file(file: UploadFile = File(...),
     # 初始化messages列表
     completion = client.chat.completions.create(
         model="qwen-long",
+        temperature=0.1,
+        presence_penalty=1,
         messages=[
             {'role': 'system', 'content': 'You are a helpful assistant.'},
             {'role': 'system', 'content': f'fileid://{file_id}'},

+ 11 - 3
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java

@@ -61,9 +61,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
 
     @Override
     public T parse(ReportParserParams params) throws IOException, ReportParseException {
-        // 先初始化为null
-        this.fundInfoTable = null;
-        this.textList = null;
         // 初始化
         this.init();
         String filename = params.getFilename();
@@ -169,6 +166,17 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         // cleaning.
     }
 
+    @Override
+    protected void init() {
+        super.init();
+        // 先初始化为null
+        this.fundInfoTable = null;
+        this.textList = null;
+        this.aiFileId = null;
+        this.aiParserContent = null;
+        this.aiParse = false;
+    }
+
 //    /**
 //     * 构建只有两列表格的dto数据对象,如果有分级基金时(并且一个表格可能跨页)
 //     *

+ 2 - 2
mo-daq/src/test/java/com/smppw/modaq/MoDaqApplicationTests.java

@@ -35,8 +35,8 @@ public class MoDaqApplicationTests {
     @Test
     public void reportTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox("xx@simuwang.com", "**");
-        Date startDate = DateUtil.parse("2025-02-25 14:56:00", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2025-02-25 14:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2025-02-27 09:18:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2025-02-27 09:58:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             emailParseService.parseEmail(emailInfoDTO, startDate, endDate);
         } catch (Exception e) {