|
@@ -3,20 +3,11 @@ package com.smppw.modaq.application.components;
|
|
|
import cn.hutool.core.collection.ListUtil;
|
|
|
import cn.hutool.core.map.MapUtil;
|
|
|
import cn.hutool.core.util.StrUtil;
|
|
|
-import com.smppw.modaq.common.conts.Constants;
|
|
|
import com.smppw.modaq.common.conts.EmailTypeConst;
|
|
|
-import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
|
import com.smppw.modaq.common.exception.ReportParseException;
|
|
|
import com.smppw.modaq.domain.dto.report.ReportAssetAllocationDTO;
|
|
|
-import org.apache.pdfbox.Loader;
|
|
|
-import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
|
|
-import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
-import technology.tabula.CustomObjectExtractor;
|
|
|
-import technology.tabula.Page;
|
|
|
-import technology.tabula.PageIterator;
|
|
|
-import technology.tabula.Table;
|
|
|
-import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
|
+import jakarta.mail.internet.MimeUtility;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.util.Calendar;
|
|
@@ -440,53 +431,13 @@ public final class ReportParseUtils {
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) throws IOException, ReportParseException {
|
|
|
-// String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\(1)投资者交易确认函【申购】_【SZF635】佳岳国债增强私募证券投资基金_20250217_任军.pdf";
|
|
|
-// String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\CP080A_优美利赢胜价值1号私募投资基金A_20250217_邓辉_申购确认_20250217131352.pdf";
|
|
|
-// String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\宁水德远国债宝私募证券投资基金_青国平(S21002741743)_申购_20241213_基金交易确认单a2604e57e9a12108.sign.pdf";
|
|
|
-// String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\基金分红交易确认函_SJH876_2025-02-12_戴羽晨_202502130107720842.pdf";
|
|
|
- String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\钧富如风7号私募证券投资基金_陈小明_20250221_073544980_申购确认单.pdf";
|
|
|
-// String filepath = "C:\\Users\\Administrator\\Desktop\\tmp\\SZN224_君之健睿泰私募证券投资基金_郑为民_20250214_申购确认_20250217102704.pdf";
|
|
|
-
|
|
|
-// String aiParserUtl = "http://localhost:8088/upload-filepath";
|
|
|
-//
|
|
|
-// Map<String, Object> params = MapUtil.newHashMap(4);
|
|
|
-// params.put("filepath", filepath);
|
|
|
-// String body = HttpUtil.get(aiParserUtl, params);
|
|
|
-//
|
|
|
-// String content = "{" +
|
|
|
-// StrUtil.subAfter(body, "{", false)
|
|
|
-// .replaceAll("\\\\", "")
|
|
|
-// .replaceAll("n", "")
|
|
|
-// .replaceAll(" ", "") +
|
|
|
-// "}";
|
|
|
-// System.out.println(content);
|
|
|
-
|
|
|
- List<String> textList;
|
|
|
- // 解析报告和表格
|
|
|
- try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(filepath))) {
|
|
|
- // 识别所有文字(去水印后的)
|
|
|
- CustomPDFTextStripper stripper = new CustomPDFTextStripper(true, "");
|
|
|
- String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, StrUtil.EMPTY);
|
|
|
- textList = StrUtil.split(text, System.lineSeparator());
|
|
|
- textList.removeIf(StrUtil::isBlank);
|
|
|
- if (textList.isEmpty()) {
|
|
|
- throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, "");
|
|
|
- }
|
|
|
- // 解析所有表格(单元格字符去水印)
|
|
|
- List<Table> tables = ListUtil.list(true);
|
|
|
-// BasicExtractionAlgorithm extractionAlgorithm = new BasicExtractionAlgorithm();
|
|
|
- SpreadsheetExtractionAlgorithm spreadsheetExtractionAlgorithm = new SpreadsheetExtractionAlgorithm();
|
|
|
- // 自定义表格提取工具,去除单元格中的水印文字
|
|
|
- PageIterator pageIterator = new CustomObjectExtractor(document).extract();
|
|
|
- while (pageIterator.hasNext()) {
|
|
|
- Page page = pageIterator.next();
|
|
|
- List<Table> tablesList = spreadsheetExtractionAlgorithm.extract(page);
|
|
|
- tables.addAll(tablesList);
|
|
|
- }
|
|
|
- if (tables.isEmpty()) {
|
|
|
- throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN, "");
|
|
|
- }
|
|
|
-// this.initTableInfo(tables);
|
|
|
- }
|
|
|
+ String s = """
|
|
|
+ =?utf-8?b?5Y2D6LGh5Y2T6LaKMuWPt+S4reivgTUwMOaMh+aVsOWinuW8uuengeWLn+ivgQ==?=
|
|
|
+ =?utf-8?b?5Yi45oqV6LWE5Z+66YeRLeWNg+ixoeWNk+i2ijLlj7fkuK3or4E1MDDmjIfmlbA=?=
|
|
|
+ =?utf-8?b?5aKe5by656eB5Yuf6K+B5Yi45oqV6LWE5Z+66YeRMjAyNeW5tDTmnIjmnIjluqY=?=
|
|
|
+ =?utf-8?b?5oql5ZGKLTIwMjUwNTEyLnBkZg==?=
|
|
|
+ """;
|
|
|
+ String s1 = MimeUtility.decodeText(s);
|
|
|
+ System.out.println(s1);
|
|
|
}
|
|
|
}
|