PdfUtil.java 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. package com.smppw.modaq.infrastructure.util;
  2. import cn.hutool.core.collection.ListUtil;
  3. import cn.hutool.core.io.FileUtil;
  4. import org.apache.pdfbox.Loader;
  5. import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
  6. import org.apache.pdfbox.pdmodel.PDDocument;
  7. import org.apache.pdfbox.rendering.PDFRenderer;
  8. //import org.docx4j.Docx4J;
  9. //import org.docx4j.openpackaging.packages.OpcPackage;
  10. //import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
  11. import javax.imageio.ImageIO;
  12. import java.awt.*;
  13. import java.awt.image.BufferedImage;
  14. import java.io.*;
  15. import java.nio.file.Files;
  16. import java.nio.file.StandardCopyOption;
  17. import java.util.List;
  18. public class PdfUtil {
  19. // public static void validateDocx(String path) throws Exception {
  20. // File file = new File(path);
  21. //
  22. // // 基础检查
  23. // if (!file.exists()) throw new FileNotFoundException("文件不存在");
  24. // if (!file.canRead()) throw new IOException("无读取权限");
  25. // if (file.length() == 0) throw new IOException("文件为空");
  26. //
  27. // // 文件头检查
  28. // try (InputStream is = new FileInputStream(file)) {
  29. // byte[] header = new byte[4];
  30. // if (is.read(header) < 4) throw new IOException("文件过小");
  31. // if (header[0] != 0x50 || header[1] != 0x4B) { // PK 头
  32. // throw new IOException("不是ZIP格式文件");
  33. // }
  34. // }
  35. //
  36. // // 尝试作为ZIP打开
  37. // try (java.util.zip.ZipFile zip = new java.util.zip.ZipFile(file)) {
  38. // if (zip.getEntry("[Content_Types].xml") == null) {
  39. // throw new IOException("缺少[Content_Types].xml");
  40. // }
  41. // } catch (IOException e) {
  42. // throw new IOException("无效的ZIP格式: " + e.getMessage());
  43. // }
  44. // }
  45. // public static void convertDocxToPdf(String input, String output) throws Exception {
  46. // validateDocx(input);
  47. // try (OutputStream os = new FileOutputStream(output)) {
  48. // OpcPackage opc = OpcPackage.load(new File(input));
  49. // if (opc instanceof WordprocessingMLPackage) {
  50. // Docx4J.toPDF((WordprocessingMLPackage) opc, os);
  51. // } else {
  52. // throw new Exception("不是WordprocessingML文档");
  53. // }
  54. // }
  55. // }
  56. /**
  57. * 将 PDF 的首页和尾页转换为 PNG 图片
  58. *
  59. * @param pdfFilepath 输入的 PDF 文件
  60. * @param outputDir 输出目录
  61. * @param dpi 图片分辨率(默认建议 300)
  62. * @return 生成的图片文件列表
  63. */
  64. public static List<String> convertFirstAndLastPagesToPng(String pdfFilepath, File outputDir, int dpi) throws IOException {
  65. List<String> generatedImages = ListUtil.list(false);
  66. try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(pdfFilepath))) {
  67. int totalPages = document.getNumberOfPages();
  68. if (totalPages == 0) {
  69. throw new IOException("PDF 文件无有效页面");
  70. }
  71. // 准备输出目录
  72. if (!outputDir.exists() && !outputDir.mkdirs()) {
  73. throw new IOException("无法创建输出目录: " + outputDir);
  74. }
  75. String baseName = FileUtil.mainName(pdfFilepath);
  76. PDFRenderer renderer = new PDFRenderer(document);
  77. // 始终处理首页(页码从1开始)
  78. String firstImg = renderPage(renderer, 0, baseName + ".png", outputDir, dpi);
  79. generatedImages.add(compressAndSave(firstImg));
  80. // 处理尾页(当总页数 > 1 时)
  81. if (totalPages > 1) {
  82. String lastImg = renderPage(renderer, totalPages - 1,
  83. baseName + "_footer.png",
  84. outputDir, dpi);
  85. generatedImages.add(compressAndSave(lastImg));
  86. }
  87. }
  88. return generatedImages;
  89. }
  90. /**
  91. * 渲染单页并保存为图片
  92. */
  93. private static String renderPage(PDFRenderer renderer, int pageIndex,
  94. String fileName, File outputDir, int dpi) throws IOException {
  95. BufferedImage image = renderer.renderImageWithDPI(pageIndex, dpi);
  96. File outputFile = new File(outputDir, fileName);
  97. ImageIO.write(image, "PNG", outputFile);
  98. return outputFile.getAbsolutePath();
  99. }
  100. /**
  101. * 检查图片分辨率是否超过限制
  102. *
  103. * @param image 图片对象
  104. * @param maxSize 最大允许尺寸(单边像素数)
  105. * @return 是否超出限制
  106. */
  107. private static boolean isResolutionExceeded(BufferedImage image, int maxSize) {
  108. return image.getWidth() > maxSize || image.getHeight() > maxSize;
  109. }
  110. // /**
  111. // * 压缩图片并转换为Base64
  112. // *
  113. // * @param inputFile 输入图片文件
  114. // * @param maxSize 最大允许尺寸(单边像素数)
  115. // * @param quality 压缩质量 (0.0-1.0)
  116. // * @param format 输出格式 ("jpg", "png"等)
  117. // * @return Base64编码的图片数据
  118. // */
  119. // public static String compressAndConvertToBase64(File inputFile, int maxSize, float quality, String format)
  120. // throws IOException {
  121. //
  122. // // 读取原始图片
  123. // BufferedImage originalImage = ImageIO.read(inputFile);
  124. //
  125. // // 检查分辨率是否超出限制
  126. // if (!isResolutionExceeded(originalImage, maxSize)) {
  127. // System.out.println("图片尺寸符合要求,无需压缩");
  128. // }
  129. //
  130. // // 计算新尺寸(保持宽高比)
  131. // int originalWidth = originalImage.getWidth();
  132. // int originalHeight = originalImage.getHeight();
  133. // double ratio = (double) maxSize / Math.max(originalWidth, originalHeight);
  134. // int newWidth = (int) (originalWidth * ratio);
  135. // int newHeight = (int) (originalHeight * ratio);
  136. //
  137. // // 创建缩放后的图片
  138. // Image scaledImage = originalImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
  139. // BufferedImage outputImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
  140. //
  141. // // 绘制缩放后的图片
  142. // Graphics2D g2d = outputImage.createGraphics();
  143. // g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
  144. // g2d.drawImage(scaledImage, 0, 0, null);
  145. // g2d.dispose();
  146. //
  147. // // 转换为Base64
  148. // ByteArrayOutputStream baos = new ByteArrayOutputStream();
  149. // ImageIO.write(outputImage, format, baos);
  150. // byte[] imageBytes = baos.toByteArray();
  151. //
  152. // return Base64.getEncoder().encodeToString(imageBytes);
  153. // }
  154. public static String compressAndSave(String inputFile) throws IOException {
  155. return compressAndSave(FileUtil.file(inputFile));
  156. }
  157. public static String compressAndSave(File inputFile) throws IOException {
  158. int maxSize = 8192;
  159. String format = FileUtil.extName(inputFile);
  160. return compressAndSave(inputFile, null, maxSize, format);
  161. }
  162. /**
  163. * 压缩图片并保存到文件
  164. *
  165. * @param inputFile 输入文件
  166. * @param outputFile 输出文件
  167. * @param maxSize 最大尺寸
  168. * @param format 输出格式
  169. */
  170. public static String compressAndSave(File inputFile, File outputFile, int maxSize, String format)
  171. throws IOException {
  172. BufferedImage originalImage = ImageIO.read(inputFile);
  173. if (!isResolutionExceeded(originalImage, maxSize)) {
  174. if (outputFile != null) {
  175. // 直接复制文件
  176. Files.copy(inputFile.toPath(), outputFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
  177. return outputFile.getAbsolutePath();
  178. }
  179. return inputFile.getAbsolutePath();
  180. }
  181. if (outputFile == null) {
  182. outputFile = inputFile;
  183. }
  184. int originalWidth = originalImage.getWidth();
  185. int originalHeight = originalImage.getHeight();
  186. double ratio = (double) maxSize / Math.max(originalWidth, originalHeight);
  187. int newWidth = (int) (originalWidth * ratio);
  188. int newHeight = (int) (originalHeight * ratio);
  189. Image scaledImage = originalImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
  190. BufferedImage outputImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
  191. Graphics2D g2d = outputImage.createGraphics();
  192. g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
  193. g2d.drawImage(scaledImage, 0, 0, null);
  194. g2d.dispose();
  195. // 保存到文件
  196. ImageIO.write(outputImage, format, outputFile);
  197. return outputFile.getAbsolutePath();
  198. }
  199. public static void main(String[] args) {
  200. try {
  201. // 示例用法
  202. File inputFile = new File("D:\\home\\wwwroot\\daq_report_file\\wangzaijun@simuwang.com\\20250605\\image\\泓湖泓福积极配置2期私募证券投资基金-周报-20250530.png");
  203. int maxSize = 8192; // OpenAI限制
  204. // 1. 检查图片是否超出限制
  205. BufferedImage image = ImageIO.read(inputFile);
  206. if (isResolutionExceeded(image, maxSize)) {
  207. System.out.println("图片超出尺寸限制,需要压缩");
  208. System.out.println("原始尺寸: " + image.getWidth() + "x" + image.getHeight());
  209. // // 2. 压缩并转换为Base64
  210. // String base64Image = compressAndConvertToBase64(inputFile, maxSize, 0.85f, "png");
  211. // System.out.println("Base64 数据长度: " + base64Image.length());
  212. // System.out.println("Base64 前缀: " + base64Image.substring(0, 50) + "...");
  213. // 3. 压缩并保存到文件
  214. File outputFile = new File("D:\\home\\wwwroot\\daq_report_file\\wangzaijun@simuwang.com\\20250605\\image\\泓湖泓福积极配置2期私募证券投资基金-周报-20250530.png");
  215. String output = compressAndSave(inputFile, outputFile, maxSize, "png");
  216. System.out.println("图片已压缩保存到: " + output);
  217. // 验证压缩后尺寸
  218. BufferedImage compressedImage = ImageIO.read(outputFile);
  219. System.out.println("压缩后尺寸: " + compressedImage.getWidth() + "x" + compressedImage.getHeight());
  220. } else {
  221. System.out.println("图片尺寸符合要求");
  222. }
  223. } catch (IOException e) {
  224. e.printStackTrace();
  225. }
  226. }
  227. }