1
0

CustomPDFTextStripper.java 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. package com.simuwang.daq.components;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.collection.ListUtil;
  4. import org.apache.pdfbox.text.PDFTextStripper;
  5. import org.apache.pdfbox.text.TextPosition;
  6. import org.apache.pdfbox.util.Matrix;
  7. import java.io.IOException;
  8. import java.util.List;
  9. /**
  10. * @author wangzaijun
  11. * @date 2024/9/12 14:00
  12. * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大
  13. */
  14. public class CustomPDFTextStripper extends PDFTextStripper {
  15. private final float[] watermarkWidth = {0f};
  16. @Override
  17. protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
  18. List<String> newTexts = ListUtil.list(false);
  19. for (TextPosition textPosition : textPositions) {
  20. Matrix textMatrix = textPosition.getTextMatrix();
  21. float col = textMatrix.getValue(0, 1);
  22. float width = textPosition.getWidth();
  23. if (col == 0.) {
  24. if (width < watermarkWidth[0]) {
  25. newTexts.add(textPosition.getUnicode());
  26. }
  27. } else {
  28. if (width > watermarkWidth[0]) {
  29. watermarkWidth[0] = width;
  30. }
  31. newTexts.add("++");
  32. }
  33. }
  34. if (CollUtil.isNotEmpty(newTexts)) {
  35. super.writeString(String.join("", newTexts));
  36. }
  37. }
  38. }