123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600 |
- package com.smppw.modaq.domain.service;
- import cn.hutool.core.collection.CollUtil;
- import cn.hutool.core.collection.ListUtil;
- import cn.hutool.core.date.DateUtil;
- import cn.hutool.core.exceptions.ExceptionUtil;
- import cn.hutool.core.map.MapUtil;
- import cn.hutool.core.util.StrUtil;
- import com.smppw.modaq.application.components.ReportParseUtils;
- import com.smppw.modaq.application.components.report.parser.ReportParser;
- import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
- import com.smppw.modaq.application.components.report.writer.ReportWriter;
- import com.smppw.modaq.application.components.report.writer.ReportWriterFactory;
- import com.smppw.modaq.application.util.EmailUtil;
- import com.smppw.modaq.common.conts.DateConst;
- import com.smppw.modaq.common.conts.EmailParseStatusConst;
- import com.smppw.modaq.common.conts.EmailTypeConst;
- import com.smppw.modaq.common.enums.ReportParseStatus;
- import com.smppw.modaq.common.enums.ReportParserFileType;
- import com.smppw.modaq.common.enums.ReportType;
- import com.smppw.modaq.common.exception.ReportParseException;
- import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
- import com.smppw.modaq.domain.dto.EmailZipFileDTO;
- import com.smppw.modaq.domain.dto.MailboxInfoDTO;
- import com.smppw.modaq.domain.dto.report.ParseResult;
- import com.smppw.modaq.domain.dto.report.ReportData;
- import com.smppw.modaq.domain.dto.report.ReportParserParams;
- import com.smppw.modaq.domain.entity.EmailFileInfoDO;
- import com.smppw.modaq.domain.entity.EmailParseInfoDO;
- import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
- import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
- import com.smppw.modaq.infrastructure.util.ExcelUtil;
- import com.smppw.modaq.infrastructure.util.FileUtil;
- import jakarta.mail.*;
- import jakarta.mail.internet.MimeUtility;
- import jakarta.mail.search.ComparisonTerm;
- import jakarta.mail.search.ReceivedDateTerm;
- import jakarta.mail.search.SearchTerm;
- import org.apache.commons.compress.archivers.ArchiveException;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.stereotype.Service;
- import org.springframework.util.StopWatch;
- import java.io.File;
- import java.io.IOException;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import java.util.stream.Collectors;
- /**
- * @author mozuwen
- * @date 2024-09-04
- * @description 邮件解析服务
- */
- @Service
- public class EmailParseService {
- // public static final int stepSize = 10000;
- private static final Logger log = LoggerFactory.getLogger(EmailParseService.class);
- // private final EmailFieldMappingMapper emailFieldMapper;
- private final EmailParseInfoMapper emailParseInfoMapper;
- private final EmailFileInfoMapper emailFileInfoMapper;
- /* 报告解析和入库的方法 */
- private final ReportParserFactory reportParserFactory;
- private final ReportWriterFactory reportWriterFactory;
- @Value("${email.file.path}")
- private String path;
- public EmailParseService(EmailParseInfoMapper emailParseInfoMapper,
- EmailFileInfoMapper emailFileInfoMapper,
- ReportParserFactory reportParserFactory,
- ReportWriterFactory reportWriterFactory) {
- this.emailParseInfoMapper = emailParseInfoMapper;
- this.emailFileInfoMapper = emailFileInfoMapper;
- this.reportParserFactory = reportParserFactory;
- this.reportWriterFactory = reportWriterFactory;
- }
- /**
- * 解析指定邮箱指定时间范围内的邮件
- *
- * @param mailboxInfoDTO 邮箱配置信息
- * @param startDate 邮件起始日期(yyyy-MM-dd HH:mm:ss)
- * @param endDate 邮件截止日期(yyyy-MM-dd HH:mm:ss, 为null,将解析邮件日期小于等于startDate的当天邮件)
- */
- public void parseEmail(MailboxInfoDTO mailboxInfoDTO, Date startDate, Date endDate) {
- log.info("开始邮件解析 -> 邮箱信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO, DateUtil.format(startDate,
- DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
- // 邮件类型配置
- Map<Integer, List<String>> emailTypeMap = getEmailType();
- // 邮件字段识别映射表
- // Map<String, List<String>> emailFieldMap = getEmailFieldMapping();
- Map<String, List<EmailContentInfoDTO>> emailContentMap;
- try {
- emailContentMap = realEmail(mailboxInfoDTO, emailTypeMap, startDate, endDate);
- } catch (Exception e) {
- log.info("采集邮件失败 -> 邮箱配置信息:{},堆栈信息:{}", mailboxInfoDTO, ExceptionUtil.stacktraceToString(e));
- return;
- }
- if (MapUtil.isEmpty(emailContentMap)) {
- log.info("未采集到邮件 -> 邮箱配置信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO,
- DateUtil.format(startDate, DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
- return;
- }
- for (Map.Entry<String, List<EmailContentInfoDTO>> emailEntry : emailContentMap.entrySet()) {
- List<EmailContentInfoDTO> emailContentInfoDTOList = emailEntry.getValue();
- if (CollUtil.isEmpty(emailContentInfoDTOList)) {
- log.warn("未采集到正文或附件");
- continue;
- }
- log.info("开始解析邮件数据 -> 邮件主题:{},邮件日期:{}", emailContentInfoDTOList.get(0).getEmailTitle(), emailContentInfoDTOList.get(0).getEmailDate());
- Map<EmailContentInfoDTO, List<EmailZipFileDTO>> emailZipFileMap = MapUtil.newHashMap();
- Iterator<EmailContentInfoDTO> iterator = emailContentInfoDTOList.iterator();
- while (iterator.hasNext()) {
- EmailContentInfoDTO emailContentInfoDTO = iterator.next();
- // 正文不用解压附件
- if (emailContentInfoDTO.getFileName() != null && emailContentInfoDTO.getFileName().endsWith(".html")) {
- emailZipFileMap.put(emailContentInfoDTO, ListUtil.empty());
- continue;
- }
- try {
- List<EmailZipFileDTO> fundNavDTOList = parseZipEmail(emailContentInfoDTO);
- emailZipFileMap.put(emailContentInfoDTO, fundNavDTOList);
- } catch (IOException | ArchiveException e) {
- log.error("压缩包解压失败:{}", ExceptionUtil.stacktraceToString(e));
- EmailParseInfoDO fail = buildEmailParseInfo(null, mailboxInfoDTO.getAccount(), emailContentInfoDTO);
- fail.setFailReason("压缩包解压失败");
- fail.setParseStatus(EmailParseStatusConst.FAIL);
- fail.setEmailKey(emailEntry.getKey());
- this.emailParseInfoMapper.insert(fail);
- iterator.remove();
- } catch (Exception e) {
- log.error("堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
- }
- }
- // 保存相关信息 -> 邮件信息表,邮件文件表,邮件净值表,邮件规模表,基金净值表
- saveRelatedTable(emailEntry.getKey(), mailboxInfoDTO.getAccount(), emailZipFileMap);
- log.info("结束邮件解析 -> 邮箱信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO,
- DateUtil.format(startDate, DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
- }
- }
- public List<EmailZipFileDTO> parseZipEmail(EmailContentInfoDTO emailContentInfoDTO) throws Exception {
- List<EmailZipFileDTO> resultList = ListUtil.list(false);
- Integer emailType = emailContentInfoDTO.getEmailType();
- String filepath = emailContentInfoDTO.getFilePath();
- if (ExcelUtil.isZip(filepath)) {
- handleCompressedFiles(filepath, ".zip", emailType, resultList);
- } else if (ExcelUtil.isRAR(filepath)) {
- handleCompressedFiles(filepath, ".rar", emailType, resultList);
- }
- return resultList;
- }
- private void handleCompressedFiles(String filepath, String extension, Integer emailType, List<EmailZipFileDTO> resultList) throws Exception {
- String destPath = getDestinationPath(filepath, extension);
- log.info("压缩包地址:{}, 解压后文件地址:{}", filepath, destPath);
- File destFile = new File(destPath);
- if (!destFile.exists()) {
- if (!destFile.mkdirs()) {
- throw new IOException("无法创建目标目录: " + destPath);
- }
- }
- List<String> extractedDirs;
- if (ExcelUtil.isZip(filepath)) {
- extractedDirs = ExcelUtil.extractCompressedFiles(filepath, destPath);
- } else if (ExcelUtil.isRAR(filepath)) {
- extractedDirs = ExcelUtil.extractRar5(filepath, destPath);
- } else {
- return;
- }
- for (String dir : extractedDirs) {
- // 如果邮件类型不满足解析条件则重新根据文件名判断
- if (!Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)
- && !Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)) {
- emailType = EmailUtil.getEmailTypeBySubject(dir, this.getEmailType());
- }
- File file = new File(dir);
- if (file.isDirectory()) {
- String[] subDirs = file.list();
- if (subDirs != null) {
- for (String subDir : subDirs) {
- resultList.add(new EmailZipFileDTO(subDir, emailType));
- }
- } else {
- log.warn("目录 {} 下无文件", dir);
- }
- } else {
- resultList.add(new EmailZipFileDTO(dir, emailType));
- }
- }
- }
- private String getDestinationPath(String filepath, String extension) {
- Path path = Paths.get(filepath);
- String fileName = path.getFileName().toString();
- String baseName = fileName.substring(0, fileName.length() - extension.length());
- return path.getParent().resolve(baseName).toString();
- }
- public void saveRelatedTable(String emailKey, String emailAddress,
- Map<EmailContentInfoDTO, List<EmailZipFileDTO>> emailZipFileMap) {
- // python 报告解析接口结果
- List<ParseResult<ReportData>> dataList = ListUtil.list(false);
- for (Map.Entry<EmailContentInfoDTO, List<EmailZipFileDTO>> entry : emailZipFileMap.entrySet()) {
- EmailContentInfoDTO emailContentInfoDTO = entry.getKey();
- if (emailContentInfoDTO.getFileName() != null && emailContentInfoDTO.getFileName().endsWith(".html")) {
- continue;
- }
- Integer emailId = emailContentInfoDTO.getEmailId();
- EmailParseInfoDO emailParseInfoDO = buildEmailParseInfo(emailId, emailAddress, emailContentInfoDTO);
- emailParseInfoDO.setEmailKey(emailKey);
- emailId = saveEmailParseInfo(emailParseInfoDO);
- if (emailId == null) {
- continue;
- }
- List<EmailZipFileDTO> zipFiles = entry.getValue();
- if (CollUtil.isNotEmpty(zipFiles)) {
- for (EmailZipFileDTO zipFile : zipFiles) {
- EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, null, zipFile.getFilename(), zipFile.getFilepath(), null);
- // 解析结果(可以从python获取或者自行解析)并保存报告
- ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), zipFile.getFilename(),
- zipFile.getFilepath(), zipFile.getEmailType(), emailFile.getAiFileId());
- dataList.add(parseResult);
- }
- } else {
- String fileName = emailContentInfoDTO.getFileName();
- EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, emailContentInfoDTO.getFileId(), fileName,
- emailContentInfoDTO.getFilePath(), emailContentInfoDTO.getAiFileId());
- // 解析结果(可以从python获取或者自行解析)并保存报告
- ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), fileName,
- emailContentInfoDTO.getFilePath(), emailContentInfoDTO.getEmailType(), emailFile.getAiFileId());
- dataList.add(parseResult);
- }
- String failReason = null;
- int emailParseStatus = EmailParseStatusConst.SUCCESS;
- // 报告邮件有一条失败就表示整个邮件解析失败
- if (CollUtil.isNotEmpty(dataList)) {
- // ai解析结果
- List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
- .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
- if (CollUtil.isNotEmpty(aiParaseList)) {
- for (ReportData data : aiParaseList) {
- this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(), data.getAiParse(), data.getAiFileId());
- }
- }
- long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
- if (failNum > 0) {
- emailParseStatus = EmailParseStatusConst.FAIL;
- failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
- }
- }
- emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
- }
- }
- private ParseResult<ReportData> parseReportAndHandleResult(int fileId, String fileName,
- String filepath, Integer emailType, String aiFileId) {
- ParseResult<ReportData> result = new ParseResult<>();
- boolean reportFlag = !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)
- && !Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType);
- if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(".html")) {
- result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
- result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
- return result;
- }
- Pattern pattern = Pattern.compile("[A-Z0-9]{6}");
- Matcher matcher = pattern.matcher(fileName);
- String registerNumber = null;
- if (matcher.find()) {
- registerNumber = matcher.group();
- }
- // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
- ReportType reportType = ReportParseUtils.matchReportType(fileName);
- if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)) {
- reportType = ReportType.LETTER;
- }
- // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
- ReportParserFileType fileType;
- String fileSuffix = StrUtil.subAfter(fileName, ".", true);
- fileType = ReportParserFileType.getBySuffix(fileSuffix);
- // 不支持的格式
- if (fileType == null) {
- result.setStatus(ReportParseStatus.NO_SUPPORT_TEMPLATE.getCode());
- result.setMsg(StrUtil.format(ReportParseStatus.NO_SUPPORT_TEMPLATE.getMsg(), fileName));
- return result;
- }
- // 不是定期报告的判断逻辑放在不支持的格式下面
- if (reportType == null) {
- result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
- result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
- return result;
- }
- // 解析报告
- ReportData reportData = null;
- StopWatch parserWatch = new StopWatch();
- parserWatch.start();
- try {
- ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName)
- .filepath(filepath).registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
- ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
- reportData = instance.parse(params);
- result.setStatus(1);
- result.setMsg("报告解析成功");
- result.setData(reportData);
- } catch (ReportParseException e) {
- log.error("解析失败\n{}", StrUtil.format(e.getMsg(), fileName));
- result.setStatus(e.getCode());
- result.setMsg(StrUtil.format(e.getMsg(), fileName));
- } catch (Exception e) {
- log.error("解析错误\n{}", ExceptionUtil.stacktraceToString(e));
- result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
- result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
- } finally {
- parserWatch.stop();
- if (log.isInfoEnabled()) {
- log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
- }
- }
- // 保存报告解析结果
- if (reportData != null) {
- StopWatch writeWatch = new StopWatch();
- writeWatch.start();
- try {
- ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
- instance.write(reportData);
- } catch (Exception e) {
- log.error("报告{}结果保存失败\n{}", fileName, ExceptionUtil.stacktraceToString(e));
- } finally {
- writeWatch.stop();
- if (log.isInfoEnabled()) {
- log.info("报告{}解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
- }
- }
- }
- return result;
- }
- private EmailFileInfoDO saveEmailFileInfo(Integer emailId, Integer fileId, String fileName, String filePath, String aiFileId) {
- EmailFileInfoDO emailFileInfoDO = buildEmailFileInfoDO(emailId, fileId, fileName, filePath);
- emailFileInfoDO.setAiFileId(aiFileId);
- if (emailFileInfoDO.getId() != null) {
- emailFileInfoMapper.updateTimeById(fileId, new Date());
- return emailFileInfoDO;
- }
- emailFileInfoMapper.insert(emailFileInfoDO);
- return emailFileInfoDO;
- }
- private EmailFileInfoDO buildEmailFileInfoDO(Integer emailId, Integer fileId, String fileName, String filePath) {
- EmailFileInfoDO emailFileInfoDO = new EmailFileInfoDO();
- emailFileInfoDO.setId(fileId);
- emailFileInfoDO.setEmailId(emailId);
- emailFileInfoDO.setFileName(fileName);
- emailFileInfoDO.setFilePath(filePath);
- emailFileInfoDO.setIsvalid(1);
- emailFileInfoDO.setCreatorId(0);
- emailFileInfoDO.setCreateTime(new Date());
- emailFileInfoDO.setUpdaterId(0);
- emailFileInfoDO.setUpdateTime(new Date());
- return emailFileInfoDO;
- }
- private Integer saveEmailParseInfo(EmailParseInfoDO emailParseInfoDO) {
- if (emailParseInfoDO == null) {
- return null;
- }
- // 重新邮件功能 -> 修改解析时间和更新时间
- if (emailParseInfoDO.getId() != null) {
- emailParseInfoMapper.updateParseTime(emailParseInfoDO.getId(), emailParseInfoDO.getParseDate());
- return emailParseInfoDO.getId();
- }
- // // 根据邮件发送人、邮件地址、邮箱日期、主题找到是否已经存在的记录(不管是否成功),已存在就不解析了
- // EmailParseInfoDO temp = this.emailParseInfoMapper.searchEmail(emailParseInfoDO);
- // if (temp != null) {
- // return null;
- // }
- emailParseInfoMapper.insert(emailParseInfoDO);
- return emailParseInfoDO.getId();
- }
- private EmailParseInfoDO buildEmailParseInfo(Integer emailId, String emailAddress, EmailContentInfoDTO emailContentInfoDTO) {
- EmailParseInfoDO emailParseInfoDO = new EmailParseInfoDO();
- emailParseInfoDO.setId(emailId);
- emailParseInfoDO.setSenderEmail(emailContentInfoDTO.getSenderEmail());
- emailParseInfoDO.setEmail(emailAddress);
- emailParseInfoDO.setEmailDate(DateUtil.parse(emailContentInfoDTO.getEmailDate(), DateConst.YYYY_MM_DD_HH_MM_SS));
- emailParseInfoDO.setParseDate(emailContentInfoDTO.getParseDate() == null ? null : DateUtil.parseDate(emailContentInfoDTO.getParseDate()));
- emailParseInfoDO.setEmailTitle(emailContentInfoDTO.getEmailTitle());
- emailParseInfoDO.setEmailType(emailContentInfoDTO.getEmailType());
- emailParseInfoDO.setParseStatus(EmailParseStatusConst.SUCCESS);
- emailParseInfoDO.setIsvalid(1);
- emailParseInfoDO.setCreatorId(0);
- emailParseInfoDO.setCreateTime(new Date());
- emailParseInfoDO.setUpdaterId(0);
- emailParseInfoDO.setUpdateTime(new Date());
- return emailParseInfoDO;
- }
- public Map<Integer, List<String>> getEmailType() {
- Map<Integer, List<String>> emailTypeMap = MapUtil.newHashMap(3, true);
- emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE,
- ListUtil.toList("月报", "月度报告", "季报", "季度报告", "年报", "年度报告"));
- emailTypeMap.put(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE,
- ListUtil.toList("确认单", "确认函", "交易确认数据", "赎回确认", "申购确认", "分红确认", "确认表", "交易确认", "确认"));
- return emailTypeMap;
- }
- /**
- * 读取邮件
- *
- * @param mailboxInfoDTO 邮箱配置信息
- * @param emailTypeMap 邮件类型识别规则映射表
- * @param startDate 邮件起始日期
- * @param endDate 邮件截止日期(为null,将解析邮件日期小于等于startDate的当天邮件)
- * @return 读取到的邮件信息
- * @throws Exception 异常信息
- */
- private Map<String, List<EmailContentInfoDTO>> realEmail(MailboxInfoDTO mailboxInfoDTO,
- Map<Integer, List<String>> emailTypeMap, Date startDate, Date endDate) throws Exception {
- Store store = EmailUtil.getStoreNew(mailboxInfoDTO);
- if (store == null) {
- return MapUtil.newHashMap();
- }
- // 默认读取收件箱的邮件
- Folder folder = store.getFolder("INBOX");
- folder.open(Folder.READ_ONLY);
- Message[] messages = getEmailMessage(folder, mailboxInfoDTO.getProtocol(), startDate);
- if (messages == null || messages.length == 0) {
- log.info("获取不到邮件 -> 邮箱信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO, startDate, endDate);
- return MapUtil.newHashMap();
- }
- Map<String, List<EmailContentInfoDTO>> emailMessageMap = MapUtil.newHashMap();
- for (Message message : messages) {
- long start = System.currentTimeMillis();
- List<EmailContentInfoDTO> emailContentInfoDTOList = CollUtil.newArrayList();
- String uuidKey = UUID.randomUUID().toString().replaceAll("-", "");
- Integer emailType;
- String senderEmail;
- String emailTitle = null;
- try {
- Date emailDate = message.getSentDate();
- boolean isNotParseConditionSatisfied = emailDate == null || (endDate != null && emailDate.compareTo(endDate) > 0) || (startDate != null && emailDate.compareTo(startDate) < 0);
- if (isNotParseConditionSatisfied) {
- continue;
- }
- emailTitle = message.getSubject();
- senderEmail = getSenderEmail(message);
- emailType = EmailUtil.getEmailTypeBySubject(emailTitle, emailTypeMap);
- String emailDateStr = DateUtil.format(emailDate, DateConst.YYYY_MM_DD_HH_MM_SS);
- if (emailType == null) {
- log.info("邮件不满足解析条件 -> 邮件主题:{},邮件日期:{}", emailTitle, emailDateStr);
- continue;
- }
- log.info("邮件{} 基本信息获取完成,开始下载附件!邮件日期:{}", emailTitle, emailDateStr);
- Object content = message.getContent();
- if (content instanceof Multipart multipart) {
- this.reMultipart(mailboxInfoDTO.getAccount(), emailTitle, emailDate, multipart, emailContentInfoDTOList);
- } else if (content instanceof Part part) {
- this.rePart(mailboxInfoDTO.getAccount(), emailTitle, emailDate, part, emailContentInfoDTOList);
- } else {
- log.warn("不支持的邮件数据 {}", emailTitle);
- }
- if (CollUtil.isNotEmpty(emailContentInfoDTOList)) {
- emailContentInfoDTOList.forEach(e -> {
- e.setEmailType(emailType);
- e.setSenderEmail(senderEmail);
- });
- emailMessageMap.put(uuidKey, emailContentInfoDTOList);
- }
- } catch (Exception e) {
- log.error("获取邮箱的邮件{} 报错,堆栈信息:{}", emailTitle, ExceptionUtil.stacktraceToString(e));
- } finally {
- if (log.isInfoEnabled()) {
- log.info("邮件{} 下载完成,总计耗时{} ms", emailTitle, System.currentTimeMillis() - start);
- }
- }
- }
- folder.close(false);
- store.close();
- return emailMessageMap;
- }
- private void rePart(String account, String subject, Date sendDate, Part part, List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
- String disposition = part.getDisposition();
- if (disposition != null && (disposition.equals(Part.ATTACHMENT) || disposition.equals(Part.INLINE))) {
- String emailDate = DateUtil.format(sendDate, DateConst.YYYYMMDDHHMMSS24);
- String emailDateStr = DateUtil.format(sendDate, DateConst.YYYYMMDD);
- String filePath = path + File.separator + account + File.separator + emailDateStr + File.separator;
- EmailContentInfoDTO emailContentInfoDTO = new EmailContentInfoDTO();
- String fileName = MimeUtility.decodeText(part.getFileName());
- emailContentInfoDTO.setFileName(fileName);
- emailContentInfoDTO.setFileSize((long) part.getSize());
- String realPath = filePath + emailDate + fileName;
- File saveFile = cn.hutool.core.io.FileUtil.file(realPath);
- if (!saveFile.exists()) {
- if (!saveFile.getParentFile().exists()) {
- boolean mkdirs = saveFile.getParentFile().mkdirs();
- if (!mkdirs) {
- log.warn("file path mkdir failed.");
- }
- }
- FileUtil.saveFile(saveFile, part);
- } else {
- cn.hutool.core.io.FileUtil.del(saveFile);
- FileUtil.saveFile(saveFile, part);
- }
- emailContentInfoDTO.setFilePath(realPath);
- emailContentInfoDTO.setEmailAddress(account);
- emailContentInfoDTO.setEmailTitle(subject);
- emailContentInfoDTO.setEmailDate(DateUtil.format(sendDate, DateConst.YYYY_MM_DD_HH_MM_SS));
- emailContentInfoDTOList.add(emailContentInfoDTO);
- }
- }
- private void reMultipart(String account, String subject, Date emailDate, Multipart multipart, List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
- for (int i = 0; i < multipart.getCount(); i++) {
- Part bodyPart = multipart.getBodyPart(i);
- if (bodyPart.getContent() instanceof Multipart mp) {
- this.reMultipart(account, subject, emailDate, mp, emailContentInfoDTOList);
- } else {
- this.rePart(account, subject, emailDate, bodyPart, emailContentInfoDTOList);
- }
- }
- }
- private String getSenderEmail(Message message) {
- Address[] senderAddress;
- try {
- senderAddress = message.getFrom();
- if (senderAddress == null || senderAddress.length == 0) {
- return null;
- }
- // 此时的address是含有编码(MIME编码方式)后的文本和实际的邮件地址
- String address = "";
- for (Address from : senderAddress) {
- if (StrUtil.isNotBlank(from.toString())) {
- address = from.toString();
- break;
- }
- }
- // 正则表达式匹配邮件地址
- Pattern pattern = Pattern.compile("<(\\S+)>");
- Matcher matcher = pattern.matcher(address);
- if (matcher.find()) {
- return matcher.group(1);
- }
- // //说明匹配不到,直接获取sender
- // Address sender = message.getSender();
- // if (sender == null) {
- // return address;
- // }
- // String senderEmail = sender.toString();
- // log.info("senderEmail:" + senderEmail + "====================");
- // if (senderEmail.contains("<") && senderEmail.contains(">") && senderEmail.indexOf("<") < senderEmail.indexOf(">")) {
- // senderEmail = senderEmail.substring(senderEmail.indexOf("<") + 1, senderEmail.length() - 1);
- // }
- // return senderEmail;
- } catch (MessagingException e) {
- log.error(e.getMessage(), e);
- }
- return null;
- }
- private Message[] getEmailMessage(Folder folder, String protocol, Date startDate) {
- try {
- if (protocol.contains("imap")) {
- // 获取邮件日期大于等于startDate的邮件(搜索条件只支持按天)
- SearchTerm startDateTerm = new ReceivedDateTerm(ComparisonTerm.GE, startDate);
- return folder.search(startDateTerm);
- } else {
- return folder.getMessages();
- }
- } catch (MessagingException e) {
- throw new RuntimeException(e);
- }
- }
- }
|