MimeMessageParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.mail2.jakarta.util;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.mail2.jakarta.activation.InputStreamDataSource;
import jakarta.activation.DataSource;
import jakarta.mail.Address;
import jakarta.mail.Message;
import jakarta.mail.MessagingException;
import jakarta.mail.Multipart;
import jakarta.mail.Part;
import jakarta.mail.internet.ContentType;
import jakarta.mail.internet.InternetAddress;
import jakarta.mail.internet.MimeBodyPart;
import jakarta.mail.internet.MimeMessage;
import jakarta.mail.internet.MimePart;
import jakarta.mail.internet.MimeUtility;
import jakarta.mail.internet.ParseException;
/**
* Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments.
*
* @since 1.3
*/
public class MimeMessageParser {
/** The MimeMessage to convert. */
private final MimeMessage mimeMessage;
/** Plain mail content from MimeMessage. */
private String plainContent;
/** HTML mail content from MimeMessage. */
private String htmlContent;
/** List of attachments of MimeMessage. */
private final List<DataSource> attachmentList;
/** Attachments stored by their content-id. */
private final Map<String, DataSource> cidMap;
/** Is this a Multipart email. */
private boolean isMultiPart;
/**
* Constructs an instance with the MimeMessage to be extracted.
*
* @param mimeMessage the message to parse
*/
public MimeMessageParser(final MimeMessage mimeMessage) {
this.attachmentList = new ArrayList<>();
this.cidMap = new HashMap<>();
this.mimeMessage = mimeMessage;
this.isMultiPart = false;
}
private List<Address> asList(final Address[] recipients) {
return recipients != null ? Arrays.asList(recipients) : new ArrayList<>();
}
/**
* Parses the MimePart to create a DataSource.
*
* @param parent the parent multi-part
* @param part the current part to be processed
* @return the DataSource
* @throws MessagingException creating the DataSource failed
* @throws IOException error getting InputStream or unsupported encoding
*/
@SuppressWarnings("resource") // Caller closes InputStream
protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException {
final DataSource dataSource = part.getDataHandler().getDataSource();
final String contentType = getBaseMimeType(dataSource.getContentType());
final String dataSourceName = getDataSourceName(part, dataSource);
return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName);
}
/**
* Find an attachment using its content-id.
* <p>
* The content-id must be stripped of any angle brackets, i.e. "part1" instead of "<part1>".
* </p>
*
* @param cid the content-id of the attachment
* @return the corresponding datasource or null if nothing was found
* @since 1.3.4
*/
public DataSource findAttachmentByCid(final String cid) {
return cidMap.get(cid);
}
/**
* Find an attachment using its name.
*
* @param name the name of the attachment
* @return the corresponding datasource or null if nothing was found
*/
public DataSource findAttachmentByName(final String name) {
for (final DataSource dataSource : getAttachmentList()) {
if (name.equalsIgnoreCase(dataSource.getName())) {
return dataSource;
}
}
return null;
}
/**
* Gets the attachment list.
*
* @return Returns the attachment list.
*/
public List<DataSource> getAttachmentList() {
return attachmentList;
}
/**
* Gets the MIME type.
*
* @param fullMimeType the mime type from the mail API
* @return the real mime type
*/
private String getBaseMimeType(final String fullMimeType) {
final int pos = fullMimeType.indexOf(';');
return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos);
}
/**
* Gets the BCC Address list.
*
* @return the 'BCC' recipients of the message
* @throws MessagingException determining the recipients failed
*/
public List<Address> getBcc() throws MessagingException {
return asList(mimeMessage.getRecipients(Message.RecipientType.BCC));
}
/**
* Gets the CC Address list.
*
* @return the 'CC' recipients of the message
* @throws MessagingException determining the recipients failed
*/
public List<Address> getCc() throws MessagingException {
return asList(mimeMessage.getRecipients(Message.RecipientType.CC));
}
/**
* Returns a collection of all content-ids in the parsed message.
* <p>
* The content-ids are stripped of any angle brackets, i.e. "part1" instead of "<part1>".
* </p>
*
* @return the collection of content ids.
* @since 1.3.4
*/
public Collection<String> getContentIds() {
return Collections.unmodifiableSet(cidMap.keySet());
}
/**
* Determines the name of the data source if it is not already set.
*
* @param part the mail part
* @param dataSource the data source
* @return the name of the data source or {@code null} if no name can be determined
* @throws MessagingException accessing the part failed
* @throws UnsupportedEncodingException decoding the text failed
*/
protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException {
String result = dataSource.getName();
if (isEmpty(result)) {
result = part.getFileName();
}
if (!isEmpty(result)) {
result = MimeUtility.decodeText(result);
} else {
result = null;
}
return result;
}
/**
* Gets the FROM field.
*
* @return the FROM field of the message
* @throws MessagingException parsing the mime message failed
*/
public String getFrom() throws MessagingException {
final Address[] addresses = mimeMessage.getFrom();
if (isEmpty(addresses)) {
return null;
}
return ((InternetAddress) addresses[0]).getAddress();
}
/**
* Gets the htmlContent if any.
*
* @return Returns the htmlContent if any
*/
public String getHtmlContent() {
return htmlContent;
}
/**
* Gets the MimeMessage.
*
* @return Returns the mimeMessage.
*/
public MimeMessage getMimeMessage() {
return mimeMessage;
}
/**
* Gets the plain content if any.
*
* @return Returns the plainContent if any
*/
public String getPlainContent() {
return plainContent;
}
/**
* Gets the 'replyTo' address of the email.
*
* @return the 'replyTo' address of the email
* @throws MessagingException parsing the mime message failed
*/
public String getReplyTo() throws MessagingException {
final Address[] addresses = mimeMessage.getReplyTo();
if (isEmpty(addresses)) {
return null;
}
return ((InternetAddress) addresses[0]).getAddress();
}
/**
* Gets the MIME message subject.
*
* @return the MIME message subject.
* @throws MessagingException parsing the mime message failed.
*/
public String getSubject() throws MessagingException {
return mimeMessage.getSubject();
}
/**
* Gets the MIME message 'to' list.
*
* @return the 'to' recipients of the message.
* @throws MessagingException determining the recipients failed
*/
public List<Address> getTo() throws MessagingException {
return asList(mimeMessage.getRecipients(Message.RecipientType.TO));
}
/**
* Tests if attachments are present.
*
* @return true if attachments are present.
*/
public boolean hasAttachments() {
return !attachmentList.isEmpty();
}
/**
* Tests is HTML content is present.
*
* @return true if HTML content is present.
*/
public boolean hasHtmlContent() {
return htmlContent != null;
}
/**
* Tests is plain content is present.
*
* @return true if a plain content is present.
*/
public boolean hasPlainContent() {
return plainContent != null;
}
private boolean isEmpty(final Object[] array) {
return array == null || array.length == 0;
}
private boolean isEmpty(final String result) {
return result == null || result.isEmpty();
}
/**
* Tests whether the MimePart contains an object of the given mime type.
*
* @param part the current MimePart
* @param mimeType the mime type to check
* @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
* @throws MessagingException parsing the MimeMessage failed
*/
private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException {
// Do not use part.isMimeType(String) as it is broken for MimeBodyPart
// and does not really check the actual content type.
try {
return new ContentType(part.getDataHandler().getContentType()).match(mimeType);
} catch (final ParseException ex) {
return part.getContentType().equalsIgnoreCase(mimeType);
}
}
/**
* Tests whether this is multipart.
*
* @return Returns the isMultiPart.
*/
public boolean isMultipart() {
return isMultiPart;
}
/**
* Does the actual extraction.
*
* @return this instance
* @throws MessagingException parsing the mime message failed
* @throws IOException parsing the mime message failed
*/
public MimeMessageParser parse() throws MessagingException, IOException {
parse(null, mimeMessage);
return this;
}
/**
* Extracts the content of a MimeMessage recursively.
*
* @param parent the parent multi-part
* @param part the current MimePart
* @throws MessagingException parsing the MimeMessage failed
* @throws IOException parsing the MimeMessage failed
*/
protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException {
if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
plainContent = (String) part.getContent();
} else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
htmlContent = (String) part.getContent();
} else if (isMimeType(part, "multipart/*")) {
isMultiPart = true;
final Multipart multipart = (Multipart) part.getContent();
final int count = multipart.getCount();
// iterate over all MimeBodyPart
for (int i = 0; i < count; i++) {
parse(multipart, (MimeBodyPart) multipart.getBodyPart(i));
}
} else {
final String cid = stripContentId(part.getContentID());
final DataSource dataSource = createDataSource(parent, part);
if (cid != null) {
cidMap.put(cid, dataSource);
}
attachmentList.add(dataSource);
}
}
/**
* Strips the content id of any whitespace and angle brackets.
*
* @param contentId the string to strip
* @return a stripped version of the content id
*/
private String stripContentId(final String contentId) {
return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", "");
}
}