View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.mail2.javax.util;
18  
19  import java.io.IOException;
20  import java.io.UnsupportedEncodingException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collection;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  
29  import javax.activation.DataSource;
30  import javax.mail.Address;
31  import javax.mail.Message;
32  import javax.mail.MessagingException;
33  import javax.mail.Multipart;
34  import javax.mail.Part;
35  import javax.mail.internet.ContentType;
36  import javax.mail.internet.InternetAddress;
37  import javax.mail.internet.MimeBodyPart;
38  import javax.mail.internet.MimeMessage;
39  import javax.mail.internet.MimePart;
40  import javax.mail.internet.MimeUtility;
41  import javax.mail.internet.ParseException;
42  
43  import org.apache.commons.mail2.javax.activation.InputStreamDataSource;
44  
45  /**
46   * Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments.
47   *
48   * @since 1.3
49   */
50  public class MimeMessageParser {
51  
52      /** The MimeMessage to convert. */
53      private final MimeMessage mimeMessage;
54  
55      /** Plain mail content from MimeMessage. */
56      private String plainContent;
57  
58      /** HTML mail content from MimeMessage. */
59      private String htmlContent;
60  
61      /** List of attachments of MimeMessage. */
62      private final List<DataSource> attachmentList;
63  
64      /** Attachments stored by their content-id. */
65      private final Map<String, DataSource> cidMap;
66  
67      /** Is this a Multipart email. */
68      private boolean isMultiPart;
69  
70      /**
71       * Constructs an instance with the MimeMessage to be extracted.
72       *
73       * @param mimeMessage the message to parse
74       */
75      public MimeMessageParser(final MimeMessage mimeMessage) {
76          this.attachmentList = new ArrayList<>();
77          this.cidMap = new HashMap<>();
78          this.mimeMessage = mimeMessage;
79          this.isMultiPart = false;
80      }
81  
82      private List<Address> asList(final Address[] recipients) {
83          return recipients != null ? Arrays.asList(recipients) : new ArrayList<>();
84      }
85  
86      /**
87       * Parses the MimePart to create a DataSource.
88       *
89       * @param parent the parent multi-part
90       * @param part   the current part to be processed
91       * @return the DataSource
92       * @throws MessagingException creating the DataSource failed
93       * @throws IOException        error getting InputStream or unsupported encoding
94       */
95      @SuppressWarnings("resource") // Caller closes InputStream
96      protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException {
97          final DataSource dataSource = part.getDataHandler().getDataSource();
98          final String contentType = getBaseMimeType(dataSource.getContentType());
99          final String dataSourceName = getDataSourceName(part, dataSource);
100         return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName);
101     }
102 
103     /**
104      * Find an attachment using its content-id.
105      * <p>
106      * The content-id must be stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
107      * </p>
108      *
109      * @param cid the content-id of the attachment
110      * @return the corresponding datasource or null if nothing was found
111      * @since 1.3.4
112      */
113     public DataSource findAttachmentByCid(final String cid) {
114         return cidMap.get(cid);
115     }
116 
117     /**
118      * Find an attachment using its name.
119      *
120      * @param name the name of the attachment
121      * @return the corresponding datasource or null if nothing was found
122      */
123     public DataSource findAttachmentByName(final String name) {
124         for (final DataSource dataSource : getAttachmentList()) {
125             if (name.equalsIgnoreCase(dataSource.getName())) {
126                 return dataSource;
127             }
128         }
129         return null;
130     }
131 
132     /**
133      * Gets the attachment list.
134      *
135      * @return Returns the attachment list.
136      */
137     public List<DataSource> getAttachmentList() {
138         return attachmentList;
139     }
140 
141     /**
142      * Gets the MIME type.
143      *
144      * @param fullMimeType the mime type from the mail API
145      * @return the real mime type
146      */
147     private String getBaseMimeType(final String fullMimeType) {
148         final int pos = fullMimeType.indexOf(';');
149         return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos);
150     }
151 
152     /**
153      * Gets the BCC Address list.
154      *
155      * @return the 'BCC' recipients of the message
156      * @throws MessagingException determining the recipients failed
157      */
158     public List<Address> getBcc() throws MessagingException {
159         return asList(mimeMessage.getRecipients(Message.RecipientType.BCC));
160     }
161 
162     /**
163      * Gets the CC Address list.
164      *
165      * @return the 'CC' recipients of the message
166      * @throws MessagingException determining the recipients failed
167      */
168     public List<Address> getCc() throws MessagingException {
169         return asList(mimeMessage.getRecipients(Message.RecipientType.CC));
170     }
171 
172     /**
173      * Returns a collection of all content-ids in the parsed message.
174      * <p>
175      * The content-ids are stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
176      * </p>
177      *
178      * @return the collection of content ids.
179      * @since 1.3.4
180      */
181     public Collection<String> getContentIds() {
182         return Collections.unmodifiableSet(cidMap.keySet());
183     }
184 
185     /**
186      * Determines the name of the data source if it is not already set.
187      *
188      * @param part       the mail part
189      * @param dataSource the data source
190      * @return the name of the data source or {@code null} if no name can be determined
191      * @throws MessagingException           accessing the part failed
192      * @throws UnsupportedEncodingException decoding the text failed
193      */
194     protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException {
195         String result = dataSource.getName();
196         if (isEmpty(result)) {
197             result = part.getFileName();
198         }
199         if (!isEmpty(result)) {
200             result = MimeUtility.decodeText(result);
201         } else {
202             result = null;
203         }
204         return result;
205     }
206 
207     /**
208      * Gets the FROM field.
209      *
210      * @return the FROM field of the message
211      * @throws MessagingException parsing the mime message failed
212      */
213     public String getFrom() throws MessagingException {
214         final Address[] addresses = mimeMessage.getFrom();
215         if (isEmpty(addresses)) {
216             return null;
217         }
218         return ((InternetAddress) addresses[0]).getAddress();
219     }
220 
221     /**
222      * Gets the htmlContent if any.
223      *
224      * @return Returns the htmlContent if any
225      */
226     public String getHtmlContent() {
227         return htmlContent;
228     }
229 
230     /**
231      * Gets the MimeMessage.
232      *
233      * @return Returns the mimeMessage.
234      */
235     public MimeMessage getMimeMessage() {
236         return mimeMessage;
237     }
238 
239     /**
240      * Gets the plain content if any.
241      *
242      * @return Returns the plainContent if any
243      */
244     public String getPlainContent() {
245         return plainContent;
246     }
247 
248     /**
249      * Gets the 'replyTo' address of the email.
250      *
251      * @return the 'replyTo' address of the email
252      * @throws MessagingException parsing the mime message failed
253      */
254     public String getReplyTo() throws MessagingException {
255         final Address[] addresses = mimeMessage.getReplyTo();
256         if (isEmpty(addresses)) {
257             return null;
258         }
259         return ((InternetAddress) addresses[0]).getAddress();
260     }
261 
262     /**
263      * Gets the MIME message subject.
264      *
265      * @return the MIME message subject.
266      * @throws MessagingException parsing the mime message failed.
267      */
268     public String getSubject() throws MessagingException {
269         return mimeMessage.getSubject();
270     }
271 
272     /**
273      * Gets the MIME message 'to' list.
274      *
275      * @return the 'to' recipients of the message.
276      * @throws MessagingException determining the recipients failed
277      */
278     public List<Address> getTo() throws MessagingException {
279         return asList(mimeMessage.getRecipients(Message.RecipientType.TO));
280     }
281 
282     /**
283      * Tests if attachments are present.
284      *
285      * @return true if attachments are present.
286      */
287     public boolean hasAttachments() {
288         return !attachmentList.isEmpty();
289     }
290 
291     /**
292      * Tests is HTML content is present.
293      *
294      * @return true if HTML content is present.
295      */
296     public boolean hasHtmlContent() {
297         return htmlContent != null;
298     }
299 
300     /**
301      * Tests is plain content is present.
302      *
303      * @return true if a plain content is present.
304      */
305     public boolean hasPlainContent() {
306         return plainContent != null;
307     }
308 
309     private boolean isEmpty(final Object[] array) {
310         return array == null || array.length == 0;
311     }
312 
313     private boolean isEmpty(final String result) {
314         return result == null || result.isEmpty();
315     }
316 
317     /**
318      * Tests whether the MimePart contains an object of the given mime type.
319      *
320      * @param part     the current MimePart
321      * @param mimeType the mime type to check
322      * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
323      * @throws MessagingException parsing the MimeMessage failed
324      */
325     private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException {
326         // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
327         // and does not really check the actual content type.
328         try {
329             return new ContentType(part.getDataHandler().getContentType()).match(mimeType);
330         } catch (final ParseException ex) {
331             return part.getContentType().equalsIgnoreCase(mimeType);
332         }
333     }
334 
335     /**
336      * Tests whether this is multipart.
337      *
338      * @return Returns the isMultiPart.
339      */
340     public boolean isMultipart() {
341         return isMultiPart;
342     }
343 
344     /**
345      * Does the actual extraction.
346      *
347      * @return this instance
348      * @throws MessagingException parsing the mime message failed
349      * @throws IOException        parsing the mime message failed
350      */
351     public MimeMessageParser parse() throws MessagingException, IOException {
352         parse(null, mimeMessage);
353         return this;
354     }
355 
356     /**
357      * Extracts the content of a MimeMessage recursively.
358      *
359      * @param parent the parent multi-part
360      * @param part   the current MimePart
361      * @throws MessagingException parsing the MimeMessage failed
362      * @throws IOException        parsing the MimeMessage failed
363      */
364     protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException {
365         if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
366             plainContent = (String) part.getContent();
367         } else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
368             htmlContent = (String) part.getContent();
369         } else if (isMimeType(part, "multipart/*")) {
370             isMultiPart = true;
371             final Multipart multipart = (Multipart) part.getContent();
372             final int count = multipart.getCount();
373             // iterate over all MimeBodyPart
374             for (int i = 0; i < count; i++) {
375                 parse(multipart, (MimeBodyPart) multipart.getBodyPart(i));
376             }
377         } else {
378             final String cid = stripContentId(part.getContentID());
379             final DataSource dataSource = createDataSource(parent, part);
380             if (cid != null) {
381                 cidMap.put(cid, dataSource);
382             }
383             attachmentList.add(dataSource);
384         }
385     }
386 
387     /**
388      * Strips the content id of any whitespace and angle brackets.
389      *
390      * @param contentId the string to strip
391      * @return a stripped version of the content id
392      */
393     private String stripContentId(final String contentId) {
394         return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", "");
395     }
396 }