001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail2.javax.util;
018
019import java.io.IOException;
020import java.io.UnsupportedEncodingException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Collections;
025import java.util.HashMap;
026import java.util.List;
027import java.util.Map;
028
029import javax.activation.DataSource;
030import javax.mail.Address;
031import javax.mail.Message;
032import javax.mail.MessagingException;
033import javax.mail.Multipart;
034import javax.mail.Part;
035import javax.mail.internet.ContentType;
036import javax.mail.internet.InternetAddress;
037import javax.mail.internet.MimeBodyPart;
038import javax.mail.internet.MimeMessage;
039import javax.mail.internet.MimePart;
040import javax.mail.internet.MimeUtility;
041import javax.mail.internet.ParseException;
042
043import org.apache.commons.mail2.javax.activation.InputStreamDataSource;
044
045/**
046 * Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments.
047 *
048 * @since 1.3
049 */
050public class MimeMessageParser {
051
052    /** The MimeMessage to convert. */
053    private final MimeMessage mimeMessage;
054
055    /** Plain mail content from MimeMessage. */
056    private String plainContent;
057
058    /** HTML mail content from MimeMessage. */
059    private String htmlContent;
060
061    /** List of attachments of MimeMessage. */
062    private final List<DataSource> attachmentList;
063
064    /** Attachments stored by their content-id. */
065    private final Map<String, DataSource> cidMap;
066
067    /** Is this a Multipart email. */
068    private boolean isMultiPart;
069
070    /**
071     * Constructs an instance with the MimeMessage to be extracted.
072     *
073     * @param mimeMessage the message to parse
074     */
075    public MimeMessageParser(final MimeMessage mimeMessage) {
076        this.attachmentList = new ArrayList<>();
077        this.cidMap = new HashMap<>();
078        this.mimeMessage = mimeMessage;
079        this.isMultiPart = false;
080    }
081
082    private List<Address> asList(final Address[] recipients) {
083        return recipients != null ? Arrays.asList(recipients) : new ArrayList<>();
084    }
085
086    /**
087     * Parses the MimePart to create a DataSource.
088     *
089     * @param parent the parent multi-part
090     * @param part   the current part to be processed
091     * @return the DataSource
092     * @throws MessagingException creating the DataSource failed
093     * @throws IOException        error getting InputStream or unsupported encoding
094     */
095    @SuppressWarnings("resource") // Caller closes InputStream
096    protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException {
097        final DataSource dataSource = part.getDataHandler().getDataSource();
098        final String contentType = getBaseMimeType(dataSource.getContentType());
099        final String dataSourceName = getDataSourceName(part, dataSource);
100        return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName);
101    }
102
103    /**
104     * Find an attachment using its content-id.
105     * <p>
106     * The content-id must be stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
107     * </p>
108     *
109     * @param cid the content-id of the attachment
110     * @return the corresponding datasource or null if nothing was found
111     * @since 1.3.4
112     */
113    public DataSource findAttachmentByCid(final String cid) {
114        return cidMap.get(cid);
115    }
116
117    /**
118     * Find an attachment using its name.
119     *
120     * @param name the name of the attachment
121     * @return the corresponding datasource or null if nothing was found
122     */
123    public DataSource findAttachmentByName(final String name) {
124        for (final DataSource dataSource : getAttachmentList()) {
125            if (name.equalsIgnoreCase(dataSource.getName())) {
126                return dataSource;
127            }
128        }
129        return null;
130    }
131
132    /**
133     * Gets the attachment list.
134     *
135     * @return Returns the attachment list.
136     */
137    public List<DataSource> getAttachmentList() {
138        return attachmentList;
139    }
140
141    /**
142     * Gets the MIME type.
143     *
144     * @param fullMimeType the mime type from the mail API
145     * @return the real mime type
146     */
147    private String getBaseMimeType(final String fullMimeType) {
148        final int pos = fullMimeType.indexOf(';');
149        return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos);
150    }
151
152    /**
153     * Gets the BCC Address list.
154     *
155     * @return the 'BCC' recipients of the message
156     * @throws MessagingException determining the recipients failed
157     */
158    public List<Address> getBcc() throws MessagingException {
159        return asList(mimeMessage.getRecipients(Message.RecipientType.BCC));
160    }
161
162    /**
163     * Gets the CC Address list.
164     *
165     * @return the 'CC' recipients of the message
166     * @throws MessagingException determining the recipients failed
167     */
168    public List<Address> getCc() throws MessagingException {
169        return asList(mimeMessage.getRecipients(Message.RecipientType.CC));
170    }
171
172    /**
173     * Returns a collection of all content-ids in the parsed message.
174     * <p>
175     * The content-ids are stripped of any angle brackets, i.e. "part1" instead of "&lt;part1&gt;".
176     * </p>
177     *
178     * @return the collection of content ids.
179     * @since 1.3.4
180     */
181    public Collection<String> getContentIds() {
182        return Collections.unmodifiableSet(cidMap.keySet());
183    }
184
185    /**
186     * Determines the name of the data source if it is not already set.
187     *
188     * @param part       the mail part
189     * @param dataSource the data source
190     * @return the name of the data source or {@code null} if no name can be determined
191     * @throws MessagingException           accessing the part failed
192     * @throws UnsupportedEncodingException decoding the text failed
193     */
194    protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException {
195        String result = dataSource.getName();
196        if (isEmpty(result)) {
197            result = part.getFileName();
198        }
199        if (!isEmpty(result)) {
200            result = MimeUtility.decodeText(result);
201        } else {
202            result = null;
203        }
204        return result;
205    }
206
207    /**
208     * Gets the FROM field.
209     *
210     * @return the FROM field of the message
211     * @throws MessagingException parsing the mime message failed
212     */
213    public String getFrom() throws MessagingException {
214        final Address[] addresses = mimeMessage.getFrom();
215        if (isEmpty(addresses)) {
216            return null;
217        }
218        return ((InternetAddress) addresses[0]).getAddress();
219    }
220
221    /**
222     * Gets the htmlContent if any.
223     *
224     * @return Returns the htmlContent if any
225     */
226    public String getHtmlContent() {
227        return htmlContent;
228    }
229
230    /**
231     * Gets the MimeMessage.
232     *
233     * @return Returns the mimeMessage.
234     */
235    public MimeMessage getMimeMessage() {
236        return mimeMessage;
237    }
238
239    /**
240     * Gets the plain content if any.
241     *
242     * @return Returns the plainContent if any
243     */
244    public String getPlainContent() {
245        return plainContent;
246    }
247
248    /**
249     * Gets the 'replyTo' address of the email.
250     *
251     * @return the 'replyTo' address of the email
252     * @throws MessagingException parsing the mime message failed
253     */
254    public String getReplyTo() throws MessagingException {
255        final Address[] addresses = mimeMessage.getReplyTo();
256        if (isEmpty(addresses)) {
257            return null;
258        }
259        return ((InternetAddress) addresses[0]).getAddress();
260    }
261
262    /**
263     * Gets the MIME message subject.
264     *
265     * @return the MIME message subject.
266     * @throws MessagingException parsing the mime message failed.
267     */
268    public String getSubject() throws MessagingException {
269        return mimeMessage.getSubject();
270    }
271
272    /**
273     * Gets the MIME message 'to' list.
274     *
275     * @return the 'to' recipients of the message.
276     * @throws MessagingException determining the recipients failed
277     */
278    public List<Address> getTo() throws MessagingException {
279        return asList(mimeMessage.getRecipients(Message.RecipientType.TO));
280    }
281
282    /**
283     * Tests if attachments are present.
284     *
285     * @return true if attachments are present.
286     */
287    public boolean hasAttachments() {
288        return !attachmentList.isEmpty();
289    }
290
291    /**
292     * Tests is HTML content is present.
293     *
294     * @return true if HTML content is present.
295     */
296    public boolean hasHtmlContent() {
297        return htmlContent != null;
298    }
299
300    /**
301     * Tests is plain content is present.
302     *
303     * @return true if a plain content is present.
304     */
305    public boolean hasPlainContent() {
306        return plainContent != null;
307    }
308
309    private boolean isEmpty(final Object[] array) {
310        return array == null || array.length == 0;
311    }
312
313    private boolean isEmpty(final String result) {
314        return result == null || result.isEmpty();
315    }
316
317    /**
318     * Tests whether the MimePart contains an object of the given mime type.
319     *
320     * @param part     the current MimePart
321     * @param mimeType the mime type to check
322     * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise
323     * @throws MessagingException parsing the MimeMessage failed
324     */
325    private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException {
326        // Do not use part.isMimeType(String) as it is broken for MimeBodyPart
327        // and does not really check the actual content type.
328        try {
329            return new ContentType(part.getDataHandler().getContentType()).match(mimeType);
330        } catch (final ParseException ex) {
331            return part.getContentType().equalsIgnoreCase(mimeType);
332        }
333    }
334
335    /**
336     * Tests whether this is multipart.
337     *
338     * @return Returns the isMultiPart.
339     */
340    public boolean isMultipart() {
341        return isMultiPart;
342    }
343
344    /**
345     * Does the actual extraction.
346     *
347     * @return this instance
348     * @throws MessagingException parsing the mime message failed
349     * @throws IOException        parsing the mime message failed
350     */
351    public MimeMessageParser parse() throws MessagingException, IOException {
352        parse(null, mimeMessage);
353        return this;
354    }
355
356    /**
357     * Extracts the content of a MimeMessage recursively.
358     *
359     * @param parent the parent multi-part
360     * @param part   the current MimePart
361     * @throws MessagingException parsing the MimeMessage failed
362     * @throws IOException        parsing the MimeMessage failed
363     */
364    protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException {
365        if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
366            plainContent = (String) part.getContent();
367        } else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) {
368            htmlContent = (String) part.getContent();
369        } else if (isMimeType(part, "multipart/*")) {
370            isMultiPart = true;
371            final Multipart multipart = (Multipart) part.getContent();
372            final int count = multipart.getCount();
373            // iterate over all MimeBodyPart
374            for (int i = 0; i < count; i++) {
375                parse(multipart, (MimeBodyPart) multipart.getBodyPart(i));
376            }
377        } else {
378            final String cid = stripContentId(part.getContentID());
379            final DataSource dataSource = createDataSource(parent, part);
380            if (cid != null) {
381                cidMap.put(cid, dataSource);
382            }
383            attachmentList.add(dataSource);
384        }
385    }
386
387    /**
388     * Strips the content id of any whitespace and angle brackets.
389     *
390     * @param contentId the string to strip
391     * @return a stripped version of the content id
392     */
393    private String stripContentId(final String contentId) {
394        return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", "");
395    }
396}