001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail2.jakarta.util; 018 019import java.io.IOException; 020import java.io.UnsupportedEncodingException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.HashMap; 026import java.util.List; 027import java.util.Map; 028 029import org.apache.commons.mail2.jakarta.activation.InputStreamDataSource; 030 031import jakarta.activation.DataSource; 032import jakarta.mail.Address; 033import jakarta.mail.Message; 034import jakarta.mail.MessagingException; 035import jakarta.mail.Multipart; 036import jakarta.mail.Part; 037import jakarta.mail.internet.ContentType; 038import jakarta.mail.internet.InternetAddress; 039import jakarta.mail.internet.MimeBodyPart; 040import jakarta.mail.internet.MimeMessage; 041import jakarta.mail.internet.MimePart; 042import jakarta.mail.internet.MimeUtility; 043import jakarta.mail.internet.ParseException; 044 045/** 046 * Parses a MimeMessage and stores the individual parts such a plain text, HTML text and attachments. 047 * 048 * @since 1.3 049 */ 050public class MimeMessageParser { 051 052 /** The MimeMessage to convert. */ 053 private final MimeMessage mimeMessage; 054 055 /** Plain mail content from MimeMessage. */ 056 private String plainContent; 057 058 /** HTML mail content from MimeMessage. */ 059 private String htmlContent; 060 061 /** List of attachments of MimeMessage. */ 062 private final List<DataSource> attachmentList; 063 064 /** Attachments stored by their content-id. */ 065 private final Map<String, DataSource> cidMap; 066 067 /** Is this a Multipart email. */ 068 private boolean isMultiPart; 069 070 /** 071 * Constructs an instance with the MimeMessage to be extracted. 072 * 073 * @param mimeMessage the message to parse 074 */ 075 public MimeMessageParser(final MimeMessage mimeMessage) { 076 this.attachmentList = new ArrayList<>(); 077 this.cidMap = new HashMap<>(); 078 this.mimeMessage = mimeMessage; 079 this.isMultiPart = false; 080 } 081 082 private List<Address> asList(final Address[] recipients) { 083 return recipients != null ? Arrays.asList(recipients) : new ArrayList<>(); 084 } 085 086 /** 087 * Parses the MimePart to create a DataSource. 088 * 089 * @param parent the parent multi-part 090 * @param part the current part to be processed 091 * @return the DataSource 092 * @throws MessagingException creating the DataSource failed 093 * @throws IOException error getting InputStream or unsupported encoding 094 */ 095 @SuppressWarnings("resource") // Caller closes InputStream 096 protected DataSource createDataSource(final Multipart parent, final MimePart part) throws MessagingException, IOException { 097 final DataSource dataSource = part.getDataHandler().getDataSource(); 098 final String contentType = getBaseMimeType(dataSource.getContentType()); 099 final String dataSourceName = getDataSourceName(part, dataSource); 100 return new InputStreamDataSource(dataSource.getInputStream(), contentType, dataSourceName); 101 } 102 103 /** 104 * Find an attachment using its content-id. 105 * <p> 106 * The content-id must be stripped of any angle brackets, i.e. "part1" instead of "<part1>". 107 * </p> 108 * 109 * @param cid the content-id of the attachment 110 * @return the corresponding datasource or null if nothing was found 111 * @since 1.3.4 112 */ 113 public DataSource findAttachmentByCid(final String cid) { 114 return cidMap.get(cid); 115 } 116 117 /** 118 * Find an attachment using its name. 119 * 120 * @param name the name of the attachment 121 * @return the corresponding datasource or null if nothing was found 122 */ 123 public DataSource findAttachmentByName(final String name) { 124 for (final DataSource dataSource : getAttachmentList()) { 125 if (name.equalsIgnoreCase(dataSource.getName())) { 126 return dataSource; 127 } 128 } 129 return null; 130 } 131 132 /** 133 * Gets the attachment list. 134 * 135 * @return Returns the attachment list. 136 */ 137 public List<DataSource> getAttachmentList() { 138 return attachmentList; 139 } 140 141 /** 142 * Gets the MIME type. 143 * 144 * @param fullMimeType the mime type from the mail API 145 * @return the real mime type 146 */ 147 private String getBaseMimeType(final String fullMimeType) { 148 final int pos = fullMimeType.indexOf(';'); 149 return pos < 0 ? fullMimeType : fullMimeType.substring(0, pos); 150 } 151 152 /** 153 * Gets the BCC Address list. 154 * 155 * @return the 'BCC' recipients of the message 156 * @throws MessagingException determining the recipients failed 157 */ 158 public List<Address> getBcc() throws MessagingException { 159 return asList(mimeMessage.getRecipients(Message.RecipientType.BCC)); 160 } 161 162 /** 163 * Gets the CC Address list. 164 * 165 * @return the 'CC' recipients of the message 166 * @throws MessagingException determining the recipients failed 167 */ 168 public List<Address> getCc() throws MessagingException { 169 return asList(mimeMessage.getRecipients(Message.RecipientType.CC)); 170 } 171 172 /** 173 * Returns a collection of all content-ids in the parsed message. 174 * <p> 175 * The content-ids are stripped of any angle brackets, i.e. "part1" instead of "<part1>". 176 * </p> 177 * 178 * @return the collection of content ids. 179 * @since 1.3.4 180 */ 181 public Collection<String> getContentIds() { 182 return Collections.unmodifiableSet(cidMap.keySet()); 183 } 184 185 /** 186 * Determines the name of the data source if it is not already set. 187 * 188 * @param part the mail part 189 * @param dataSource the data source 190 * @return the name of the data source or {@code null} if no name can be determined 191 * @throws MessagingException accessing the part failed 192 * @throws UnsupportedEncodingException decoding the text failed 193 */ 194 protected String getDataSourceName(final Part part, final DataSource dataSource) throws MessagingException, UnsupportedEncodingException { 195 String result = dataSource.getName(); 196 if (isEmpty(result)) { 197 result = part.getFileName(); 198 } 199 if (!isEmpty(result)) { 200 result = MimeUtility.decodeText(result); 201 } else { 202 result = null; 203 } 204 return result; 205 } 206 207 /** 208 * Gets the FROM field. 209 * 210 * @return the FROM field of the message 211 * @throws MessagingException parsing the mime message failed 212 */ 213 public String getFrom() throws MessagingException { 214 final Address[] addresses = mimeMessage.getFrom(); 215 if (isEmpty(addresses)) { 216 return null; 217 } 218 return ((InternetAddress) addresses[0]).getAddress(); 219 } 220 221 /** 222 * Gets the htmlContent if any. 223 * 224 * @return Returns the htmlContent if any 225 */ 226 public String getHtmlContent() { 227 return htmlContent; 228 } 229 230 /** 231 * Gets the MimeMessage. 232 * 233 * @return Returns the mimeMessage. 234 */ 235 public MimeMessage getMimeMessage() { 236 return mimeMessage; 237 } 238 239 /** 240 * Gets the plain content if any. 241 * 242 * @return Returns the plainContent if any 243 */ 244 public String getPlainContent() { 245 return plainContent; 246 } 247 248 /** 249 * Gets the 'replyTo' address of the email. 250 * 251 * @return the 'replyTo' address of the email 252 * @throws MessagingException parsing the mime message failed 253 */ 254 public String getReplyTo() throws MessagingException { 255 final Address[] addresses = mimeMessage.getReplyTo(); 256 if (isEmpty(addresses)) { 257 return null; 258 } 259 return ((InternetAddress) addresses[0]).getAddress(); 260 } 261 262 /** 263 * Gets the MIME message subject. 264 * 265 * @return the MIME message subject. 266 * @throws MessagingException parsing the mime message failed. 267 */ 268 public String getSubject() throws MessagingException { 269 return mimeMessage.getSubject(); 270 } 271 272 /** 273 * Gets the MIME message 'to' list. 274 * 275 * @return the 'to' recipients of the message. 276 * @throws MessagingException determining the recipients failed 277 */ 278 public List<Address> getTo() throws MessagingException { 279 return asList(mimeMessage.getRecipients(Message.RecipientType.TO)); 280 } 281 282 /** 283 * Tests if attachments are present. 284 * 285 * @return true if attachments are present. 286 */ 287 public boolean hasAttachments() { 288 return !attachmentList.isEmpty(); 289 } 290 291 /** 292 * Tests is HTML content is present. 293 * 294 * @return true if HTML content is present. 295 */ 296 public boolean hasHtmlContent() { 297 return htmlContent != null; 298 } 299 300 /** 301 * Tests is plain content is present. 302 * 303 * @return true if a plain content is present. 304 */ 305 public boolean hasPlainContent() { 306 return plainContent != null; 307 } 308 309 private boolean isEmpty(final Object[] array) { 310 return array == null || array.length == 0; 311 } 312 313 private boolean isEmpty(final String result) { 314 return result == null || result.isEmpty(); 315 } 316 317 /** 318 * Tests whether the MimePart contains an object of the given mime type. 319 * 320 * @param part the current MimePart 321 * @param mimeType the mime type to check 322 * @return {@code true} if the MimePart matches the given mime type, {@code false} otherwise 323 * @throws MessagingException parsing the MimeMessage failed 324 */ 325 private boolean isMimeType(final MimePart part, final String mimeType) throws MessagingException { 326 // Do not use part.isMimeType(String) as it is broken for MimeBodyPart 327 // and does not really check the actual content type. 328 try { 329 return new ContentType(part.getDataHandler().getContentType()).match(mimeType); 330 } catch (final ParseException ex) { 331 return part.getContentType().equalsIgnoreCase(mimeType); 332 } 333 } 334 335 /** 336 * Tests whether this is multipart. 337 * 338 * @return Returns the isMultiPart. 339 */ 340 public boolean isMultipart() { 341 return isMultiPart; 342 } 343 344 /** 345 * Does the actual extraction. 346 * 347 * @return this instance 348 * @throws MessagingException parsing the mime message failed 349 * @throws IOException parsing the mime message failed 350 */ 351 public MimeMessageParser parse() throws MessagingException, IOException { 352 parse(null, mimeMessage); 353 return this; 354 } 355 356 /** 357 * Extracts the content of a MimeMessage recursively. 358 * 359 * @param parent the parent multi-part 360 * @param part the current MimePart 361 * @throws MessagingException parsing the MimeMessage failed 362 * @throws IOException parsing the MimeMessage failed 363 */ 364 protected void parse(final Multipart parent, final MimePart part) throws MessagingException, IOException { 365 if (isMimeType(part, "text/plain") && plainContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) { 366 plainContent = (String) part.getContent(); 367 } else if (isMimeType(part, "text/html") && htmlContent == null && !Part.ATTACHMENT.equalsIgnoreCase(part.getDisposition())) { 368 htmlContent = (String) part.getContent(); 369 } else if (isMimeType(part, "multipart/*")) { 370 isMultiPart = true; 371 final Multipart multipart = (Multipart) part.getContent(); 372 final int count = multipart.getCount(); 373 // iterate over all MimeBodyPart 374 for (int i = 0; i < count; i++) { 375 parse(multipart, (MimeBodyPart) multipart.getBodyPart(i)); 376 } 377 } else { 378 final String cid = stripContentId(part.getContentID()); 379 final DataSource dataSource = createDataSource(parent, part); 380 if (cid != null) { 381 cidMap.put(cid, dataSource); 382 } 383 attachmentList.add(dataSource); 384 } 385 } 386 387 /** 388 * Strips the content id of any whitespace and angle brackets. 389 * 390 * @param contentId the string to strip 391 * @return a stripped version of the content id 392 */ 393 private String stripContentId(final String contentId) { 394 return contentId == null ? null : contentId.trim().replaceAll("[\\<\\>]", ""); 395 } 396}