View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.mail2.jakarta;
18  
19  import java.io.IOException;
20  import java.util.HashMap;
21  import java.util.Map;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.mail2.core.EmailException;
26  import org.apache.commons.mail2.core.EmailUtils;
27  
28  import jakarta.activation.DataSource;
29  
30  /**
31   * <p>
32   * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "&lt;img src=../&gt;" elements in the
33   * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
34   * </p>
35   * <p>
36   * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
37   * that are not found locally, the implementation tries to download the element and link it in.
38   * </p>
39   * <p>
40   * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller.
41   * </p>
42   *
43   * @since 1.3
44   */
45  public class ImageHtmlEmail extends HtmlEmail {
46      // Regular Expression to find all <IMG SRC="..."> entries in an HTML
47      // document.It needs to cater for various things, like more whitespaces
48      // including newlines on any place, HTML is not case sensitive and there
49      // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
50  
51      /** Regexp for extracting {@code <img>} tags */
52      public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
53  
54      /** Regexp for extracting {@code <script>} tags */
55      public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
56  
57      // this pattern looks for the HTML image tag which indicates embedded images,
58      // the grouping is necessary to allow to replace the element with the CID
59  
60      /** Pattern for extracting {@code <img>} tags */
61      private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
62  
63      /** Pattern for extracting {@code <script>} tags */
64      private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
65  
66      /** Resolve the images and script resources to a DataSource */
67      private DataSourceResolver dataSourceResolver;
68  
69      /**
70       * Constructs a new instance.
71       */
72      public ImageHtmlEmail() {
73          // empty
74      }
75  
76      /**
77       * Does the work of actually building the MimeMessage.
78       *
79       * @see org.apache.commons.mail2.jakarta.HtmlEmail#buildMimeMessage()
80       * @throws EmailException building the MimeMessage failed
81       */
82      @Override
83      public void buildMimeMessage() throws EmailException {
84          try {
85              // embed all the matching image and script resources within the email
86              String temp = replacePattern(getHtml(), IMG_PATTERN);
87              temp = replacePattern(temp, SCRIPT_PATTERN);
88              setHtmlMsg(temp);
89              super.buildMimeMessage();
90          } catch (final IOException e) {
91              throw new EmailException("Building the MimeMessage failed", e);
92          }
93      }
94  
95      /**
96       * Gets the data source resolver.
97       *
98       * @return the resolver
99       */
100     public DataSourceResolver getDataSourceResolver() {
101         return dataSourceResolver;
102     }
103 
104     /**
105      * Replace the regexp matching resource locations with "cid:..." references.
106      *
107      * @param htmlMessage the HTML message to analyze
108      * @param pattern     the regular expression to find resources
109      * @return the HTML message containing "cid" references
110      * @throws EmailException creating the email failed
111      * @throws IOException    resolving the resources failed
112      */
113     private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException {
114         DataSource dataSource;
115         final StringBuffer stringBuffer = new StringBuffer();
116 
117         // maps "cid" --> name
118         final Map<String, String> cidCache = new HashMap<>();
119 
120         // maps "name" --> dataSource
121         final Map<String, DataSource> dataSourceCache = new HashMap<>();
122 
123         // in the String, replace all "img src" with a CID and embed the related
124         // image file if we find it.
125         final Matcher matcher = pattern.matcher(htmlMessage);
126 
127         // the matcher returns all instances one by one
128         while (matcher.find()) {
129             // in the RegEx we have the <src> element as second "group"
130             final String resourceLocation = matcher.group(2);
131 
132             // avoid loading the same data source more than once
133             if (dataSourceCache.get(resourceLocation) == null) {
134                 // in lenient mode we might get a 'null' data source if the resource was not found
135                 dataSource = getDataSourceResolver().resolve(resourceLocation);
136 
137                 if (dataSource != null) {
138                     dataSourceCache.put(resourceLocation, dataSource);
139                 }
140             } else {
141                 dataSource = dataSourceCache.get(resourceLocation);
142             }
143 
144             if (dataSource != null) {
145                 String name = dataSource.getName();
146                 if (EmailUtils.isEmpty(name)) {
147                     name = resourceLocation;
148                 }
149 
150                 String cid = cidCache.get(name);
151 
152                 if (cid == null) {
153                     cid = embed(dataSource, name);
154                     cidCache.put(name, cid);
155                 }
156 
157                 // if we embedded something, then we need to replace the URL with
158                 // the CID, otherwise the Matcher takes care of adding the
159                 // non-replaced text afterwards, so no else is necessary here!
160                 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
161             }
162         }
163 
164         // append the remaining items...
165         matcher.appendTail(stringBuffer);
166 
167         cidCache.clear();
168         dataSourceCache.clear();
169 
170         return stringBuffer.toString();
171     }
172 
173     /**
174      * Sets the data source resolver.
175      *
176      * @param dataSourceResolver the resolver
177      */
178     public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) {
179         this.dataSourceResolver = dataSourceResolver;
180     }
181 }