1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.mail2.jakarta; 18 19 import java.io.IOException; 20 import java.util.HashMap; 21 import java.util.Map; 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import org.apache.commons.mail2.core.EmailException; 26 import org.apache.commons.mail2.core.EmailUtils; 27 28 import jakarta.activation.DataSource; 29 30 /** 31 * <p> 32 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "<img src=../>" elements in the 33 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email. 34 * </p> 35 * <p> 36 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files 37 * that are not found locally, the implementation tries to download the element and link it in. 38 * </p> 39 * <p> 40 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller. 41 * </p> 42 * 43 * @since 1.3 44 */ 45 public class ImageHtmlEmail extends HtmlEmail { 46 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 47 // document.It needs to cater for various things, like more whitespaces 48 // including newlines on any place, HTML is not case sensitive and there 49 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 50 51 /** Regexp for extracting {@code <img>} tags */ 52 public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 53 54 /** Regexp for extracting {@code <script>} tags */ 55 public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 56 57 // this pattern looks for the HTML image tag which indicates embedded images, 58 // the grouping is necessary to allow to replace the element with the CID 59 60 /** Pattern for extracting {@code <img>} tags */ 61 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 62 63 /** Pattern for extracting {@code <script>} tags */ 64 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 65 66 /** Resolve the images and script resources to a DataSource */ 67 private DataSourceResolver dataSourceResolver; 68 69 /** 70 * Constructs a new instance. 71 */ 72 public ImageHtmlEmail() { 73 // empty 74 } 75 76 /** 77 * Does the work of actually building the MimeMessage. 78 * 79 * @see org.apache.commons.mail2.jakarta.HtmlEmail#buildMimeMessage() 80 * @throws EmailException building the MimeMessage failed 81 */ 82 @Override 83 public void buildMimeMessage() throws EmailException { 84 try { 85 // embed all the matching image and script resources within the email 86 String temp = replacePattern(getHtml(), IMG_PATTERN); 87 temp = replacePattern(temp, SCRIPT_PATTERN); 88 setHtmlMsg(temp); 89 super.buildMimeMessage(); 90 } catch (final IOException e) { 91 throw new EmailException("Building the MimeMessage failed", e); 92 } 93 } 94 95 /** 96 * Gets the data source resolver. 97 * 98 * @return the resolver 99 */ 100 public DataSourceResolver getDataSourceResolver() { 101 return dataSourceResolver; 102 } 103 104 /** 105 * Replace the regexp matching resource locations with "cid:..." references. 106 * 107 * @param htmlMessage the HTML message to analyze 108 * @param pattern the regular expression to find resources 109 * @return the HTML message containing "cid" references 110 * @throws EmailException creating the email failed 111 * @throws IOException resolving the resources failed 112 */ 113 private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException { 114 DataSource dataSource; 115 final StringBuffer stringBuffer = new StringBuffer(); 116 117 // maps "cid" --> name 118 final Map<String, String> cidCache = new HashMap<>(); 119 120 // maps "name" --> dataSource 121 final Map<String, DataSource> dataSourceCache = new HashMap<>(); 122 123 // in the String, replace all "img src" with a CID and embed the related 124 // image file if we find it. 125 final Matcher matcher = pattern.matcher(htmlMessage); 126 127 // the matcher returns all instances one by one 128 while (matcher.find()) { 129 // in the RegEx we have the <src> element as second "group" 130 final String resourceLocation = matcher.group(2); 131 132 // avoid loading the same data source more than once 133 if (dataSourceCache.get(resourceLocation) == null) { 134 // in lenient mode we might get a 'null' data source if the resource was not found 135 dataSource = getDataSourceResolver().resolve(resourceLocation); 136 137 if (dataSource != null) { 138 dataSourceCache.put(resourceLocation, dataSource); 139 } 140 } else { 141 dataSource = dataSourceCache.get(resourceLocation); 142 } 143 144 if (dataSource != null) { 145 String name = dataSource.getName(); 146 if (EmailUtils.isEmpty(name)) { 147 name = resourceLocation; 148 } 149 150 String cid = cidCache.get(name); 151 152 if (cid == null) { 153 cid = embed(dataSource, name); 154 cidCache.put(name, cid); 155 } 156 157 // if we embedded something, then we need to replace the URL with 158 // the CID, otherwise the Matcher takes care of adding the 159 // non-replaced text afterwards, so no else is necessary here! 160 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 161 } 162 } 163 164 // append the remaining items... 165 matcher.appendTail(stringBuffer); 166 167 cidCache.clear(); 168 dataSourceCache.clear(); 169 170 return stringBuffer.toString(); 171 } 172 173 /** 174 * Sets the data source resolver. 175 * 176 * @param dataSourceResolver the resolver 177 */ 178 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) { 179 this.dataSourceResolver = dataSourceResolver; 180 } 181 }