001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail2.javax; 018 019import java.io.IOException; 020import java.util.HashMap; 021import java.util.Map; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import javax.activation.DataSource; 026 027import org.apache.commons.mail2.core.EmailException; 028import org.apache.commons.mail2.core.EmailUtils; 029 030/** 031 * <p> 032 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "<img src=../>" elements in the 033 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email. 034 * </p> 035 * <p> 036 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files 037 * that are not found locally, the implementation tries to download the element and link it in. 038 * </p> 039 * <p> 040 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller. 041 * </p> 042 * 043 * @since 1.3 044 */ 045public class ImageHtmlEmail extends HtmlEmail { 046 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 047 // document.It needs to cater for various things, like more whitespaces 048 // including newlines on any place, HTML is not case sensitive and there 049 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 050 051 /** Regexp for extracting {@code <img>} tags */ 052 public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 053 054 /** Regexp for extracting {@code <script>} tags */ 055 public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 056 057 // this pattern looks for the HTML image tag which indicates embedded images, 058 // the grouping is necessary to allow to replace the element with the CID 059 060 /** Pattern for extracting {@code <img>} tags */ 061 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 062 063 /** Pattern for extracting {@code <script>} tags */ 064 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 065 066 /** Resolve the images and script resources to a DataSource */ 067 private DataSourceResolver dataSourceResolver; 068 069 /** 070 * Constructs a new instance. 071 */ 072 public ImageHtmlEmail() { 073 // empty 074 } 075 076 /** 077 * Does the work of actually building the MimeMessage. 078 * 079 * @see org.apache.commons.mail2.javax.HtmlEmail#buildMimeMessage() 080 * @throws EmailException building the MimeMessage failed 081 */ 082 @Override 083 public void buildMimeMessage() throws EmailException { 084 try { 085 // embed all the matching image and script resources within the email 086 String temp = replacePattern(getHtml(), IMG_PATTERN); 087 temp = replacePattern(temp, SCRIPT_PATTERN); 088 setHtmlMsg(temp); 089 super.buildMimeMessage(); 090 } catch (final IOException e) { 091 throw new EmailException("Building the MimeMessage failed", e); 092 } 093 } 094 095 /** 096 * Gets the data source resolver. 097 * 098 * @return the resolver 099 */ 100 public DataSourceResolver getDataSourceResolver() { 101 return dataSourceResolver; 102 } 103 104 /** 105 * Replace the regexp matching resource locations with "cid:..." references. 106 * 107 * @param htmlMessage the HTML message to analyze 108 * @param pattern the regular expression to find resources 109 * @return the HTML message containing "cid" references 110 * @throws EmailException creating the email failed 111 * @throws IOException resolving the resources failed 112 */ 113 private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException { 114 DataSource dataSource; 115 final StringBuffer stringBuffer = new StringBuffer(); 116 117 // maps "cid" --> name 118 final Map<String, String> cidCache = new HashMap<>(); 119 120 // maps "name" --> dataSource 121 final Map<String, DataSource> dataSourceCache = new HashMap<>(); 122 123 // in the String, replace all "img src" with a CID and embed the related 124 // image file if we find it. 125 final Matcher matcher = pattern.matcher(htmlMessage); 126 127 // the matcher returns all instances one by one 128 while (matcher.find()) { 129 // in the RegEx we have the <src> element as second "group" 130 final String resourceLocation = matcher.group(2); 131 132 // avoid loading the same data source more than once 133 if (dataSourceCache.get(resourceLocation) == null) { 134 // in lenient mode we might get a 'null' data source if the resource was not found 135 dataSource = getDataSourceResolver().resolve(resourceLocation); 136 137 if (dataSource != null) { 138 dataSourceCache.put(resourceLocation, dataSource); 139 } 140 } else { 141 dataSource = dataSourceCache.get(resourceLocation); 142 } 143 144 if (dataSource != null) { 145 String name = dataSource.getName(); 146 if (EmailUtils.isEmpty(name)) { 147 name = resourceLocation; 148 } 149 150 String cid = cidCache.get(name); 151 152 if (cid == null) { 153 cid = embed(dataSource, name); 154 cidCache.put(name, cid); 155 } 156 157 // if we embedded something, then we need to replace the URL with 158 // the CID, otherwise the Matcher takes care of adding the 159 // non-replaced text afterwards, so no else is necessary here! 160 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 161 } 162 } 163 164 // append the remaining items... 165 matcher.appendTail(stringBuffer); 166 167 cidCache.clear(); 168 dataSourceCache.clear(); 169 170 return stringBuffer.toString(); 171 } 172 173 /** 174 * Sets the data source resolver. 175 * 176 * @param dataSourceResolver the resolver 177 */ 178 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) { 179 this.dataSourceResolver = dataSourceResolver; 180 } 181}