001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail2.jakarta;
018
019import java.io.IOException;
020import java.util.HashMap;
021import java.util.Map;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.commons.mail2.core.EmailException;
026import org.apache.commons.mail2.core.EmailUtils;
027
028import jakarta.activation.DataSource;
029
030/**
031 * <p>
032 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "&lt;img src=../&gt;" elements in the
033 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
034 * </p>
035 * <p>
036 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
037 * that are not found locally, the implementation tries to download the element and link it in.
038 * </p>
039 * <p>
040 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller.
041 * </p>
042 *
043 * @since 1.3
044 */
045public class ImageHtmlEmail extends HtmlEmail {
046    // Regular Expression to find all <IMG SRC="..."> entries in an HTML
047    // document.It needs to cater for various things, like more whitespaces
048    // including newlines on any place, HTML is not case sensitive and there
049    // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
050
051    /** Regexp for extracting {@code <img>} tags */
052    public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
053
054    /** Regexp for extracting {@code <script>} tags */
055    public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
056
057    // this pattern looks for the HTML image tag which indicates embedded images,
058    // the grouping is necessary to allow to replace the element with the CID
059
060    /** Pattern for extracting {@code <img>} tags */
061    private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
062
063    /** Pattern for extracting {@code <script>} tags */
064    private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
065
066    /** Resolve the images and script resources to a DataSource */
067    private DataSourceResolver dataSourceResolver;
068
069    /**
070     * Constructs a new instance.
071     */
072    public ImageHtmlEmail() {
073        // empty
074    }
075
076    /**
077     * Does the work of actually building the MimeMessage.
078     *
079     * @see org.apache.commons.mail2.jakarta.HtmlEmail#buildMimeMessage()
080     * @throws EmailException building the MimeMessage failed
081     */
082    @Override
083    public void buildMimeMessage() throws EmailException {
084        try {
085            // embed all the matching image and script resources within the email
086            String temp = replacePattern(getHtml(), IMG_PATTERN);
087            temp = replacePattern(temp, SCRIPT_PATTERN);
088            setHtmlMsg(temp);
089            super.buildMimeMessage();
090        } catch (final IOException e) {
091            throw new EmailException("Building the MimeMessage failed", e);
092        }
093    }
094
095    /**
096     * Gets the data source resolver.
097     *
098     * @return the resolver
099     */
100    public DataSourceResolver getDataSourceResolver() {
101        return dataSourceResolver;
102    }
103
104    /**
105     * Replace the regexp matching resource locations with "cid:..." references.
106     *
107     * @param htmlMessage the HTML message to analyze
108     * @param pattern     the regular expression to find resources
109     * @return the HTML message containing "cid" references
110     * @throws EmailException creating the email failed
111     * @throws IOException    resolving the resources failed
112     */
113    private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException {
114        DataSource dataSource;
115        final StringBuffer stringBuffer = new StringBuffer();
116
117        // maps "cid" --> name
118        final Map<String, String> cidCache = new HashMap<>();
119
120        // maps "name" --> dataSource
121        final Map<String, DataSource> dataSourceCache = new HashMap<>();
122
123        // in the String, replace all "img src" with a CID and embed the related
124        // image file if we find it.
125        final Matcher matcher = pattern.matcher(htmlMessage);
126
127        // the matcher returns all instances one by one
128        while (matcher.find()) {
129            // in the RegEx we have the <src> element as second "group"
130            final String resourceLocation = matcher.group(2);
131
132            // avoid loading the same data source more than once
133            if (dataSourceCache.get(resourceLocation) == null) {
134                // in lenient mode we might get a 'null' data source if the resource was not found
135                dataSource = getDataSourceResolver().resolve(resourceLocation);
136
137                if (dataSource != null) {
138                    dataSourceCache.put(resourceLocation, dataSource);
139                }
140            } else {
141                dataSource = dataSourceCache.get(resourceLocation);
142            }
143
144            if (dataSource != null) {
145                String name = dataSource.getName();
146                if (EmailUtils.isEmpty(name)) {
147                    name = resourceLocation;
148                }
149
150                String cid = cidCache.get(name);
151
152                if (cid == null) {
153                    cid = embed(dataSource, name);
154                    cidCache.put(name, cid);
155                }
156
157                // if we embedded something, then we need to replace the URL with
158                // the CID, otherwise the Matcher takes care of adding the
159                // non-replaced text afterwards, so no else is necessary here!
160                matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
161            }
162        }
163
164        // append the remaining items...
165        matcher.appendTail(stringBuffer);
166
167        cidCache.clear();
168        dataSourceCache.clear();
169
170        return stringBuffer.toString();
171    }
172
173    /**
174     * Sets the data source resolver.
175     *
176     * @param dataSourceResolver the resolver
177     */
178    public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) {
179        this.dataSourceResolver = dataSourceResolver;
180    }
181}