View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  import java.nio.charset.UnsupportedCharsetException;
23  import java.util.Objects;
24  
25  import org.apache.commons.codec.DecoderException;
26  import org.apache.commons.codec.EncoderException;
27  import org.apache.commons.codec.binary.StringUtils;
28  
29  /**
30   * Implements methods common to all codecs defined in RFC 1522.
31   * <p>
32   * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
33   * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
34   * is unlikely to confuse existing message handling software.
35   * </p>
36   * <p>
37   * This class is immutable and thread-safe.
38   * </p>
39   *
40   * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
41   *          Message Header Extensions for Non-ASCII Text</a>
42   * @since 1.3
43   */
44  abstract class RFC1522Codec {
45  
46      /** Separator. */
47      protected static final char SEP = '?';
48  
49      /** Prefix. */
50      protected static final String POSTFIX = "?=";
51  
52      /** Postfix. */
53      protected static final String PREFIX = "=?";
54  
55      /**
56       * The default Charset used for string decoding and encoding.
57       */
58      protected final Charset charset;
59  
60      RFC1522Codec(final Charset charset) {
61          this.charset = Objects.requireNonNull(charset, "charset");
62      }
63  
64      /**
65       * Applies an RFC 1522 compliant decoding scheme to the given string of text.
66       * <p>
67       * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
68       * {@link #doDecoding(byte[])}  method of a concrete class to perform the specific decoding.
69       * </p>
70       *
71       * @param text
72       *            a string to decode
73       * @return A new decoded String or {@code null} if the input is {@code null}.
74       * @throws DecoderException
75       *             thrown if there is an error condition during the decoding process.
76       * @throws UnsupportedEncodingException
77       *             thrown if charset specified in the "encoded-word" header is not supported
78       */
79      protected String decodeText(final String text) throws DecoderException, UnsupportedEncodingException {
80          if (text == null) {
81              return null;
82          }
83          if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
84              throw new DecoderException("RFC 1522 violation: malformed encoded content");
85          }
86          final int terminator = text.length() - 2;
87          int from = 2;
88          int to = text.indexOf(SEP, from);
89          if (to == terminator) {
90              throw new DecoderException("RFC 1522 violation: charset token not found");
91          }
92          final String charset = text.substring(from, to);
93          if (charset.isEmpty()) {
94              throw new DecoderException("RFC 1522 violation: charset not specified");
95          }
96          from = to + 1;
97          to = text.indexOf(SEP, from);
98          if (to == terminator) {
99              throw new DecoderException("RFC 1522 violation: encoding token not found");
100         }
101         final String encoding = text.substring(from, to);
102         if (!getEncoding().equalsIgnoreCase(encoding)) {
103             throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
104         }
105         from = to + 1;
106         to = text.indexOf(SEP, from);
107         byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
108         data = doDecoding(data);
109         return new String(data, charset);
110     }
111 
112     /**
113      * Decodes an array of bytes using the defined encoding scheme.
114      *
115      * @param bytes
116      *            Data to be decoded
117      * @return a byte array that contains decoded data
118      * @throws DecoderException
119      *             A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
120      */
121     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
122 
123     /**
124      * Encodes an array of bytes using the defined encoding scheme.
125      *
126      * @param bytes
127      *            Data to be encoded
128      * @return A byte array containing the encoded data
129      * @throws EncoderException
130      *             thrown if the Encoder encounters a failure condition during the encoding process.
131      */
132     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
133 
134     /**
135      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
136      * <p>
137      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
138      * {@link #doEncoding(byte[])}  method of a concrete class to perform the specific encoding.
139      * </p>
140      *
141      * @param text
142      *            a string to encode
143      * @param charset
144      *            a charset to be used
145      * @return RFC 1522 compliant "encoded-word"
146      * @throws EncoderException
147      *             thrown if there is an error condition during the Encoding process.
148      * @see Charset
149      */
150     protected String encodeText(final String text, final Charset charset) throws EncoderException {
151         if (text == null) {
152             return null;
153         }
154         final StringBuilder buffer = new StringBuilder();
155         buffer.append(PREFIX);
156         buffer.append(charset);
157         buffer.append(SEP);
158         buffer.append(getEncoding());
159         buffer.append(SEP);
160         buffer.append(StringUtils.newStringUsAscii(doEncoding(text.getBytes(charset))));
161         buffer.append(POSTFIX);
162         return buffer.toString();
163     }
164 
165     /**
166      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
167      * <p>
168      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
169      * {@link #doEncoding(byte[])}  method of a concrete class to perform the specific encoding.
170      * </p>
171      *
172      * @param text
173      *            a string to encode
174      * @param charsetName
175      *            the charset to use
176      * @return RFC 1522 compliant "encoded-word"
177      * @throws EncoderException
178      *             thrown if there is an error condition during the Encoding process.
179      * @throws UnsupportedCharsetException
180      *             if charset is not available
181      * @see Charset
182      */
183     protected String encodeText(final String text, final String charsetName) throws EncoderException {
184         if (text == null) {
185             // Don't attempt charsetName conversion.
186             return null;
187         }
188         return encodeText(text, Charset.forName(charsetName));
189     }
190 
191     /**
192      * Gets the default Charset name used for string decoding and encoding.
193      *
194      * @return the default Charset name
195      * @since 1.7
196      */
197     public Charset getCharset() {
198         return charset;
199     }
200 
201     /**
202      * Gets the default Charset name used for string decoding and encoding.
203      *
204      * @return the default Charset name
205      */
206     public String getDefaultCharset() {
207         return charset.name();
208     }
209 
210     /**
211      * Returns the codec name (referred to as encoding in the RFC 1522).
212      *
213      * @return name of the codec.
214      */
215     protected abstract String getEncoding();
216 }