1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.net; 19 20 import java.io.UnsupportedEncodingException; 21 import java.nio.charset.Charset; 22 import java.nio.charset.UnsupportedCharsetException; 23 import java.util.Objects; 24 25 import org.apache.commons.codec.DecoderException; 26 import org.apache.commons.codec.EncoderException; 27 import org.apache.commons.codec.binary.StringUtils; 28 29 /** 30 * Implements methods common to all codecs defined in RFC 1522. 31 * <p> 32 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the 33 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which 34 * is unlikely to confuse existing message handling software. 35 * </p> 36 * <p> 37 * This class is immutable and thread-safe. 38 * </p> 39 * 40 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: 41 * Message Header Extensions for Non-ASCII Text</a> 42 * @since 1.3 43 */ 44 abstract class RFC1522Codec { 45 46 /** Separator. */ 47 protected static final char SEP = '?'; 48 49 /** Prefix. */ 50 protected static final String POSTFIX = "?="; 51 52 /** Postfix. */ 53 protected static final String PREFIX = "=?"; 54 55 /** 56 * The default Charset used for string decoding and encoding. 57 */ 58 protected final Charset charset; 59 60 RFC1522Codec(final Charset charset) { 61 this.charset = Objects.requireNonNull(charset, "charset"); 62 } 63 64 /** 65 * Applies an RFC 1522 compliant decoding scheme to the given string of text. 66 * <p> 67 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes 68 * {@link #doDecoding(byte[])} method of a concrete class to perform the specific decoding. 69 * </p> 70 * 71 * @param text 72 * a string to decode 73 * @return A new decoded String or {@code null} if the input is {@code null}. 74 * @throws DecoderException 75 * thrown if there is an error condition during the decoding process. 76 * @throws UnsupportedEncodingException 77 * thrown if charset specified in the "encoded-word" header is not supported 78 */ 79 protected String decodeText(final String text) throws DecoderException, UnsupportedEncodingException { 80 if (text == null) { 81 return null; 82 } 83 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) { 84 throw new DecoderException("RFC 1522 violation: malformed encoded content"); 85 } 86 final int terminator = text.length() - 2; 87 int from = 2; 88 int to = text.indexOf(SEP, from); 89 if (to == terminator) { 90 throw new DecoderException("RFC 1522 violation: charset token not found"); 91 } 92 final String charset = text.substring(from, to); 93 if (charset.isEmpty()) { 94 throw new DecoderException("RFC 1522 violation: charset not specified"); 95 } 96 from = to + 1; 97 to = text.indexOf(SEP, from); 98 if (to == terminator) { 99 throw new DecoderException("RFC 1522 violation: encoding token not found"); 100 } 101 final String encoding = text.substring(from, to); 102 if (!getEncoding().equalsIgnoreCase(encoding)) { 103 throw new DecoderException("This codec cannot decode " + encoding + " encoded content"); 104 } 105 from = to + 1; 106 to = text.indexOf(SEP, from); 107 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to)); 108 data = doDecoding(data); 109 return new String(data, charset); 110 } 111 112 /** 113 * Decodes an array of bytes using the defined encoding scheme. 114 * 115 * @param bytes 116 * Data to be decoded 117 * @return a byte array that contains decoded data 118 * @throws DecoderException 119 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. 120 */ 121 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; 122 123 /** 124 * Encodes an array of bytes using the defined encoding scheme. 125 * 126 * @param bytes 127 * Data to be encoded 128 * @return A byte array containing the encoded data 129 * @throws EncoderException 130 * thrown if the Encoder encounters a failure condition during the encoding process. 131 */ 132 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; 133 134 /** 135 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 136 * <p> 137 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 138 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding. 139 * </p> 140 * 141 * @param text 142 * a string to encode 143 * @param charset 144 * a charset to be used 145 * @return RFC 1522 compliant "encoded-word" 146 * @throws EncoderException 147 * thrown if there is an error condition during the Encoding process. 148 * @see Charset 149 */ 150 protected String encodeText(final String text, final Charset charset) throws EncoderException { 151 if (text == null) { 152 return null; 153 } 154 final StringBuilder buffer = new StringBuilder(); 155 buffer.append(PREFIX); 156 buffer.append(charset); 157 buffer.append(SEP); 158 buffer.append(getEncoding()); 159 buffer.append(SEP); 160 buffer.append(StringUtils.newStringUsAscii(doEncoding(text.getBytes(charset)))); 161 buffer.append(POSTFIX); 162 return buffer.toString(); 163 } 164 165 /** 166 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 167 * <p> 168 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 169 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding. 170 * </p> 171 * 172 * @param text 173 * a string to encode 174 * @param charsetName 175 * the charset to use 176 * @return RFC 1522 compliant "encoded-word" 177 * @throws EncoderException 178 * thrown if there is an error condition during the Encoding process. 179 * @throws UnsupportedCharsetException 180 * if charset is not available 181 * @see Charset 182 */ 183 protected String encodeText(final String text, final String charsetName) throws EncoderException { 184 if (text == null) { 185 // Don't attempt charsetName conversion. 186 return null; 187 } 188 return encodeText(text, Charset.forName(charsetName)); 189 } 190 191 /** 192 * Gets the default Charset name used for string decoding and encoding. 193 * 194 * @return the default Charset name 195 * @since 1.7 196 */ 197 public Charset getCharset() { 198 return charset; 199 } 200 201 /** 202 * Gets the default Charset name used for string decoding and encoding. 203 * 204 * @return the default Charset name 205 */ 206 public String getDefaultCharset() { 207 return charset.name(); 208 } 209 210 /** 211 * Returns the codec name (referred to as encoding in the RFC 1522). 212 * 213 * @return name of the codec. 214 */ 215 protected abstract String getEncoding(); 216 }