001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.util.BitSet; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CharEncoding; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029import org.apache.commons.codec.StringDecoder; 030import org.apache.commons.codec.StringEncoder; 031import org.apache.commons.codec.binary.StringUtils; 032 033/** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * </p> 040 * <p> 041 * This class is thread-safe as of 1.11 042 * </p> 043 * 044 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 045 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> 046 * 047 * @since 1.2 048 */ 049public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 050 051 /** 052 * Release 1.5 made this field final. 053 */ 054 protected static final byte ESCAPE_CHAR = '%'; 055 056 /** 057 * BitSet of www-form-url safe characters. 058 * This is a copy of the internal BitSet which is now used for the conversion. 059 * Changes to this field are ignored. 060 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) 061 */ 062 @Deprecated 063 protected static final BitSet WWW_FORM_URL; 064 065 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); 066 067 // Static initializer for www_form_url 068 static { 069 // alpha characters 070 for (int i = 'a'; i <= 'z'; i++) { 071 WWW_FORM_URL_SAFE.set(i); 072 } 073 for (int i = 'A'; i <= 'Z'; i++) { 074 WWW_FORM_URL_SAFE.set(i); 075 } 076 // numeric characters 077 for (int i = '0'; i <= '9'; i++) { 078 WWW_FORM_URL_SAFE.set(i); 079 } 080 // special chars 081 WWW_FORM_URL_SAFE.set('-'); 082 WWW_FORM_URL_SAFE.set('_'); 083 WWW_FORM_URL_SAFE.set('.'); 084 WWW_FORM_URL_SAFE.set('*'); 085 // blank to be replaced with + 086 WWW_FORM_URL_SAFE.set(' '); 087 088 // Create a copy in case anyone (ab)uses it 089 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); 090 } 091 092 /** 093 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 094 * back to their original representation. 095 * 096 * @param bytes 097 * array of URL safe characters 098 * @return array of original bytes 099 * @throws DecoderException 100 * Thrown if URL decoding is unsuccessful 101 */ 102 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 103 if (bytes == null) { 104 return null; 105 } 106 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 107 for (int i = 0; i < bytes.length; i++) { 108 final int b = bytes[i]; 109 if (b == '+') { 110 buffer.write(' '); 111 } else if (b == ESCAPE_CHAR) { 112 try { 113 final int u = Utils.digit16(bytes[++i]); 114 final int l = Utils.digit16(bytes[++i]); 115 buffer.write((char) ((u << 4) + l)); 116 } catch (final ArrayIndexOutOfBoundsException e) { 117 throw new DecoderException("Invalid URL encoding: ", e); 118 } 119 } else { 120 buffer.write(b); 121 } 122 } 123 return buffer.toByteArray(); 124 } 125 126 /** 127 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 128 * 129 * @param urlsafe 130 * bitset of characters deemed URL safe 131 * @param bytes 132 * array of bytes to convert to URL safe characters 133 * @return array of bytes containing URL safe characters 134 */ 135 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 136 if (bytes == null) { 137 return null; 138 } 139 if (urlsafe == null) { 140 urlsafe = WWW_FORM_URL_SAFE; 141 } 142 143 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 144 for (final byte c : bytes) { 145 int b = c; 146 if (b < 0) { 147 b = 256 + b; 148 } 149 if (urlsafe.get(b)) { 150 if (b == ' ') { 151 b = '+'; 152 } 153 buffer.write(b); 154 } else { 155 buffer.write(ESCAPE_CHAR); 156 final char hex1 = Utils.hexDigit(b >> 4); 157 final char hex2 = Utils.hexDigit(b); 158 buffer.write(hex1); 159 buffer.write(hex2); 160 } 161 } 162 return buffer.toByteArray(); 163 } 164 165 /** 166 * The default charset used for string decoding and encoding. 167 * 168 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) 169 */ 170 @Deprecated 171 protected volatile String charset; // added volatile: see CODEC-232 172 173 /** 174 * Default constructor. 175 */ 176 public URLCodec() { 177 this(CharEncoding.UTF_8); 178 } 179 180 /** 181 * Constructor which allows for the selection of a default charset. 182 * 183 * @param charset the default string charset to use. 184 */ 185 public URLCodec(final String charset) { 186 this.charset = charset; 187 } 188 189 /** 190 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 191 * back to their original representation. 192 * 193 * @param bytes 194 * array of URL safe characters 195 * @return array of original bytes 196 * @throws DecoderException 197 * Thrown if URL decoding is unsuccessful 198 */ 199 @Override 200 public byte[] decode(final byte[] bytes) throws DecoderException { 201 return decodeUrl(bytes); 202 } 203 204 /** 205 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 206 * representation. 207 * 208 * @param obj 209 * URL safe object to convert into its original form 210 * @return original object 211 * @throws DecoderException 212 * Thrown if the argument is not a {@code String} or {@code byte[]}. Thrown if a failure 213 * condition is encountered during the decode process. 214 */ 215 @Override 216 public Object decode(final Object obj) throws DecoderException { 217 if (obj == null) { 218 return null; 219 } 220 if (obj instanceof byte[]) { 221 return decode((byte[]) obj); 222 } 223 if (obj instanceof String) { 224 return decode((String) obj); 225 } 226 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 227 } 228 229 /** 230 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 231 * converted back to their original representation. 232 * 233 * @param str 234 * URL safe string to convert into its original form 235 * @return original string 236 * @throws DecoderException 237 * Thrown if URL decoding is unsuccessful 238 * @see #getDefaultCharset() 239 */ 240 @Override 241 public String decode(final String str) throws DecoderException { 242 if (str == null) { 243 return null; 244 } 245 try { 246 return decode(str, getDefaultCharset()); 247 } catch (final UnsupportedEncodingException e) { 248 throw new DecoderException(e.getMessage(), e); 249 } 250 } 251 252 /** 253 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 254 * back to their original representation. 255 * 256 * @param str 257 * URL safe string to convert into its original form 258 * @param charsetName 259 * the original string charset 260 * @return original string 261 * @throws DecoderException 262 * Thrown if URL decoding is unsuccessful 263 * @throws UnsupportedEncodingException 264 * Thrown if charset is not supported 265 */ 266 public String decode(final String str, final String charsetName) 267 throws DecoderException, UnsupportedEncodingException { 268 if (str == null) { 269 return null; 270 } 271 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); 272 } 273 274 /** 275 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 276 * 277 * @param bytes 278 * array of bytes to convert to URL safe characters 279 * @return array of bytes containing URL safe characters 280 */ 281 @Override 282 public byte[] encode(final byte[] bytes) { 283 return encodeUrl(WWW_FORM_URL_SAFE, bytes); 284 } 285 286 /** 287 * Encodes an object into its URL safe form. Unsafe characters are escaped. 288 * 289 * @param obj 290 * string to convert to a URL safe form 291 * @return URL safe object 292 * @throws EncoderException 293 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful 294 */ 295 @Override 296 public Object encode(final Object obj) throws EncoderException { 297 if (obj == null) { 298 return null; 299 } 300 if (obj instanceof byte[]) { 301 return encode((byte[]) obj); 302 } 303 if (obj instanceof String) { 304 return encode((String) obj); 305 } 306 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 307 } 308 309 /** 310 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 311 * 312 * @param str 313 * string to convert to a URL safe form 314 * @return URL safe string 315 * @throws EncoderException 316 * Thrown if URL encoding is unsuccessful 317 * 318 * @see #getDefaultCharset() 319 */ 320 @Override 321 public String encode(final String str) throws EncoderException { 322 if (str == null) { 323 return null; 324 } 325 try { 326 return encode(str, getDefaultCharset()); 327 } catch (final UnsupportedEncodingException e) { 328 throw new EncoderException(e.getMessage(), e); 329 } 330 } 331 332 /** 333 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 334 * 335 * @param str 336 * string to convert to a URL safe form 337 * @param charsetName 338 * the charset for str 339 * @return URL safe string 340 * @throws UnsupportedEncodingException 341 * Thrown if charset is not supported 342 */ 343 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { 344 if (str == null) { 345 return null; 346 } 347 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); 348 } 349 350 /** 351 * The default charset used for string decoding and encoding. 352 * 353 * @return the default string charset. 354 */ 355 public String getDefaultCharset() { 356 return this.charset; 357 } 358 359 /** 360 * The {@code String} encoding used for decoding and encoding. 361 * 362 * @return Returns the encoding. 363 * 364 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 365 */ 366 @Deprecated 367 public String getEncoding() { 368 return this.charset; 369 } 370 371}