001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.nio.ByteBuffer; 021import java.util.BitSet; 022 023import org.apache.commons.codec.BinaryDecoder; 024import org.apache.commons.codec.BinaryEncoder; 025import org.apache.commons.codec.DecoderException; 026import org.apache.commons.codec.EncoderException; 027 028/** 029 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of 030 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts 031 * of the URI. 032 * <p> 033 * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public 034 * interface only call the access 035 * </p> 036 * 037 * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> 038 * @since 1.12 039 */ 040public class PercentCodec implements BinaryEncoder, BinaryDecoder { 041 042 /** 043 * The escape character used by the Percent-Encoding in order to introduce an encoded character. 044 */ 045 private static final byte ESCAPE_CHAR = '%'; 046 047 /** 048 * The bit set used to store the character that should be always encoded 049 */ 050 private final BitSet alwaysEncodeChars = new BitSet(); 051 052 /** 053 * The flag defining if the space character should be encoded as '+' 054 */ 055 private final boolean plusForSpace; 056 057 /** 058 * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups 059 */ 060 private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; 061 062 /** 063 * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding 064 * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape 065 * character for Percent-Encoding. 066 */ 067 public PercentCodec() { 068 this.plusForSpace = false; 069 insertAlwaysEncodeChar(ESCAPE_CHAR); 070 } 071 072 /** 073 * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should 074 * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that 075 * is used as escape character for Percent-Encoding. 076 * 077 * @param alwaysEncodeChars the unsafe characters that should always be encoded 078 * @param plusForSpace the flag defining if the space character should be encoded as '+' 079 */ 080 public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { 081 this.plusForSpace = plusForSpace; 082 insertAlwaysEncodeChars(alwaysEncodeChars); 083 } 084 085 private boolean canEncode(final byte c) { 086 return !isAsciiChar(c) || inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c); 087 } 088 089 private boolean containsSpace(final byte[] bytes) { 090 for (final byte b : bytes) { 091 if (b == ' ') { 092 return true; 093 } 094 } 095 return false; 096 } 097 098 /** 099 * Decodes bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to 100 * decode the encoded characters to Unicode. 101 */ 102 @Override 103 public byte[] decode(final byte[] bytes) throws DecoderException { 104 if (bytes == null) { 105 return null; 106 } 107 final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); 108 for (int i = 0; i < bytes.length; i++) { 109 final byte b = bytes[i]; 110 if (b == ESCAPE_CHAR) { 111 try { 112 final int u = Utils.digit16(bytes[++i]); 113 final int l = Utils.digit16(bytes[++i]); 114 buffer.put((byte) ((u << 4) + l)); 115 } catch (final ArrayIndexOutOfBoundsException e) { 116 throw new DecoderException("Invalid percent decoding: ", e); 117 } 118 } else if (plusForSpace && b == '+') { 119 buffer.put((byte) ' '); 120 } else { 121 buffer.put(b); 122 } 123 } 124 return buffer.array(); 125 } 126 127 /** 128 * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. 129 * 130 * @param obj the object to decode 131 * @return the decoding result byte[] as Object 132 * @throws DecoderException if the object is not a byte array 133 */ 134 @Override 135 public Object decode(final Object obj) throws DecoderException { 136 if (obj == null) { 137 return null; 138 } 139 if (obj instanceof byte[]) { 140 return decode((byte[]) obj); 141 } 142 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); 143 } 144 145 private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) { 146 final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); 147 for (final byte b : bytes) { 148 if (willEncode && canEncode(b)) { 149 byte bb = b; 150 if (bb < 0) { 151 bb = (byte) (256 + bb); 152 } 153 final char hex1 = Utils.hexDigit(bb >> 4); 154 final char hex2 = Utils.hexDigit(bb); 155 buffer.put(ESCAPE_CHAR); 156 buffer.put((byte) hex1); 157 buffer.put((byte) hex2); 158 } else if (plusForSpace && b == ' ') { 159 buffer.put((byte) '+'); 160 } else { 161 buffer.put(b); 162 } 163 } 164 return buffer.array(); 165 } 166 167 /** 168 * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the 169 * US-ASCII characters that are configured to be always encoded. 170 */ 171 @Override 172 public byte[] encode(final byte[] bytes) throws EncoderException { 173 if (bytes == null) { 174 return null; 175 } 176 final int expectedEncodingBytes = expectedEncodingBytes(bytes); 177 final boolean willEncode = expectedEncodingBytes != bytes.length; 178 if (willEncode || plusForSpace && containsSpace(bytes)) { 179 return doEncode(bytes, expectedEncodingBytes, willEncode); 180 } 181 return bytes; 182 } 183 184 /** 185 * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. 186 * 187 * @param obj the object to encode 188 * @return the encoding result byte[] as Object 189 * @throws EncoderException if the object is not a byte array 190 */ 191 @Override 192 public Object encode(final Object obj) throws EncoderException { 193 if (obj == null) { 194 return null; 195 } 196 if (obj instanceof byte[]) { 197 return encode((byte[]) obj); 198 } 199 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); 200 } 201 202 private int expectedDecodingBytes(final byte[] bytes) { 203 int byteCount = 0; 204 for (int i = 0; i < bytes.length;) { 205 final byte b = bytes[i]; 206 i += b == ESCAPE_CHAR ? 3 : 1; 207 byteCount++; 208 } 209 return byteCount; 210 } 211 212 private int expectedEncodingBytes(final byte[] bytes) { 213 int byteCount = 0; 214 for (final byte b : bytes) { 215 byteCount += canEncode(b) ? 3 : 1; 216 } 217 return byteCount; 218 } 219 220 private boolean inAlwaysEncodeCharsRange(final byte c) { 221 return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; 222 } 223 224 /** 225 * Inserts a single character into a BitSet and maintains the min and max of the characters of the 226 * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. 227 * 228 * @param b the byte that is candidate for min and max limit 229 */ 230 private void insertAlwaysEncodeChar(final byte b) { 231 if (b < 0) { 232 throw new IllegalArgumentException("byte must be >= 0"); 233 } 234 this.alwaysEncodeChars.set(b); 235 if (b < alwaysEncodeCharsMin) { 236 alwaysEncodeCharsMin = b; 237 } 238 if (b > alwaysEncodeCharsMax) { 239 alwaysEncodeCharsMax = b; 240 } 241 } 242 243 /** 244 * Inserts the byte array into a BitSet for faster lookup. 245 * 246 * @param alwaysEncodeCharsArray 247 */ 248 private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { 249 if (alwaysEncodeCharsArray != null) { 250 for (final byte b : alwaysEncodeCharsArray) { 251 insertAlwaysEncodeChar(b); 252 } 253 } 254 insertAlwaysEncodeChar(ESCAPE_CHAR); 255 } 256 257 private boolean isAsciiChar(final byte c) { 258 return c >= 0; 259 } 260}