1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.vfs2.util;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22 import java.nio.charset.StandardCharsets;
23 import java.util.BitSet;
24
25 import org.apache.commons.lang3.StringUtils;
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.commons.vfs2.provider.GenericURLFileName;
29
30
31
32
33
34
35
36
37
38 public class URIUtils {
39
40
41
42
43
44
45
46
47
48
49 private static class EncodingUtils {
50
51
52
53
54
55
56
57
58
59
60
61 static String getAsciiString(final byte[] data, final int offset, final int length) {
62 return new String(data, offset, length, StandardCharsets.US_ASCII);
63 }
64
65
66
67
68
69
70
71
72
73 static byte[] getBytes(final String data, final String charsetName) {
74 if (data == null) {
75 throw new IllegalArgumentException("data may not be null");
76 }
77
78 if (StringUtils.isEmpty(charsetName)) {
79 throw new IllegalArgumentException("charset may not be null or empty");
80 }
81
82 try {
83 return data.getBytes(charsetName);
84 } catch (final UnsupportedEncodingException e) {
85
86 if (LOG.isWarnEnabled()) {
87 LOG.warn("Unsupported encoding: " + charsetName + ". System encoding used.");
88 }
89
90 return data.getBytes(Charset.defaultCharset());
91 }
92 }
93
94 private EncodingUtils() {
95 }
96 }
97
98
99
100
101
102
103
104
105
106 private static class URLCodecUtils {
107
108 private static final byte ESCAPE_CHAR = '%';
109
110 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256);
111
112
113 static {
114
115 for (int i = 'a'; i <= 'z'; i++) {
116 WWW_FORM_URL_SAFE.set(i);
117 }
118 for (int i = 'A'; i <= 'Z'; i++) {
119 WWW_FORM_URL_SAFE.set(i);
120 }
121
122 for (int i = '0'; i <= '9'; i++) {
123 WWW_FORM_URL_SAFE.set(i);
124 }
125
126 WWW_FORM_URL_SAFE.set('-');
127 WWW_FORM_URL_SAFE.set('_');
128 WWW_FORM_URL_SAFE.set('.');
129 WWW_FORM_URL_SAFE.set('*');
130
131 WWW_FORM_URL_SAFE.set(' ');
132 }
133
134
135
136
137 private static final int RADIX = 16;
138
139 static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
140 if (bytes == null) {
141 return null;
142 }
143 if (urlsafe == null) {
144 urlsafe = WWW_FORM_URL_SAFE;
145 }
146
147 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
148 for (final byte c : bytes) {
149 int b = c;
150 if (b < 0) {
151 b = 256 + b;
152 }
153 if (urlsafe.get(b)) {
154 if (b == ' ') {
155 b = '+';
156 }
157 buffer.write(b);
158 } else {
159 buffer.write(ESCAPE_CHAR);
160 final char hex1 = hexDigit(b >> 4);
161 final char hex2 = hexDigit(b);
162 buffer.write(hex1);
163 buffer.write(hex2);
164 }
165 }
166 return buffer.toByteArray();
167 }
168
169 private static char hexDigit(final int b) {
170 return Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
171 }
172
173 private URLCodecUtils() {
174 }
175 }
176
177 private static final Log LOG = LogFactory.getLog(URIUtils.class);
178
179
180
181
182 private static final String DEFAULT_PROTOCOL_CHARSET = "UTF-8";
183
184 private static String encode(final String unescaped, final BitSet allowed, final String charset) {
185 final byte[] rawdata = URLCodecUtils.encodeUrl(allowed, EncodingUtils.getBytes(unescaped, charset));
186 return EncodingUtils.getAsciiString(rawdata, 0, rawdata.length);
187 }
188
189
190
191
192
193
194
195
196 public static String encodePath(final String unescaped) {
197 return encodePath(unescaped, DEFAULT_PROTOCOL_CHARSET);
198 }
199
200
201
202
203
204
205
206
207
208 public static String encodePath(final String unescaped, final String charset) {
209 if (unescaped == null) {
210 throw new IllegalArgumentException("The string to encode may not be null.");
211 }
212
213 return encode(unescaped, URIBitSets.allowed_abs_path, charset);
214 }
215
216 private URIUtils() {
217 }
218
219 }