1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.ByteBuffer;
22 import java.nio.charset.Charset;
23 import java.nio.charset.StandardCharsets;
24
25 import org.apache.commons.codec.CharEncoding;
26
27 /**
28 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29 * specified in standard {@link Charset}.
30 *
31 * <p>This class is immutable and thread-safe.</p>
32 *
33 * @see CharEncoding
34 * @see Charset
35 * @see StandardCharsets
36 * @since 1.4
37 */
38 public class StringUtils {
39
40 /**
41 * <p>
42 * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
43 * </p>
44 *
45 * <p>
46 * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
47 * The comparison is case sensitive.
48 * </p>
49 *
50 * <pre>
51 * StringUtils.equals(null, null) = true
52 * StringUtils.equals(null, "abc") = false
53 * StringUtils.equals("abc", null) = false
54 * StringUtils.equals("abc", "abc") = true
55 * StringUtils.equals("abc", "ABC") = false
56 * </pre>
57 *
58 * <p>
59 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
60 * </p>
61 *
62 * @see Object#equals(Object)
63 * @param cs1
64 * the first CharSequence, may be {@code null}
65 * @param cs2
66 * the second CharSequence, may be {@code null}
67 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
68 * @since 1.10
69 */
70 public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
71 if (cs1 == cs2) {
72 return true;
73 }
74 if (cs1 == null || cs2 == null) {
75 return false;
76 }
77 if (cs1 instanceof String && cs2 instanceof String) {
78 return cs1.equals(cs2);
79 }
80 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
81 }
82
83 /**
84 * Calls {@link String#getBytes(Charset)}
85 *
86 * @param string
87 * The string to encode (if null, return null).
88 * @param charset
89 * The {@link Charset} to encode the {@code String}
90 * @return the encoded bytes
91 */
92 private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
93 if (string == null) {
94 return null;
95 }
96 return ByteBuffer.wrap(string.getBytes(charset));
97 }
98
99 /**
100 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
101 * array.
102 *
103 * @param string
104 * the String to encode, may be {@code null}
105 * @return encoded bytes, or {@code null} if the input string was {@code null}
106 * @throws NullPointerException
107 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
108 * required by the Java platform specification.
109 * @see Charset
110 * @see #getBytesUnchecked(String, String)
111 * @since 1.11
112 */
113 public static ByteBuffer getByteBufferUtf8(final String string) {
114 return getByteBuffer(string, StandardCharsets.UTF_8);
115 }
116
117 /**
118 * Calls {@link String#getBytes(Charset)}
119 *
120 * @param string
121 * The string to encode (if null, return null).
122 * @param charset
123 * The {@link Charset} to encode the {@code String}
124 * @return the encoded bytes
125 */
126 private static byte[] getBytes(final String string, final Charset charset) {
127 return string == null ? null : string.getBytes(charset);
128 }
129
130 /**
131 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
132 * byte array.
133 *
134 * @param string
135 * the String to encode, may be {@code null}
136 * @return encoded bytes, or {@code null} if the input string was {@code null}
137 * @throws NullPointerException
138 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
139 * since it is required by the Java platform specification.
140 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
141 * @see Charset
142 * @see #getBytesUnchecked(String, String)
143 */
144 public static byte[] getBytesIso8859_1(final String string) {
145 return getBytes(string, StandardCharsets.ISO_8859_1);
146 }
147
148 /**
149 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
150 * array.
151 * <p>
152 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
153 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
154 * </p>
155 *
156 * @param string
157 * the String to encode, may be {@code null}
158 * @param charsetName
159 * The name of a required {@link java.nio.charset.Charset}
160 * @return encoded bytes, or {@code null} if the input string was {@code null}
161 * @throws IllegalStateException
162 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
163 * required charset name.
164 * @see CharEncoding
165 * @see String#getBytes(String)
166 */
167 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
168 if (string == null) {
169 return null;
170 }
171 try {
172 return string.getBytes(charsetName);
173 } catch (final UnsupportedEncodingException e) {
174 throw StringUtils.newIllegalStateException(charsetName, e);
175 }
176 }
177
178 /**
179 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
180 * array.
181 *
182 * @param string
183 * the String to encode, may be {@code null}
184 * @return encoded bytes, or {@code null} if the input string was {@code null}
185 * @throws NullPointerException
186 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
187 * required by the Java platform specification.
188 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
189 * @see Charset
190 * @see #getBytesUnchecked(String, String)
191 */
192 public static byte[] getBytesUsAscii(final String string) {
193 return getBytes(string, StandardCharsets.US_ASCII);
194 }
195
196 /**
197 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
198 * array.
199 *
200 * @param string
201 * the String to encode, may be {@code null}
202 * @return encoded bytes, or {@code null} if the input string was {@code null}
203 * @throws NullPointerException
204 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
205 * required by the Java platform specification.
206 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
207 * @see Charset
208 * @see #getBytesUnchecked(String, String)
209 */
210 public static byte[] getBytesUtf16(final String string) {
211 return getBytes(string, StandardCharsets.UTF_16);
212 }
213
214 /**
215 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
216 * array.
217 *
218 * @param string
219 * the String to encode, may be {@code null}
220 * @return encoded bytes, or {@code null} if the input string was {@code null}
221 * @throws NullPointerException
222 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
223 * required by the Java platform specification.
224 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
225 * @see Charset
226 * @see #getBytesUnchecked(String, String)
227 */
228 public static byte[] getBytesUtf16Be(final String string) {
229 return getBytes(string, StandardCharsets.UTF_16BE);
230 }
231
232 /**
233 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
234 * array.
235 *
236 * @param string
237 * the String to encode, may be {@code null}
238 * @return encoded bytes, or {@code null} if the input string was {@code null}
239 * @throws NullPointerException
240 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
241 * required by the Java platform specification.
242 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
243 * @see Charset
244 * @see #getBytesUnchecked(String, String)
245 */
246 public static byte[] getBytesUtf16Le(final String string) {
247 return getBytes(string, StandardCharsets.UTF_16LE);
248 }
249
250 /**
251 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
252 * array.
253 *
254 * @param string
255 * the String to encode, may be {@code null}
256 * @return encoded bytes, or {@code null} if the input string was {@code null}
257 * @throws NullPointerException
258 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
259 * required by the Java platform specification.
260 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
261 * @see Charset
262 * @see #getBytesUnchecked(String, String)
263 */
264 public static byte[] getBytesUtf8(final String string) {
265 return getBytes(string, StandardCharsets.UTF_8);
266 }
267
268 private static IllegalStateException newIllegalStateException(final String charsetName,
269 final UnsupportedEncodingException e) {
270 return new IllegalStateException(charsetName + ": " + e);
271 }
272
273 /**
274 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
275 *
276 * @param bytes
277 * The bytes to be decoded into characters
278 * @param charset
279 * The {@link Charset} to encode the {@code String}; not {@code null}
280 * @return A new {@code String} decoded from the specified array of bytes using the given charset,
281 * or {@code null} if the input byte array was {@code null}.
282 * @throws NullPointerException
283 * Thrown if charset is {@code null}
284 */
285 private static String newString(final byte[] bytes, final Charset charset) {
286 return bytes == null ? null : new String(bytes, charset);
287 }
288
289 /**
290 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
291 * <p>
292 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
293 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
294 * </p>
295 *
296 * @param bytes
297 * The bytes to be decoded into characters, may be {@code null}
298 * @param charsetName
299 * The name of a required {@link java.nio.charset.Charset}
300 * @return A new {@code String} decoded from the specified array of bytes using the given charset,
301 * or {@code null} if the input byte array was {@code null}.
302 * @throws IllegalStateException
303 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
304 * required charset name.
305 * @see CharEncoding
306 * @see String#String(byte[], String)
307 */
308 public static String newString(final byte[] bytes, final String charsetName) {
309 if (bytes == null) {
310 return null;
311 }
312 try {
313 return new String(bytes, charsetName);
314 } catch (final UnsupportedEncodingException e) {
315 throw StringUtils.newIllegalStateException(charsetName, e);
316 }
317 }
318
319 /**
320 * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
321 *
322 * @param bytes
323 * The bytes to be decoded into characters, may be {@code null}
324 * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
325 * {@code null} if the input byte array was {@code null}.
326 * @throws NullPointerException
327 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
328 * since it is required by the Java platform specification.
329 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
330 */
331 public static String newStringIso8859_1(final byte[] bytes) {
332 return newString(bytes, StandardCharsets.ISO_8859_1);
333 }
334
335 /**
336 * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
337 *
338 * @param bytes
339 * The bytes to be decoded into characters
340 * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
341 * or {@code null} if the input byte array was {@code null}.
342 * @throws NullPointerException
343 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
344 * required by the Java platform specification.
345 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
346 */
347 public static String newStringUsAscii(final byte[] bytes) {
348 return newString(bytes, StandardCharsets.US_ASCII);
349 }
350
351 /**
352 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
353 *
354 * @param bytes
355 * The bytes to be decoded into characters
356 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
357 * or {@code null} if the input byte array was {@code null}.
358 * @throws NullPointerException
359 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
360 * required by the Java platform specification.
361 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
362 */
363 public static String newStringUtf16(final byte[] bytes) {
364 return newString(bytes, StandardCharsets.UTF_16);
365 }
366
367 /**
368 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
369 *
370 * @param bytes
371 * The bytes to be decoded into characters
372 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
373 * or {@code null} if the input byte array was {@code null}.
374 * @throws NullPointerException
375 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
376 * required by the Java platform specification.
377 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
378 */
379 public static String newStringUtf16Be(final byte[] bytes) {
380 return newString(bytes, StandardCharsets.UTF_16BE);
381 }
382
383 /**
384 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
385 *
386 * @param bytes
387 * The bytes to be decoded into characters
388 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
389 * or {@code null} if the input byte array was {@code null}.
390 * @throws NullPointerException
391 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
392 * required by the Java platform specification.
393 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
394 */
395 public static String newStringUtf16Le(final byte[] bytes) {
396 return newString(bytes, StandardCharsets.UTF_16LE);
397 }
398
399 /**
400 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
401 *
402 * @param bytes
403 * The bytes to be decoded into characters
404 * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
405 * or {@code null} if the input byte array was {@code null}.
406 * @throws NullPointerException
407 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
408 * required by the Java platform specification.
409 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
410 */
411 public static String newStringUtf8(final byte[] bytes) {
412 return newString(bytes, StandardCharsets.UTF_8);
413 }
414
415 /**
416 * TODO Make private in 2.0.
417 *
418 * @deprecated TODO Make private in 2.0.
419 */
420 @Deprecated
421 public StringUtils() {
422 // empty
423 }
424 }