View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  import java.nio.charset.StandardCharsets;
24  
25  import org.apache.commons.codec.CharEncoding;
26  
27  /**
28   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29   * specified in standard {@link Charset}.
30   *
31   * <p>This class is immutable and thread-safe.</p>
32   *
33   * @see CharEncoding
34   * @see Charset
35   * @see StandardCharsets
36   * @since 1.4
37   */
38  public class StringUtils {
39  
40      /**
41       * <p>
42       * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
43       * </p>
44       *
45       * <p>
46       * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
47       * The comparison is case sensitive.
48       * </p>
49       *
50       * <pre>
51       * StringUtils.equals(null, null)   = true
52       * StringUtils.equals(null, "abc")  = false
53       * StringUtils.equals("abc", null)  = false
54       * StringUtils.equals("abc", "abc") = true
55       * StringUtils.equals("abc", "ABC") = false
56       * </pre>
57       *
58       * <p>
59       * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
60       * </p>
61       *
62       * @see Object#equals(Object)
63       * @param cs1
64       *            the first CharSequence, may be {@code null}
65       * @param cs2
66       *            the second CharSequence, may be {@code null}
67       * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
68       * @since 1.10
69       */
70      public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
71          if (cs1 == cs2) {
72              return true;
73          }
74          if (cs1 == null || cs2 == null) {
75              return false;
76          }
77          if (cs1 instanceof String && cs2 instanceof String) {
78              return cs1.equals(cs2);
79          }
80          return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
81      }
82  
83      /**
84       * Calls {@link String#getBytes(Charset)}
85       *
86       * @param string
87       *            The string to encode (if null, return null).
88       * @param charset
89       *            The {@link Charset} to encode the {@code String}
90       * @return the encoded bytes
91       */
92      private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
93          if (string == null) {
94              return null;
95          }
96          return ByteBuffer.wrap(string.getBytes(charset));
97      }
98  
99      /**
100      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
101      * array.
102      *
103      * @param string
104      *            the String to encode, may be {@code null}
105      * @return encoded bytes, or {@code null} if the input string was {@code null}
106      * @throws NullPointerException
107      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
108      *             required by the Java platform specification.
109      * @see Charset
110      * @see #getBytesUnchecked(String, String)
111      * @since 1.11
112      */
113     public static ByteBuffer getByteBufferUtf8(final String string) {
114         return getByteBuffer(string, StandardCharsets.UTF_8);
115     }
116 
117     /**
118      * Calls {@link String#getBytes(Charset)}
119      *
120      * @param string
121      *            The string to encode (if null, return null).
122      * @param charset
123      *            The {@link Charset} to encode the {@code String}
124      * @return the encoded bytes
125      */
126     private static byte[] getBytes(final String string, final Charset charset) {
127         return string == null ? null : string.getBytes(charset);
128     }
129 
130     /**
131      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
132      * byte array.
133      *
134      * @param string
135      *            the String to encode, may be {@code null}
136      * @return encoded bytes, or {@code null} if the input string was {@code null}
137      * @throws NullPointerException
138      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
139      *             since it is required by the Java platform specification.
140      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
141      * @see Charset
142      * @see #getBytesUnchecked(String, String)
143      */
144     public static byte[] getBytesIso8859_1(final String string) {
145         return getBytes(string, StandardCharsets.ISO_8859_1);
146     }
147 
148     /**
149      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
150      * array.
151      * <p>
152      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
153      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
154      * </p>
155      *
156      * @param string
157      *            the String to encode, may be {@code null}
158      * @param charsetName
159      *            The name of a required {@link java.nio.charset.Charset}
160      * @return encoded bytes, or {@code null} if the input string was {@code null}
161      * @throws IllegalStateException
162      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
163      *             required charset name.
164      * @see CharEncoding
165      * @see String#getBytes(String)
166      */
167     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
168         if (string == null) {
169             return null;
170         }
171         try {
172             return string.getBytes(charsetName);
173         } catch (final UnsupportedEncodingException e) {
174             throw StringUtils.newIllegalStateException(charsetName, e);
175         }
176     }
177 
178     /**
179      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
180      * array.
181      *
182      * @param string
183      *            the String to encode, may be {@code null}
184      * @return encoded bytes, or {@code null} if the input string was {@code null}
185      * @throws NullPointerException
186      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
187      *             required by the Java platform specification.
188      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
189      * @see Charset
190      * @see #getBytesUnchecked(String, String)
191      */
192     public static byte[] getBytesUsAscii(final String string) {
193         return getBytes(string, StandardCharsets.US_ASCII);
194     }
195 
196     /**
197      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
198      * array.
199      *
200      * @param string
201      *            the String to encode, may be {@code null}
202      * @return encoded bytes, or {@code null} if the input string was {@code null}
203      * @throws NullPointerException
204      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
205      *             required by the Java platform specification.
206      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
207      * @see Charset
208      * @see #getBytesUnchecked(String, String)
209      */
210     public static byte[] getBytesUtf16(final String string) {
211         return getBytes(string, StandardCharsets.UTF_16);
212     }
213 
214     /**
215      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
216      * array.
217      *
218      * @param string
219      *            the String to encode, may be {@code null}
220      * @return encoded bytes, or {@code null} if the input string was {@code null}
221      * @throws NullPointerException
222      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
223      *             required by the Java platform specification.
224      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
225      * @see Charset
226      * @see #getBytesUnchecked(String, String)
227      */
228     public static byte[] getBytesUtf16Be(final String string) {
229         return getBytes(string, StandardCharsets.UTF_16BE);
230     }
231 
232     /**
233      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
234      * array.
235      *
236      * @param string
237      *            the String to encode, may be {@code null}
238      * @return encoded bytes, or {@code null} if the input string was {@code null}
239      * @throws NullPointerException
240      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
241      *             required by the Java platform specification.
242      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
243      * @see Charset
244      * @see #getBytesUnchecked(String, String)
245      */
246     public static byte[] getBytesUtf16Le(final String string) {
247         return getBytes(string, StandardCharsets.UTF_16LE);
248     }
249 
250     /**
251      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
252      * array.
253      *
254      * @param string
255      *            the String to encode, may be {@code null}
256      * @return encoded bytes, or {@code null} if the input string was {@code null}
257      * @throws NullPointerException
258      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
259      *             required by the Java platform specification.
260      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
261      * @see Charset
262      * @see #getBytesUnchecked(String, String)
263      */
264     public static byte[] getBytesUtf8(final String string) {
265         return getBytes(string, StandardCharsets.UTF_8);
266     }
267 
268     private static IllegalStateException newIllegalStateException(final String charsetName,
269                                                                   final UnsupportedEncodingException e) {
270         return new IllegalStateException(charsetName + ": " + e);
271     }
272 
273     /**
274      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
275      *
276      * @param bytes
277      *            The bytes to be decoded into characters
278      * @param charset
279      *            The {@link Charset} to encode the {@code String}; not {@code null}
280      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
281      *         or {@code null} if the input byte array was {@code null}.
282      * @throws NullPointerException
283      *             Thrown if charset is {@code null}
284      */
285     private static String newString(final byte[] bytes, final Charset charset) {
286         return bytes == null ? null : new String(bytes, charset);
287     }
288 
289     /**
290      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
291      * <p>
292      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
293      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
294      * </p>
295      *
296      * @param bytes
297      *            The bytes to be decoded into characters, may be {@code null}
298      * @param charsetName
299      *            The name of a required {@link java.nio.charset.Charset}
300      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
301      *         or {@code null} if the input byte array was {@code null}.
302      * @throws IllegalStateException
303      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
304      *             required charset name.
305      * @see CharEncoding
306      * @see String#String(byte[], String)
307      */
308     public static String newString(final byte[] bytes, final String charsetName) {
309         if (bytes == null) {
310             return null;
311         }
312         try {
313             return new String(bytes, charsetName);
314         } catch (final UnsupportedEncodingException e) {
315             throw StringUtils.newIllegalStateException(charsetName, e);
316         }
317     }
318 
319     /**
320      * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
321      *
322      * @param bytes
323      *            The bytes to be decoded into characters, may be {@code null}
324      * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
325      *         {@code null} if the input byte array was {@code null}.
326      * @throws NullPointerException
327      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
328      *             since it is required by the Java platform specification.
329      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
330      */
331     public static String newStringIso8859_1(final byte[] bytes) {
332         return newString(bytes, StandardCharsets.ISO_8859_1);
333     }
334 
335     /**
336      * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
337      *
338      * @param bytes
339      *            The bytes to be decoded into characters
340      * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
341      *         or {@code null} if the input byte array was {@code null}.
342      * @throws NullPointerException
343      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
344      *             required by the Java platform specification.
345      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
346      */
347     public static String newStringUsAscii(final byte[] bytes) {
348         return newString(bytes, StandardCharsets.US_ASCII);
349     }
350 
351     /**
352      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
353      *
354      * @param bytes
355      *            The bytes to be decoded into characters
356      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
357      *         or {@code null} if the input byte array was {@code null}.
358      * @throws NullPointerException
359      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
360      *             required by the Java platform specification.
361      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
362      */
363     public static String newStringUtf16(final byte[] bytes) {
364         return newString(bytes, StandardCharsets.UTF_16);
365     }
366 
367     /**
368      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
369      *
370      * @param bytes
371      *            The bytes to be decoded into characters
372      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
373      *         or {@code null} if the input byte array was {@code null}.
374      * @throws NullPointerException
375      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
376      *             required by the Java platform specification.
377      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
378      */
379     public static String newStringUtf16Be(final byte[] bytes) {
380         return newString(bytes, StandardCharsets.UTF_16BE);
381     }
382 
383     /**
384      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
385      *
386      * @param bytes
387      *            The bytes to be decoded into characters
388      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
389      *         or {@code null} if the input byte array was {@code null}.
390      * @throws NullPointerException
391      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
392      *             required by the Java platform specification.
393      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
394      */
395     public static String newStringUtf16Le(final byte[] bytes) {
396         return newString(bytes, StandardCharsets.UTF_16LE);
397     }
398 
399     /**
400      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
401      *
402      * @param bytes
403      *            The bytes to be decoded into characters
404      * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
405      *         or {@code null} if the input byte array was {@code null}.
406      * @throws NullPointerException
407      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
408      *             required by the Java platform specification.
409      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
410      */
411     public static String newStringUtf8(final byte[] bytes) {
412         return newString(bytes, StandardCharsets.UTF_8);
413     }
414 
415     /**
416      * TODO Make private in 2.0.
417      *
418      * @deprecated TODO Make private in 2.0.
419      */
420     @Deprecated
421     public StringUtils() {
422         // empty
423     }
424 }