View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.io.IOException;
20  import java.io.Writer;
21  
22  import org.apache.commons.lang3.text.translate.AggregateTranslator;
23  import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
24  import org.apache.commons.lang3.text.translate.EntityArrays;
25  import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
26  import org.apache.commons.lang3.text.translate.LookupTranslator;
27  import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
28  import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
29  import org.apache.commons.lang3.text.translate.OctalUnescaper;
30  import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
31  import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
32  
33  /**
34   * Escapes and unescapes {@link String}s for
35   * Java, Java Script, HTML and XML.
36   *
37   * <p>#ThreadSafe#</p>
38   * @since 2.0
39   * @deprecated As of 3.6, use Apache Commons Text
40   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
41   * StringEscapeUtils</a> instead
42   */
43  @Deprecated
44  public class StringEscapeUtils {
45  
46      /* ESCAPE TRANSLATORS */
47  
48      static class CsvEscaper extends CharSequenceTranslator {
49  
50          private static final char CSV_DELIMITER = ',';
51          private static final char CSV_QUOTE = '"';
52          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
53          private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
54  
55          @Override
56          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
57  
58              if (index != 0) {
59                  throw new IllegalStateException("CsvEscaper should never reach the [1] index");
60              }
61  
62              if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
63                  out.write(input.toString());
64              } else {
65                  out.write(CSV_QUOTE);
66                  out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
67                  out.write(CSV_QUOTE);
68              }
69              return Character.codePointCount(input, 0, input.length());
70          }
71      }
72  
73      static class CsvUnescaper extends CharSequenceTranslator {
74  
75          private static final char CSV_DELIMITER = ',';
76          private static final char CSV_QUOTE = '"';
77          private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
78          private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
79  
80          @Override
81          public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
82  
83              if (index != 0) {
84                  throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
85              }
86  
87              if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
88                  out.write(input.toString());
89                  return Character.codePointCount(input, 0, input.length());
90              }
91  
92              // strip quotes
93              final String quoteless = input.subSequence(1, input.length() - 1).toString();
94  
95              if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
96                  // deal with escaped quotes; ie) ""
97                  out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
98              } else {
99                  out.write(input.toString());
100             }
101             return Character.codePointCount(input, 0, input.length());
102         }
103     }
104 
105     /**
106      * Translator object for escaping Java.
107      *
108      * While {@link #escapeJava(String)} is the expected method of use, this
109      * object allows the Java escaping functionality to be used
110      * as the foundation for a custom translator.
111      *
112      * @since 3.0
113      */
114     public static final CharSequenceTranslator ESCAPE_JAVA =
115           new LookupTranslator(
116             new String[][] {
117               {"\"", "\\\""},
118               {"\\", "\\\\"},
119           }).with(
120             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
121           ).with(
122             JavaUnicodeEscaper.outsideOf(32, 0x7f)
123         );
124 
125     /**
126      * Translator object for escaping EcmaScript/JavaScript.
127      *
128      * While {@link #escapeEcmaScript(String)} is the expected method of use, this
129      * object allows the EcmaScript escaping functionality to be used
130      * as the foundation for a custom translator.
131      *
132      * @since 3.0
133      */
134     public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
135         new AggregateTranslator(
136             new LookupTranslator(
137                       new String[][] {
138                             {"'", "\\'"},
139                             {"\"", "\\\""},
140                             {"\\", "\\\\"},
141                             {"/", "\\/"}
142                       }),
143             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
144             JavaUnicodeEscaper.outsideOf(32, 0x7f)
145         );
146 
147     /**
148      * Translator object for escaping Json.
149      *
150      * While {@link #escapeJson(String)} is the expected method of use, this
151      * object allows the Json escaping functionality to be used
152      * as the foundation for a custom translator.
153      *
154      * @since 3.2
155      */
156     public static final CharSequenceTranslator ESCAPE_JSON =
157         new AggregateTranslator(
158             new LookupTranslator(
159                       new String[][] {
160                             {"\"", "\\\""},
161                             {"\\", "\\\\"},
162                             {"/", "\\/"}
163                       }),
164             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
165             JavaUnicodeEscaper.outsideOf(32, 0x7f)
166         );
167 
168     /**
169      * Translator object for escaping XML.
170      *
171      * While {@link #escapeXml(String)} is the expected method of use, this
172      * object allows the XML escaping functionality to be used
173      * as the foundation for a custom translator.
174      *
175      * @since 3.0
176      * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
177      */
178     @Deprecated
179     public static final CharSequenceTranslator ESCAPE_XML =
180         new AggregateTranslator(
181             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
182             new LookupTranslator(EntityArrays.APOS_ESCAPE())
183         );
184 
185     /**
186      * Translator object for escaping XML 1.0.
187      *
188      * While {@link #escapeXml10(String)} is the expected method of use, this
189      * object allows the XML escaping functionality to be used
190      * as the foundation for a custom translator.
191      *
192      * @since 3.3
193      */
194     public static final CharSequenceTranslator ESCAPE_XML10 =
195         new AggregateTranslator(
196             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
197             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
198             new LookupTranslator(
199                     new String[][] {
200                             { "\u0000", StringUtils.EMPTY },
201                             { "\u0001", StringUtils.EMPTY },
202                             { "\u0002", StringUtils.EMPTY },
203                             { "\u0003", StringUtils.EMPTY },
204                             { "\u0004", StringUtils.EMPTY },
205                             { "\u0005", StringUtils.EMPTY },
206                             { "\u0006", StringUtils.EMPTY },
207                             { "\u0007", StringUtils.EMPTY },
208                             { "\u0008", StringUtils.EMPTY },
209                             { "\u000b", StringUtils.EMPTY },
210                             { "\u000c", StringUtils.EMPTY },
211                             { "\u000e", StringUtils.EMPTY },
212                             { "\u000f", StringUtils.EMPTY },
213                             { "\u0010", StringUtils.EMPTY },
214                             { "\u0011", StringUtils.EMPTY },
215                             { "\u0012", StringUtils.EMPTY },
216                             { "\u0013", StringUtils.EMPTY },
217                             { "\u0014", StringUtils.EMPTY },
218                             { "\u0015", StringUtils.EMPTY },
219                             { "\u0016", StringUtils.EMPTY },
220                             { "\u0017", StringUtils.EMPTY },
221                             { "\u0018", StringUtils.EMPTY },
222                             { "\u0019", StringUtils.EMPTY },
223                             { "\u001a", StringUtils.EMPTY },
224                             { "\u001b", StringUtils.EMPTY },
225                             { "\u001c", StringUtils.EMPTY },
226                             { "\u001d", StringUtils.EMPTY },
227                             { "\u001e", StringUtils.EMPTY },
228                             { "\u001f", StringUtils.EMPTY },
229                             { "\ufffe", StringUtils.EMPTY },
230                             { "\uffff", StringUtils.EMPTY }
231                     }),
232             NumericEntityEscaper.between(0x7f, 0x84),
233             NumericEntityEscaper.between(0x86, 0x9f),
234             new UnicodeUnpairedSurrogateRemover()
235         );
236 
237     /**
238      * Translator object for escaping XML 1.1.
239      *
240      * While {@link #escapeXml11(String)} is the expected method of use, this
241      * object allows the XML escaping functionality to be used
242      * as the foundation for a custom translator.
243      *
244      * @since 3.3
245      */
246     public static final CharSequenceTranslator ESCAPE_XML11 =
247         new AggregateTranslator(
248             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
249             new LookupTranslator(EntityArrays.APOS_ESCAPE()),
250             new LookupTranslator(
251                     new String[][] {
252                             { "\u0000", StringUtils.EMPTY },
253                             { "\u000b", "&#11;" },
254                             { "\u000c", "&#12;" },
255                             { "\ufffe", StringUtils.EMPTY },
256                             { "\uffff", StringUtils.EMPTY }
257                     }),
258             NumericEntityEscaper.between(0x1, 0x8),
259             NumericEntityEscaper.between(0xe, 0x1f),
260             NumericEntityEscaper.between(0x7f, 0x84),
261             NumericEntityEscaper.between(0x86, 0x9f),
262             new UnicodeUnpairedSurrogateRemover()
263         );
264 
265     /**
266      * Translator object for escaping HTML version 3.0.
267      *
268      * While {@link #escapeHtml3(String)} is the expected method of use, this
269      * object allows the HTML escaping functionality to be used
270      * as the foundation for a custom translator.
271      *
272      * @since 3.0
273      */
274     public static final CharSequenceTranslator ESCAPE_HTML3 =
275         new AggregateTranslator(
276             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
277             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
278         );
279 
280     /**
281      * Translator object for escaping HTML version 4.0.
282      *
283      * While {@link #escapeHtml4(String)} is the expected method of use, this
284      * object allows the HTML escaping functionality to be used
285      * as the foundation for a custom translator.
286      *
287      * @since 3.0
288      */
289     public static final CharSequenceTranslator ESCAPE_HTML4 =
290         new AggregateTranslator(
291             new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
292             new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
293             new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
294         );
295 
296     /* UNESCAPE TRANSLATORS */
297 
298     /**
299      * Translator object for escaping individual Comma Separated Values.
300      *
301      * While {@link #escapeCsv(String)} is the expected method of use, this
302      * object allows the CSV escaping functionality to be used
303      * as the foundation for a custom translator.
304      *
305      * @since 3.0
306      */
307     public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
308 
309     /**
310      * Translator object for unescaping escaped Java.
311      *
312      * While {@link #unescapeJava(String)} is the expected method of use, this
313      * object allows the Java unescaping functionality to be used
314      * as the foundation for a custom translator.
315      *
316      * @since 3.0
317      */
318     // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
319     public static final CharSequenceTranslator UNESCAPE_JAVA =
320         new AggregateTranslator(
321             new OctalUnescaper(),     // .between('\1', '\377'),
322             new UnicodeUnescaper(),
323             new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
324             new LookupTranslator(
325                       new String[][] {
326                             {"\\\\", "\\"},
327                             {"\\\"", "\""},
328                             {"\\'", "'"},
329                             {"\\", ""}
330                       })
331         );
332 
333     /**
334      * Translator object for unescaping escaped EcmaScript.
335      *
336      * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
337      * object allows the EcmaScript unescaping functionality to be used
338      * as the foundation for a custom translator.
339      *
340      * @since 3.0
341      */
342     public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
343 
344     /**
345      * Translator object for unescaping escaped Json.
346      *
347      * While {@link #unescapeJson(String)} is the expected method of use, this
348      * object allows the Json unescaping functionality to be used
349      * as the foundation for a custom translator.
350      *
351      * @since 3.2
352      */
353     public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
354 
355     /**
356      * Translator object for unescaping escaped HTML 3.0.
357      *
358      * While {@link #unescapeHtml3(String)} is the expected method of use, this
359      * object allows the HTML unescaping functionality to be used
360      * as the foundation for a custom translator.
361      *
362      * @since 3.0
363      */
364     public static final CharSequenceTranslator UNESCAPE_HTML3 =
365         new AggregateTranslator(
366             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
367             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
368             new NumericEntityUnescaper()
369         );
370 
371     /**
372      * Translator object for unescaping escaped HTML 4.0.
373      *
374      * While {@link #unescapeHtml4(String)} is the expected method of use, this
375      * object allows the HTML unescaping functionality to be used
376      * as the foundation for a custom translator.
377      *
378      * @since 3.0
379      */
380     public static final CharSequenceTranslator UNESCAPE_HTML4 =
381         new AggregateTranslator(
382             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
383             new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
384             new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
385             new NumericEntityUnescaper()
386         );
387 
388     /**
389      * Translator object for unescaping escaped XML.
390      *
391      * While {@link #unescapeXml(String)} is the expected method of use, this
392      * object allows the XML unescaping functionality to be used
393      * as the foundation for a custom translator.
394      *
395      * @since 3.0
396      */
397     public static final CharSequenceTranslator UNESCAPE_XML =
398         new AggregateTranslator(
399             new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
400             new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
401             new NumericEntityUnescaper()
402         );
403 
404     /**
405      * Translator object for unescaping escaped Comma Separated Value entries.
406      *
407      * While {@link #unescapeCsv(String)} is the expected method of use, this
408      * object allows the CSV unescaping functionality to be used
409      * as the foundation for a custom translator.
410      *
411      * @since 3.0
412      */
413     public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414 
415     /* Helper functions */
416 
417     /**
418      * Returns a {@link String} value for a CSV column enclosed in double quotes,
419      * if required.
420      *
421      * <p>If the value contains a comma, newline or double quote, then the
422      *    String value is returned enclosed in double quotes.</p>
423      *
424      * <p>Any double quote characters in the value are escaped with another double quote.</p>
425      *
426      * <p>If the value does not contain a comma, newline or double quote, then the
427      *    String value is returned unchanged.</p>
428      *
429      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
430      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
431      *
432      * @param input the input CSV column String, may be null
433      * @return the input String, enclosed in double quotes if the value contains a comma,
434      * newline or double quote, {@code null} if null string input
435      * @since 2.4
436      */
437     public static final String escapeCsv(final String input) {
438         return ESCAPE_CSV.translate(input);
439     }
440 
441     /**
442      * Escapes the characters in a {@link String} using EcmaScript String rules.
443      * <p>Escapes any values it finds into their EcmaScript String form.
444      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
445      *
446      * <p>So a tab becomes the characters {@code '\\'} and
447      * {@code 't'}.</p>
448      *
449      * <p>The only difference between Java strings and EcmaScript strings
450      * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
451      *
452      * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
453      *
454      * <p>Example:</p>
455      * <pre>
456      * input string: He didn't say, "Stop!"
457      * output string: He didn\'t say, \"Stop!\"
458      * </pre>
459      *
460      * @param input  String to escape values in, may be null
461      * @return String with escaped values, {@code null} if null string input
462      *
463      * @since 3.0
464      */
465     public static final String escapeEcmaScript(final String input) {
466         return ESCAPE_ECMASCRIPT.translate(input);
467     }
468 
469     /**
470      * Escapes the characters in a {@link String} using HTML entities.
471      * <p>Supports only the HTML 3.0 entities.</p>
472      *
473      * @param input  the {@link String} to escape, may be null
474      * @return a new escaped {@link String}, {@code null} if null string input
475      *
476      * @since 3.0
477      */
478     public static final String escapeHtml3(final String input) {
479         return ESCAPE_HTML3.translate(input);
480     }
481 
482     /**
483      * Escapes the characters in a {@link String} using HTML entities.
484      *
485      * <p>
486      * For example:
487      * </p>
488      * <p>{@code "bread" &amp; "butter"}</p>
489      * becomes:
490      * <p>
491      * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
492      * </p>
493      *
494      * <p>Supports all known HTML 4.0 entities, including funky accents.
495      * Note that the commonly used apostrophe escape character (&amp;apos;)
496      * is not a legal entity and so is not supported).</p>
497      *
498      * @param input  the {@link String} to escape, may be null
499      * @return a new escaped {@link String}, {@code null} if null string input
500      *
501      * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
502      * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
503      * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
504      * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
505      * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
506      *
507      * @since 3.0
508      */
509     public static final String escapeHtml4(final String input) {
510         return ESCAPE_HTML4.translate(input);
511     }
512 
513     /**
514      * Escapes the characters in a {@link String} using Java String rules.
515      *
516      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
517      *
518      * <p>So a tab becomes the characters {@code '\\'} and
519      * {@code 't'}.</p>
520      *
521      * <p>The only difference between Java strings and JavaScript strings
522      * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
523      *
524      * <p>Example:</p>
525      * <pre>
526      * input string: He didn't say, "Stop!"
527      * output string: He didn't say, \"Stop!\"
528      * </pre>
529      *
530      * @param input  String to escape values in, may be null
531      * @return String with escaped values, {@code null} if null string input
532      */
533     public static final String escapeJava(final String input) {
534         return ESCAPE_JAVA.translate(input);
535     }
536 
537     /**
538      * Escapes the characters in a {@link String} using Json String rules.
539      * <p>Escapes any values it finds into their Json String form.
540      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
541      *
542      * <p>So a tab becomes the characters {@code '\\'} and
543      * {@code 't'}.</p>
544      *
545      * <p>The only difference between Java strings and Json strings
546      * is that in Json, forward-slash (/) is escaped.</p>
547      *
548      * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
549      *
550      * <p>Example:</p>
551      * <pre>
552      * input string: He didn't say, "Stop!"
553      * output string: He didn't say, \"Stop!\"
554      * </pre>
555      *
556      * @param input  String to escape values in, may be null
557      * @return String with escaped values, {@code null} if null string input
558      *
559      * @since 3.2
560      */
561     public static final String escapeJson(final String input) {
562         return ESCAPE_JSON.translate(input);
563     }
564 
565     /**
566      * Escapes the characters in a {@link String} using XML entities.
567      *
568      * <p>For example: {@code "bread" & "butter"} =&gt;
569      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
570      * </p>
571      *
572      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
573      * Does not support DTDs or external entities.</p>
574      *
575      * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
576      *    escaped. If you still wish this functionality, you can achieve it
577      *    via the following:
578      * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
579      *
580      * @param input  the {@link String} to escape, may be null
581      * @return a new escaped {@link String}, {@code null} if null string input
582      * @see #unescapeXml(String)
583      * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
584      */
585     @Deprecated
586     public static final String escapeXml(final String input) {
587         return ESCAPE_XML.translate(input);
588     }
589 
590     /**
591      * Escapes the characters in a {@link String} using XML entities.
592      *
593      * <p>For example: {@code "bread" & "butter"} =&gt;
594      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
595      * </p>
596      *
597      * <p>Note that XML 1.0 is a text-only format: it cannot represent control
598      * characters or unpaired Unicode surrogate code points, even after escaping.
599      * {@code escapeXml10} will remove characters that do not fit in the
600      * following ranges:</p>
601      *
602      * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
603      *
604      * <p>Though not strictly necessary, {@code escapeXml10} will escape
605      * characters in the following ranges:</p>
606      *
607      * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
608      *
609      * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
610      * document. If you want to allow more non-text characters in an XML 1.1
611      * document, use {@link #escapeXml11(String)}.</p>
612      *
613      * @param input  the {@link String} to escape, may be null
614      * @return a new escaped {@link String}, {@code null} if null string input
615      * @see #unescapeXml(String)
616      * @since 3.3
617      */
618     public static String escapeXml10(final String input) {
619         return ESCAPE_XML10.translate(input);
620     }
621 
622     /**
623      * Escapes the characters in a {@link String} using XML entities.
624      *
625      * <p>For example: {@code "bread" & "butter"} =&gt;
626      * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
627      * </p>
628      *
629      * <p>XML 1.1 can represent certain control characters, but it cannot represent
630      * the null byte or unpaired Unicode surrogate code points, even after escaping.
631      * {@code escapeXml11} will remove characters that do not fit in the following
632      * ranges:</p>
633      *
634      * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
635      *
636      * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
637      *
638      * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
639      *
640      * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
641      * use it for XML 1.0 documents.</p>
642      *
643      * @param input  the {@link String} to escape, may be null
644      * @return a new escaped {@link String}, {@code null} if null string input
645      * @see #unescapeXml(String)
646      * @since 3.3
647      */
648     public static String escapeXml11(final String input) {
649         return ESCAPE_XML11.translate(input);
650     }
651 
652     /**
653      * Returns a {@link String} value for an unescaped CSV column.
654      *
655      * <p>If the value is enclosed in double quotes, and contains a comma, newline
656      *    or double quote, then quotes are removed.
657      * </p>
658      *
659      * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
660      *    to just one double quote.</p>
661      *
662      * <p>If the value is not enclosed in double quotes, or is and does not contain a
663      *    comma, newline or double quote, then the String value is returned unchanged.</p>
664      *
665      * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
666      * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
667      *
668      * @param input the input CSV column String, may be null
669      * @return the input String, with enclosing double quotes removed and embedded double
670      * quotes unescaped, {@code null} if null string input
671      * @since 2.4
672      */
673     public static final String unescapeCsv(final String input) {
674         return UNESCAPE_CSV.translate(input);
675     }
676 
677     /**
678      * Unescapes any EcmaScript literals found in the {@link String}.
679      *
680      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
681      * into a newline character, unless the {@code '\'} is preceded by another
682      * {@code '\'}.</p>
683      *
684      * @see #unescapeJava(String)
685      * @param input  the {@link String} to unescape, may be null
686      * @return A new unescaped {@link String}, {@code null} if null string input
687      *
688      * @since 3.0
689      */
690     public static final String unescapeEcmaScript(final String input) {
691         return UNESCAPE_ECMASCRIPT.translate(input);
692     }
693 
694     /**
695      * Unescapes a string containing entity escapes to a string
696      * containing the actual Unicode characters corresponding to the
697      * escapes. Supports only HTML 3.0 entities.
698      *
699      * @param input  the {@link String} to unescape, may be null
700      * @return a new unescaped {@link String}, {@code null} if null string input
701      *
702      * @since 3.0
703      */
704     public static final String unescapeHtml3(final String input) {
705         return UNESCAPE_HTML3.translate(input);
706     }
707 
708     /**
709      * Unescapes a string containing entity escapes to a string
710      * containing the actual Unicode characters corresponding to the
711      * escapes. Supports HTML 4.0 entities.
712      *
713      * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
714      * will become {@code "<Français>"}</p>
715      *
716      * <p>If an entity is unrecognized, it is left alone, and inserted
717      * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
718      * become {@code ">&zzzz;x"}.</p>
719      *
720      * @param input  the {@link String} to unescape, may be null
721      * @return a new unescaped {@link String}, {@code null} if null string input
722      *
723      * @since 3.0
724      */
725     public static final String unescapeHtml4(final String input) {
726         return UNESCAPE_HTML4.translate(input);
727     }
728 
729     /**
730      * Unescapes any Java literals found in the {@link String}.
731      * For example, it will turn a sequence of {@code '\'} and
732      * {@code 'n'} into a newline character, unless the {@code '\'}
733      * is preceded by another {@code '\'}.
734      *
735      * @param input  the {@link String} to unescape, may be null
736      * @return a new unescaped {@link String}, {@code null} if null string input
737      */
738     public static final String unescapeJava(final String input) {
739         return UNESCAPE_JAVA.translate(input);
740     }
741 
742     /**
743      * Unescapes any Json literals found in the {@link String}.
744      *
745      * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
746      * into a newline character, unless the {@code '\'} is preceded by another
747      * {@code '\'}.</p>
748      *
749      * @see #unescapeJava(String)
750      * @param input  the {@link String} to unescape, may be null
751      * @return A new unescaped {@link String}, {@code null} if null string input
752      *
753      * @since 3.2
754      */
755     public static final String unescapeJson(final String input) {
756         return UNESCAPE_JSON.translate(input);
757     }
758 
759     /**
760      * Unescapes a string containing XML entity escapes to a string
761      * containing the actual Unicode characters corresponding to the
762      * escapes.
763      *
764      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
765      * Does not support DTDs or external entities.</p>
766      *
767      * <p>Note that numerical \\u Unicode codes are unescaped to their respective
768      *    Unicode characters. This may change in future releases.</p>
769      *
770      * @param input  the {@link String} to unescape, may be null
771      * @return a new unescaped {@link String}, {@code null} if null string input
772      * @see #escapeXml(String)
773      * @see #escapeXml10(String)
774      * @see #escapeXml11(String)
775      */
776     public static final String unescapeXml(final String input) {
777         return UNESCAPE_XML.translate(input);
778     }
779 
780     /**
781      * {@link StringEscapeUtils} instances should NOT be constructed in
782      * standard programming.
783      *
784      * <p>Instead, the class should be used as:</p>
785      * <pre>StringEscapeUtils.escapeJava("foo");</pre>
786      *
787      * <p>This constructor is public to permit tools that require a JavaBean
788      * instance to operate.</p>
789      *
790      * @deprecated TODO Make private in 4.0.
791      */
792     @Deprecated
793     public StringEscapeUtils() {
794         // empty
795     }
796 
797 }