001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * Escapes and unescapes {@link String}s for
035 * Java, Java Script, HTML and XML.
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 * @deprecated As of 3.6, use Apache Commons Text
040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
041 * StringEscapeUtils</a> instead
042 */
043@Deprecated
044public class StringEscapeUtils {
045
046    /* ESCAPE TRANSLATORS */
047
048    static class CsvEscaper extends CharSequenceTranslator {
049
050        private static final char CSV_DELIMITER = ',';
051        private static final char CSV_QUOTE = '"';
052        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
053        private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
054
055        @Override
056        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
057
058            if (index != 0) {
059                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
060            }
061
062            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
063                out.write(input.toString());
064            } else {
065                out.write(CSV_QUOTE);
066                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
067                out.write(CSV_QUOTE);
068            }
069            return Character.codePointCount(input, 0, input.length());
070        }
071    }
072
073    static class CsvUnescaper extends CharSequenceTranslator {
074
075        private static final char CSV_DELIMITER = ',';
076        private static final char CSV_QUOTE = '"';
077        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
078        private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
079
080        @Override
081        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
082
083            if (index != 0) {
084                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
085            }
086
087            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
088                out.write(input.toString());
089                return Character.codePointCount(input, 0, input.length());
090            }
091
092            // strip quotes
093            final String quoteless = input.subSequence(1, input.length() - 1).toString();
094
095            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
096                // deal with escaped quotes; ie) ""
097                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
098            } else {
099                out.write(input.toString());
100            }
101            return Character.codePointCount(input, 0, input.length());
102        }
103    }
104
105    /**
106     * Translator object for escaping Java.
107     *
108     * While {@link #escapeJava(String)} is the expected method of use, this
109     * object allows the Java escaping functionality to be used
110     * as the foundation for a custom translator.
111     *
112     * @since 3.0
113     */
114    public static final CharSequenceTranslator ESCAPE_JAVA =
115          new LookupTranslator(
116            new String[][] {
117              {"\"", "\\\""},
118              {"\\", "\\\\"},
119          }).with(
120            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
121          ).with(
122            JavaUnicodeEscaper.outsideOf(32, 0x7f)
123        );
124
125    /**
126     * Translator object for escaping EcmaScript/JavaScript.
127     *
128     * While {@link #escapeEcmaScript(String)} is the expected method of use, this
129     * object allows the EcmaScript escaping functionality to be used
130     * as the foundation for a custom translator.
131     *
132     * @since 3.0
133     */
134    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
135        new AggregateTranslator(
136            new LookupTranslator(
137                      new String[][] {
138                            {"'", "\\'"},
139                            {"\"", "\\\""},
140                            {"\\", "\\\\"},
141                            {"/", "\\/"}
142                      }),
143            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
144            JavaUnicodeEscaper.outsideOf(32, 0x7f)
145        );
146
147    /**
148     * Translator object for escaping Json.
149     *
150     * While {@link #escapeJson(String)} is the expected method of use, this
151     * object allows the Json escaping functionality to be used
152     * as the foundation for a custom translator.
153     *
154     * @since 3.2
155     */
156    public static final CharSequenceTranslator ESCAPE_JSON =
157        new AggregateTranslator(
158            new LookupTranslator(
159                      new String[][] {
160                            {"\"", "\\\""},
161                            {"\\", "\\\\"},
162                            {"/", "\\/"}
163                      }),
164            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
165            JavaUnicodeEscaper.outsideOf(32, 0x7f)
166        );
167
168    /**
169     * Translator object for escaping XML.
170     *
171     * While {@link #escapeXml(String)} is the expected method of use, this
172     * object allows the XML escaping functionality to be used
173     * as the foundation for a custom translator.
174     *
175     * @since 3.0
176     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
177     */
178    @Deprecated
179    public static final CharSequenceTranslator ESCAPE_XML =
180        new AggregateTranslator(
181            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
182            new LookupTranslator(EntityArrays.APOS_ESCAPE())
183        );
184
185    /**
186     * Translator object for escaping XML 1.0.
187     *
188     * While {@link #escapeXml10(String)} is the expected method of use, this
189     * object allows the XML escaping functionality to be used
190     * as the foundation for a custom translator.
191     *
192     * @since 3.3
193     */
194    public static final CharSequenceTranslator ESCAPE_XML10 =
195        new AggregateTranslator(
196            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
197            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
198            new LookupTranslator(
199                    new String[][] {
200                            { "\u0000", StringUtils.EMPTY },
201                            { "\u0001", StringUtils.EMPTY },
202                            { "\u0002", StringUtils.EMPTY },
203                            { "\u0003", StringUtils.EMPTY },
204                            { "\u0004", StringUtils.EMPTY },
205                            { "\u0005", StringUtils.EMPTY },
206                            { "\u0006", StringUtils.EMPTY },
207                            { "\u0007", StringUtils.EMPTY },
208                            { "\u0008", StringUtils.EMPTY },
209                            { "\u000b", StringUtils.EMPTY },
210                            { "\u000c", StringUtils.EMPTY },
211                            { "\u000e", StringUtils.EMPTY },
212                            { "\u000f", StringUtils.EMPTY },
213                            { "\u0010", StringUtils.EMPTY },
214                            { "\u0011", StringUtils.EMPTY },
215                            { "\u0012", StringUtils.EMPTY },
216                            { "\u0013", StringUtils.EMPTY },
217                            { "\u0014", StringUtils.EMPTY },
218                            { "\u0015", StringUtils.EMPTY },
219                            { "\u0016", StringUtils.EMPTY },
220                            { "\u0017", StringUtils.EMPTY },
221                            { "\u0018", StringUtils.EMPTY },
222                            { "\u0019", StringUtils.EMPTY },
223                            { "\u001a", StringUtils.EMPTY },
224                            { "\u001b", StringUtils.EMPTY },
225                            { "\u001c", StringUtils.EMPTY },
226                            { "\u001d", StringUtils.EMPTY },
227                            { "\u001e", StringUtils.EMPTY },
228                            { "\u001f", StringUtils.EMPTY },
229                            { "\ufffe", StringUtils.EMPTY },
230                            { "\uffff", StringUtils.EMPTY }
231                    }),
232            NumericEntityEscaper.between(0x7f, 0x84),
233            NumericEntityEscaper.between(0x86, 0x9f),
234            new UnicodeUnpairedSurrogateRemover()
235        );
236
237    /**
238     * Translator object for escaping XML 1.1.
239     *
240     * While {@link #escapeXml11(String)} is the expected method of use, this
241     * object allows the XML escaping functionality to be used
242     * as the foundation for a custom translator.
243     *
244     * @since 3.3
245     */
246    public static final CharSequenceTranslator ESCAPE_XML11 =
247        new AggregateTranslator(
248            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
249            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
250            new LookupTranslator(
251                    new String[][] {
252                            { "\u0000", StringUtils.EMPTY },
253                            { "\u000b", "&#11;" },
254                            { "\u000c", "&#12;" },
255                            { "\ufffe", StringUtils.EMPTY },
256                            { "\uffff", StringUtils.EMPTY }
257                    }),
258            NumericEntityEscaper.between(0x1, 0x8),
259            NumericEntityEscaper.between(0xe, 0x1f),
260            NumericEntityEscaper.between(0x7f, 0x84),
261            NumericEntityEscaper.between(0x86, 0x9f),
262            new UnicodeUnpairedSurrogateRemover()
263        );
264
265    /**
266     * Translator object for escaping HTML version 3.0.
267     *
268     * While {@link #escapeHtml3(String)} is the expected method of use, this
269     * object allows the HTML escaping functionality to be used
270     * as the foundation for a custom translator.
271     *
272     * @since 3.0
273     */
274    public static final CharSequenceTranslator ESCAPE_HTML3 =
275        new AggregateTranslator(
276            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
277            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
278        );
279
280    /**
281     * Translator object for escaping HTML version 4.0.
282     *
283     * While {@link #escapeHtml4(String)} is the expected method of use, this
284     * object allows the HTML escaping functionality to be used
285     * as the foundation for a custom translator.
286     *
287     * @since 3.0
288     */
289    public static final CharSequenceTranslator ESCAPE_HTML4 =
290        new AggregateTranslator(
291            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
292            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
293            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
294        );
295
296    /* UNESCAPE TRANSLATORS */
297
298    /**
299     * Translator object for escaping individual Comma Separated Values.
300     *
301     * While {@link #escapeCsv(String)} is the expected method of use, this
302     * object allows the CSV escaping functionality to be used
303     * as the foundation for a custom translator.
304     *
305     * @since 3.0
306     */
307    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
308
309    /**
310     * Translator object for unescaping escaped Java.
311     *
312     * While {@link #unescapeJava(String)} is the expected method of use, this
313     * object allows the Java unescaping functionality to be used
314     * as the foundation for a custom translator.
315     *
316     * @since 3.0
317     */
318    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
319    public static final CharSequenceTranslator UNESCAPE_JAVA =
320        new AggregateTranslator(
321            new OctalUnescaper(),     // .between('\1', '\377'),
322            new UnicodeUnescaper(),
323            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
324            new LookupTranslator(
325                      new String[][] {
326                            {"\\\\", "\\"},
327                            {"\\\"", "\""},
328                            {"\\'", "'"},
329                            {"\\", ""}
330                      })
331        );
332
333    /**
334     * Translator object for unescaping escaped EcmaScript.
335     *
336     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
337     * object allows the EcmaScript unescaping functionality to be used
338     * as the foundation for a custom translator.
339     *
340     * @since 3.0
341     */
342    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
343
344    /**
345     * Translator object for unescaping escaped Json.
346     *
347     * While {@link #unescapeJson(String)} is the expected method of use, this
348     * object allows the Json unescaping functionality to be used
349     * as the foundation for a custom translator.
350     *
351     * @since 3.2
352     */
353    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
354
355    /**
356     * Translator object for unescaping escaped HTML 3.0.
357     *
358     * While {@link #unescapeHtml3(String)} is the expected method of use, this
359     * object allows the HTML unescaping functionality to be used
360     * as the foundation for a custom translator.
361     *
362     * @since 3.0
363     */
364    public static final CharSequenceTranslator UNESCAPE_HTML3 =
365        new AggregateTranslator(
366            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
367            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
368            new NumericEntityUnescaper()
369        );
370
371    /**
372     * Translator object for unescaping escaped HTML 4.0.
373     *
374     * While {@link #unescapeHtml4(String)} is the expected method of use, this
375     * object allows the HTML unescaping functionality to be used
376     * as the foundation for a custom translator.
377     *
378     * @since 3.0
379     */
380    public static final CharSequenceTranslator UNESCAPE_HTML4 =
381        new AggregateTranslator(
382            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
383            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
384            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
385            new NumericEntityUnescaper()
386        );
387
388    /**
389     * Translator object for unescaping escaped XML.
390     *
391     * While {@link #unescapeXml(String)} is the expected method of use, this
392     * object allows the XML unescaping functionality to be used
393     * as the foundation for a custom translator.
394     *
395     * @since 3.0
396     */
397    public static final CharSequenceTranslator UNESCAPE_XML =
398        new AggregateTranslator(
399            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
400            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
401            new NumericEntityUnescaper()
402        );
403
404    /**
405     * Translator object for unescaping escaped Comma Separated Value entries.
406     *
407     * While {@link #unescapeCsv(String)} is the expected method of use, this
408     * object allows the CSV unescaping functionality to be used
409     * as the foundation for a custom translator.
410     *
411     * @since 3.0
412     */
413    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414
415    /* Helper functions */
416
417    /**
418     * Returns a {@link String} value for a CSV column enclosed in double quotes,
419     * if required.
420     *
421     * <p>If the value contains a comma, newline or double quote, then the
422     *    String value is returned enclosed in double quotes.</p>
423     *
424     * <p>Any double quote characters in the value are escaped with another double quote.</p>
425     *
426     * <p>If the value does not contain a comma, newline or double quote, then the
427     *    String value is returned unchanged.</p>
428     *
429     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
430     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
431     *
432     * @param input the input CSV column String, may be null
433     * @return the input String, enclosed in double quotes if the value contains a comma,
434     * newline or double quote, {@code null} if null string input
435     * @since 2.4
436     */
437    public static final String escapeCsv(final String input) {
438        return ESCAPE_CSV.translate(input);
439    }
440
441    /**
442     * Escapes the characters in a {@link String} using EcmaScript String rules.
443     * <p>Escapes any values it finds into their EcmaScript String form.
444     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
445     *
446     * <p>So a tab becomes the characters {@code '\\'} and
447     * {@code 't'}.</p>
448     *
449     * <p>The only difference between Java strings and EcmaScript strings
450     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
451     *
452     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
453     *
454     * <p>Example:</p>
455     * <pre>
456     * input string: He didn't say, "Stop!"
457     * output string: He didn\'t say, \"Stop!\"
458     * </pre>
459     *
460     * @param input  String to escape values in, may be null
461     * @return String with escaped values, {@code null} if null string input
462     *
463     * @since 3.0
464     */
465    public static final String escapeEcmaScript(final String input) {
466        return ESCAPE_ECMASCRIPT.translate(input);
467    }
468
469    /**
470     * Escapes the characters in a {@link String} using HTML entities.
471     * <p>Supports only the HTML 3.0 entities.</p>
472     *
473     * @param input  the {@link String} to escape, may be null
474     * @return a new escaped {@link String}, {@code null} if null string input
475     *
476     * @since 3.0
477     */
478    public static final String escapeHtml3(final String input) {
479        return ESCAPE_HTML3.translate(input);
480    }
481
482    /**
483     * Escapes the characters in a {@link String} using HTML entities.
484     *
485     * <p>
486     * For example:
487     * </p>
488     * <p>{@code "bread" &amp; "butter"}</p>
489     * becomes:
490     * <p>
491     * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
492     * </p>
493     *
494     * <p>Supports all known HTML 4.0 entities, including funky accents.
495     * Note that the commonly used apostrophe escape character (&amp;apos;)
496     * is not a legal entity and so is not supported).</p>
497     *
498     * @param input  the {@link String} to escape, may be null
499     * @return a new escaped {@link String}, {@code null} if null string input
500     *
501     * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
502     * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
503     * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
504     * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
505     * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
506     *
507     * @since 3.0
508     */
509    public static final String escapeHtml4(final String input) {
510        return ESCAPE_HTML4.translate(input);
511    }
512
513    /**
514     * Escapes the characters in a {@link String} using Java String rules.
515     *
516     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
517     *
518     * <p>So a tab becomes the characters {@code '\\'} and
519     * {@code 't'}.</p>
520     *
521     * <p>The only difference between Java strings and JavaScript strings
522     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
523     *
524     * <p>Example:</p>
525     * <pre>
526     * input string: He didn't say, "Stop!"
527     * output string: He didn't say, \"Stop!\"
528     * </pre>
529     *
530     * @param input  String to escape values in, may be null
531     * @return String with escaped values, {@code null} if null string input
532     */
533    public static final String escapeJava(final String input) {
534        return ESCAPE_JAVA.translate(input);
535    }
536
537    /**
538     * Escapes the characters in a {@link String} using Json String rules.
539     * <p>Escapes any values it finds into their Json String form.
540     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
541     *
542     * <p>So a tab becomes the characters {@code '\\'} and
543     * {@code 't'}.</p>
544     *
545     * <p>The only difference between Java strings and Json strings
546     * is that in Json, forward-slash (/) is escaped.</p>
547     *
548     * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
549     *
550     * <p>Example:</p>
551     * <pre>
552     * input string: He didn't say, "Stop!"
553     * output string: He didn't say, \"Stop!\"
554     * </pre>
555     *
556     * @param input  String to escape values in, may be null
557     * @return String with escaped values, {@code null} if null string input
558     *
559     * @since 3.2
560     */
561    public static final String escapeJson(final String input) {
562        return ESCAPE_JSON.translate(input);
563    }
564
565    /**
566     * Escapes the characters in a {@link String} using XML entities.
567     *
568     * <p>For example: {@code "bread" & "butter"} =&gt;
569     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
570     * </p>
571     *
572     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
573     * Does not support DTDs or external entities.</p>
574     *
575     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
576     *    escaped. If you still wish this functionality, you can achieve it
577     *    via the following:
578     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
579     *
580     * @param input  the {@link String} to escape, may be null
581     * @return a new escaped {@link String}, {@code null} if null string input
582     * @see #unescapeXml(String)
583     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
584     */
585    @Deprecated
586    public static final String escapeXml(final String input) {
587        return ESCAPE_XML.translate(input);
588    }
589
590    /**
591     * Escapes the characters in a {@link String} using XML entities.
592     *
593     * <p>For example: {@code "bread" & "butter"} =&gt;
594     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
595     * </p>
596     *
597     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
598     * characters or unpaired Unicode surrogate code points, even after escaping.
599     * {@code escapeXml10} will remove characters that do not fit in the
600     * following ranges:</p>
601     *
602     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
603     *
604     * <p>Though not strictly necessary, {@code escapeXml10} will escape
605     * characters in the following ranges:</p>
606     *
607     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
608     *
609     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
610     * document. If you want to allow more non-text characters in an XML 1.1
611     * document, use {@link #escapeXml11(String)}.</p>
612     *
613     * @param input  the {@link String} to escape, may be null
614     * @return a new escaped {@link String}, {@code null} if null string input
615     * @see #unescapeXml(String)
616     * @since 3.3
617     */
618    public static String escapeXml10(final String input) {
619        return ESCAPE_XML10.translate(input);
620    }
621
622    /**
623     * Escapes the characters in a {@link String} using XML entities.
624     *
625     * <p>For example: {@code "bread" & "butter"} =&gt;
626     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
627     * </p>
628     *
629     * <p>XML 1.1 can represent certain control characters, but it cannot represent
630     * the null byte or unpaired Unicode surrogate code points, even after escaping.
631     * {@code escapeXml11} will remove characters that do not fit in the following
632     * ranges:</p>
633     *
634     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
635     *
636     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
637     *
638     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
639     *
640     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
641     * use it for XML 1.0 documents.</p>
642     *
643     * @param input  the {@link String} to escape, may be null
644     * @return a new escaped {@link String}, {@code null} if null string input
645     * @see #unescapeXml(String)
646     * @since 3.3
647     */
648    public static String escapeXml11(final String input) {
649        return ESCAPE_XML11.translate(input);
650    }
651
652    /**
653     * Returns a {@link String} value for an unescaped CSV column.
654     *
655     * <p>If the value is enclosed in double quotes, and contains a comma, newline
656     *    or double quote, then quotes are removed.
657     * </p>
658     *
659     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
660     *    to just one double quote.</p>
661     *
662     * <p>If the value is not enclosed in double quotes, or is and does not contain a
663     *    comma, newline or double quote, then the String value is returned unchanged.</p>
664     *
665     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
666     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
667     *
668     * @param input the input CSV column String, may be null
669     * @return the input String, with enclosing double quotes removed and embedded double
670     * quotes unescaped, {@code null} if null string input
671     * @since 2.4
672     */
673    public static final String unescapeCsv(final String input) {
674        return UNESCAPE_CSV.translate(input);
675    }
676
677    /**
678     * Unescapes any EcmaScript literals found in the {@link String}.
679     *
680     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
681     * into a newline character, unless the {@code '\'} is preceded by another
682     * {@code '\'}.</p>
683     *
684     * @see #unescapeJava(String)
685     * @param input  the {@link String} to unescape, may be null
686     * @return A new unescaped {@link String}, {@code null} if null string input
687     *
688     * @since 3.0
689     */
690    public static final String unescapeEcmaScript(final String input) {
691        return UNESCAPE_ECMASCRIPT.translate(input);
692    }
693
694    /**
695     * Unescapes a string containing entity escapes to a string
696     * containing the actual Unicode characters corresponding to the
697     * escapes. Supports only HTML 3.0 entities.
698     *
699     * @param input  the {@link String} to unescape, may be null
700     * @return a new unescaped {@link String}, {@code null} if null string input
701     *
702     * @since 3.0
703     */
704    public static final String unescapeHtml3(final String input) {
705        return UNESCAPE_HTML3.translate(input);
706    }
707
708    /**
709     * Unescapes a string containing entity escapes to a string
710     * containing the actual Unicode characters corresponding to the
711     * escapes. Supports HTML 4.0 entities.
712     *
713     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
714     * will become {@code "<Français>"}</p>
715     *
716     * <p>If an entity is unrecognized, it is left alone, and inserted
717     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
718     * become {@code ">&zzzz;x"}.</p>
719     *
720     * @param input  the {@link String} to unescape, may be null
721     * @return a new unescaped {@link String}, {@code null} if null string input
722     *
723     * @since 3.0
724     */
725    public static final String unescapeHtml4(final String input) {
726        return UNESCAPE_HTML4.translate(input);
727    }
728
729    /**
730     * Unescapes any Java literals found in the {@link String}.
731     * For example, it will turn a sequence of {@code '\'} and
732     * {@code 'n'} into a newline character, unless the {@code '\'}
733     * is preceded by another {@code '\'}.
734     *
735     * @param input  the {@link String} to unescape, may be null
736     * @return a new unescaped {@link String}, {@code null} if null string input
737     */
738    public static final String unescapeJava(final String input) {
739        return UNESCAPE_JAVA.translate(input);
740    }
741
742    /**
743     * Unescapes any Json literals found in the {@link String}.
744     *
745     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
746     * into a newline character, unless the {@code '\'} is preceded by another
747     * {@code '\'}.</p>
748     *
749     * @see #unescapeJava(String)
750     * @param input  the {@link String} to unescape, may be null
751     * @return A new unescaped {@link String}, {@code null} if null string input
752     *
753     * @since 3.2
754     */
755    public static final String unescapeJson(final String input) {
756        return UNESCAPE_JSON.translate(input);
757    }
758
759    /**
760     * Unescapes a string containing XML entity escapes to a string
761     * containing the actual Unicode characters corresponding to the
762     * escapes.
763     *
764     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
765     * Does not support DTDs or external entities.</p>
766     *
767     * <p>Note that numerical \\u Unicode codes are unescaped to their respective
768     *    Unicode characters. This may change in future releases.</p>
769     *
770     * @param input  the {@link String} to unescape, may be null
771     * @return a new unescaped {@link String}, {@code null} if null string input
772     * @see #escapeXml(String)
773     * @see #escapeXml10(String)
774     * @see #escapeXml11(String)
775     */
776    public static final String unescapeXml(final String input) {
777        return UNESCAPE_XML.translate(input);
778    }
779
780    /**
781     * {@link StringEscapeUtils} instances should NOT be constructed in
782     * standard programming.
783     *
784     * <p>Instead, the class should be used as:</p>
785     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
786     *
787     * <p>This constructor is public to permit tools that require a JavaBean
788     * instance to operate.</p>
789     *
790     * @deprecated TODO Make private in 4.0.
791     */
792    @Deprecated
793    public StringEscapeUtils() {
794        // empty
795    }
796
797}