View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import static org.apache.commons.text.StringEscapeUtils.escapeXSI;
20  import static org.apache.commons.text.StringEscapeUtils.unescapeXSI;
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertFalse;
23  import static org.junit.jupiter.api.Assertions.assertNotNull;
24  import static org.junit.jupiter.api.Assertions.assertNull;
25  import static org.junit.jupiter.api.Assertions.assertThrows;
26  import static org.junit.jupiter.api.Assertions.assertTrue;
27  import static org.junit.jupiter.api.Assertions.fail;
28  
29  import java.io.IOException;
30  import java.io.StringWriter;
31  import java.lang.reflect.Constructor;
32  import java.lang.reflect.Modifier;
33  import java.nio.charset.StandardCharsets;
34  import java.nio.file.Files;
35  import java.nio.file.Paths;
36  
37  import org.junit.jupiter.api.Test;
38  
39  /**
40   * Tests {@link StringEscapeUtils}.
41   *
42   * <p>
43   * This code has been adapted from Apache Commons Lang 3.5.
44   * </p>
45   */
46  public class StringEscapeUtilsTest {
47      private static final String FOO = "foo";
48  
49      private static final String[][] HTML_ESCAPES = {
50              {"no escaping", "plain text", "plain text"},
51              {"no escaping", "plain text", "plain text"},
52              {"empty string", "", ""},
53              {"null", null, null},
54              {"ampersand", "bread &amp; butter", "bread & butter"},
55              {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
56              {"final character only", "greater than &gt;", "greater than >"},
57              {"first character only", "&lt; less than", "< less than"},
58              {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
59              {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E (nihongo)",
60                  "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
61              {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
62      };
63  
64      private void assertEscapeJava(final String escaped, final String original) throws IOException {
65          assertEscapeJava(escaped, original, null);
66      }
67  
68      private void assertEscapeJava(final String expected, final String original, String message) throws IOException {
69          final String converted = StringEscapeUtils.escapeJava(original);
70          message = "escapeJava(String) failed" + (message == null ? "" : ": " + message);
71          assertEquals(expected, converted, message);
72  
73          final StringWriter writer = new StringWriter();
74          StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
75          assertEquals(expected, writer.toString());
76      }
77  
78      private void assertUnescapeJava(final String unescaped, final String original) throws IOException {
79          assertUnescapeJava(unescaped, original, null);
80      }
81  
82      private void assertUnescapeJava(final String unescaped, final String original, final String message) throws IOException {
83          final String actual = StringEscapeUtils.unescapeJava(original);
84  
85          assertEquals(unescaped, actual,
86                  "unescape(String) failed" + (message == null ? "" : ": " + message) + ": expected '" + StringEscapeUtils.escapeJava(unescaped)
87                  // we escape this so we can see it in the error message
88                          + "' actual '" + StringEscapeUtils.escapeJava(actual) + "'");
89  
90          final StringWriter writer = new StringWriter();
91          StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
92          assertEquals(unescaped, writer.toString());
93      }
94  
95      private void checkCsvEscapeWriter(final String expected, final String value) throws IOException {
96          final StringWriter writer = new StringWriter();
97          StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
98          assertEquals(expected, writer.toString());
99      }
100 
101     private void checkCsvUnescapeWriter(final String expected, final String value) throws IOException {
102         final StringWriter writer = new StringWriter();
103         StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
104         assertEquals(expected, writer.toString());
105     }
106 
107     @Test
108     public void testBuilder() {
109         final String result = StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_XML10).escape("<").append(">").toString();
110         assertEquals("&lt;>", result);
111     }
112 
113     @Test
114     public void testConstructor() {
115         assertNotNull(new StringEscapeUtils());
116         final Constructor<?>[] cons = StringEscapeUtils.class.getDeclaredConstructors();
117         assertEquals(1, cons.length);
118         assertTrue(Modifier.isPublic(cons[0].getModifiers()));
119         assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
120         assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
121     }
122 
123     // HTML and XML
124     @Test
125     public void testDeleteCharacter() {
126         final String deleteString = "Delete: \u007F";
127         assertEquals("Delete: \\u007F", StringEscapeUtils.escapeJson(deleteString));
128     }
129 
130     @Test
131     public void testEscapeCsvString() {
132         assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar"));
133         assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar"));
134         assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
135         assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
136         assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
137         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
138         assertEquals("", StringEscapeUtils.escapeCsv(""));
139         assertNull(StringEscapeUtils.escapeCsv(null));
140     }
141 
142     @Test
143     public void testEscapeCsvWriter() throws IOException {
144         checkCsvEscapeWriter("foo.bar", "foo.bar");
145         checkCsvEscapeWriter("\"foo,bar\"", "foo,bar");
146         checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
147         checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
148         checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
149         checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
150         checkCsvEscapeWriter("", null);
151         checkCsvEscapeWriter("", "");
152     }
153 
154     @Test
155     public void testEscapeEcmaScript() {
156         assertNull(StringEscapeUtils.escapeEcmaScript(null));
157         try {
158             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
159             fail("Exception expected!");
160         } catch (final IOException ex) {
161             fail("Exception expected!");
162         } catch (final IllegalArgumentException ex) {
163             // expected
164         }
165         try {
166             StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
167             fail("Exception expected!");
168         } catch (final IOException ex) {
169             fail("Exception expected!");
170         } catch (final IllegalArgumentException ex) {
171             // expected
172         }
173 
174         assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
175         assertEquals("document.getElementById(\\\"test\\\").value = \\'<script>alert(\\'aaa\\');<\\/script>\\';",
176                 StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '<script>alert('aaa');</script>';"));
177     }
178 
179     /**
180      * Tests https://issues.apache.org/jira/browse/LANG-339
181      */
182     @Test
183     public void testEscapeHiragana() {
184         // Some random Japanese Unicode characters
185         final String original = "\u304B\u304C\u3068";
186         final String escaped = StringEscapeUtils.escapeHtml4(original);
187         assertEquals(original, escaped, "Hiragana character Unicode behavior should not be being escaped by escapeHtml4");
188 
189         final String unescaped = StringEscapeUtils.unescapeHtml4(escaped);
190 
191         assertEquals(escaped, unescaped, "Hiragana character Unicode behavior has changed - expected no unescaping");
192     }
193 
194     @Test
195     public void testEscapeHtml3() {
196         for (final String[] element : HTML_ESCAPES) {
197             final String message = element[0];
198             final String expected = element[1];
199             final String original = element[2];
200             assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message);
201             final StringWriter sw = new StringWriter();
202             try {
203                 StringEscapeUtils.ESCAPE_HTML3.translate(original, sw);
204             } catch (final IOException e) {
205                 // expected
206             }
207             final String actual = original == null ? null : sw.toString();
208             assertEquals(expected, actual, message);
209         }
210     }
211 
212     @Test
213     public void testEscapeHtml4() {
214         for (final String[] element : HTML_ESCAPES) {
215             final String message = element[0];
216             final String expected = element[1];
217             final String original = element[2];
218             assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message);
219             final StringWriter sw = new StringWriter();
220             try {
221                 StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
222             } catch (final IOException e) {
223                 // expected
224             }
225             final String actual = original == null ? null : sw.toString();
226             assertEquals(expected, actual, message);
227         }
228     }
229 
230     /**
231      * Tests // https://issues.apache.org/jira/browse/LANG-480
232      */
233     @Test
234     public void testEscapeHtmlHighUnicode() {
235         // this is the utf8 representation of the character:
236         // COUNTING ROD UNIT DIGIT THREE
237         // in Unicode
238         // code point: U+1D362
239         final byte[] data = { (byte) 0xF0, (byte) 0x9D, (byte) 0x8D, (byte) 0xA2 };
240 
241         final String original = new String(data, StandardCharsets.UTF_8);
242 
243         final String escaped = StringEscapeUtils.escapeHtml4(original);
244         assertEquals(original, escaped, "High Unicode should not have been escaped");
245 
246         final String unescaped = StringEscapeUtils.unescapeHtml4(escaped);
247         assertEquals(original, unescaped, "High Unicode should have been unchanged");
248 
249         // TODO: I think this should hold, needs further investigation
250         // String unescapedFromEntity = StringEscapeUtils.unescapeHtml4("&#119650;");
251         // assertEquals("High Unicode should have been unescaped", original, unescapedFromEntity);
252     }
253 
254     @Test
255     public void testEscapeHtmlThree() {
256         assertNull(StringEscapeUtils.escapeHtml3(null));
257         assertEquals("a", StringEscapeUtils.escapeHtml3("a"));
258         assertEquals("&lt;b&gt;a", StringEscapeUtils.escapeHtml3("<b>a"));
259     }
260 
261     @Test
262     public void testEscapeHtmlVersions() {
263         assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
264         assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
265 
266         // TODO: refine API for escaping/unescaping specific HTML versions
267     }
268 
269     @Test
270     public void testEscapeJava() throws IOException {
271         assertNull(StringEscapeUtils.escapeJava(null));
272         try {
273             StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
274             fail("Exception expected!");
275         } catch (final IOException ex) {
276             fail("Exception expected!");
277         } catch (final IllegalArgumentException ex) {
278             // expected
279         }
280         try {
281             StringEscapeUtils.ESCAPE_JAVA.translate("", null);
282             fail("Exception expected!");
283         } catch (final IOException ex) {
284             fail("Exception expected!");
285         } catch (final IllegalArgumentException ex) {
286             // expected
287         }
288 
289         assertEscapeJava("", "", "empty string");
290         assertEscapeJava(FOO, FOO);
291         assertEscapeJava("\\t", "\t", "tab");
292         assertEscapeJava("\\\\", "\\", "backslash");
293         assertEscapeJava("'", "'", "single quote should not be escaped");
294         assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
295         assertEscapeJava("\\u1234", "\u1234");
296         assertEscapeJava("\\u0234", "\u0234");
297         assertEscapeJava("\\u00EF", "\u00ef");
298         assertEscapeJava("\\u0001", "\u0001");
299         assertEscapeJava("\\uABCD", "\uabcd", "Should use capitalized Unicode hex");
300 
301         assertEscapeJava("He didn't say, \\\"stop!\\\"", "He didn't say, \"stop!\"");
302         assertEscapeJava("This space is non-breaking:" + "\\u00A0", "This space is non-breaking:\u00a0", "non-breaking space");
303         assertEscapeJava("\\uABCD\\u1234\\u012C", "\uABCD\u1234\u012C");
304     }
305 
306     /**
307      * Tests https://issues.apache.org/jira/browse/LANG-421
308      */
309     @Test
310     public void testEscapeJavaWithSlash() {
311         final String input = "String with a slash (/) in it";
312 
313         final String actual = StringEscapeUtils.escapeJava(input);
314 
315         /*
316          * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape in a Java string.
317          */
318         assertEquals(input, actual);
319     }
320 
321     @Test
322     public void testEscapeJson() {
323         assertNull(StringEscapeUtils.escapeJson(null));
324         try {
325             StringEscapeUtils.ESCAPE_JSON.translate(null, null);
326             fail("Exception expected!");
327         } catch (final IOException ex) {
328             fail("Exception expected!");
329         } catch (final IllegalArgumentException ex) {
330             // expected
331         }
332         try {
333             StringEscapeUtils.ESCAPE_JSON.translate("", null);
334             fail("Exception expected!");
335         } catch (final IOException ex) {
336             fail("Exception expected!");
337         } catch (final IllegalArgumentException ex) {
338             // expected
339         }
340 
341         assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
342 
343         final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/";
344         final String input = "\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
345 
346         assertEquals(expected, StringEscapeUtils.escapeJson(input));
347     }
348 
349     @Test
350     public void testEscapeXml10() {
351         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
352         assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"), "XML 1.0 should not escape \t \n \r");
353         assertEquals("ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"),
354                 "XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19");
355         assertEquals("a\ud7ff  \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"), "XML 1.0 should omit #xd800-#xdfff");
356         assertEquals("a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"), "XML 1.0 should omit #xfffe | #xffff");
357         assertEquals("a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"),
358                 "XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility");
359     }
360 
361     @Test
362     public void testEscapeXml11() {
363         assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
364         assertEquals("a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"), "XML 1.1 should not escape \t \n \r");
365         assertEquals("ab", StringEscapeUtils.escapeXml11("a\u0000b"), "XML 1.1 should omit #x0");
366         assertEquals("a&#1;&#8;&#11;&#12;&#14;&#31;b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"),
367                 "XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19");
368         assertEquals("a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"),
369                 "XML 1.1 should escape #x7F-#x84 | #x86-#x9F");
370         assertEquals("a\ud7ff  \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"), "XML 1.1 should omit #xd800-#xdfff");
371         assertEquals("a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"), "XML 1.1 should omit #xfffe | #xffff");
372     }
373 
374     @Test
375     public void testEscapeXSI() {
376         assertNull(null, escapeXSI(null));
377         assertEquals("He\\ didn\\'t\\ say,\\ \\\"Stop!\\\"", escapeXSI("He didn't say, \"Stop!\""));
378         assertEquals("\\\\", escapeXSI("\\"));
379         assertEquals("", escapeXSI("\n"));
380     }
381 
382     @Test
383     public void testLang313() {
384         assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
385     }
386 
387     /**
388      * Tests https://issues.apache.org/jira/browse/LANG-708
389      *
390      * @throws IOException if an I/O error occurs
391      */
392     @Test
393     public void testLang708() throws IOException {
394         final byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/org/apache/commons/text/stringEscapeUtilsTestData.txt"));
395         final String input = new String(inputBytes, StandardCharsets.UTF_8);
396         final String escaped = StringEscapeUtils.escapeEcmaScript(input);
397         // just the end:
398         assertTrue(escaped.endsWith("}]"), escaped);
399         // a little more:
400         assertTrue(escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"), escaped);
401     }
402 
403     /**
404      * Tests https://issues.apache.org/jira/browse/LANG-911
405      */
406     @Test
407     public void testLang911() {
408         final String bellsTest = "\ud83d\udc80\ud83d\udd14";
409         final String value = StringEscapeUtils.escapeJava(bellsTest);
410         final String valueTest = StringEscapeUtils.unescapeJava(value);
411         assertEquals(bellsTest, valueTest);
412     }
413 
414     // Tests issue #38569
415     // https://issues.apache.org/bugzilla/show_bug.cgi?id=38569
416     @Test
417     public void testStandaloneAmphersand() {
418         assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
419         assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & &lt;"));
420         assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
421         assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
422     }
423 
424     @Test
425     public void testUnescapeCsvString() {
426         assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar"));
427         assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\""));
428         assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
429         assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
430         assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
431         assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
432         assertEquals("", StringEscapeUtils.unescapeCsv(""));
433         assertNull(StringEscapeUtils.unescapeCsv(null));
434 
435         assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("\"foo.bar\""));
436     }
437 
438     @Test
439     public void testUnescapeCsvWriter() throws IOException {
440         checkCsvUnescapeWriter("foo.bar", "foo.bar");
441         checkCsvUnescapeWriter("foo,bar", "\"foo,bar\"");
442         checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
443         checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
444         checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
445         checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
446         checkCsvUnescapeWriter("", null);
447         checkCsvUnescapeWriter("", "");
448 
449         checkCsvUnescapeWriter("foo.bar", "\"foo.bar\"");
450     }
451 
452     @Test
453     public void testUnescapeEcmaScript() {
454         assertNull(StringEscapeUtils.unescapeEcmaScript(null));
455         assertEquals("8lvc1u+6B#-I", StringEscapeUtils.unescapeEcmaScript("8lvc1u+6B#-I"));
456         assertEquals("<script src=\"build/main.bundle.js\"></script>", StringEscapeUtils.unescapeEcmaScript("<script src=\"build/main.bundle.js\"></script>"));
457         assertEquals("<script src=\"build/main.bundle.js\"></script>>",
458                 StringEscapeUtils.unescapeEcmaScript("<script src=\"build/main.bundle.js\"></script>>"));
459     }
460 
461     @Test
462     public void testUnescapeHexCharsHtml() {
463         // Simple easy to grok test
464         assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"), "hex number unescape");
465         assertEquals("\u0080\u009F", StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"), "hex number unescape");
466         // Test all Character values:
467         for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
468             final char c2 = (char) (i + 1);
469             final String expected = Character.toString(i) + Character.toString(c2);
470             final String escapedC1 = "&#x" + Integer.toHexString(i) + ";";
471             final String escapedC2 = "&#x" + Integer.toHexString(c2) + ";";
472             assertEquals(expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2), "hex number unescape index " + i);
473         }
474     }
475 
476     @Test
477     public void testUnescapeHtml3() {
478         for (final String[] element : HTML_ESCAPES) {
479             final String message = element[0];
480             final String expected = element[2];
481             final String original = element[1];
482             assertEquals(expected, StringEscapeUtils.unescapeHtml3(original), message);
483 
484             final StringWriter sw = new StringWriter();
485             try {
486                 StringEscapeUtils.UNESCAPE_HTML3.translate(original, sw);
487             } catch (final IOException e) {
488                 // expected
489             }
490             final String actual = original == null ? null : sw.toString();
491             assertEquals(expected, actual, message);
492         }
493         // \u00E7 is a cedilla (c with wiggle under)
494         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
495         // on some locales
496         assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml3("Fran\u00E7ais"), "funny chars pass through OK");
497 
498         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml3("Hello&;World"));
499         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml3("Hello&#;World"));
500         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml3("Hello&# ;World"));
501         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml3("Hello&##;World"));
502     }
503 
504     @Test
505     public void testUnescapeHtml4() {
506         for (final String[] element : HTML_ESCAPES) {
507             final String message = element[0];
508             final String expected = element[2];
509             final String original = element[1];
510             assertEquals(expected, StringEscapeUtils.unescapeHtml4(original), message);
511 
512             final StringWriter sw = new StringWriter();
513             try {
514                 StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
515             } catch (final IOException e) {
516                 // expected
517             }
518             final String actual = original == null ? null : sw.toString();
519             assertEquals(expected, actual, message);
520         }
521         // \u00E7 is a cedilla (c with wiggle under)
522         // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly
523         // on some locales
524         assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"), "funny chars pass through OK");
525 
526         assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
527         assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World"));
528         assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
529         assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World"));
530     }
531 
532     @Test
533     public void testUnescapeJava() throws IOException {
534         assertNull(StringEscapeUtils.unescapeJava(null));
535         try {
536             StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
537             fail("Exception expected!");
538         } catch (final IOException ex) {
539             fail("Exception expected!");
540         } catch (final IllegalArgumentException ex) {
541             // expected
542         }
543         try {
544             StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
545             fail("Exception expected!");
546         } catch (final IOException ex) {
547             fail("Exception expected!");
548         } catch (final IllegalArgumentException ex) {
549             // expected
550         }
551         assertThrows(RuntimeException.class, () -> StringEscapeUtils.unescapeJava("\\u02-3"));
552 
553         assertUnescapeJava("", "");
554         assertUnescapeJava("test", "test");
555         assertUnescapeJava("\ntest\b", "\\ntest\\b");
556         assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
557         assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
558         assertUnescapeJava("", "\\");
559         // foo
560         assertUnescapeJava("\uABCDx", "\\uabcdx", "lowercase Unicode");
561         assertUnescapeJava("\uABCDx", "\\uABCDx", "uppercase Unicode");
562         assertUnescapeJava("\uABCD", "\\uabcd", "Unicode as final character");
563     }
564 
565     @Test
566     public void testUnescapeJson() {
567         final String jsonString = "{\"age\":100,\"name\":\"kyong.com\n\",\"messages\":[\"msg 1\",\"msg 2\",\"msg 3\"]}";
568 
569         assertEquals("", StringEscapeUtils.unescapeJson(""));
570         assertEquals(" ", StringEscapeUtils.unescapeJson(" "));
571         assertEquals("a:b", StringEscapeUtils.unescapeJson("a:b"));
572         assertEquals(jsonString, StringEscapeUtils.unescapeJson(jsonString));
573     }
574 
575     @Test // TEXT-120
576     public void testUnescapeJsonDoubleQuoteAndForwardSlash() {
577         final String escapedJsonString = "double quote: \\\" and a forward slash: \\/";
578         final String jsonString = "double quote: \" and a forward slash: /";
579 
580         assertEquals(jsonString, StringEscapeUtils.unescapeJson(escapedJsonString));
581     }
582 
583     @Test
584     public void testUnescapeUnknownEntity() {
585         assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
586     }
587 
588     /**
589      * Reverse of the above.
590      *
591      * @see <a href="https://issues.apache.org/jira/browse/LANG-729">LANG-729</a>
592      */
593     @Test
594     public void testUnescapeXmlSupplementaryCharacters() {
595         assertEquals("\uD84C\uDFB4", StringEscapeUtils.unescapeXml("&#144308;"), "Supplementary character must be represented using a single escape");
596 
597         assertEquals("a b c \uD84C\uDFB4", StringEscapeUtils.unescapeXml("a b c &#144308;"),
598                 "Supplementary characters mixed with basic characters should be decoded correctly");
599     }
600 
601     @Test
602     public void testUnscapeXSI() {
603         assertNull(null, unescapeXSI(null));
604         assertEquals("\"", unescapeXSI("\\\""));
605         assertEquals("He didn't say, \"Stop!\"", unescapeXSI("He\\ didn\\'t\\ say,\\ \\\"Stop!\\\""));
606         assertEquals("\\", unescapeXSI("\\\\"));
607         assertEquals("", unescapeXSI("\\"));
608     }
609 }