1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3; 18 19 import java.io.IOException; 20 import java.io.Writer; 21 22 import org.apache.commons.lang3.text.translate.AggregateTranslator; 23 import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 24 import org.apache.commons.lang3.text.translate.EntityArrays; 25 import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 26 import org.apache.commons.lang3.text.translate.LookupTranslator; 27 import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 28 import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 29 import org.apache.commons.lang3.text.translate.OctalUnescaper; 30 import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 31 import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 32 33 /** 34 * Escapes and unescapes {@link String}s for 35 * Java, Java Script, HTML and XML. 36 * 37 * <p>#ThreadSafe#</p> 38 * @since 2.0 39 * @deprecated As of 3.6, use Apache Commons Text 40 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 41 * StringEscapeUtils</a> instead 42 */ 43 @Deprecated 44 public class StringEscapeUtils { 45 46 /* ESCAPE TRANSLATORS */ 47 48 static class CsvEscaper extends CharSequenceTranslator { 49 50 private static final char CSV_DELIMITER = ','; 51 private static final char CSV_QUOTE = '"'; 52 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 53 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 54 55 @Override 56 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 57 58 if (index != 0) { 59 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 60 } 61 62 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 63 out.write(input.toString()); 64 } else { 65 out.write(CSV_QUOTE); 66 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 67 out.write(CSV_QUOTE); 68 } 69 return Character.codePointCount(input, 0, input.length()); 70 } 71 } 72 73 static class CsvUnescaper extends CharSequenceTranslator { 74 75 private static final char CSV_DELIMITER = ','; 76 private static final char CSV_QUOTE = '"'; 77 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 78 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 79 80 @Override 81 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 82 83 if (index != 0) { 84 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 85 } 86 87 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 88 out.write(input.toString()); 89 return Character.codePointCount(input, 0, input.length()); 90 } 91 92 // strip quotes 93 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 94 95 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 96 // deal with escaped quotes; ie) "" 97 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 98 } else { 99 out.write(input.toString()); 100 } 101 return Character.codePointCount(input, 0, input.length()); 102 } 103 } 104 105 /** 106 * Translator object for escaping Java. 107 * 108 * While {@link #escapeJava(String)} is the expected method of use, this 109 * object allows the Java escaping functionality to be used 110 * as the foundation for a custom translator. 111 * 112 * @since 3.0 113 */ 114 public static final CharSequenceTranslator ESCAPE_JAVA = 115 new LookupTranslator( 116 new String[][] { 117 {"\"", "\\\""}, 118 {"\\", "\\\\"}, 119 }).with( 120 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 121 ).with( 122 JavaUnicodeEscaper.outsideOf(32, 0x7f) 123 ); 124 125 /** 126 * Translator object for escaping EcmaScript/JavaScript. 127 * 128 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 129 * object allows the EcmaScript escaping functionality to be used 130 * as the foundation for a custom translator. 131 * 132 * @since 3.0 133 */ 134 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 135 new AggregateTranslator( 136 new LookupTranslator( 137 new String[][] { 138 {"'", "\\'"}, 139 {"\"", "\\\""}, 140 {"\\", "\\\\"}, 141 {"/", "\\/"} 142 }), 143 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 144 JavaUnicodeEscaper.outsideOf(32, 0x7f) 145 ); 146 147 /** 148 * Translator object for escaping Json. 149 * 150 * While {@link #escapeJson(String)} is the expected method of use, this 151 * object allows the Json escaping functionality to be used 152 * as the foundation for a custom translator. 153 * 154 * @since 3.2 155 */ 156 public static final CharSequenceTranslator ESCAPE_JSON = 157 new AggregateTranslator( 158 new LookupTranslator( 159 new String[][] { 160 {"\"", "\\\""}, 161 {"\\", "\\\\"}, 162 {"/", "\\/"} 163 }), 164 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 165 JavaUnicodeEscaper.outsideOf(32, 0x7f) 166 ); 167 168 /** 169 * Translator object for escaping XML. 170 * 171 * While {@link #escapeXml(String)} is the expected method of use, this 172 * object allows the XML escaping functionality to be used 173 * as the foundation for a custom translator. 174 * 175 * @since 3.0 176 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 177 */ 178 @Deprecated 179 public static final CharSequenceTranslator ESCAPE_XML = 180 new AggregateTranslator( 181 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 182 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 183 ); 184 185 /** 186 * Translator object for escaping XML 1.0. 187 * 188 * While {@link #escapeXml10(String)} is the expected method of use, this 189 * object allows the XML escaping functionality to be used 190 * as the foundation for a custom translator. 191 * 192 * @since 3.3 193 */ 194 public static final CharSequenceTranslator ESCAPE_XML10 = 195 new AggregateTranslator( 196 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 197 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 198 new LookupTranslator( 199 new String[][] { 200 { "\u0000", StringUtils.EMPTY }, 201 { "\u0001", StringUtils.EMPTY }, 202 { "\u0002", StringUtils.EMPTY }, 203 { "\u0003", StringUtils.EMPTY }, 204 { "\u0004", StringUtils.EMPTY }, 205 { "\u0005", StringUtils.EMPTY }, 206 { "\u0006", StringUtils.EMPTY }, 207 { "\u0007", StringUtils.EMPTY }, 208 { "\u0008", StringUtils.EMPTY }, 209 { "\u000b", StringUtils.EMPTY }, 210 { "\u000c", StringUtils.EMPTY }, 211 { "\u000e", StringUtils.EMPTY }, 212 { "\u000f", StringUtils.EMPTY }, 213 { "\u0010", StringUtils.EMPTY }, 214 { "\u0011", StringUtils.EMPTY }, 215 { "\u0012", StringUtils.EMPTY }, 216 { "\u0013", StringUtils.EMPTY }, 217 { "\u0014", StringUtils.EMPTY }, 218 { "\u0015", StringUtils.EMPTY }, 219 { "\u0016", StringUtils.EMPTY }, 220 { "\u0017", StringUtils.EMPTY }, 221 { "\u0018", StringUtils.EMPTY }, 222 { "\u0019", StringUtils.EMPTY }, 223 { "\u001a", StringUtils.EMPTY }, 224 { "\u001b", StringUtils.EMPTY }, 225 { "\u001c", StringUtils.EMPTY }, 226 { "\u001d", StringUtils.EMPTY }, 227 { "\u001e", StringUtils.EMPTY }, 228 { "\u001f", StringUtils.EMPTY }, 229 { "\ufffe", StringUtils.EMPTY }, 230 { "\uffff", StringUtils.EMPTY } 231 }), 232 NumericEntityEscaper.between(0x7f, 0x84), 233 NumericEntityEscaper.between(0x86, 0x9f), 234 new UnicodeUnpairedSurrogateRemover() 235 ); 236 237 /** 238 * Translator object for escaping XML 1.1. 239 * 240 * While {@link #escapeXml11(String)} is the expected method of use, this 241 * object allows the XML escaping functionality to be used 242 * as the foundation for a custom translator. 243 * 244 * @since 3.3 245 */ 246 public static final CharSequenceTranslator ESCAPE_XML11 = 247 new AggregateTranslator( 248 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 249 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 250 new LookupTranslator( 251 new String[][] { 252 { "\u0000", StringUtils.EMPTY }, 253 { "\u000b", "" }, 254 { "\u000c", "" }, 255 { "\ufffe", StringUtils.EMPTY }, 256 { "\uffff", StringUtils.EMPTY } 257 }), 258 NumericEntityEscaper.between(0x1, 0x8), 259 NumericEntityEscaper.between(0xe, 0x1f), 260 NumericEntityEscaper.between(0x7f, 0x84), 261 NumericEntityEscaper.between(0x86, 0x9f), 262 new UnicodeUnpairedSurrogateRemover() 263 ); 264 265 /** 266 * Translator object for escaping HTML version 3.0. 267 * 268 * While {@link #escapeHtml3(String)} is the expected method of use, this 269 * object allows the HTML escaping functionality to be used 270 * as the foundation for a custom translator. 271 * 272 * @since 3.0 273 */ 274 public static final CharSequenceTranslator ESCAPE_HTML3 = 275 new AggregateTranslator( 276 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 277 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 278 ); 279 280 /** 281 * Translator object for escaping HTML version 4.0. 282 * 283 * While {@link #escapeHtml4(String)} is the expected method of use, this 284 * object allows the HTML escaping functionality to be used 285 * as the foundation for a custom translator. 286 * 287 * @since 3.0 288 */ 289 public static final CharSequenceTranslator ESCAPE_HTML4 = 290 new AggregateTranslator( 291 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 292 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 293 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 294 ); 295 296 /* UNESCAPE TRANSLATORS */ 297 298 /** 299 * Translator object for escaping individual Comma Separated Values. 300 * 301 * While {@link #escapeCsv(String)} is the expected method of use, this 302 * object allows the CSV escaping functionality to be used 303 * as the foundation for a custom translator. 304 * 305 * @since 3.0 306 */ 307 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 308 309 /** 310 * Translator object for unescaping escaped Java. 311 * 312 * While {@link #unescapeJava(String)} is the expected method of use, this 313 * object allows the Java unescaping functionality to be used 314 * as the foundation for a custom translator. 315 * 316 * @since 3.0 317 */ 318 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 319 public static final CharSequenceTranslator UNESCAPE_JAVA = 320 new AggregateTranslator( 321 new OctalUnescaper(), // .between('\1', '\377'), 322 new UnicodeUnescaper(), 323 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 324 new LookupTranslator( 325 new String[][] { 326 {"\\\\", "\\"}, 327 {"\\\"", "\""}, 328 {"\\'", "'"}, 329 {"\\", ""} 330 }) 331 ); 332 333 /** 334 * Translator object for unescaping escaped EcmaScript. 335 * 336 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 337 * object allows the EcmaScript unescaping functionality to be used 338 * as the foundation for a custom translator. 339 * 340 * @since 3.0 341 */ 342 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 343 344 /** 345 * Translator object for unescaping escaped Json. 346 * 347 * While {@link #unescapeJson(String)} is the expected method of use, this 348 * object allows the Json unescaping functionality to be used 349 * as the foundation for a custom translator. 350 * 351 * @since 3.2 352 */ 353 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 354 355 /** 356 * Translator object for unescaping escaped HTML 3.0. 357 * 358 * While {@link #unescapeHtml3(String)} is the expected method of use, this 359 * object allows the HTML unescaping functionality to be used 360 * as the foundation for a custom translator. 361 * 362 * @since 3.0 363 */ 364 public static final CharSequenceTranslator UNESCAPE_HTML3 = 365 new AggregateTranslator( 366 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 367 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 368 new NumericEntityUnescaper() 369 ); 370 371 /** 372 * Translator object for unescaping escaped HTML 4.0. 373 * 374 * While {@link #unescapeHtml4(String)} is the expected method of use, this 375 * object allows the HTML unescaping functionality to be used 376 * as the foundation for a custom translator. 377 * 378 * @since 3.0 379 */ 380 public static final CharSequenceTranslator UNESCAPE_HTML4 = 381 new AggregateTranslator( 382 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 383 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 384 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 385 new NumericEntityUnescaper() 386 ); 387 388 /** 389 * Translator object for unescaping escaped XML. 390 * 391 * While {@link #unescapeXml(String)} is the expected method of use, this 392 * object allows the XML unescaping functionality to be used 393 * as the foundation for a custom translator. 394 * 395 * @since 3.0 396 */ 397 public static final CharSequenceTranslator UNESCAPE_XML = 398 new AggregateTranslator( 399 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 400 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 401 new NumericEntityUnescaper() 402 ); 403 404 /** 405 * Translator object for unescaping escaped Comma Separated Value entries. 406 * 407 * While {@link #unescapeCsv(String)} is the expected method of use, this 408 * object allows the CSV unescaping functionality to be used 409 * as the foundation for a custom translator. 410 * 411 * @since 3.0 412 */ 413 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 414 415 /* Helper functions */ 416 417 /** 418 * Returns a {@link String} value for a CSV column enclosed in double quotes, 419 * if required. 420 * 421 * <p>If the value contains a comma, newline or double quote, then the 422 * String value is returned enclosed in double quotes.</p> 423 * 424 * <p>Any double quote characters in the value are escaped with another double quote.</p> 425 * 426 * <p>If the value does not contain a comma, newline or double quote, then the 427 * String value is returned unchanged.</p> 428 * 429 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 430 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 431 * 432 * @param input the input CSV column String, may be null 433 * @return the input String, enclosed in double quotes if the value contains a comma, 434 * newline or double quote, {@code null} if null string input 435 * @since 2.4 436 */ 437 public static final String escapeCsv(final String input) { 438 return ESCAPE_CSV.translate(input); 439 } 440 441 /** 442 * Escapes the characters in a {@link String} using EcmaScript String rules. 443 * <p>Escapes any values it finds into their EcmaScript String form. 444 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 445 * 446 * <p>So a tab becomes the characters {@code '\\'} and 447 * {@code 't'}.</p> 448 * 449 * <p>The only difference between Java strings and EcmaScript strings 450 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 451 * 452 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 453 * 454 * <p>Example:</p> 455 * <pre> 456 * input string: He didn't say, "Stop!" 457 * output string: He didn\'t say, \"Stop!\" 458 * </pre> 459 * 460 * @param input String to escape values in, may be null 461 * @return String with escaped values, {@code null} if null string input 462 * 463 * @since 3.0 464 */ 465 public static final String escapeEcmaScript(final String input) { 466 return ESCAPE_ECMASCRIPT.translate(input); 467 } 468 469 /** 470 * Escapes the characters in a {@link String} using HTML entities. 471 * <p>Supports only the HTML 3.0 entities.</p> 472 * 473 * @param input the {@link String} to escape, may be null 474 * @return a new escaped {@link String}, {@code null} if null string input 475 * 476 * @since 3.0 477 */ 478 public static final String escapeHtml3(final String input) { 479 return ESCAPE_HTML3.translate(input); 480 } 481 482 /** 483 * Escapes the characters in a {@link String} using HTML entities. 484 * 485 * <p> 486 * For example: 487 * </p> 488 * <p>{@code "bread" & "butter"}</p> 489 * becomes: 490 * <p> 491 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 492 * </p> 493 * 494 * <p>Supports all known HTML 4.0 entities, including funky accents. 495 * Note that the commonly used apostrophe escape character (&apos;) 496 * is not a legal entity and so is not supported).</p> 497 * 498 * @param input the {@link String} to escape, may be null 499 * @return a new escaped {@link String}, {@code null} if null string input 500 * 501 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 502 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 503 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 504 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 505 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 506 * 507 * @since 3.0 508 */ 509 public static final String escapeHtml4(final String input) { 510 return ESCAPE_HTML4.translate(input); 511 } 512 513 /** 514 * Escapes the characters in a {@link String} using Java String rules. 515 * 516 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 517 * 518 * <p>So a tab becomes the characters {@code '\\'} and 519 * {@code 't'}.</p> 520 * 521 * <p>The only difference between Java strings and JavaScript strings 522 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 523 * 524 * <p>Example:</p> 525 * <pre> 526 * input string: He didn't say, "Stop!" 527 * output string: He didn't say, \"Stop!\" 528 * </pre> 529 * 530 * @param input String to escape values in, may be null 531 * @return String with escaped values, {@code null} if null string input 532 */ 533 public static final String escapeJava(final String input) { 534 return ESCAPE_JAVA.translate(input); 535 } 536 537 /** 538 * Escapes the characters in a {@link String} using Json String rules. 539 * <p>Escapes any values it finds into their Json String form. 540 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 541 * 542 * <p>So a tab becomes the characters {@code '\\'} and 543 * {@code 't'}.</p> 544 * 545 * <p>The only difference between Java strings and Json strings 546 * is that in Json, forward-slash (/) is escaped.</p> 547 * 548 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 549 * 550 * <p>Example:</p> 551 * <pre> 552 * input string: He didn't say, "Stop!" 553 * output string: He didn't say, \"Stop!\" 554 * </pre> 555 * 556 * @param input String to escape values in, may be null 557 * @return String with escaped values, {@code null} if null string input 558 * 559 * @since 3.2 560 */ 561 public static final String escapeJson(final String input) { 562 return ESCAPE_JSON.translate(input); 563 } 564 565 /** 566 * Escapes the characters in a {@link String} using XML entities. 567 * 568 * <p>For example: {@code "bread" & "butter"} => 569 * {@code "bread" & "butter"}. 570 * </p> 571 * 572 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 573 * Does not support DTDs or external entities.</p> 574 * 575 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 576 * escaped. If you still wish this functionality, you can achieve it 577 * via the following: 578 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 579 * 580 * @param input the {@link String} to escape, may be null 581 * @return a new escaped {@link String}, {@code null} if null string input 582 * @see #unescapeXml(String) 583 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 584 */ 585 @Deprecated 586 public static final String escapeXml(final String input) { 587 return ESCAPE_XML.translate(input); 588 } 589 590 /** 591 * Escapes the characters in a {@link String} using XML entities. 592 * 593 * <p>For example: {@code "bread" & "butter"} => 594 * {@code "bread" & "butter"}. 595 * </p> 596 * 597 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 598 * characters or unpaired Unicode surrogate code points, even after escaping. 599 * {@code escapeXml10} will remove characters that do not fit in the 600 * following ranges:</p> 601 * 602 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 603 * 604 * <p>Though not strictly necessary, {@code escapeXml10} will escape 605 * characters in the following ranges:</p> 606 * 607 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 608 * 609 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 610 * document. If you want to allow more non-text characters in an XML 1.1 611 * document, use {@link #escapeXml11(String)}.</p> 612 * 613 * @param input the {@link String} to escape, may be null 614 * @return a new escaped {@link String}, {@code null} if null string input 615 * @see #unescapeXml(String) 616 * @since 3.3 617 */ 618 public static String escapeXml10(final String input) { 619 return ESCAPE_XML10.translate(input); 620 } 621 622 /** 623 * Escapes the characters in a {@link String} using XML entities. 624 * 625 * <p>For example: {@code "bread" & "butter"} => 626 * {@code "bread" & "butter"}. 627 * </p> 628 * 629 * <p>XML 1.1 can represent certain control characters, but it cannot represent 630 * the null byte or unpaired Unicode surrogate code points, even after escaping. 631 * {@code escapeXml11} will remove characters that do not fit in the following 632 * ranges:</p> 633 * 634 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 635 * 636 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 637 * 638 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 639 * 640 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 641 * use it for XML 1.0 documents.</p> 642 * 643 * @param input the {@link String} to escape, may be null 644 * @return a new escaped {@link String}, {@code null} if null string input 645 * @see #unescapeXml(String) 646 * @since 3.3 647 */ 648 public static String escapeXml11(final String input) { 649 return ESCAPE_XML11.translate(input); 650 } 651 652 /** 653 * Returns a {@link String} value for an unescaped CSV column. 654 * 655 * <p>If the value is enclosed in double quotes, and contains a comma, newline 656 * or double quote, then quotes are removed. 657 * </p> 658 * 659 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 660 * to just one double quote.</p> 661 * 662 * <p>If the value is not enclosed in double quotes, or is and does not contain a 663 * comma, newline or double quote, then the String value is returned unchanged.</p> 664 * 665 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 666 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 667 * 668 * @param input the input CSV column String, may be null 669 * @return the input String, with enclosing double quotes removed and embedded double 670 * quotes unescaped, {@code null} if null string input 671 * @since 2.4 672 */ 673 public static final String unescapeCsv(final String input) { 674 return UNESCAPE_CSV.translate(input); 675 } 676 677 /** 678 * Unescapes any EcmaScript literals found in the {@link String}. 679 * 680 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 681 * into a newline character, unless the {@code '\'} is preceded by another 682 * {@code '\'}.</p> 683 * 684 * @see #unescapeJava(String) 685 * @param input the {@link String} to unescape, may be null 686 * @return A new unescaped {@link String}, {@code null} if null string input 687 * 688 * @since 3.0 689 */ 690 public static final String unescapeEcmaScript(final String input) { 691 return UNESCAPE_ECMASCRIPT.translate(input); 692 } 693 694 /** 695 * Unescapes a string containing entity escapes to a string 696 * containing the actual Unicode characters corresponding to the 697 * escapes. Supports only HTML 3.0 entities. 698 * 699 * @param input the {@link String} to unescape, may be null 700 * @return a new unescaped {@link String}, {@code null} if null string input 701 * 702 * @since 3.0 703 */ 704 public static final String unescapeHtml3(final String input) { 705 return UNESCAPE_HTML3.translate(input); 706 } 707 708 /** 709 * Unescapes a string containing entity escapes to a string 710 * containing the actual Unicode characters corresponding to the 711 * escapes. Supports HTML 4.0 entities. 712 * 713 * <p>For example, the string {@code "<Français>"} 714 * will become {@code "<Français>"}</p> 715 * 716 * <p>If an entity is unrecognized, it is left alone, and inserted 717 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 718 * become {@code ">&zzzz;x"}.</p> 719 * 720 * @param input the {@link String} to unescape, may be null 721 * @return a new unescaped {@link String}, {@code null} if null string input 722 * 723 * @since 3.0 724 */ 725 public static final String unescapeHtml4(final String input) { 726 return UNESCAPE_HTML4.translate(input); 727 } 728 729 /** 730 * Unescapes any Java literals found in the {@link String}. 731 * For example, it will turn a sequence of {@code '\'} and 732 * {@code 'n'} into a newline character, unless the {@code '\'} 733 * is preceded by another {@code '\'}. 734 * 735 * @param input the {@link String} to unescape, may be null 736 * @return a new unescaped {@link String}, {@code null} if null string input 737 */ 738 public static final String unescapeJava(final String input) { 739 return UNESCAPE_JAVA.translate(input); 740 } 741 742 /** 743 * Unescapes any Json literals found in the {@link String}. 744 * 745 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 746 * into a newline character, unless the {@code '\'} is preceded by another 747 * {@code '\'}.</p> 748 * 749 * @see #unescapeJava(String) 750 * @param input the {@link String} to unescape, may be null 751 * @return A new unescaped {@link String}, {@code null} if null string input 752 * 753 * @since 3.2 754 */ 755 public static final String unescapeJson(final String input) { 756 return UNESCAPE_JSON.translate(input); 757 } 758 759 /** 760 * Unescapes a string containing XML entity escapes to a string 761 * containing the actual Unicode characters corresponding to the 762 * escapes. 763 * 764 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 765 * Does not support DTDs or external entities.</p> 766 * 767 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 768 * Unicode characters. This may change in future releases.</p> 769 * 770 * @param input the {@link String} to unescape, may be null 771 * @return a new unescaped {@link String}, {@code null} if null string input 772 * @see #escapeXml(String) 773 * @see #escapeXml10(String) 774 * @see #escapeXml11(String) 775 */ 776 public static final String unescapeXml(final String input) { 777 return UNESCAPE_XML.translate(input); 778 } 779 780 /** 781 * {@link StringEscapeUtils} instances should NOT be constructed in 782 * standard programming. 783 * 784 * <p>Instead, the class should be used as:</p> 785 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 786 * 787 * <p>This constructor is public to permit tools that require a JavaBean 788 * instance to operate.</p> 789 * 790 * @deprecated TODO Make private in 4.0. 791 */ 792 @Deprecated 793 public StringEscapeUtils() { 794 // empty 795 } 796 797 }