001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.function.Predicate; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import org.apache.commons.lang3.ArrayUtils; 026import org.apache.commons.lang3.StringUtils; 027import org.apache.commons.lang3.Validate; 028 029/** 030 * Operations on Strings that contain words. 031 * 032 * <p> 033 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a 034 * {@code null} input. Each method documents its behavior in more detail. 035 * </p> 036 * 037 * @since 1.1 038 */ 039public class WordUtils { 040 041 /** 042 * Abbreviates the words nicely. 043 * 044 * <p> 045 * This method searches for the first space after the lower limit and abbreviates 046 * the String there. It will also append any String passed as a parameter 047 * to the end of the String. The upper limit can be specified to forcibly 048 * abbreviate a String. 049 * </p> 050 * 051 * @param str the string to be abbreviated. If null is passed, null is returned. 052 * If the empty String is passed, the empty string is returned. 053 * @param lower the lower limit; negative value is treated as zero. 054 * @param upper the upper limit; specify -1 if no limit is desired. 055 * The upper limit cannot be lower than the lower limit. 056 * @param appendToEnd String to be appended to the end of the abbreviated string. 057 * This is appended ONLY if the string was indeed abbreviated. 058 * The append does not count towards the lower or upper limits. 059 * @return The abbreviated String. 060 * 061 * <pre> 062 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 063 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 064 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 065 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 066 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 067 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 068 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 069 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 070 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 071 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 072 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 073 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 074 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 075 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 076 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 077 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 078 * </pre> 079 */ 080 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 081 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 082 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 083 if (StringUtils.isEmpty(str)) { 084 return str; 085 } 086 087 // if the lower value is greater than the length of the string, 088 // set to the length of the string 089 if (lower > str.length()) { 090 lower = str.length(); 091 } 092 093 // if the upper value is -1 (i.e. no limit) or is greater 094 // than the length of the string, set to the length of the string 095 if (upper == -1 || upper > str.length()) { 096 upper = str.length(); 097 } 098 099 final StringBuilder result = new StringBuilder(); 100 final int index = StringUtils.indexOf(str, " ", lower); 101 if (index == -1) { 102 result.append(str, 0, upper); 103 // only if abbreviation has occurred do we append the appendToEnd value 104 if (upper != str.length()) { 105 result.append(StringUtils.defaultString(appendToEnd)); 106 } 107 } else { 108 result.append(str, 0, Math.min(index, upper)); 109 result.append(StringUtils.defaultString(appendToEnd)); 110 } 111 112 return result.toString(); 113 } 114 115 /** 116 * Capitalizes all the whitespace separated words in a String. 117 * Only the first character of each word is changed. To convert the 118 * rest of each word to lowercase at the same time, 119 * use {@link #capitalizeFully(String)}. 120 * 121 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 122 * A {@code null} input String returns {@code null}. 123 * Capitalization uses the Unicode title case, normally equivalent to 124 * upper case.</p> 125 * 126 * <pre> 127 * WordUtils.capitalize(null) = null 128 * WordUtils.capitalize("") = "" 129 * WordUtils.capitalize("i am FINE") = "I Am FINE" 130 * </pre> 131 * 132 * @param str the String to capitalize, may be null 133 * @return capitalized String, {@code null} if null String input 134 * @see #uncapitalize(String) 135 * @see #capitalizeFully(String) 136 */ 137 public static String capitalize(final String str) { 138 return capitalize(str, null); 139 } 140 141 /** 142 * Capitalizes all the delimiter separated words in a String. 143 * Only the first character of each word is changed. To convert the 144 * rest of each word to lowercase at the same time, 145 * use {@link #capitalizeFully(String, char[])}. 146 * 147 * <p>The delimiters represent a set of characters understood to separate words. 148 * The first string character and the first non-delimiter character after a 149 * delimiter will be capitalized.</p> 150 * 151 * <p>A {@code null} input String returns {@code null}. 152 * Capitalization uses the Unicode title case, normally equivalent to 153 * upper case.</p> 154 * 155 * <pre> 156 * WordUtils.capitalize(null, *) = null 157 * WordUtils.capitalize("", *) = "" 158 * WordUtils.capitalize(*, new char[0]) = * 159 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 160 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 161 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 162 * </pre> 163 * 164 * @param str the String to capitalize, may be null 165 * @param delimiters set of characters to determine capitalization, null means whitespace 166 * @return capitalized String, {@code null} if null String input 167 * @see #uncapitalize(String) 168 * @see #capitalizeFully(String) 169 */ 170 public static String capitalize(final String str, final char... delimiters) { 171 if (StringUtils.isEmpty(str)) { 172 return str; 173 } 174 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 175 final int strLen = str.length(); 176 final int[] newCodePoints = new int[strLen]; 177 int outOffset = 0; 178 179 boolean capitalizeNext = true; 180 for (int index = 0; index < strLen;) { 181 final int codePoint = str.codePointAt(index); 182 183 if (isDelimiter.test(codePoint)) { 184 capitalizeNext = true; 185 newCodePoints[outOffset++] = codePoint; 186 index += Character.charCount(codePoint); 187 } else if (capitalizeNext) { 188 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 189 newCodePoints[outOffset++] = titleCaseCodePoint; 190 index += Character.charCount(titleCaseCodePoint); 191 capitalizeNext = false; 192 } else { 193 newCodePoints[outOffset++] = codePoint; 194 index += Character.charCount(codePoint); 195 } 196 } 197 return new String(newCodePoints, 0, outOffset); 198 } 199 200 /** 201 * Converts all the whitespace separated words in a String into capitalized words, 202 * that is each word is made up of a titlecase character and then a series of 203 * lowercase characters. 204 * 205 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 206 * A {@code null} input String returns {@code null}. 207 * Capitalization uses the Unicode title case, normally equivalent to 208 * upper case.</p> 209 * 210 * <pre> 211 * WordUtils.capitalizeFully(null) = null 212 * WordUtils.capitalizeFully("") = "" 213 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 214 * </pre> 215 * 216 * @param str the String to capitalize, may be null 217 * @return capitalized String, {@code null} if null String input 218 */ 219 public static String capitalizeFully(final String str) { 220 return capitalizeFully(str, null); 221 } 222 223 /** 224 * Converts all the delimiter separated words in a String into capitalized words, 225 * that is each word is made up of a titlecase character and then a series of 226 * lowercase characters. 227 * 228 * <p>The delimiters represent a set of characters understood to separate words. 229 * The first string character and the first non-delimiter character after a 230 * delimiter will be capitalized.</p> 231 * 232 * <p>A {@code null} input String returns {@code null}. 233 * Capitalization uses the Unicode title case, normally equivalent to 234 * upper case.</p> 235 * 236 * <pre> 237 * WordUtils.capitalizeFully(null, *) = null 238 * WordUtils.capitalizeFully("", *) = "" 239 * WordUtils.capitalizeFully(*, null) = * 240 * WordUtils.capitalizeFully(*, new char[0]) = * 241 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 242 * </pre> 243 * 244 * @param str the String to capitalize, may be null 245 * @param delimiters set of characters to determine capitalization, null means whitespace 246 * @return capitalized String, {@code null} if null String input 247 */ 248 public static String capitalizeFully(String str, final char... delimiters) { 249 if (StringUtils.isEmpty(str)) { 250 return str; 251 } 252 str = str.toLowerCase(); 253 return capitalize(str, delimiters); 254 } 255 256 /** 257 * Checks if the String contains all words in the given array. 258 * 259 * <p> 260 * A {@code null} String will return {@code false}. A {@code null}, zero 261 * length search array or if one element of array is null will return {@code false}. 262 * </p> 263 * 264 * <pre> 265 * WordUtils.containsAllWords(null, *) = false 266 * WordUtils.containsAllWords("", *) = false 267 * WordUtils.containsAllWords(*, null) = false 268 * WordUtils.containsAllWords(*, []) = false 269 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 270 * WordUtils.containsAllWords("abc def", "def", "abc") = true 271 * </pre> 272 * 273 * @param word The CharSequence to check, may be null 274 * @param words The array of String words to search for, may be null 275 * @return {@code true} if all search words are found, {@code false} otherwise 276 */ 277 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 278 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 279 return false; 280 } 281 for (final CharSequence w : words) { 282 if (StringUtils.isBlank(w)) { 283 return false; 284 } 285 final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*"); 286 if (!p.matcher(word).matches()) { 287 return false; 288 } 289 } 290 return true; 291 } 292 293 /** 294 * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter. 295 * The function provides O(1) lookup time. 296 * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null. 297 * 298 * @param delimiters set of characters to determine delimiters, null means whitespace 299 * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter. 300 */ 301 private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) { 302 final Predicate<Integer> isDelimiter; 303 if (delimiters == null || delimiters.length == 0) { 304 isDelimiter = delimiters == null ? Character::isWhitespace : c -> false; 305 } else { 306 final Set<Integer> delimiterSet = new HashSet<>(); 307 for (int index = 0; index < delimiters.length; index++) { 308 delimiterSet.add(Character.codePointAt(delimiters, index)); 309 } 310 isDelimiter = delimiterSet::contains; 311 } 312 313 return isDelimiter; 314 } 315 316 /** 317 * Extracts the initial characters from each word in the String. 318 * 319 * <p>All first characters after whitespace are returned as a new string. 320 * Their case is not changed.</p> 321 * 322 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 323 * A {@code null} input String returns {@code null}.</p> 324 * 325 * <pre> 326 * WordUtils.initials(null) = null 327 * WordUtils.initials("") = "" 328 * WordUtils.initials("Ben John Lee") = "BJL" 329 * WordUtils.initials("Ben J.Lee") = "BJ" 330 * </pre> 331 * 332 * @param str the String to get initials from, may be null 333 * @return String of initial letters, {@code null} if null String input 334 * @see #initials(String,char[]) 335 */ 336 public static String initials(final String str) { 337 return initials(str, null); 338 } 339 340 /** 341 * Extracts the initial characters from each word in the String. 342 * 343 * <p>All first characters after the defined delimiters are returned as a new string. 344 * Their case is not changed.</p> 345 * 346 * <p>If the delimiters array is null, then Whitespace is used. 347 * Whitespace is defined by {@link Character#isWhitespace(char)}. 348 * A {@code null} input String returns {@code null}. 349 * An empty delimiter array returns an empty String.</p> 350 * 351 * <pre> 352 * WordUtils.initials(null, *) = null 353 * WordUtils.initials("", *) = "" 354 * WordUtils.initials("Ben John Lee", null) = "BJL" 355 * WordUtils.initials("Ben J.Lee", null) = "BJ" 356 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 357 * WordUtils.initials(*, new char[0]) = "" 358 * </pre> 359 * 360 * @param str the String to get initials from, may be null 361 * @param delimiters set of characters to determine words, null means whitespace 362 * @return String of initial characters, {@code null} if null String input 363 * @see #initials(String) 364 */ 365 public static String initials(final String str, final char... delimiters) { 366 if (StringUtils.isEmpty(str)) { 367 return str; 368 } 369 if (delimiters != null && delimiters.length == 0) { 370 return StringUtils.EMPTY; 371 } 372 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 373 final int strLen = str.length(); 374 final int[] newCodePoints = new int[strLen / 2 + 1]; 375 int count = 0; 376 boolean lastWasGap = true; 377 for (int i = 0; i < strLen;) { 378 final int codePoint = str.codePointAt(i); 379 380 if (isDelimiter.test(codePoint)) { 381 lastWasGap = true; 382 } else if (lastWasGap) { 383 newCodePoints[count++] = codePoint; 384 lastWasGap = false; 385 } 386 387 i += Character.charCount(codePoint); 388 } 389 return new String(newCodePoints, 0, count); 390 } 391 392 /** 393 * Is the character a delimiter. 394 * 395 * @param ch the character to check 396 * @param delimiters the delimiters 397 * @return true if it is a delimiter 398 * @deprecated as of 1.2 and will be removed in 2.0 399 */ 400 @Deprecated 401 public static boolean isDelimiter(final char ch, final char[] delimiters) { 402 if (delimiters == null) { 403 return Character.isWhitespace(ch); 404 } 405 for (final char delimiter : delimiters) { 406 if (ch == delimiter) { 407 return true; 408 } 409 } 410 return false; 411 } 412 413 /** 414 * Is the codePoint a delimiter. 415 * 416 * @param codePoint the codePint to check 417 * @param delimiters the delimiters 418 * @return true if it is a delimiter 419 * @deprecated as of 1.2 and will be removed in 2.0 420 */ 421 @Deprecated 422 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 423 if (delimiters == null) { 424 return Character.isWhitespace(codePoint); 425 } 426 for (int index = 0; index < delimiters.length; index++) { 427 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 428 if (delimiterCodePoint == codePoint) { 429 return true; 430 } 431 } 432 return false; 433 } 434 435 /** 436 * Swaps the case of a String using a word based algorithm. 437 * 438 * <ul> 439 * <li>Upper case character converts to Lower case</li> 440 * <li>Title case character converts to Lower case</li> 441 * <li>Lower case character after Whitespace or at start converts to Title case</li> 442 * <li>Other Lower case character converts to Upper case</li> 443 * </ul> 444 * 445 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 446 * A {@code null} input String returns {@code null}.</p> 447 * 448 * <pre> 449 * StringUtils.swapCase(null) = null 450 * StringUtils.swapCase("") = "" 451 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 452 * </pre> 453 * 454 * @param str the String to swap case, may be null 455 * @return The changed String, {@code null} if null String input 456 */ 457 public static String swapCase(final String str) { 458 if (StringUtils.isEmpty(str)) { 459 return str; 460 } 461 final int strLen = str.length(); 462 final int[] newCodePoints = new int[strLen]; 463 int outOffset = 0; 464 boolean whitespace = true; 465 for (int index = 0; index < strLen;) { 466 final int oldCodepoint = str.codePointAt(index); 467 final int newCodePoint; 468 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 469 newCodePoint = Character.toLowerCase(oldCodepoint); 470 whitespace = false; 471 } else if (Character.isLowerCase(oldCodepoint)) { 472 if (whitespace) { 473 newCodePoint = Character.toTitleCase(oldCodepoint); 474 whitespace = false; 475 } else { 476 newCodePoint = Character.toUpperCase(oldCodepoint); 477 } 478 } else { 479 whitespace = Character.isWhitespace(oldCodepoint); 480 newCodePoint = oldCodepoint; 481 } 482 newCodePoints[outOffset++] = newCodePoint; 483 index += Character.charCount(newCodePoint); 484 } 485 return new String(newCodePoints, 0, outOffset); 486 } 487 488 /** 489 * Uncapitalizes all the whitespace separated words in a String. 490 * Only the first character of each word is changed. 491 * 492 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 493 * A {@code null} input String returns {@code null}.</p> 494 * 495 * <pre> 496 * WordUtils.uncapitalize(null) = null 497 * WordUtils.uncapitalize("") = "" 498 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 499 * </pre> 500 * 501 * @param str the String to uncapitalize, may be null 502 * @return uncapitalized String, {@code null} if null String input 503 * @see #capitalize(String) 504 */ 505 public static String uncapitalize(final String str) { 506 return uncapitalize(str, null); 507 } 508 509 /** 510 * Uncapitalizes all the whitespace separated words in a String. 511 * Only the first character of each word is changed. 512 * 513 * <p>The delimiters represent a set of characters understood to separate words. 514 * The first string character and the first non-delimiter character after a 515 * delimiter will be uncapitalized.</p> 516 * 517 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 518 * A {@code null} input String returns {@code null}.</p> 519 * 520 * <pre> 521 * WordUtils.uncapitalize(null, *) = null 522 * WordUtils.uncapitalize("", *) = "" 523 * WordUtils.uncapitalize(*, null) = * 524 * WordUtils.uncapitalize(*, new char[0]) = * 525 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 526 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 527 * </pre> 528 * 529 * @param str the String to uncapitalize, may be null 530 * @param delimiters set of characters to determine uncapitalization, null means whitespace 531 * @return uncapitalized String, {@code null} if null String input 532 * @see #capitalize(String) 533 */ 534 public static String uncapitalize(final String str, final char... delimiters) { 535 if (StringUtils.isEmpty(str)) { 536 return str; 537 } 538 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 539 final int strLen = str.length(); 540 final int[] newCodePoints = new int[strLen]; 541 int outOffset = 0; 542 543 boolean uncapitalizeNext = true; 544 for (int index = 0; index < strLen;) { 545 final int codePoint = str.codePointAt(index); 546 547 if (isDelimiter.test(codePoint)) { 548 uncapitalizeNext = true; 549 newCodePoints[outOffset++] = codePoint; 550 index += Character.charCount(codePoint); 551 } else if (uncapitalizeNext) { 552 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 553 newCodePoints[outOffset++] = titleCaseCodePoint; 554 index += Character.charCount(titleCaseCodePoint); 555 uncapitalizeNext = false; 556 } else { 557 newCodePoints[outOffset++] = codePoint; 558 index += Character.charCount(codePoint); 559 } 560 } 561 return new String(newCodePoints, 0, outOffset); 562 } 563 564 /** 565 * Wraps a single line of text, identifying words by {@code ' '}. 566 * 567 * <p>New lines will be separated by the system property line separator. 568 * Very long words, such as URLs will <em>not</em> be wrapped.</p> 569 * 570 * <p>Leading spaces on a new line are stripped. 571 * Trailing spaces are not stripped.</p> 572 * 573 * <table border="1"> 574 * <caption>Examples</caption> 575 * <tr> 576 * <th>input</th> 577 * <th>wrapLength</th> 578 * <th>result</th> 579 * </tr> 580 * <tr> 581 * <td>null</td> 582 * <td>*</td> 583 * <td>null</td> 584 * </tr> 585 * <tr> 586 * <td>""</td> 587 * <td>*</td> 588 * <td>""</td> 589 * </tr> 590 * <tr> 591 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 592 * <td>20</td> 593 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 594 * </tr> 595 * <tr> 596 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 597 * <td>20</td> 598 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 599 * </tr> 600 * <tr> 601 * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td> 602 * <td>20</td> 603 * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td> 604 * </tr> 605 * </table> 606 * 607 * (assuming that '\n' is the systems line separator) 608 * 609 * @param str the String to be word wrapped, may be null 610 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 611 * @return a line with newlines inserted, {@code null} if null input 612 */ 613 public static String wrap(final String str, final int wrapLength) { 614 return wrap(str, wrapLength, null, false); 615 } 616 617 /** 618 * Wraps a single line of text, identifying words by {@code ' '}. 619 * 620 * <p>Leading spaces on a new line are stripped. 621 * Trailing spaces are not stripped.</p> 622 * 623 * <table border="1"> 624 * <caption>Examples</caption> 625 * <tr> 626 * <th>input</th> 627 * <th>wrapLength</th> 628 * <th>newLineString</th> 629 * <th>wrapLongWords</th> 630 * <th>result</th> 631 * </tr> 632 * <tr> 633 * <td>null</td> 634 * <td>*</td> 635 * <td>*</td> 636 * <td>true/false</td> 637 * <td>null</td> 638 * </tr> 639 * <tr> 640 * <td>""</td> 641 * <td>*</td> 642 * <td>*</td> 643 * <td>true/false</td> 644 * <td>""</td> 645 * </tr> 646 * <tr> 647 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 648 * <td>20</td> 649 * <td>"\n"</td> 650 * <td>true/false</td> 651 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 652 * </tr> 653 * <tr> 654 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 655 * <td>20</td> 656 * <td>"<br />"</td> 657 * <td>true/false</td> 658 * <td>"Here is one line of<br />text that is going< 659 * br />to be wrapped after<br />20 columns."</td> 660 * </tr> 661 * <tr> 662 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 663 * <td>20</td> 664 * <td>null</td> 665 * <td>true/false</td> 666 * <td>"Here is one line of" + systemNewLine + "text that is going" 667 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 668 * </tr> 669 * <tr> 670 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 671 * <td>20</td> 672 * <td>"\n"</td> 673 * <td>false</td> 674 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 675 * </tr> 676 * <tr> 677 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 678 * <td>20</td> 679 * <td>"\n"</td> 680 * <td>true</td> 681 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td> 682 * </tr> 683 * </table> 684 * 685 * @param str the String to be word wrapped, may be null 686 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 687 * @param newLineStr the string to insert for a new line, 688 * {@code null} uses the system property line separator 689 * @param wrapLongWords true if long words (such as URLs) should be wrapped 690 * @return a line with newlines inserted, {@code null} if null input 691 */ 692 public static String wrap(final String str, 693 final int wrapLength, 694 final String newLineStr, 695 final boolean wrapLongWords) { 696 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 697 } 698 699 /** 700 * Wraps a single line of text, identifying words by {@code wrapOn}. 701 * 702 * <p>Leading spaces on a new line are stripped. 703 * Trailing spaces are not stripped.</p> 704 * 705 * <table border="1"> 706 * <caption>Examples</caption> 707 * <tr> 708 * <th>input</th> 709 * <th>wrapLength</th> 710 * <th>newLineString</th> 711 * <th>wrapLongWords</th> 712 * <th>wrapOn</th> 713 * <th>result</th> 714 * </tr> 715 * <tr> 716 * <td>null</td> 717 * <td>*</td> 718 * <td>*</td> 719 * <td>true/false</td> 720 * <td>*</td> 721 * <td>null</td> 722 * </tr> 723 * <tr> 724 * <td>""</td> 725 * <td>*</td> 726 * <td>*</td> 727 * <td>true/false</td> 728 * <td>*</td> 729 * <td>""</td> 730 * </tr> 731 * <tr> 732 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 733 * <td>20</td> 734 * <td>"\n"</td> 735 * <td>true/false</td> 736 * <td>" "</td> 737 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 738 * </tr> 739 * <tr> 740 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 741 * <td>20</td> 742 * <td>"<br />"</td> 743 * <td>true/false</td> 744 * <td>" "</td> 745 * <td>"Here is one line of<br />text that is going<br /> 746 * to be wrapped after<br />20 columns."</td> 747 * </tr> 748 * <tr> 749 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 750 * <td>20</td> 751 * <td>null</td> 752 * <td>true/false</td> 753 * <td>" "</td> 754 * <td>"Here is one line of" + systemNewLine + "text that is going" 755 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 756 * </tr> 757 * <tr> 758 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 759 * <td>20</td> 760 * <td>"\n"</td> 761 * <td>false</td> 762 * <td>" "</td> 763 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 764 * </tr> 765 * <tr> 766 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 767 * <td>20</td> 768 * <td>"\n"</td> 769 * <td>true</td> 770 * <td>" "</td> 771 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td> 772 * </tr> 773 * <tr> 774 * <td>"flammable/inflammable"</td> 775 * <td>20</td> 776 * <td>"\n"</td> 777 * <td>true</td> 778 * <td>"/"</td> 779 * <td>"flammable\ninflammable"</td> 780 * </tr> 781 * </table> 782 * @param str the String to be word wrapped, may be null 783 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 784 * @param newLineStr the string to insert for a new line, 785 * {@code null} uses the system property line separator 786 * @param wrapLongWords true if long words (such as URLs) should be wrapped 787 * @param wrapOn regex expression to be used as a breakable characters, 788 * if blank string is provided a space character will be used 789 * @return a line with newlines inserted, {@code null} if null input 790 */ 791 public static String wrap(final String str, 792 int wrapLength, 793 String newLineStr, 794 final boolean wrapLongWords, 795 String wrapOn) { 796 if (str == null) { 797 return null; 798 } 799 if (newLineStr == null) { 800 newLineStr = System.lineSeparator(); 801 } 802 if (wrapLength < 1) { 803 wrapLength = 1; 804 } 805 if (StringUtils.isBlank(wrapOn)) { 806 wrapOn = " "; 807 } 808 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 809 final int inputLineLength = str.length(); 810 int offset = 0; 811 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 812 int matcherSize = -1; 813 814 while (offset < inputLineLength) { 815 int spaceToWrapAt = -1; 816 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 817 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); 818 if (matcher.find()) { 819 if (matcher.start() == 0) { 820 matcherSize = matcher.end(); 821 if (matcherSize != 0) { 822 offset += matcher.end(); 823 continue; 824 } 825 offset += 1; 826 } 827 spaceToWrapAt = matcher.start() + offset; 828 } 829 830 // only last line without leading spaces is left 831 if (inputLineLength - offset <= wrapLength) { 832 break; 833 } 834 835 while (matcher.find()) { 836 spaceToWrapAt = matcher.start() + offset; 837 } 838 839 if (spaceToWrapAt >= offset) { 840 // normal case 841 wrappedLine.append(str, offset, spaceToWrapAt); 842 wrappedLine.append(newLineStr); 843 offset = spaceToWrapAt + 1; 844 845 } else // really long word or URL 846 if (wrapLongWords) { 847 if (matcherSize == 0) { 848 offset--; 849 } 850 // wrap really long word one line at a time 851 wrappedLine.append(str, offset, wrapLength + offset); 852 wrappedLine.append(newLineStr); 853 offset += wrapLength; 854 matcherSize = -1; 855 } else { 856 // do not wrap really long word, just extend beyond limit 857 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 858 if (matcher.find()) { 859 matcherSize = matcher.end() - matcher.start(); 860 spaceToWrapAt = matcher.start() + offset + wrapLength; 861 } 862 863 if (spaceToWrapAt >= 0) { 864 if (matcherSize == 0 && offset != 0) { 865 offset--; 866 } 867 wrappedLine.append(str, offset, spaceToWrapAt); 868 wrappedLine.append(newLineStr); 869 offset = spaceToWrapAt + 1; 870 } else { 871 if (matcherSize == 0 && offset != 0) { 872 offset--; 873 } 874 wrappedLine.append(str, offset, str.length()); 875 offset = inputLineLength; 876 matcherSize = -1; 877 } 878 } 879 } 880 881 if (matcherSize == 0 && offset < inputLineLength) { 882 offset--; 883 } 884 885 // Whatever is left in line is short enough to just pass through 886 wrappedLine.append(str, offset, str.length()); 887 888 return wrappedLine.toString(); 889 } 890 891 /** 892 * {@code WordUtils} instances should NOT be constructed in 893 * standard programming. Instead, the class should be used as 894 * {@code WordUtils.wrap("foo bar", 20);}. 895 * 896 * <p>This constructor is public to permit tools that require a JavaBean 897 * instance to operate.</p> 898 */ 899 public WordUtils() { 900 } 901 }