001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.function.Predicate;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.commons.lang3.ArrayUtils;
026import org.apache.commons.lang3.StringUtils;
027import org.apache.commons.lang3.Validate;
028
029/**
030 * Operations on Strings that contain words.
031 *
032 * <p>
033 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
034 * {@code null} input. Each method documents its behavior in more detail.
035 * </p>
036 *
037 * @since 1.1
038 */
039public class WordUtils {
040
041    /**
042     * Abbreviates the words nicely.
043     *
044     * <p>
045     * This method searches for the first space after the lower limit and abbreviates
046     * the String there. It will also append any String passed as a parameter
047     * to the end of the String. The upper limit can be specified to forcibly
048     * abbreviate a String.
049     * </p>
050     *
051     * @param str         the string to be abbreviated. If null is passed, null is returned.
052     *                    If the empty String is passed, the empty string is returned.
053     * @param lower       the lower limit; negative value is treated as zero.
054     * @param upper       the upper limit; specify -1 if no limit is desired.
055     *                    The upper limit cannot be lower than the lower limit.
056     * @param appendToEnd String to be appended to the end of the abbreviated string.
057     *                    This is appended ONLY if the string was indeed abbreviated.
058     *                    The append does not count towards the lower or upper limits.
059     * @return The abbreviated String.
060     *
061     * <pre>
062     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
063     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
064     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
065     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
066     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
067     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
068     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
069     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
070     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
071     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
072     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
073     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
074     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
075     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
076     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
077     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
078     * </pre>
079     */
080    public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
081        Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
082        Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
083        if (StringUtils.isEmpty(str)) {
084            return str;
085        }
086
087        // if the lower value is greater than the length of the string,
088        // set to the length of the string
089        if (lower > str.length()) {
090            lower = str.length();
091        }
092
093        // if the upper value is -1 (i.e. no limit) or is greater
094        // than the length of the string, set to the length of the string
095        if (upper == -1 || upper > str.length()) {
096            upper = str.length();
097        }
098
099        final StringBuilder result = new StringBuilder();
100        final int index = StringUtils.indexOf(str, " ", lower);
101        if (index == -1) {
102            result.append(str, 0, upper);
103            // only if abbreviation has occurred do we append the appendToEnd value
104            if (upper != str.length()) {
105                result.append(StringUtils.defaultString(appendToEnd));
106            }
107        } else {
108            result.append(str, 0, Math.min(index, upper));
109            result.append(StringUtils.defaultString(appendToEnd));
110        }
111
112        return result.toString();
113    }
114
115    /**
116     * Capitalizes all the whitespace separated words in a String.
117     * Only the first character of each word is changed. To convert the
118     * rest of each word to lowercase at the same time,
119     * use {@link #capitalizeFully(String)}.
120     *
121     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
122     * A {@code null} input String returns {@code null}.
123     * Capitalization uses the Unicode title case, normally equivalent to
124     * upper case.</p>
125     *
126     * <pre>
127     * WordUtils.capitalize(null)        = null
128     * WordUtils.capitalize("")          = ""
129     * WordUtils.capitalize("i am FINE") = "I Am FINE"
130     * </pre>
131     *
132     * @param str  the String to capitalize, may be null
133     * @return capitalized String, {@code null} if null String input
134     * @see #uncapitalize(String)
135     * @see #capitalizeFully(String)
136     */
137    public static String capitalize(final String str) {
138        return capitalize(str, null);
139    }
140
141    /**
142     * Capitalizes all the delimiter separated words in a String.
143     * Only the first character of each word is changed. To convert the
144     * rest of each word to lowercase at the same time,
145     * use {@link #capitalizeFully(String, char[])}.
146     *
147     * <p>The delimiters represent a set of characters understood to separate words.
148     * The first string character and the first non-delimiter character after a
149     * delimiter will be capitalized.</p>
150     *
151     * <p>A {@code null} input String returns {@code null}.
152     * Capitalization uses the Unicode title case, normally equivalent to
153     * upper case.</p>
154     *
155     * <pre>
156     * WordUtils.capitalize(null, *)            = null
157     * WordUtils.capitalize("", *)              = ""
158     * WordUtils.capitalize(*, new char[0])     = *
159     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
160     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
161     * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
162     * </pre>
163     *
164     * @param str  the String to capitalize, may be null
165     * @param delimiters  set of characters to determine capitalization, null means whitespace
166     * @return capitalized String, {@code null} if null String input
167     * @see #uncapitalize(String)
168     * @see #capitalizeFully(String)
169     */
170    public static String capitalize(final String str, final char... delimiters) {
171        if (StringUtils.isEmpty(str)) {
172            return str;
173        }
174        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
175        final int strLen = str.length();
176        final int[] newCodePoints = new int[strLen];
177        int outOffset = 0;
178
179        boolean capitalizeNext = true;
180        for (int index = 0; index < strLen;) {
181            final int codePoint = str.codePointAt(index);
182
183            if (isDelimiter.test(codePoint)) {
184                capitalizeNext = true;
185                newCodePoints[outOffset++] = codePoint;
186                index += Character.charCount(codePoint);
187            } else if (capitalizeNext) {
188                final int titleCaseCodePoint = Character.toTitleCase(codePoint);
189                newCodePoints[outOffset++] = titleCaseCodePoint;
190                index += Character.charCount(titleCaseCodePoint);
191                capitalizeNext = false;
192            } else {
193                newCodePoints[outOffset++] = codePoint;
194                index += Character.charCount(codePoint);
195            }
196        }
197        return new String(newCodePoints, 0, outOffset);
198    }
199
200    /**
201     * Converts all the whitespace separated words in a String into capitalized words,
202     * that is each word is made up of a titlecase character and then a series of
203     * lowercase characters.
204     *
205     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
206     * A {@code null} input String returns {@code null}.
207     * Capitalization uses the Unicode title case, normally equivalent to
208     * upper case.</p>
209     *
210     * <pre>
211     * WordUtils.capitalizeFully(null)        = null
212     * WordUtils.capitalizeFully("")          = ""
213     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
214     * </pre>
215     *
216     * @param str  the String to capitalize, may be null
217     * @return capitalized String, {@code null} if null String input
218     */
219    public static String capitalizeFully(final String str) {
220        return capitalizeFully(str, null);
221    }
222
223    /**
224     * Converts all the delimiter separated words in a String into capitalized words,
225     * that is each word is made up of a titlecase character and then a series of
226     * lowercase characters.
227     *
228     * <p>The delimiters represent a set of characters understood to separate words.
229     * The first string character and the first non-delimiter character after a
230     * delimiter will be capitalized.</p>
231     *
232     * <p>A {@code null} input String returns {@code null}.
233     * Capitalization uses the Unicode title case, normally equivalent to
234     * upper case.</p>
235     *
236     * <pre>
237     * WordUtils.capitalizeFully(null, *)            = null
238     * WordUtils.capitalizeFully("", *)              = ""
239     * WordUtils.capitalizeFully(*, null)            = *
240     * WordUtils.capitalizeFully(*, new char[0])     = *
241     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
242     * </pre>
243     *
244     * @param str  the String to capitalize, may be null
245     * @param delimiters  set of characters to determine capitalization, null means whitespace
246     * @return capitalized String, {@code null} if null String input
247     */
248    public static String capitalizeFully(String str, final char... delimiters) {
249        if (StringUtils.isEmpty(str)) {
250            return str;
251        }
252        str = str.toLowerCase();
253        return capitalize(str, delimiters);
254    }
255
256    /**
257     * Checks if the String contains all words in the given array.
258     *
259     * <p>
260     * A {@code null} String will return {@code false}. A {@code null}, zero
261     * length search array or if one element of array is null will return {@code false}.
262     * </p>
263     *
264     * <pre>
265     * WordUtils.containsAllWords(null, *)            = false
266     * WordUtils.containsAllWords("", *)              = false
267     * WordUtils.containsAllWords(*, null)            = false
268     * WordUtils.containsAllWords(*, [])              = false
269     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
270     * WordUtils.containsAllWords("abc def", "def", "abc") = true
271     * </pre>
272     *
273     * @param word The CharSequence to check, may be null
274     * @param words The array of String words to search for, may be null
275     * @return {@code true} if all search words are found, {@code false} otherwise
276     */
277    public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
278        if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
279            return false;
280        }
281        for (final CharSequence w : words) {
282            if (StringUtils.isBlank(w)) {
283                return false;
284            }
285            final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
286            if (!p.matcher(word).matches()) {
287                return false;
288            }
289        }
290        return true;
291    }
292
293    /**
294     * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
295     * The function provides O(1) lookup time.
296     * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
297     *
298     * @param delimiters set of characters to determine delimiters, null means whitespace
299     * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
300     */
301    private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
302        final Predicate<Integer> isDelimiter;
303        if (delimiters == null || delimiters.length == 0) {
304            isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
305        } else {
306            final Set<Integer> delimiterSet = new HashSet<>();
307            for (int index = 0; index < delimiters.length; index++) {
308                delimiterSet.add(Character.codePointAt(delimiters, index));
309            }
310            isDelimiter = delimiterSet::contains;
311        }
312
313        return isDelimiter;
314    }
315
316    /**
317     * Extracts the initial characters from each word in the String.
318     *
319     * <p>All first characters after whitespace are returned as a new string.
320     * Their case is not changed.</p>
321     *
322     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
323     * A {@code null} input String returns {@code null}.</p>
324     *
325     * <pre>
326     * WordUtils.initials(null)             = null
327     * WordUtils.initials("")               = ""
328     * WordUtils.initials("Ben John Lee")   = "BJL"
329     * WordUtils.initials("Ben J.Lee")      = "BJ"
330     * </pre>
331     *
332     * @param str  the String to get initials from, may be null
333     * @return String of initial letters, {@code null} if null String input
334     * @see #initials(String,char[])
335     */
336    public static String initials(final String str) {
337        return initials(str, null);
338    }
339
340    /**
341     * Extracts the initial characters from each word in the String.
342     *
343     * <p>All first characters after the defined delimiters are returned as a new string.
344     * Their case is not changed.</p>
345     *
346     * <p>If the delimiters array is null, then Whitespace is used.
347     * Whitespace is defined by {@link Character#isWhitespace(char)}.
348     * A {@code null} input String returns {@code null}.
349     * An empty delimiter array returns an empty String.</p>
350     *
351     * <pre>
352     * WordUtils.initials(null, *)                = null
353     * WordUtils.initials("", *)                  = ""
354     * WordUtils.initials("Ben John Lee", null)   = "BJL"
355     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
356     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
357     * WordUtils.initials(*, new char[0])         = ""
358     * </pre>
359     *
360     * @param str  the String to get initials from, may be null
361     * @param delimiters  set of characters to determine words, null means whitespace
362     * @return String of initial characters, {@code null} if null String input
363     * @see #initials(String)
364     */
365    public static String initials(final String str, final char... delimiters) {
366        if (StringUtils.isEmpty(str)) {
367            return str;
368        }
369        if (delimiters != null && delimiters.length == 0) {
370            return StringUtils.EMPTY;
371        }
372        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
373        final int strLen = str.length();
374        final int[] newCodePoints = new int[strLen / 2 + 1];
375        int count = 0;
376        boolean lastWasGap = true;
377        for (int i = 0; i < strLen;) {
378            final int codePoint = str.codePointAt(i);
379
380            if (isDelimiter.test(codePoint)) {
381                lastWasGap = true;
382            } else if (lastWasGap) {
383                newCodePoints[count++] = codePoint;
384                lastWasGap = false;
385            }
386
387            i += Character.charCount(codePoint);
388        }
389        return new String(newCodePoints, 0, count);
390    }
391
392    /**
393     * Is the character a delimiter.
394     *
395     * @param ch the character to check
396     * @param delimiters the delimiters
397     * @return true if it is a delimiter
398     * @deprecated as of 1.2 and will be removed in 2.0
399     */
400    @Deprecated
401    public static boolean isDelimiter(final char ch, final char[] delimiters) {
402        if (delimiters == null) {
403            return Character.isWhitespace(ch);
404        }
405        for (final char delimiter : delimiters) {
406            if (ch == delimiter) {
407                return true;
408            }
409        }
410        return false;
411    }
412
413    /**
414     * Is the codePoint a delimiter.
415     *
416     * @param codePoint the codePint to check
417     * @param delimiters the delimiters
418     * @return true if it is a delimiter
419     * @deprecated as of 1.2 and will be removed in 2.0
420     */
421    @Deprecated
422    public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
423        if (delimiters == null) {
424            return Character.isWhitespace(codePoint);
425        }
426        for (int index = 0; index < delimiters.length; index++) {
427            final int delimiterCodePoint = Character.codePointAt(delimiters, index);
428            if (delimiterCodePoint == codePoint) {
429                return true;
430            }
431        }
432        return false;
433    }
434
435    /**
436     * Swaps the case of a String using a word based algorithm.
437     *
438     * <ul>
439     *  <li>Upper case character converts to Lower case</li>
440     *  <li>Title case character converts to Lower case</li>
441     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
442     *  <li>Other Lower case character converts to Upper case</li>
443     * </ul>
444     *
445     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
446     * A {@code null} input String returns {@code null}.</p>
447     *
448     * <pre>
449     * StringUtils.swapCase(null)                 = null
450     * StringUtils.swapCase("")                   = ""
451     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
452     * </pre>
453     *
454     * @param str  the String to swap case, may be null
455     * @return The changed String, {@code null} if null String input
456     */
457    public static String swapCase(final String str) {
458        if (StringUtils.isEmpty(str)) {
459            return str;
460        }
461        final int strLen = str.length();
462        final int[] newCodePoints = new int[strLen];
463        int outOffset = 0;
464        boolean whitespace = true;
465        for (int index = 0; index < strLen;) {
466            final int oldCodepoint = str.codePointAt(index);
467            final int newCodePoint;
468            if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
469                newCodePoint = Character.toLowerCase(oldCodepoint);
470                whitespace = false;
471            } else if (Character.isLowerCase(oldCodepoint)) {
472                if (whitespace) {
473                    newCodePoint = Character.toTitleCase(oldCodepoint);
474                    whitespace = false;
475                } else {
476                    newCodePoint = Character.toUpperCase(oldCodepoint);
477                }
478            } else {
479                whitespace = Character.isWhitespace(oldCodepoint);
480                newCodePoint = oldCodepoint;
481            }
482            newCodePoints[outOffset++] = newCodePoint;
483            index += Character.charCount(newCodePoint);
484        }
485        return new String(newCodePoints, 0, outOffset);
486    }
487
488    /**
489     * Uncapitalizes all the whitespace separated words in a String.
490     * Only the first character of each word is changed.
491     *
492     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
493     * A {@code null} input String returns {@code null}.</p>
494     *
495     * <pre>
496     * WordUtils.uncapitalize(null)        = null
497     * WordUtils.uncapitalize("")          = ""
498     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
499     * </pre>
500     *
501     * @param str  the String to uncapitalize, may be null
502     * @return uncapitalized String, {@code null} if null String input
503     * @see #capitalize(String)
504     */
505    public static String uncapitalize(final String str) {
506        return uncapitalize(str, null);
507    }
508
509    /**
510     * Uncapitalizes all the whitespace separated words in a String.
511     * Only the first character of each word is changed.
512     *
513     * <p>The delimiters represent a set of characters understood to separate words.
514     * The first string character and the first non-delimiter character after a
515     * delimiter will be uncapitalized.</p>
516     *
517     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
518     * A {@code null} input String returns {@code null}.</p>
519     *
520     * <pre>
521     * WordUtils.uncapitalize(null, *)            = null
522     * WordUtils.uncapitalize("", *)              = ""
523     * WordUtils.uncapitalize(*, null)            = *
524     * WordUtils.uncapitalize(*, new char[0])     = *
525     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
526     * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
527     * </pre>
528     *
529     * @param str  the String to uncapitalize, may be null
530     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
531     * @return uncapitalized String, {@code null} if null String input
532     * @see #capitalize(String)
533     */
534    public static String uncapitalize(final String str, final char... delimiters) {
535        if (StringUtils.isEmpty(str)) {
536            return str;
537        }
538        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
539        final int strLen = str.length();
540        final int[] newCodePoints = new int[strLen];
541        int outOffset = 0;
542
543        boolean uncapitalizeNext = true;
544        for (int index = 0; index < strLen;) {
545            final int codePoint = str.codePointAt(index);
546
547            if (isDelimiter.test(codePoint)) {
548                uncapitalizeNext = true;
549                newCodePoints[outOffset++] = codePoint;
550                index += Character.charCount(codePoint);
551            } else if (uncapitalizeNext) {
552                final int titleCaseCodePoint = Character.toLowerCase(codePoint);
553                newCodePoints[outOffset++] = titleCaseCodePoint;
554                index += Character.charCount(titleCaseCodePoint);
555                uncapitalizeNext = false;
556            } else {
557                newCodePoints[outOffset++] = codePoint;
558                index += Character.charCount(codePoint);
559            }
560        }
561        return new String(newCodePoints, 0, outOffset);
562    }
563
564    /**
565     * Wraps a single line of text, identifying words by {@code ' '}.
566     *
567     * <p>New lines will be separated by the system property line separator.
568     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
569     *
570     * <p>Leading spaces on a new line are stripped.
571     * Trailing spaces are not stripped.</p>
572     *
573     * <table border="1">
574     *  <caption>Examples</caption>
575     *  <tr>
576     *   <th>input</th>
577     *   <th>wrapLength</th>
578     *   <th>result</th>
579     *  </tr>
580     *  <tr>
581     *   <td>null</td>
582     *   <td>*</td>
583     *   <td>null</td>
584     *  </tr>
585     *  <tr>
586     *   <td>""</td>
587     *   <td>*</td>
588     *   <td>""</td>
589     *  </tr>
590     *  <tr>
591     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
592     *   <td>20</td>
593     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
594     *  </tr>
595     *  <tr>
596     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
597     *   <td>20</td>
598     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
599     *  </tr>
600     *  <tr>
601     *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
602     *   <td>20</td>
603     *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
604     *  </tr>
605     * </table>
606     *
607     * (assuming that '\n' is the systems line separator)
608     *
609     * @param str  the String to be word wrapped, may be null
610     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
611     * @return a line with newlines inserted, {@code null} if null input
612     */
613    public static String wrap(final String str, final int wrapLength) {
614        return wrap(str, wrapLength, null, false);
615    }
616
617    /**
618     * Wraps a single line of text, identifying words by {@code ' '}.
619     *
620     * <p>Leading spaces on a new line are stripped.
621     * Trailing spaces are not stripped.</p>
622     *
623     * <table border="1">
624     *  <caption>Examples</caption>
625     *  <tr>
626     *   <th>input</th>
627     *   <th>wrapLength</th>
628     *   <th>newLineString</th>
629     *   <th>wrapLongWords</th>
630     *   <th>result</th>
631     *  </tr>
632     *  <tr>
633     *   <td>null</td>
634     *   <td>*</td>
635     *   <td>*</td>
636     *   <td>true/false</td>
637     *   <td>null</td>
638     *  </tr>
639     *  <tr>
640     *   <td>""</td>
641     *   <td>*</td>
642     *   <td>*</td>
643     *   <td>true/false</td>
644     *   <td>""</td>
645     *  </tr>
646     *  <tr>
647     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
648     *   <td>20</td>
649     *   <td>"\n"</td>
650     *   <td>true/false</td>
651     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
652     *  </tr>
653     *  <tr>
654     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
655     *   <td>20</td>
656     *   <td>"&lt;br /&gt;"</td>
657     *   <td>true/false</td>
658     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
659     *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
660     *  </tr>
661     *  <tr>
662     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
663     *   <td>20</td>
664     *   <td>null</td>
665     *   <td>true/false</td>
666     *   <td>"Here is one line of" + systemNewLine + "text that is going"
667     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
668     *  </tr>
669     *  <tr>
670     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
671     *   <td>20</td>
672     *   <td>"\n"</td>
673     *   <td>false</td>
674     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
675     *  </tr>
676     *  <tr>
677     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
678     *   <td>20</td>
679     *   <td>"\n"</td>
680     *   <td>true</td>
681     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
682     *  </tr>
683     * </table>
684     *
685     * @param str  the String to be word wrapped, may be null
686     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
687     * @param newLineStr  the string to insert for a new line,
688     *  {@code null} uses the system property line separator
689     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
690     * @return a line with newlines inserted, {@code null} if null input
691     */
692    public static String wrap(final String str,
693                              final int wrapLength,
694                              final String newLineStr,
695                              final boolean wrapLongWords) {
696        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
697    }
698
699    /**
700     * Wraps a single line of text, identifying words by {@code wrapOn}.
701     *
702     * <p>Leading spaces on a new line are stripped.
703     * Trailing spaces are not stripped.</p>
704     *
705     * <table border="1">
706     *  <caption>Examples</caption>
707     *  <tr>
708     *   <th>input</th>
709     *   <th>wrapLength</th>
710     *   <th>newLineString</th>
711     *   <th>wrapLongWords</th>
712     *   <th>wrapOn</th>
713     *   <th>result</th>
714     *  </tr>
715     *  <tr>
716     *   <td>null</td>
717     *   <td>*</td>
718     *   <td>*</td>
719     *   <td>true/false</td>
720     *   <td>*</td>
721     *   <td>null</td>
722     *  </tr>
723     *  <tr>
724     *   <td>""</td>
725     *   <td>*</td>
726     *   <td>*</td>
727     *   <td>true/false</td>
728     *   <td>*</td>
729     *   <td>""</td>
730     *  </tr>
731     *  <tr>
732     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
733     *   <td>20</td>
734     *   <td>"\n"</td>
735     *   <td>true/false</td>
736     *   <td>" "</td>
737     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
738     *  </tr>
739     *  <tr>
740     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
741     *   <td>20</td>
742     *   <td>"&lt;br /&gt;"</td>
743     *   <td>true/false</td>
744     *   <td>" "</td>
745     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
746     *   to be wrapped after&lt;br /&gt;20 columns."</td>
747     *  </tr>
748     *  <tr>
749     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
750     *   <td>20</td>
751     *   <td>null</td>
752     *   <td>true/false</td>
753     *   <td>" "</td>
754     *   <td>"Here is one line of" + systemNewLine + "text that is going"
755     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
756     *  </tr>
757     *  <tr>
758     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
759     *   <td>20</td>
760     *   <td>"\n"</td>
761     *   <td>false</td>
762     *   <td>" "</td>
763     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
764     *  </tr>
765     *  <tr>
766     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
767     *   <td>20</td>
768     *   <td>"\n"</td>
769     *   <td>true</td>
770     *   <td>" "</td>
771     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
772     *  </tr>
773     *  <tr>
774     *   <td>"flammable/inflammable"</td>
775     *   <td>20</td>
776     *   <td>"\n"</td>
777     *   <td>true</td>
778     *   <td>"/"</td>
779     *   <td>"flammable\ninflammable"</td>
780     *  </tr>
781     * </table>
782     * @param str  the String to be word wrapped, may be null
783     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
784     * @param newLineStr  the string to insert for a new line,
785     *  {@code null} uses the system property line separator
786     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
787     * @param wrapOn regex expression to be used as a breakable characters,
788     *               if blank string is provided a space character will be used
789     * @return a line with newlines inserted, {@code null} if null input
790     */
791    public static String wrap(final String str,
792                              int wrapLength,
793                              String newLineStr,
794                              final boolean wrapLongWords,
795                              String wrapOn) {
796        if (str == null) {
797            return null;
798        }
799        if (newLineStr == null) {
800            newLineStr = System.lineSeparator();
801        }
802        if (wrapLength < 1) {
803            wrapLength = 1;
804        }
805        if (StringUtils.isBlank(wrapOn)) {
806            wrapOn = " ";
807        }
808        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
809        final int inputLineLength = str.length();
810        int offset = 0;
811        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
812        int matcherSize = -1;
813
814        while (offset < inputLineLength) {
815            int spaceToWrapAt = -1;
816            Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
817                    Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
818            if (matcher.find()) {
819                if (matcher.start() == 0) {
820                    matcherSize = matcher.end();
821                    if (matcherSize != 0) {
822                        offset += matcher.end();
823                        continue;
824                    }
825                    offset += 1;
826                }
827                spaceToWrapAt = matcher.start() + offset;
828            }
829
830            // only last line without leading spaces is left
831            if (inputLineLength - offset <= wrapLength) {
832                break;
833            }
834
835            while (matcher.find()) {
836                spaceToWrapAt = matcher.start() + offset;
837            }
838
839            if (spaceToWrapAt >= offset) {
840                // normal case
841                wrappedLine.append(str, offset, spaceToWrapAt);
842                wrappedLine.append(newLineStr);
843                offset = spaceToWrapAt + 1;
844
845            } else // really long word or URL
846            if (wrapLongWords) {
847                if (matcherSize == 0) {
848                    offset--;
849                }
850                // wrap really long word one line at a time
851                wrappedLine.append(str, offset, wrapLength + offset);
852                wrappedLine.append(newLineStr);
853                offset += wrapLength;
854                matcherSize = -1;
855            } else {
856                // do not wrap really long word, just extend beyond limit
857                matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
858                if (matcher.find()) {
859                    matcherSize = matcher.end() - matcher.start();
860                    spaceToWrapAt = matcher.start() + offset + wrapLength;
861                }
862
863                if (spaceToWrapAt >= 0) {
864                    if (matcherSize == 0 && offset != 0) {
865                        offset--;
866                    }
867                    wrappedLine.append(str, offset, spaceToWrapAt);
868                    wrappedLine.append(newLineStr);
869                    offset = spaceToWrapAt + 1;
870                } else {
871                    if (matcherSize == 0 && offset != 0) {
872                        offset--;
873                    }
874                    wrappedLine.append(str, offset, str.length());
875                    offset = inputLineLength;
876                    matcherSize = -1;
877                }
878            }
879        }
880
881        if (matcherSize == 0 && offset < inputLineLength) {
882            offset--;
883        }
884
885        // Whatever is left in line is short enough to just pass through
886        wrappedLine.append(str, offset, str.length());
887
888        return wrappedLine.toString();
889    }
890
891    /**
892     * {@code WordUtils} instances should NOT be constructed in
893     * standard programming. Instead, the class should be used as
894     * {@code WordUtils.wrap("foo bar", 20);}.
895     *
896     * <p>This constructor is public to permit tools that require a JavaBean
897     * instance to operate.</p>
898     */
899    public WordUtils() {
900    }
901 }