001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.util.regex.Matcher;
020import java.util.regex.Pattern;
021
022/**
023 * Helpers to process Strings using regular expressions.
024 * @see java.util.regex.Pattern
025 * @since 3.8
026 */
027public class RegExUtils {
028
029    /**
030     * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag.
031     *
032     * @param regex The expression to be compiled
033     * @return the given regular expression compiled into a pattern with the {@link Pattern#DOTALL} flag.
034     * @since 3.13.0
035     */
036    public static Pattern dotAll(final String regex) {
037        return Pattern.compile(regex, Pattern.DOTALL);
038    }
039
040    /**
041     * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag, then creates a matcher that will match the given text against
042     * this pattern.
043     *
044     * @param regex The expression to be compiled.
045     * @param text  The character sequence to be matched.
046     * @return A new matcher for this pattern.
047     * @since 3.13.0
048     */
049    public static Matcher dotAllMatcher(final String regex, final String text) {
050        return dotAll(regex).matcher(text);
051    }
052
053    /**
054     * Removes each substring of the text String that matches the given regular expression pattern.
055     *
056     * This method is a {@code null} safe equivalent to:
057     * <ul>
058     *  <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li>
059     * </ul>
060     *
061     * <p>A {@code null} reference passed to this method is a no-op.</p>
062     *
063     * <pre>{@code
064     * StringUtils.removeAll(null, *)      = null
065     * StringUtils.removeAll("any", (Pattern) null)  = "any"
066     * StringUtils.removeAll("any", Pattern.compile(""))    = "any"
067     * StringUtils.removeAll("any", Pattern.compile(".*"))  = ""
068     * StringUtils.removeAll("any", Pattern.compile(".+"))  = ""
069     * StringUtils.removeAll("abc", Pattern.compile(".?"))  = ""
070     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\nB"
071     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
072     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>", Pattern.DOTALL))  = "AB"
073     * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]"))     = "ABC123"
074     * }</pre>
075     *
076     * @param text  text to remove from, may be null
077     * @param regex  the regular expression to which this string is to be matched
078     * @return  the text with any removes processed,
079     *              {@code null} if null String input
080     *
081     * @see #replaceAll(String, Pattern, String)
082     * @see java.util.regex.Matcher#replaceAll(String)
083     * @see java.util.regex.Pattern
084     */
085    public static String removeAll(final String text, final Pattern regex) {
086        return replaceAll(text, regex, StringUtils.EMPTY);
087    }
088
089    /**
090     * Removes each substring of the text String that matches the given regular expression.
091     *
092     * This method is a {@code null} safe equivalent to:
093     * <ul>
094     *  <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li>
095     *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
096     * </ul>
097     *
098     * <p>A {@code null} reference passed to this method is a no-op.</p>
099     *
100     * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option
101     * is NOT automatically added.
102     * To use the DOTALL option prepend {@code "(?s)"} to the regex.
103     * DOTALL is also known as single-line mode in Perl.</p>
104     *
105     * <pre>{@code
106     * StringUtils.removeAll(null, *)      = null
107     * StringUtils.removeAll("any", (String) null)  = "any"
108     * StringUtils.removeAll("any", "")    = "any"
109     * StringUtils.removeAll("any", ".*")  = ""
110     * StringUtils.removeAll("any", ".+")  = ""
111     * StringUtils.removeAll("abc", ".?")  = ""
112     * StringUtils.removeAll("A<__>\n<__>B", "<.*>")      = "A\nB"
113     * StringUtils.removeAll("A<__>\n<__>B", "(?s)<.*>")  = "AB"
114     * StringUtils.removeAll("ABCabc123abc", "[a-z]")     = "ABC123"
115     * }</pre>
116     *
117     * @param text  text to remove from, may be null
118     * @param regex  the regular expression to which this string is to be matched
119     * @return  the text with any removes processed,
120     *              {@code null} if null String input
121     *
122     * @throws  java.util.regex.PatternSyntaxException
123     *              if the regular expression's syntax is invalid
124     *
125     * @see #replaceAll(String, String, String)
126     * @see #removePattern(String, String)
127     * @see String#replaceAll(String, String)
128     * @see java.util.regex.Pattern
129     * @see java.util.regex.Pattern#DOTALL
130     */
131    public static String removeAll(final String text, final String regex) {
132        return replaceAll(text, regex, StringUtils.EMPTY);
133    }
134
135    /**
136     * Removes the first substring of the text string that matches the given regular expression pattern.
137     *
138     * This method is a {@code null} safe equivalent to:
139     * <ul>
140     *  <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
141     * </ul>
142     *
143     * <p>A {@code null} reference passed to this method is a no-op.</p>
144     *
145     * <pre>{@code
146     * StringUtils.removeFirst(null, *)      = null
147     * StringUtils.removeFirst("any", (Pattern) null)  = "any"
148     * StringUtils.removeFirst("any", Pattern.compile(""))    = "any"
149     * StringUtils.removeFirst("any", Pattern.compile(".*"))  = ""
150     * StringUtils.removeFirst("any", Pattern.compile(".+"))  = ""
151     * StringUtils.removeFirst("abc", Pattern.compile(".?"))  = "bc"
152     * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\n<__>B"
153     * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
154     * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]"))          = "ABCbc123"
155     * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+"))      = "ABC123abc"
156     * }</pre>
157     *
158     * @param text  text to remove from, may be null
159     * @param regex  the regular expression pattern to which this string is to be matched
160     * @return  the text with the first replacement processed,
161     *              {@code null} if null String input
162     *
163     * @see #replaceFirst(String, Pattern, String)
164     * @see java.util.regex.Matcher#replaceFirst(String)
165     * @see java.util.regex.Pattern
166     */
167    public static String removeFirst(final String text, final Pattern regex) {
168        return replaceFirst(text, regex, StringUtils.EMPTY);
169    }
170
171    /**
172     * Removes the first substring of the text string that matches the given regular expression.
173     *
174     * This method is a {@code null} safe equivalent to:
175     * <ul>
176     *  <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li>
177     *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li>
178     * </ul>
179     *
180     * <p>A {@code null} reference passed to this method is a no-op.</p>
181     *
182     * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
183     * To use the DOTALL option prepend {@code "(?s)"} to the regex.
184     * DOTALL is also known as single-line mode in Perl.</p>
185     *
186     * <pre>{@code
187     * StringUtils.removeFirst(null, *)      = null
188     * StringUtils.removeFirst("any", (String) null)  = "any"
189     * StringUtils.removeFirst("any", "")    = "any"
190     * StringUtils.removeFirst("any", ".*")  = ""
191     * StringUtils.removeFirst("any", ".+")  = ""
192     * StringUtils.removeFirst("abc", ".?")  = "bc"
193     * StringUtils.removeFirst("A<__>\n<__>B", "<.*>")      = "A\n<__>B"
194     * StringUtils.removeFirst("A<__>\n<__>B", "(?s)<.*>")  = "AB"
195     * StringUtils.removeFirst("ABCabc123", "[a-z]")          = "ABCbc123"
196     * StringUtils.removeFirst("ABCabc123abc", "[a-z]+")      = "ABC123abc"
197     * }</pre>
198     *
199     * @param text  text to remove from, may be null
200     * @param regex  the regular expression to which this string is to be matched
201     * @return  the text with the first replacement processed,
202     *              {@code null} if null String input
203     *
204     * @throws  java.util.regex.PatternSyntaxException
205     *              if the regular expression's syntax is invalid
206     *
207     * @see #replaceFirst(String, String, String)
208     * @see String#replaceFirst(String, String)
209     * @see java.util.regex.Pattern
210     * @see java.util.regex.Pattern#DOTALL
211     */
212    public static String removeFirst(final String text, final String regex) {
213        return replaceFirst(text, regex, StringUtils.EMPTY);
214    }
215
216    /**
217     * Removes each substring of the source String that matches the given regular expression using the DOTALL option.
218     *
219     * This call is a {@code null} safe equivalent to:
220     * <ul>
221     * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, StringUtils.EMPTY)}</li>
222     * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li>
223     * </ul>
224     *
225     * <p>A {@code null} reference passed to this method is a no-op.</p>
226     *
227     * <pre>{@code
228     * StringUtils.removePattern(null, *)       = null
229     * StringUtils.removePattern("any", (String) null)   = "any"
230     * StringUtils.removePattern("A<__>\n<__>B", "<.*>")  = "AB"
231     * StringUtils.removePattern("ABCabc123", "[a-z]")    = "ABC123"
232     * }</pre>
233     *
234     * @param text
235     *            the source string
236     * @param regex
237     *            the regular expression to which this string is to be matched
238     * @return The resulting {@link String}
239     * @see #replacePattern(String, String, String)
240     * @see String#replaceAll(String, String)
241     * @see Pattern#DOTALL
242     */
243    public static String removePattern(final String text, final String regex) {
244        return replacePattern(text, regex, StringUtils.EMPTY);
245    }
246
247    /**
248     * Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.
249     *
250     * This method is a {@code null} safe equivalent to:
251     * <ul>
252     *  <li>{@code pattern.matcher(text).replaceAll(replacement)}</li>
253     * </ul>
254     *
255     * <p>A {@code null} reference passed to this method is a no-op.</p>
256     *
257     * <pre>{@code
258     * StringUtils.replaceAll(null, *, *)       = null
259     * StringUtils.replaceAll("any", (Pattern) null, *)   = "any"
260     * StringUtils.replaceAll("any", *, null)   = "any"
261     * StringUtils.replaceAll("", Pattern.compile(""), "zzz")    = "zzz"
262     * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz")  = "zzz"
263     * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz")  = ""
264     * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ")  = "ZZaZZbZZcZZ"
265     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>"), "z")                 = "z\nz"
266     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>", Pattern.DOTALL), "z") = "z"
267     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")             = "z"
268     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_")       = "ABC___123"
269     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123"
270     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123"
271     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum_dolor_sit"
272     * }</pre>
273     *
274     * @param text  text to search and replace in, may be null
275     * @param regex  the regular expression pattern to which this string is to be matched
276     * @param replacement  the string to be substituted for each match
277     * @return  the text with any replacements processed,
278     *              {@code null} if null String input
279     *
280     * @see java.util.regex.Matcher#replaceAll(String)
281     * @see java.util.regex.Pattern
282     */
283    public static String replaceAll(final String text, final Pattern regex, final String replacement) {
284        if (ObjectUtils.anyNull(text, regex, replacement)) {
285            return text;
286        }
287        return regex.matcher(text).replaceAll(replacement);
288    }
289
290    /**
291     * Replaces each substring of the text String that matches the given regular expression
292     * with the given replacement.
293     *
294     * This method is a {@code null} safe equivalent to:
295     * <ul>
296     *  <li>{@code text.replaceAll(regex, replacement)}</li>
297     *  <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li>
298     * </ul>
299     *
300     * <p>A {@code null} reference passed to this method is a no-op.</p>
301     *
302     * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option
303     * is NOT automatically added.
304     * To use the DOTALL option prepend {@code "(?s)"} to the regex.
305     * DOTALL is also known as single-line mode in Perl.</p>
306     *
307     * <pre>{@code
308     * StringUtils.replaceAll(null, *, *)       = null
309     * StringUtils.replaceAll("any", (String) null, *)   = "any"
310     * StringUtils.replaceAll("any", *, null)   = "any"
311     * StringUtils.replaceAll("", "", "zzz")    = "zzz"
312     * StringUtils.replaceAll("", ".*", "zzz")  = "zzz"
313     * StringUtils.replaceAll("", ".+", "zzz")  = ""
314     * StringUtils.replaceAll("abc", "", "ZZ")  = "ZZaZZbZZcZZ"
315     * StringUtils.replaceAll("<__>\n<__>", "<.*>", "z")      = "z\nz"
316     * StringUtils.replaceAll("<__>\n<__>", "(?s)<.*>", "z")  = "z"
317     * StringUtils.replaceAll("ABCabc123", "[a-z]", "_")       = "ABC___123"
318     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
319     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
320     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
321     * }</pre>
322     *
323     * @param text  text to search and replace in, may be null
324     * @param regex  the regular expression to which this string is to be matched
325     * @param replacement  the string to be substituted for each match
326     * @return  the text with any replacements processed,
327     *              {@code null} if null String input
328     *
329     * @throws  java.util.regex.PatternSyntaxException
330     *              if the regular expression's syntax is invalid
331     *
332     * @see #replacePattern(String, String, String)
333     * @see String#replaceAll(String, String)
334     * @see java.util.regex.Pattern
335     * @see java.util.regex.Pattern#DOTALL
336     */
337    public static String replaceAll(final String text, final String regex, final String replacement) {
338        if (ObjectUtils.anyNull(text, regex, replacement)) {
339            return text;
340        }
341        return text.replaceAll(regex, replacement);
342    }
343
344    /**
345     * Replaces the first substring of the text string that matches the given regular expression pattern
346     * with the given replacement.
347     *
348     * This method is a {@code null} safe equivalent to:
349     * <ul>
350     *  <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li>
351     * </ul>
352     *
353     * <p>A {@code null} reference passed to this method is a no-op.</p>
354     *
355     * <pre>{@code
356     * StringUtils.replaceFirst(null, *, *)       = null
357     * StringUtils.replaceFirst("any", (Pattern) null, *)   = "any"
358     * StringUtils.replaceFirst("any", *, null)   = "any"
359     * StringUtils.replaceFirst("", Pattern.compile(""), "zzz")    = "zzz"
360     * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz")  = "zzz"
361     * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz")  = ""
362     * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ")  = "ZZabc"
363     * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("<.*>"), "z")      = "z\n<__>"
364     * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")  = "z"
365     * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_")          = "ABC_bc123"
366     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123abc"
367     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123abc"
368     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum  dolor   sit"
369     * }</pre>
370     *
371     * @param text  text to search and replace in, may be null
372     * @param regex  the regular expression pattern to which this string is to be matched
373     * @param replacement  the string to be substituted for the first match
374     * @return  the text with the first replacement processed,
375     *              {@code null} if null String input
376     *
377     * @see java.util.regex.Matcher#replaceFirst(String)
378     * @see java.util.regex.Pattern
379     */
380    public static String replaceFirst(final String text, final Pattern regex, final String replacement) {
381        if (text == null || regex == null || replacement == null) {
382            return text;
383        }
384        return regex.matcher(text).replaceFirst(replacement);
385    }
386
387    /**
388     * Replaces the first substring of the text string that matches the given regular expression
389     * with the given replacement.
390     *
391     * This method is a {@code null} safe equivalent to:
392     * <ul>
393     *  <li>{@code text.replaceFirst(regex, replacement)}</li>
394     *  <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li>
395     * </ul>
396     *
397     * <p>A {@code null} reference passed to this method is a no-op.</p>
398     *
399     * <p>The {@link Pattern#DOTALL} option is NOT automatically added.
400     * To use the DOTALL option prepend {@code "(?s)"} to the regex.
401     * DOTALL is also known as single-line mode in Perl.</p>
402     *
403     * <pre>{@code
404     * StringUtils.replaceFirst(null, *, *)       = null
405     * StringUtils.replaceFirst("any", (String) null, *)   = "any"
406     * StringUtils.replaceFirst("any", *, null)   = "any"
407     * StringUtils.replaceFirst("", "", "zzz")    = "zzz"
408     * StringUtils.replaceFirst("", ".*", "zzz")  = "zzz"
409     * StringUtils.replaceFirst("", ".+", "zzz")  = ""
410     * StringUtils.replaceFirst("abc", "", "ZZ")  = "ZZabc"
411     * StringUtils.replaceFirst("<__>\n<__>", "<.*>", "z")      = "z\n<__>"
412     * StringUtils.replaceFirst("<__>\n<__>", "(?s)<.*>", "z")  = "z"
413     * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_")          = "ABC_bc123"
414     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_")  = "ABC_123abc"
415     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "")   = "ABC123abc"
416     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum  dolor   sit"
417     * }</pre>
418     *
419     * @param text  text to search and replace in, may be null
420     * @param regex  the regular expression to which this string is to be matched
421     * @param replacement  the string to be substituted for the first match
422     * @return  the text with the first replacement processed,
423     *              {@code null} if null String input
424     *
425     * @throws  java.util.regex.PatternSyntaxException
426     *              if the regular expression's syntax is invalid
427     *
428     * @see String#replaceFirst(String, String)
429     * @see java.util.regex.Pattern
430     * @see java.util.regex.Pattern#DOTALL
431     */
432    public static String replaceFirst(final String text, final String regex, final String replacement) {
433        if (text == null || regex == null || replacement == null) {
434            return text;
435        }
436        return text.replaceFirst(regex, replacement);
437    }
438
439    /**
440     * Replaces each substring of the source String that matches the given regular expression with the given
441     * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.
442     *
443     * This call is a {@code null} safe equivalent to:
444     * <ul>
445     * <li>{@code text.replaceAll(&quot;(?s)&quot; + regex, replacement)}</li>
446     * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li>
447     * </ul>
448     *
449     * <p>A {@code null} reference passed to this method is a no-op.</p>
450     *
451     * <pre>{@code
452     * StringUtils.replacePattern(null, *, *)       = null
453     * StringUtils.replacePattern("any", (String) null, *)   = "any"
454     * StringUtils.replacePattern("any", *, null)   = "any"
455     * StringUtils.replacePattern("", "", "zzz")    = "zzz"
456     * StringUtils.replacePattern("", ".*", "zzz")  = "zzz"
457     * StringUtils.replacePattern("", ".+", "zzz")  = ""
458     * StringUtils.replacePattern("<__>\n<__>", "<.*>", "z")       = "z"
459     * StringUtils.replacePattern("ABCabc123", "[a-z]", "_")       = "ABC___123"
460     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
461     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
462     * StringUtils.replacePattern("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
463     * }</pre>
464     *
465     * @param text
466     *            the source string
467     * @param regex
468     *            the regular expression to which this string is to be matched
469     * @param replacement
470     *            the string to be substituted for each match
471     * @return The resulting {@link String}
472     * @see #replaceAll(String, String, String)
473     * @see String#replaceAll(String, String)
474     * @see Pattern#DOTALL
475     */
476    public static String replacePattern(final String text, final String regex, final String replacement) {
477        if (ObjectUtils.anyNull(text, regex, replacement)) {
478            return text;
479        }
480        return dotAllMatcher(regex, text).replaceAll(replacement);
481    }
482
483    /**
484     * Make private in 4.0.
485     *
486     * @deprecated TODO Make private in 4.0.
487     */
488    @Deprecated
489    public RegExUtils() {
490        // empty
491    }
492}