001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.util.regex.Matcher; 020import java.util.regex.Pattern; 021 022/** 023 * Helpers to process Strings using regular expressions. 024 * @see java.util.regex.Pattern 025 * @since 3.8 026 */ 027public class RegExUtils { 028 029 /** 030 * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag. 031 * 032 * @param regex The expression to be compiled 033 * @return the given regular expression compiled into a pattern with the {@link Pattern#DOTALL} flag. 034 * @since 3.13.0 035 */ 036 public static Pattern dotAll(final String regex) { 037 return Pattern.compile(regex, Pattern.DOTALL); 038 } 039 040 /** 041 * Compiles the given regular expression into a pattern with the {@link Pattern#DOTALL} flag, then creates a matcher that will match the given text against 042 * this pattern. 043 * 044 * @param regex The expression to be compiled. 045 * @param text The character sequence to be matched. 046 * @return A new matcher for this pattern. 047 * @since 3.13.0 048 */ 049 public static Matcher dotAllMatcher(final String regex, final String text) { 050 return dotAll(regex).matcher(text); 051 } 052 053 /** 054 * Removes each substring of the text String that matches the given regular expression pattern. 055 * 056 * This method is a {@code null} safe equivalent to: 057 * <ul> 058 * <li>{@code pattern.matcher(text).replaceAll(StringUtils.EMPTY)}</li> 059 * </ul> 060 * 061 * <p>A {@code null} reference passed to this method is a no-op.</p> 062 * 063 * <pre>{@code 064 * StringUtils.removeAll(null, *) = null 065 * StringUtils.removeAll("any", (Pattern) null) = "any" 066 * StringUtils.removeAll("any", Pattern.compile("")) = "any" 067 * StringUtils.removeAll("any", Pattern.compile(".*")) = "" 068 * StringUtils.removeAll("any", Pattern.compile(".+")) = "" 069 * StringUtils.removeAll("abc", Pattern.compile(".?")) = "" 070 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>")) = "A\nB" 071 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("(?s)<.*>")) = "AB" 072 * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>", Pattern.DOTALL)) = "AB" 073 * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]")) = "ABC123" 074 * }</pre> 075 * 076 * @param text text to remove from, may be null 077 * @param regex the regular expression to which this string is to be matched 078 * @return the text with any removes processed, 079 * {@code null} if null String input 080 * 081 * @see #replaceAll(String, Pattern, String) 082 * @see java.util.regex.Matcher#replaceAll(String) 083 * @see java.util.regex.Pattern 084 */ 085 public static String removeAll(final String text, final Pattern regex) { 086 return replaceAll(text, regex, StringUtils.EMPTY); 087 } 088 089 /** 090 * Removes each substring of the text String that matches the given regular expression. 091 * 092 * This method is a {@code null} safe equivalent to: 093 * <ul> 094 * <li>{@code text.replaceAll(regex, StringUtils.EMPTY)}</li> 095 * <li>{@code Pattern.compile(regex).matcher(text).replaceAll(StringUtils.EMPTY)}</li> 096 * </ul> 097 * 098 * <p>A {@code null} reference passed to this method is a no-op.</p> 099 * 100 * <p>Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option 101 * is NOT automatically added. 102 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 103 * DOTALL is also known as single-line mode in Perl.</p> 104 * 105 * <pre>{@code 106 * StringUtils.removeAll(null, *) = null 107 * StringUtils.removeAll("any", (String) null) = "any" 108 * StringUtils.removeAll("any", "") = "any" 109 * StringUtils.removeAll("any", ".*") = "" 110 * StringUtils.removeAll("any", ".+") = "" 111 * StringUtils.removeAll("abc", ".?") = "" 112 * StringUtils.removeAll("A<__>\n<__>B", "<.*>") = "A\nB" 113 * StringUtils.removeAll("A<__>\n<__>B", "(?s)<.*>") = "AB" 114 * StringUtils.removeAll("ABCabc123abc", "[a-z]") = "ABC123" 115 * }</pre> 116 * 117 * @param text text to remove from, may be null 118 * @param regex the regular expression to which this string is to be matched 119 * @return the text with any removes processed, 120 * {@code null} if null String input 121 * 122 * @throws java.util.regex.PatternSyntaxException 123 * if the regular expression's syntax is invalid 124 * 125 * @see #replaceAll(String, String, String) 126 * @see #removePattern(String, String) 127 * @see String#replaceAll(String, String) 128 * @see java.util.regex.Pattern 129 * @see java.util.regex.Pattern#DOTALL 130 */ 131 public static String removeAll(final String text, final String regex) { 132 return replaceAll(text, regex, StringUtils.EMPTY); 133 } 134 135 /** 136 * Removes the first substring of the text string that matches the given regular expression pattern. 137 * 138 * This method is a {@code null} safe equivalent to: 139 * <ul> 140 * <li>{@code pattern.matcher(text).replaceFirst(StringUtils.EMPTY)}</li> 141 * </ul> 142 * 143 * <p>A {@code null} reference passed to this method is a no-op.</p> 144 * 145 * <pre>{@code 146 * StringUtils.removeFirst(null, *) = null 147 * StringUtils.removeFirst("any", (Pattern) null) = "any" 148 * StringUtils.removeFirst("any", Pattern.compile("")) = "any" 149 * StringUtils.removeFirst("any", Pattern.compile(".*")) = "" 150 * StringUtils.removeFirst("any", Pattern.compile(".+")) = "" 151 * StringUtils.removeFirst("abc", Pattern.compile(".?")) = "bc" 152 * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("<.*>")) = "A\n<__>B" 153 * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("(?s)<.*>")) = "AB" 154 * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]")) = "ABCbc123" 155 * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+")) = "ABC123abc" 156 * }</pre> 157 * 158 * @param text text to remove from, may be null 159 * @param regex the regular expression pattern to which this string is to be matched 160 * @return the text with the first replacement processed, 161 * {@code null} if null String input 162 * 163 * @see #replaceFirst(String, Pattern, String) 164 * @see java.util.regex.Matcher#replaceFirst(String) 165 * @see java.util.regex.Pattern 166 */ 167 public static String removeFirst(final String text, final Pattern regex) { 168 return replaceFirst(text, regex, StringUtils.EMPTY); 169 } 170 171 /** 172 * Removes the first substring of the text string that matches the given regular expression. 173 * 174 * This method is a {@code null} safe equivalent to: 175 * <ul> 176 * <li>{@code text.replaceFirst(regex, StringUtils.EMPTY)}</li> 177 * <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(StringUtils.EMPTY)}</li> 178 * </ul> 179 * 180 * <p>A {@code null} reference passed to this method is a no-op.</p> 181 * 182 * <p>The {@link Pattern#DOTALL} option is NOT automatically added. 183 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 184 * DOTALL is also known as single-line mode in Perl.</p> 185 * 186 * <pre>{@code 187 * StringUtils.removeFirst(null, *) = null 188 * StringUtils.removeFirst("any", (String) null) = "any" 189 * StringUtils.removeFirst("any", "") = "any" 190 * StringUtils.removeFirst("any", ".*") = "" 191 * StringUtils.removeFirst("any", ".+") = "" 192 * StringUtils.removeFirst("abc", ".?") = "bc" 193 * StringUtils.removeFirst("A<__>\n<__>B", "<.*>") = "A\n<__>B" 194 * StringUtils.removeFirst("A<__>\n<__>B", "(?s)<.*>") = "AB" 195 * StringUtils.removeFirst("ABCabc123", "[a-z]") = "ABCbc123" 196 * StringUtils.removeFirst("ABCabc123abc", "[a-z]+") = "ABC123abc" 197 * }</pre> 198 * 199 * @param text text to remove from, may be null 200 * @param regex the regular expression to which this string is to be matched 201 * @return the text with the first replacement processed, 202 * {@code null} if null String input 203 * 204 * @throws java.util.regex.PatternSyntaxException 205 * if the regular expression's syntax is invalid 206 * 207 * @see #replaceFirst(String, String, String) 208 * @see String#replaceFirst(String, String) 209 * @see java.util.regex.Pattern 210 * @see java.util.regex.Pattern#DOTALL 211 */ 212 public static String removeFirst(final String text, final String regex) { 213 return replaceFirst(text, regex, StringUtils.EMPTY); 214 } 215 216 /** 217 * Removes each substring of the source String that matches the given regular expression using the DOTALL option. 218 * 219 * This call is a {@code null} safe equivalent to: 220 * <ul> 221 * <li>{@code text.replaceAll("(?s)" + regex, StringUtils.EMPTY)}</li> 222 * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(StringUtils.EMPTY)}</li> 223 * </ul> 224 * 225 * <p>A {@code null} reference passed to this method is a no-op.</p> 226 * 227 * <pre>{@code 228 * StringUtils.removePattern(null, *) = null 229 * StringUtils.removePattern("any", (String) null) = "any" 230 * StringUtils.removePattern("A<__>\n<__>B", "<.*>") = "AB" 231 * StringUtils.removePattern("ABCabc123", "[a-z]") = "ABC123" 232 * }</pre> 233 * 234 * @param text 235 * the source string 236 * @param regex 237 * the regular expression to which this string is to be matched 238 * @return The resulting {@link String} 239 * @see #replacePattern(String, String, String) 240 * @see String#replaceAll(String, String) 241 * @see Pattern#DOTALL 242 */ 243 public static String removePattern(final String text, final String regex) { 244 return replacePattern(text, regex, StringUtils.EMPTY); 245 } 246 247 /** 248 * Replaces each substring of the text String that matches the given regular expression pattern with the given replacement. 249 * 250 * This method is a {@code null} safe equivalent to: 251 * <ul> 252 * <li>{@code pattern.matcher(text).replaceAll(replacement)}</li> 253 * </ul> 254 * 255 * <p>A {@code null} reference passed to this method is a no-op.</p> 256 * 257 * <pre>{@code 258 * StringUtils.replaceAll(null, *, *) = null 259 * StringUtils.replaceAll("any", (Pattern) null, *) = "any" 260 * StringUtils.replaceAll("any", *, null) = "any" 261 * StringUtils.replaceAll("", Pattern.compile(""), "zzz") = "zzz" 262 * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz") = "zzz" 263 * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz") = "" 264 * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ") = "ZZaZZbZZcZZ" 265 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>"), "z") = "z\nz" 266 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>", Pattern.DOTALL), "z") = "z" 267 * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z") = "z" 268 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_") = "ABC___123" 269 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_") = "ABC_123" 270 * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "") = "ABC123" 271 * StringUtils.replaceAll("Lorem ipsum dolor sit", Pattern.compile("( +)([a-z]+)"), "_$2") = "Lorem_ipsum_dolor_sit" 272 * }</pre> 273 * 274 * @param text text to search and replace in, may be null 275 * @param regex the regular expression pattern to which this string is to be matched 276 * @param replacement the string to be substituted for each match 277 * @return the text with any replacements processed, 278 * {@code null} if null String input 279 * 280 * @see java.util.regex.Matcher#replaceAll(String) 281 * @see java.util.regex.Pattern 282 */ 283 public static String replaceAll(final String text, final Pattern regex, final String replacement) { 284 if (ObjectUtils.anyNull(text, regex, replacement)) { 285 return text; 286 } 287 return regex.matcher(text).replaceAll(replacement); 288 } 289 290 /** 291 * Replaces each substring of the text String that matches the given regular expression 292 * with the given replacement. 293 * 294 * This method is a {@code null} safe equivalent to: 295 * <ul> 296 * <li>{@code text.replaceAll(regex, replacement)}</li> 297 * <li>{@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}</li> 298 * </ul> 299 * 300 * <p>A {@code null} reference passed to this method is a no-op.</p> 301 * 302 * <p>Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option 303 * is NOT automatically added. 304 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 305 * DOTALL is also known as single-line mode in Perl.</p> 306 * 307 * <pre>{@code 308 * StringUtils.replaceAll(null, *, *) = null 309 * StringUtils.replaceAll("any", (String) null, *) = "any" 310 * StringUtils.replaceAll("any", *, null) = "any" 311 * StringUtils.replaceAll("", "", "zzz") = "zzz" 312 * StringUtils.replaceAll("", ".*", "zzz") = "zzz" 313 * StringUtils.replaceAll("", ".+", "zzz") = "" 314 * StringUtils.replaceAll("abc", "", "ZZ") = "ZZaZZbZZcZZ" 315 * StringUtils.replaceAll("<__>\n<__>", "<.*>", "z") = "z\nz" 316 * StringUtils.replaceAll("<__>\n<__>", "(?s)<.*>", "z") = "z" 317 * StringUtils.replaceAll("ABCabc123", "[a-z]", "_") = "ABC___123" 318 * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_") = "ABC_123" 319 * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "") = "ABC123" 320 * StringUtils.replaceAll("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum_dolor_sit" 321 * }</pre> 322 * 323 * @param text text to search and replace in, may be null 324 * @param regex the regular expression to which this string is to be matched 325 * @param replacement the string to be substituted for each match 326 * @return the text with any replacements processed, 327 * {@code null} if null String input 328 * 329 * @throws java.util.regex.PatternSyntaxException 330 * if the regular expression's syntax is invalid 331 * 332 * @see #replacePattern(String, String, String) 333 * @see String#replaceAll(String, String) 334 * @see java.util.regex.Pattern 335 * @see java.util.regex.Pattern#DOTALL 336 */ 337 public static String replaceAll(final String text, final String regex, final String replacement) { 338 if (ObjectUtils.anyNull(text, regex, replacement)) { 339 return text; 340 } 341 return text.replaceAll(regex, replacement); 342 } 343 344 /** 345 * Replaces the first substring of the text string that matches the given regular expression pattern 346 * with the given replacement. 347 * 348 * This method is a {@code null} safe equivalent to: 349 * <ul> 350 * <li>{@code pattern.matcher(text).replaceFirst(replacement)}</li> 351 * </ul> 352 * 353 * <p>A {@code null} reference passed to this method is a no-op.</p> 354 * 355 * <pre>{@code 356 * StringUtils.replaceFirst(null, *, *) = null 357 * StringUtils.replaceFirst("any", (Pattern) null, *) = "any" 358 * StringUtils.replaceFirst("any", *, null) = "any" 359 * StringUtils.replaceFirst("", Pattern.compile(""), "zzz") = "zzz" 360 * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz") = "zzz" 361 * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz") = "" 362 * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ") = "ZZabc" 363 * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("<.*>"), "z") = "z\n<__>" 364 * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z") = "z" 365 * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_") = "ABC_bc123" 366 * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_") = "ABC_123abc" 367 * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "") = "ABC123abc" 368 * StringUtils.replaceFirst("Lorem ipsum dolor sit", Pattern.compile("( +)([a-z]+)"), "_$2") = "Lorem_ipsum dolor sit" 369 * }</pre> 370 * 371 * @param text text to search and replace in, may be null 372 * @param regex the regular expression pattern to which this string is to be matched 373 * @param replacement the string to be substituted for the first match 374 * @return the text with the first replacement processed, 375 * {@code null} if null String input 376 * 377 * @see java.util.regex.Matcher#replaceFirst(String) 378 * @see java.util.regex.Pattern 379 */ 380 public static String replaceFirst(final String text, final Pattern regex, final String replacement) { 381 if (text == null || regex == null || replacement == null) { 382 return text; 383 } 384 return regex.matcher(text).replaceFirst(replacement); 385 } 386 387 /** 388 * Replaces the first substring of the text string that matches the given regular expression 389 * with the given replacement. 390 * 391 * This method is a {@code null} safe equivalent to: 392 * <ul> 393 * <li>{@code text.replaceFirst(regex, replacement)}</li> 394 * <li>{@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}</li> 395 * </ul> 396 * 397 * <p>A {@code null} reference passed to this method is a no-op.</p> 398 * 399 * <p>The {@link Pattern#DOTALL} option is NOT automatically added. 400 * To use the DOTALL option prepend {@code "(?s)"} to the regex. 401 * DOTALL is also known as single-line mode in Perl.</p> 402 * 403 * <pre>{@code 404 * StringUtils.replaceFirst(null, *, *) = null 405 * StringUtils.replaceFirst("any", (String) null, *) = "any" 406 * StringUtils.replaceFirst("any", *, null) = "any" 407 * StringUtils.replaceFirst("", "", "zzz") = "zzz" 408 * StringUtils.replaceFirst("", ".*", "zzz") = "zzz" 409 * StringUtils.replaceFirst("", ".+", "zzz") = "" 410 * StringUtils.replaceFirst("abc", "", "ZZ") = "ZZabc" 411 * StringUtils.replaceFirst("<__>\n<__>", "<.*>", "z") = "z\n<__>" 412 * StringUtils.replaceFirst("<__>\n<__>", "(?s)<.*>", "z") = "z" 413 * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_") = "ABC_bc123" 414 * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_") = "ABC_123abc" 415 * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "") = "ABC123abc" 416 * StringUtils.replaceFirst("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum dolor sit" 417 * }</pre> 418 * 419 * @param text text to search and replace in, may be null 420 * @param regex the regular expression to which this string is to be matched 421 * @param replacement the string to be substituted for the first match 422 * @return the text with the first replacement processed, 423 * {@code null} if null String input 424 * 425 * @throws java.util.regex.PatternSyntaxException 426 * if the regular expression's syntax is invalid 427 * 428 * @see String#replaceFirst(String, String) 429 * @see java.util.regex.Pattern 430 * @see java.util.regex.Pattern#DOTALL 431 */ 432 public static String replaceFirst(final String text, final String regex, final String replacement) { 433 if (text == null || regex == null || replacement == null) { 434 return text; 435 } 436 return text.replaceFirst(regex, replacement); 437 } 438 439 /** 440 * Replaces each substring of the source String that matches the given regular expression with the given 441 * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl. 442 * 443 * This call is a {@code null} safe equivalent to: 444 * <ul> 445 * <li>{@code text.replaceAll("(?s)" + regex, replacement)}</li> 446 * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}</li> 447 * </ul> 448 * 449 * <p>A {@code null} reference passed to this method is a no-op.</p> 450 * 451 * <pre>{@code 452 * StringUtils.replacePattern(null, *, *) = null 453 * StringUtils.replacePattern("any", (String) null, *) = "any" 454 * StringUtils.replacePattern("any", *, null) = "any" 455 * StringUtils.replacePattern("", "", "zzz") = "zzz" 456 * StringUtils.replacePattern("", ".*", "zzz") = "zzz" 457 * StringUtils.replacePattern("", ".+", "zzz") = "" 458 * StringUtils.replacePattern("<__>\n<__>", "<.*>", "z") = "z" 459 * StringUtils.replacePattern("ABCabc123", "[a-z]", "_") = "ABC___123" 460 * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_") = "ABC_123" 461 * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "") = "ABC123" 462 * StringUtils.replacePattern("Lorem ipsum dolor sit", "( +)([a-z]+)", "_$2") = "Lorem_ipsum_dolor_sit" 463 * }</pre> 464 * 465 * @param text 466 * the source string 467 * @param regex 468 * the regular expression to which this string is to be matched 469 * @param replacement 470 * the string to be substituted for each match 471 * @return The resulting {@link String} 472 * @see #replaceAll(String, String, String) 473 * @see String#replaceAll(String, String) 474 * @see Pattern#DOTALL 475 */ 476 public static String replacePattern(final String text, final String regex, final String replacement) { 477 if (ObjectUtils.anyNull(text, regex, replacement)) { 478 return text; 479 } 480 return dotAllMatcher(regex, text).replaceAll(replacement); 481 } 482 483 /** 484 * Make private in 4.0. 485 * 486 * @deprecated TODO Make private in 4.0. 487 */ 488 @Deprecated 489 public RegExUtils() { 490 // empty 491 } 492}