1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3.text; 18 19 import java.util.Arrays; 20 21 import org.apache.commons.lang3.ArraySorter; 22 import org.apache.commons.lang3.ArrayUtils; 23 import org.apache.commons.lang3.StringUtils; 24 25 /** 26 * A matcher class that can be queried to determine if a character array 27 * portion matches. 28 * <p> 29 * This class comes complete with various factory methods. 30 * If these do not suffice, you can subclass and implement your own matcher. 31 * </p> 32 * 33 * @since 2.2 34 * @deprecated As of 3.6, use Apache Commons Text 35 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html"> 36 * StringMatcherFactory</a> instead 37 */ 38 @Deprecated 39 public abstract class StrMatcher { 40 41 /** 42 * Class used to define a character for matching purposes. 43 */ 44 static final class CharMatcher extends StrMatcher { 45 /** The character to match. */ 46 private final char ch; 47 48 /** 49 * Constructor that creates a matcher that matches a single character. 50 * 51 * @param ch the character to match 52 */ 53 CharMatcher(final char ch) { 54 this.ch = ch; 55 } 56 57 /** 58 * Returns whether or not the given character matches. 59 * 60 * @param buffer the text content to match against, do not change 61 * @param pos the starting position for the match, valid for buffer 62 * @param bufferStart the first active index in the buffer, valid for buffer 63 * @param bufferEnd the end index of the active buffer, valid for buffer 64 * @return the number of matching characters, zero for no match 65 */ 66 @Override 67 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 68 return ch == buffer[pos] ? 1 : 0; 69 } 70 } 71 /** 72 * Class used to define a set of characters for matching purposes. 73 */ 74 static final class CharSetMatcher extends StrMatcher { 75 /** The set of characters to match. */ 76 private final char[] chars; 77 78 /** 79 * Constructor that creates a matcher from a character array. 80 * 81 * @param chars the characters to match, must not be null 82 */ 83 CharSetMatcher(final char[] chars) { 84 this.chars = ArraySorter.sort(chars.clone()); 85 } 86 87 /** 88 * Returns whether or not the given character matches. 89 * 90 * @param buffer the text content to match against, do not change 91 * @param pos the starting position for the match, valid for buffer 92 * @param bufferStart the first active index in the buffer, valid for buffer 93 * @param bufferEnd the end index of the active buffer, valid for buffer 94 * @return the number of matching characters, zero for no match 95 */ 96 @Override 97 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 98 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 99 } 100 } 101 /** 102 * Class used to match no characters. 103 */ 104 static final class NoMatcher extends StrMatcher { 105 106 /** 107 * Constructs a new instance of {@link NoMatcher}. 108 */ 109 NoMatcher() { 110 } 111 112 /** 113 * Always returns {@code false}. 114 * 115 * @param buffer the text content to match against, do not change 116 * @param pos the starting position for the match, valid for buffer 117 * @param bufferStart the first active index in the buffer, valid for buffer 118 * @param bufferEnd the end index of the active buffer, valid for buffer 119 * @return the number of matching characters, zero for no match 120 */ 121 @Override 122 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 123 return 0; 124 } 125 } 126 /** 127 * Class used to define a set of characters for matching purposes. 128 */ 129 static final class StringMatcher extends StrMatcher { 130 /** The string to match, as a character array. */ 131 private final char[] chars; 132 133 /** 134 * Constructor that creates a matcher from a String. 135 * 136 * @param str the string to match, must not be null 137 */ 138 StringMatcher(final String str) { 139 chars = str.toCharArray(); 140 } 141 142 /** 143 * Returns whether or not the given text matches the stored string. 144 * 145 * @param buffer the text content to match against, do not change 146 * @param pos the starting position for the match, valid for buffer 147 * @param bufferStart the first active index in the buffer, valid for buffer 148 * @param bufferEnd the end index of the active buffer, valid for buffer 149 * @return the number of matching characters, zero for no match 150 */ 151 @Override 152 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 153 final int len = chars.length; 154 if (pos + len > bufferEnd) { 155 return 0; 156 } 157 for (int i = 0; i < chars.length; i++, pos++) { 158 if (chars[i] != buffer[pos]) { 159 return 0; 160 } 161 } 162 return len; 163 } 164 165 @Override 166 public String toString() { 167 return super.toString() + ' ' + Arrays.toString(chars); 168 } 169 170 } 171 /** 172 * Class used to match whitespace as per trim(). 173 */ 174 static final class TrimMatcher extends StrMatcher { 175 176 /** 177 * Constructs a new instance of {@link TrimMatcher}. 178 */ 179 TrimMatcher() { 180 } 181 182 /** 183 * Returns whether or not the given character matches. 184 * 185 * @param buffer the text content to match against, do not change 186 * @param pos the starting position for the match, valid for buffer 187 * @param bufferStart the first active index in the buffer, valid for buffer 188 * @param bufferEnd the end index of the active buffer, valid for buffer 189 * @return the number of matching characters, zero for no match 190 */ 191 @Override 192 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 193 return buffer[pos] <= 32 ? 1 : 0; 194 } 195 } 196 /** 197 * Matches the comma character. 198 */ 199 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 200 /** 201 * Matches the tab character. 202 */ 203 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 204 /** 205 * Matches the space character. 206 */ 207 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 208 /** 209 * Matches the same characters as StringTokenizer, 210 * namely space, tab, newline, formfeed. 211 */ 212 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 213 214 /** 215 * Matches the String trim() whitespace characters. 216 */ 217 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 218 219 /** 220 * Matches the double quote character. 221 */ 222 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 223 224 /** 225 * Matches the double quote character. 226 */ 227 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 228 229 /** 230 * Matches the single or double quote character. 231 */ 232 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 233 234 /** 235 * Matches no characters. 236 */ 237 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 238 239 /** 240 * Constructor that creates a matcher from a character. 241 * 242 * @param ch the character to match, must not be null 243 * @return a new Matcher for the given char 244 */ 245 public static StrMatcher charMatcher(final char ch) { 246 return new CharMatcher(ch); 247 } 248 249 /** 250 * Constructor that creates a matcher from a set of characters. 251 * 252 * @param chars the characters to match, null or empty matches nothing 253 * @return a new matcher for the given char[] 254 */ 255 public static StrMatcher charSetMatcher(final char... chars) { 256 if (ArrayUtils.isEmpty(chars)) { 257 return NONE_MATCHER; 258 } 259 if (chars.length == 1) { 260 return new CharMatcher(chars[0]); 261 } 262 return new CharSetMatcher(chars); 263 } 264 265 /** 266 * Constructor that creates a matcher from a string representing a set of characters. 267 * 268 * @param chars the characters to match, null or empty matches nothing 269 * @return a new Matcher for the given characters 270 */ 271 public static StrMatcher charSetMatcher(final String chars) { 272 if (StringUtils.isEmpty(chars)) { 273 return NONE_MATCHER; 274 } 275 if (chars.length() == 1) { 276 return new CharMatcher(chars.charAt(0)); 277 } 278 return new CharSetMatcher(chars.toCharArray()); 279 } 280 281 /** 282 * Returns a matcher which matches the comma character. 283 * 284 * @return a matcher for a comma 285 */ 286 public static StrMatcher commaMatcher() { 287 return COMMA_MATCHER; 288 } 289 290 /** 291 * Returns a matcher which matches the double quote character. 292 * 293 * @return a matcher for a double quote 294 */ 295 public static StrMatcher doubleQuoteMatcher() { 296 return DOUBLE_QUOTE_MATCHER; 297 } 298 299 /** 300 * Matches no characters. 301 * 302 * @return a matcher that matches nothing 303 */ 304 public static StrMatcher noneMatcher() { 305 return NONE_MATCHER; 306 } 307 308 /** 309 * Returns a matcher which matches the single or double quote character. 310 * 311 * @return a matcher for a single or double quote 312 */ 313 public static StrMatcher quoteMatcher() { 314 return QUOTE_MATCHER; 315 } 316 317 /** 318 * Returns a matcher which matches the single quote character. 319 * 320 * @return a matcher for a single quote 321 */ 322 public static StrMatcher singleQuoteMatcher() { 323 return SINGLE_QUOTE_MATCHER; 324 } 325 326 /** 327 * Returns a matcher which matches the space character. 328 * 329 * @return a matcher for a space 330 */ 331 public static StrMatcher spaceMatcher() { 332 return SPACE_MATCHER; 333 } 334 335 /** 336 * Matches the same characters as StringTokenizer, 337 * namely space, tab, newline and formfeed. 338 * 339 * @return the split matcher 340 */ 341 public static StrMatcher splitMatcher() { 342 return SPLIT_MATCHER; 343 } 344 345 /** 346 * Constructor that creates a matcher from a string. 347 * 348 * @param str the string to match, null or empty matches nothing 349 * @return a new Matcher for the given String 350 */ 351 public static StrMatcher stringMatcher(final String str) { 352 if (StringUtils.isEmpty(str)) { 353 return NONE_MATCHER; 354 } 355 return new StringMatcher(str); 356 } 357 358 /** 359 * Returns a matcher which matches the tab character. 360 * 361 * @return a matcher for a tab 362 */ 363 public static StrMatcher tabMatcher() { 364 return TAB_MATCHER; 365 } 366 367 /** 368 * Matches the String trim() whitespace characters. 369 * 370 * @return the trim matcher 371 */ 372 public static StrMatcher trimMatcher() { 373 return TRIM_MATCHER; 374 } 375 376 /** 377 * Constructs a new instance. 378 */ 379 protected StrMatcher() { 380 } 381 382 /** 383 * Returns the number of matching characters, zero for no match. 384 * <p> 385 * This method is called to check for a match. 386 * The parameter {@code pos} represents the current position to be 387 * checked in the string {@code buffer} (a character array which must 388 * not be changed). 389 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 390 * </p> 391 * <p> 392 * The matching code may check one character or many. 393 * It may check characters preceding {@code pos} as well as those after. 394 * </p> 395 * <p> 396 * It must return zero for no match, or a positive number if a match was found. 397 * The number indicates the number of characters that matched. 398 * </p> 399 * 400 * @param buffer the text content to match against, do not change 401 * @param pos the starting position for the match, valid for buffer 402 * @return the number of matching characters, zero for no match 403 * @since 2.4 404 */ 405 public int isMatch(final char[] buffer, final int pos) { 406 return isMatch(buffer, pos, 0, buffer.length); 407 } 408 409 /** 410 * Returns the number of matching characters, zero for no match. 411 * <p> 412 * This method is called to check for a match. 413 * The parameter {@code pos} represents the current position to be 414 * checked in the string {@code buffer} (a character array which must 415 * not be changed). 416 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 417 * </p> 418 * <p> 419 * The character array may be larger than the active area to be matched. 420 * Only values in the buffer between the specified indices may be accessed. 421 * </p> 422 * <p> 423 * The matching code may check one character or many. 424 * It may check characters preceding {@code pos} as well as those 425 * after, so long as no checks exceed the bounds specified. 426 * </p> 427 * <p> 428 * It must return zero for no match, or a positive number if a match was found. 429 * The number indicates the number of characters that matched. 430 * </p> 431 * 432 * @param buffer the text content to match against, do not change 433 * @param pos the starting position for the match, valid for buffer 434 * @param bufferStart the first active index in the buffer, valid for buffer 435 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 436 * @return the number of matching characters, zero for no match 437 */ 438 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 439 440 }