001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArrayUtils; 022import org.apache.commons.text.matcher.StringMatcherFactory; 023 024/** 025 * A matcher class that can be queried to determine if a character array 026 * portion matches. 027 * <p> 028 * This class comes complete with various factory methods. 029 * If these do not suffice, you can subclass and implement your own matcher. 030 * </p> 031 * 032 * @since 1.0 033 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0. 034 */ 035@Deprecated 036public abstract class StrMatcher { 037 038 /** 039 * Class used to define a character for matching purposes. 040 */ 041 static final class CharMatcher extends StrMatcher { 042 /** The character to match. */ 043 private final char ch; 044 045 /** 046 * Constructor that creates a matcher that matches a single character. 047 * 048 * @param ch the character to match 049 */ 050 CharMatcher(final char ch) { 051 this.ch = ch; 052 } 053 054 /** 055 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 056 * 057 * @param buffer the text content to match against, do not change 058 * @param pos the starting position for the match, valid for buffer 059 * @param bufferStart the first active index in the buffer, valid for buffer 060 * @param bufferEnd the end index of the active buffer, valid for buffer 061 * @return The number of matching characters, or zero if there is no match 062 */ 063 @Override 064 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 065 return ch == buffer[pos] ? 1 : 0; 066 } 067 } 068 069 /** 070 * Class used to define a set of characters for matching purposes. 071 */ 072 static final class CharSetMatcher extends StrMatcher { 073 /** The set of characters to match. */ 074 private final char[] chars; 075 076 /** 077 * Constructor that creates a matcher from a character array. 078 * 079 * @param chars the characters to match, must not be null 080 */ 081 CharSetMatcher(final char[] chars) { 082 this.chars = chars.clone(); 083 Arrays.sort(this.chars); 084 } 085 086 /** 087 * Returns {@code 1} if there is a match, or {@code 0} if there is no match. 088 * 089 * @param buffer the text content to match against, do not change 090 * @param pos the starting position for the match, valid for buffer 091 * @param bufferStart the first active index in the buffer, valid for buffer 092 * @param bufferEnd the end index of the active buffer, valid for buffer 093 * @return The number of matching characters, or zero if there is no match 094 */ 095 @Override 096 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 097 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 098 } 099 } 100 101 /** 102 * Class used to match no characters. 103 */ 104 static final class NoMatcher extends StrMatcher { 105 106 /** 107 * Constructs a new instance of {@code NoMatcher}. 108 */ 109 NoMatcher() { 110 } 111 112 /** 113 * Always returns {@code 0}. 114 * 115 * @param buffer the text content to match against, do not change 116 * @param pos the starting position for the match, valid for buffer 117 * @param bufferStart the first active index in the buffer, valid for buffer 118 * @param bufferEnd the end index of the active buffer, valid for buffer 119 * @return The number of matching characters, or zero if there is no match 120 */ 121 @Override 122 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 123 return 0; 124 } 125 } 126 127 /** 128 * Class used to define a set of characters for matching purposes. 129 */ 130 static final class StringMatcher extends StrMatcher { 131 /** The string to match, as a character array. */ 132 private final char[] chars; 133 134 /** 135 * Constructor that creates a matcher from a String. 136 * 137 * @param str the string to match, must not be null 138 */ 139 StringMatcher(final String str) { 140 chars = str.toCharArray(); 141 } 142 143 /** 144 * Returns the number of matching characters, or zero if there is no match. 145 * 146 * @param buffer the text content to match against, do not change 147 * @param pos the starting position for the match, valid for buffer 148 * @param bufferStart the first active index in the buffer, valid for buffer 149 * @param bufferEnd the end index of the active buffer, valid for buffer 150 * @return The number of matching characters, or zero if there is no match 151 */ 152 @Override 153 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 154 final int len = chars.length; 155 if (pos + len > bufferEnd) { 156 return 0; 157 } 158 for (int i = 0; i < chars.length; i++, pos++) { 159 if (chars[i] != buffer[pos]) { 160 return 0; 161 } 162 } 163 return len; 164 } 165 166 @Override 167 public String toString() { 168 return super.toString() + ' ' + Arrays.toString(chars); 169 } 170 171 } 172 173 /** 174 * Class used to match whitespace as per trim(). 175 */ 176 static final class TrimMatcher extends StrMatcher { 177 178 /** 179 * Constructs a new instance of {@code TrimMatcher}. 180 */ 181 TrimMatcher() { 182 } 183 184 /** 185 * Returns whether or not the given character matches. 186 * 187 * @param buffer the text content to match against, do not change 188 * @param pos the starting position for the match, valid for buffer 189 * @param bufferStart the first active index in the buffer, valid for buffer 190 * @param bufferEnd the end index of the active buffer, valid for buffer 191 * @return The number of matching characters, or zero if there is no match 192 */ 193 @Override 194 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 195 return buffer[pos] <= 32 ? 1 : 0; 196 } 197 } 198 199 /** 200 * Matches the comma character. 201 */ 202 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 203 204 /** 205 * Matches the tab character. 206 */ 207 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 208 209 /** 210 * Matches the space character. 211 */ 212 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 213 214 /** 215 * Matches the same characters as StringTokenizer, 216 * namely space, tab, newline, form feed. 217 */ 218 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 219 220 /** 221 * Matches the String trim() whitespace characters. 222 */ 223 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 224 225 /** 226 * Matches the double quote character. 227 */ 228 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 229 230 /** 231 * Matches the double quote character. 232 */ 233 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 234 235 /** 236 * Matches the single or double quote character. 237 */ 238 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 239 240 /** 241 * Matches no characters. 242 */ 243 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 244 245 /** 246 * Creates a matcher from a character. 247 * 248 * @param ch the character to match, must not be null 249 * @return a new Matcher for the given char 250 */ 251 public static StrMatcher charMatcher(final char ch) { 252 return new CharMatcher(ch); 253 } 254 255 /** 256 * Creates a matcher from a set of characters. 257 * 258 * @param chars the characters to match, null or empty matches nothing 259 * @return a new matcher for the given char[] 260 */ 261 public static StrMatcher charSetMatcher(final char... chars) { 262 if (ArrayUtils.isEmpty(chars)) { 263 return NONE_MATCHER; 264 } 265 if (chars.length == 1) { 266 return new CharMatcher(chars[0]); 267 } 268 return new CharSetMatcher(chars); 269 } 270 271 /** 272 * Creates a matcher from a string representing a set of characters. 273 * 274 * @param chars the characters to match, null or empty matches nothing 275 * @return a new Matcher for the given characters 276 */ 277 public static StrMatcher charSetMatcher(final String chars) { 278 if (chars == null || chars.isEmpty()) { 279 return NONE_MATCHER; 280 } 281 if (chars.length() == 1) { 282 return new CharMatcher(chars.charAt(0)); 283 } 284 return new CharSetMatcher(chars.toCharArray()); 285 } 286 287 /** 288 * Returns a matcher which matches the comma character. 289 * 290 * @return a matcher for a comma 291 */ 292 public static StrMatcher commaMatcher() { 293 return COMMA_MATCHER; 294 } 295 296 /** 297 * Returns a matcher which matches the double quote character. 298 * 299 * @return a matcher for a double quote 300 */ 301 public static StrMatcher doubleQuoteMatcher() { 302 return DOUBLE_QUOTE_MATCHER; 303 } 304 305 /** 306 * Matches no characters. 307 * 308 * @return a matcher that matches nothing 309 */ 310 public static StrMatcher noneMatcher() { 311 return NONE_MATCHER; 312 } 313 314 /** 315 * Returns a matcher which matches the single or double quote character. 316 * 317 * @return a matcher for a single or double quote 318 */ 319 public static StrMatcher quoteMatcher() { 320 return QUOTE_MATCHER; 321 } 322 323 /** 324 * Returns a matcher which matches the single quote character. 325 * 326 * @return a matcher for a single quote 327 */ 328 public static StrMatcher singleQuoteMatcher() { 329 return SINGLE_QUOTE_MATCHER; 330 } 331 332 /** 333 * Returns a matcher which matches the space character. 334 * 335 * @return a matcher for a space 336 */ 337 public static StrMatcher spaceMatcher() { 338 return SPACE_MATCHER; 339 } 340 341 /** 342 * Matches the same characters as StringTokenizer, 343 * namely space, tab, newline and form feed. 344 * 345 * @return The split matcher 346 */ 347 public static StrMatcher splitMatcher() { 348 return SPLIT_MATCHER; 349 } 350 351 /** 352 * Creates a matcher from a string. 353 * 354 * @param str the string to match, null or empty matches nothing 355 * @return a new Matcher for the given String 356 */ 357 public static StrMatcher stringMatcher(final String str) { 358 if (str == null || str.isEmpty()) { 359 return NONE_MATCHER; 360 } 361 return new StringMatcher(str); 362 } 363 364 /** 365 * Returns a matcher which matches the tab character. 366 * 367 * @return a matcher for a tab 368 */ 369 public static StrMatcher tabMatcher() { 370 return TAB_MATCHER; 371 } 372 373 /** 374 * Matches the String trim() whitespace characters. 375 * 376 * @return The trim matcher 377 */ 378 public static StrMatcher trimMatcher() { 379 return TRIM_MATCHER; 380 } 381 382 /** 383 * Constructs a new instance. 384 */ 385 protected StrMatcher() { 386 } 387 388 /** 389 * Returns the number of matching characters, or zero if there is no match. 390 * <p> 391 * This method is called to check for a match. 392 * The parameter {@code pos} represents the current position to be 393 * checked in the string {@code buffer} (a character array which must 394 * not be changed). 395 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 396 * </p> 397 * <p> 398 * The matching code may check one character or many. 399 * It may check characters preceding {@code pos} as well as those after. 400 * </p> 401 * <p> 402 * It must return zero for no match, or a positive number if a match was found. 403 * The number indicates the number of characters that matched. 404 * </p> 405 * 406 * @param buffer the text content to match against, do not change 407 * @param pos the starting position for the match, valid for buffer 408 * @return The number of matching characters, or zero if there is no match 409 */ 410 public int isMatch(final char[] buffer, final int pos) { 411 return isMatch(buffer, pos, 0, buffer.length); 412 } 413 414 /** 415 * Returns the number of matching characters, or zero if there is no match. 416 * <p> 417 * This method is called to check for a match. 418 * The parameter {@code pos} represents the current position to be 419 * checked in the string {@code buffer} (a character array which must 420 * not be changed). 421 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 422 * </p> 423 * <p> 424 * The character array may be larger than the active area to be matched. 425 * Only values in the buffer between the specified indices may be accessed. 426 * </p> 427 * <p> 428 * The matching code may check one character or many. 429 * It may check characters preceding {@code pos} as well as those 430 * after, so long as no checks exceed the bounds specified. 431 * </p> 432 * <p> 433 * It must return zero for no match, or a positive number if a match was found. 434 * The number indicates the number of characters that matched. 435 * </p> 436 * 437 * @param buffer the text content to match against, do not change 438 * @param pos the starting position for the match, valid for buffer 439 * @param bufferStart the first active index in the buffer, valid for buffer 440 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 441 * @return The number of matching characters, or zero if there is no match 442 */ 443 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 444 445}