View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.ArrayUtils;
22  import org.apache.commons.text.matcher.StringMatcherFactory;
23  
24  /**
25   * A matcher class that can be queried to determine if a character array
26   * portion matches.
27   * <p>
28   * This class comes complete with various factory methods.
29   * If these do not suffice, you can subclass and implement your own matcher.
30   * </p>
31   *
32   * @since 1.0
33   * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
34   */
35  @Deprecated
36  public abstract class StrMatcher {
37  
38      /**
39       * Class used to define a character for matching purposes.
40       */
41      static final class CharMatcher extends StrMatcher {
42          /** The character to match. */
43          private final char ch;
44  
45          /**
46           * Constructor that creates a matcher that matches a single character.
47           *
48           * @param ch  the character to match
49           */
50          CharMatcher(final char ch) {
51              this.ch = ch;
52          }
53  
54          /**
55           * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
56           *
57           * @param buffer  the text content to match against, do not change
58           * @param pos  the starting position for the match, valid for buffer
59           * @param bufferStart  the first active index in the buffer, valid for buffer
60           * @param bufferEnd  the end index of the active buffer, valid for buffer
61           * @return The number of matching characters, or zero if there is no match
62           */
63          @Override
64          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
65              return ch == buffer[pos] ? 1 : 0;
66          }
67      }
68  
69      /**
70       * Class used to define a set of characters for matching purposes.
71       */
72      static final class CharSetMatcher extends StrMatcher {
73          /** The set of characters to match. */
74          private final char[] chars;
75  
76          /**
77           * Constructor that creates a matcher from a character array.
78           *
79           * @param chars  the characters to match, must not be null
80           */
81          CharSetMatcher(final char[] chars) {
82              this.chars = chars.clone();
83              Arrays.sort(this.chars);
84          }
85  
86          /**
87           * Returns {@code 1} if there is a match, or {@code 0} if there is no match.
88           *
89           * @param buffer  the text content to match against, do not change
90           * @param pos  the starting position for the match, valid for buffer
91           * @param bufferStart  the first active index in the buffer, valid for buffer
92           * @param bufferEnd  the end index of the active buffer, valid for buffer
93           * @return The number of matching characters, or zero if there is no match
94           */
95          @Override
96          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
97              return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
98          }
99      }
100 
101     /**
102      * Class used to match no characters.
103      */
104     static final class NoMatcher extends StrMatcher {
105 
106         /**
107          * Constructs a new instance of {@code NoMatcher}.
108          */
109         NoMatcher() {
110         }
111 
112         /**
113          * Always returns {@code 0}.
114          *
115          * @param buffer  the text content to match against, do not change
116          * @param pos  the starting position for the match, valid for buffer
117          * @param bufferStart  the first active index in the buffer, valid for buffer
118          * @param bufferEnd  the end index of the active buffer, valid for buffer
119          * @return The number of matching characters, or zero if there is no match
120          */
121         @Override
122         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
123             return 0;
124         }
125     }
126 
127     /**
128      * Class used to define a set of characters for matching purposes.
129      */
130     static final class StringMatcher extends StrMatcher {
131         /** The string to match, as a character array. */
132         private final char[] chars;
133 
134         /**
135          * Constructor that creates a matcher from a String.
136          *
137          * @param str  the string to match, must not be null
138          */
139         StringMatcher(final String str) {
140             chars = str.toCharArray();
141         }
142 
143         /**
144          * Returns the number of matching characters, or zero if there is no match.
145          *
146          * @param buffer  the text content to match against, do not change
147          * @param pos  the starting position for the match, valid for buffer
148          * @param bufferStart  the first active index in the buffer, valid for buffer
149          * @param bufferEnd  the end index of the active buffer, valid for buffer
150          * @return The number of matching characters, or zero if there is no match
151          */
152         @Override
153         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
154             final int len = chars.length;
155             if (pos + len > bufferEnd) {
156                 return 0;
157             }
158             for (int i = 0; i < chars.length; i++, pos++) {
159                 if (chars[i] != buffer[pos]) {
160                     return 0;
161                 }
162             }
163             return len;
164         }
165 
166         @Override
167         public String toString() {
168             return super.toString() + ' ' + Arrays.toString(chars);
169         }
170 
171     }
172 
173     /**
174      * Class used to match whitespace as per trim().
175      */
176     static final class TrimMatcher extends StrMatcher {
177 
178         /**
179          * Constructs a new instance of {@code TrimMatcher}.
180          */
181         TrimMatcher() {
182         }
183 
184         /**
185          * Returns whether or not the given character matches.
186          *
187          * @param buffer  the text content to match against, do not change
188          * @param pos  the starting position for the match, valid for buffer
189          * @param bufferStart  the first active index in the buffer, valid for buffer
190          * @param bufferEnd  the end index of the active buffer, valid for buffer
191          * @return The number of matching characters, or zero if there is no match
192          */
193         @Override
194         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
195             return buffer[pos] <= 32 ? 1 : 0;
196         }
197     }
198 
199     /**
200      * Matches the comma character.
201      */
202     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
203 
204     /**
205      * Matches the tab character.
206      */
207     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
208 
209     /**
210      * Matches the space character.
211      */
212     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
213 
214     /**
215      * Matches the same characters as StringTokenizer,
216      * namely space, tab, newline, form feed.
217      */
218     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
219 
220     /**
221      * Matches the String trim() whitespace characters.
222      */
223     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
224 
225     /**
226      * Matches the double quote character.
227      */
228     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
229 
230     /**
231      * Matches the double quote character.
232      */
233     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
234 
235     /**
236      * Matches the single or double quote character.
237      */
238     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
239 
240     /**
241      * Matches no characters.
242      */
243     private static final StrMatcher NONE_MATCHER = new NoMatcher();
244 
245     /**
246      * Creates a matcher from a character.
247      *
248      * @param ch  the character to match, must not be null
249      * @return a new Matcher for the given char
250      */
251     public static StrMatcher charMatcher(final char ch) {
252         return new CharMatcher(ch);
253     }
254 
255     /**
256      * Creates a matcher from a set of characters.
257      *
258      * @param chars  the characters to match, null or empty matches nothing
259      * @return a new matcher for the given char[]
260      */
261     public static StrMatcher charSetMatcher(final char... chars) {
262         if (ArrayUtils.isEmpty(chars)) {
263             return NONE_MATCHER;
264         }
265         if (chars.length == 1) {
266             return new CharMatcher(chars[0]);
267         }
268         return new CharSetMatcher(chars);
269     }
270 
271     /**
272      * Creates a matcher from a string representing a set of characters.
273      *
274      * @param chars  the characters to match, null or empty matches nothing
275      * @return a new Matcher for the given characters
276      */
277     public static StrMatcher charSetMatcher(final String chars) {
278         if (chars == null || chars.isEmpty()) {
279             return NONE_MATCHER;
280         }
281         if (chars.length() == 1) {
282             return new CharMatcher(chars.charAt(0));
283         }
284         return new CharSetMatcher(chars.toCharArray());
285     }
286 
287     /**
288      * Returns a matcher which matches the comma character.
289      *
290      * @return a matcher for a comma
291      */
292     public static StrMatcher commaMatcher() {
293         return COMMA_MATCHER;
294     }
295 
296     /**
297      * Returns a matcher which matches the double quote character.
298      *
299      * @return a matcher for a double quote
300      */
301     public static StrMatcher doubleQuoteMatcher() {
302         return DOUBLE_QUOTE_MATCHER;
303     }
304 
305     /**
306      * Matches no characters.
307      *
308      * @return a matcher that matches nothing
309      */
310     public static StrMatcher noneMatcher() {
311         return NONE_MATCHER;
312     }
313 
314     /**
315      * Returns a matcher which matches the single or double quote character.
316      *
317      * @return a matcher for a single or double quote
318      */
319     public static StrMatcher quoteMatcher() {
320         return QUOTE_MATCHER;
321     }
322 
323     /**
324      * Returns a matcher which matches the single quote character.
325      *
326      * @return a matcher for a single quote
327      */
328     public static StrMatcher singleQuoteMatcher() {
329         return SINGLE_QUOTE_MATCHER;
330     }
331 
332     /**
333      * Returns a matcher which matches the space character.
334      *
335      * @return a matcher for a space
336      */
337     public static StrMatcher spaceMatcher() {
338         return SPACE_MATCHER;
339     }
340 
341     /**
342      * Matches the same characters as StringTokenizer,
343      * namely space, tab, newline and form feed.
344      *
345      * @return The split matcher
346      */
347     public static StrMatcher splitMatcher() {
348         return SPLIT_MATCHER;
349     }
350 
351     /**
352      * Creates a matcher from a string.
353      *
354      * @param str  the string to match, null or empty matches nothing
355      * @return a new Matcher for the given String
356      */
357     public static StrMatcher stringMatcher(final String str) {
358         if (str == null || str.isEmpty()) {
359             return NONE_MATCHER;
360         }
361         return new StringMatcher(str);
362     }
363 
364     /**
365      * Returns a matcher which matches the tab character.
366      *
367      * @return a matcher for a tab
368      */
369     public static StrMatcher tabMatcher() {
370         return TAB_MATCHER;
371     }
372 
373     /**
374      * Matches the String trim() whitespace characters.
375      *
376      * @return The trim matcher
377      */
378     public static StrMatcher trimMatcher() {
379         return TRIM_MATCHER;
380     }
381 
382     /**
383      * Constructs a new instance.
384      */
385     protected StrMatcher() {
386     }
387 
388     /**
389      * Returns the number of matching characters, or zero if there is no match.
390      * <p>
391      * This method is called to check for a match.
392      * The parameter {@code pos} represents the current position to be
393      * checked in the string {@code buffer} (a character array which must
394      * not be changed).
395      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
396      * </p>
397      * <p>
398      * The matching code may check one character or many.
399      * It may check characters preceding {@code pos} as well as those after.
400      * </p>
401      * <p>
402      * It must return zero for no match, or a positive number if a match was found.
403      * The number indicates the number of characters that matched.
404      * </p>
405      *
406      * @param buffer  the text content to match against, do not change
407      * @param pos  the starting position for the match, valid for buffer
408      * @return The number of matching characters, or zero if there is no match
409      */
410     public int isMatch(final char[] buffer, final int pos) {
411         return isMatch(buffer, pos, 0, buffer.length);
412     }
413 
414     /**
415      * Returns the number of matching characters, or zero if there is no match.
416      * <p>
417      * This method is called to check for a match.
418      * The parameter {@code pos} represents the current position to be
419      * checked in the string {@code buffer} (a character array which must
420      * not be changed).
421      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
422      * </p>
423      * <p>
424      * The character array may be larger than the active area to be matched.
425      * Only values in the buffer between the specified indices may be accessed.
426      * </p>
427      * <p>
428      * The matching code may check one character or many.
429      * It may check characters preceding {@code pos} as well as those
430      * after, so long as no checks exceed the bounds specified.
431      * </p>
432      * <p>
433      * It must return zero for no match, or a positive number if a match was found.
434      * The number indicates the number of characters that matched.
435      * </p>
436      *
437      * @param buffer  the text content to match against, do not change
438      * @param pos  the starting position for the match, valid for buffer
439      * @param bufferStart  the first active index in the buffer, valid for buffer
440      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
441      * @return The number of matching characters, or zero if there is no match
442      */
443     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
444 
445 }