View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.ArraySorter;
22  import org.apache.commons.lang3.ArrayUtils;
23  import org.apache.commons.lang3.StringUtils;
24  
25  /**
26   * A matcher class that can be queried to determine if a character array
27   * portion matches.
28   * <p>
29   * This class comes complete with various factory methods.
30   * If these do not suffice, you can subclass and implement your own matcher.
31   * </p>
32   *
33   * @since 2.2
34   * @deprecated As of 3.6, use Apache Commons Text
35   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
36   * StringMatcherFactory</a> instead
37   */
38  @Deprecated
39  public abstract class StrMatcher {
40  
41      /**
42       * Class used to define a character for matching purposes.
43       */
44      static final class CharMatcher extends StrMatcher {
45          /** The character to match. */
46          private final char ch;
47  
48          /**
49           * Constructor that creates a matcher that matches a single character.
50           *
51           * @param ch  the character to match
52           */
53          CharMatcher(final char ch) {
54              this.ch = ch;
55          }
56  
57          /**
58           * Returns whether or not the given character matches.
59           *
60           * @param buffer  the text content to match against, do not change
61           * @param pos  the starting position for the match, valid for buffer
62           * @param bufferStart  the first active index in the buffer, valid for buffer
63           * @param bufferEnd  the end index of the active buffer, valid for buffer
64           * @return the number of matching characters, zero for no match
65           */
66          @Override
67          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
68              return ch == buffer[pos] ? 1 : 0;
69          }
70      }
71      /**
72       * Class used to define a set of characters for matching purposes.
73       */
74      static final class CharSetMatcher extends StrMatcher {
75          /** The set of characters to match. */
76          private final char[] chars;
77  
78          /**
79           * Constructor that creates a matcher from a character array.
80           *
81           * @param chars  the characters to match, must not be null
82           */
83          CharSetMatcher(final char[] chars) {
84              this.chars = ArraySorter.sort(chars.clone());
85          }
86  
87          /**
88           * Returns whether or not the given character matches.
89           *
90           * @param buffer  the text content to match against, do not change
91           * @param pos  the starting position for the match, valid for buffer
92           * @param bufferStart  the first active index in the buffer, valid for buffer
93           * @param bufferEnd  the end index of the active buffer, valid for buffer
94           * @return the number of matching characters, zero for no match
95           */
96          @Override
97          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
98              return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
99          }
100     }
101     /**
102      * Class used to match no characters.
103      */
104     static final class NoMatcher extends StrMatcher {
105 
106         /**
107          * Constructs a new instance of {@link NoMatcher}.
108          */
109         NoMatcher() {
110         }
111 
112         /**
113          * Always returns {@code false}.
114          *
115          * @param buffer  the text content to match against, do not change
116          * @param pos  the starting position for the match, valid for buffer
117          * @param bufferStart  the first active index in the buffer, valid for buffer
118          * @param bufferEnd  the end index of the active buffer, valid for buffer
119          * @return the number of matching characters, zero for no match
120          */
121         @Override
122         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
123             return 0;
124         }
125     }
126     /**
127      * Class used to define a set of characters for matching purposes.
128      */
129     static final class StringMatcher extends StrMatcher {
130         /** The string to match, as a character array. */
131         private final char[] chars;
132 
133         /**
134          * Constructor that creates a matcher from a String.
135          *
136          * @param str  the string to match, must not be null
137          */
138         StringMatcher(final String str) {
139             chars = str.toCharArray();
140         }
141 
142         /**
143          * Returns whether or not the given text matches the stored string.
144          *
145          * @param buffer  the text content to match against, do not change
146          * @param pos  the starting position for the match, valid for buffer
147          * @param bufferStart  the first active index in the buffer, valid for buffer
148          * @param bufferEnd  the end index of the active buffer, valid for buffer
149          * @return the number of matching characters, zero for no match
150          */
151         @Override
152         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
153             final int len = chars.length;
154             if (pos + len > bufferEnd) {
155                 return 0;
156             }
157             for (int i = 0; i < chars.length; i++, pos++) {
158                 if (chars[i] != buffer[pos]) {
159                     return 0;
160                 }
161             }
162             return len;
163         }
164 
165         @Override
166         public String toString() {
167             return super.toString() + ' ' + Arrays.toString(chars);
168         }
169 
170     }
171     /**
172      * Class used to match whitespace as per trim().
173      */
174     static final class TrimMatcher extends StrMatcher {
175 
176         /**
177          * Constructs a new instance of {@link TrimMatcher}.
178          */
179         TrimMatcher() {
180         }
181 
182         /**
183          * Returns whether or not the given character matches.
184          *
185          * @param buffer  the text content to match against, do not change
186          * @param pos  the starting position for the match, valid for buffer
187          * @param bufferStart  the first active index in the buffer, valid for buffer
188          * @param bufferEnd  the end index of the active buffer, valid for buffer
189          * @return the number of matching characters, zero for no match
190          */
191         @Override
192         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
193             return buffer[pos] <= 32 ? 1 : 0;
194         }
195     }
196     /**
197      * Matches the comma character.
198      */
199     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
200     /**
201      * Matches the tab character.
202      */
203     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
204     /**
205      * Matches the space character.
206      */
207     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
208     /**
209      * Matches the same characters as StringTokenizer,
210      * namely space, tab, newline, formfeed.
211      */
212     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
213 
214     /**
215      * Matches the String trim() whitespace characters.
216      */
217     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
218 
219     /**
220      * Matches the double quote character.
221      */
222     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
223 
224     /**
225      * Matches the double quote character.
226      */
227     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
228 
229     /**
230      * Matches the single or double quote character.
231      */
232     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
233 
234     /**
235      * Matches no characters.
236      */
237     private static final StrMatcher NONE_MATCHER = new NoMatcher();
238 
239     /**
240      * Constructor that creates a matcher from a character.
241      *
242      * @param ch  the character to match, must not be null
243      * @return a new Matcher for the given char
244      */
245     public static StrMatcher charMatcher(final char ch) {
246         return new CharMatcher(ch);
247     }
248 
249     /**
250      * Constructor that creates a matcher from a set of characters.
251      *
252      * @param chars  the characters to match, null or empty matches nothing
253      * @return a new matcher for the given char[]
254      */
255     public static StrMatcher charSetMatcher(final char... chars) {
256         if (ArrayUtils.isEmpty(chars)) {
257             return NONE_MATCHER;
258         }
259         if (chars.length == 1) {
260             return new CharMatcher(chars[0]);
261         }
262         return new CharSetMatcher(chars);
263     }
264 
265     /**
266      * Constructor that creates a matcher from a string representing a set of characters.
267      *
268      * @param chars  the characters to match, null or empty matches nothing
269      * @return a new Matcher for the given characters
270      */
271     public static StrMatcher charSetMatcher(final String chars) {
272         if (StringUtils.isEmpty(chars)) {
273             return NONE_MATCHER;
274         }
275         if (chars.length() == 1) {
276             return new CharMatcher(chars.charAt(0));
277         }
278         return new CharSetMatcher(chars.toCharArray());
279     }
280 
281     /**
282      * Returns a matcher which matches the comma character.
283      *
284      * @return a matcher for a comma
285      */
286     public static StrMatcher commaMatcher() {
287         return COMMA_MATCHER;
288     }
289 
290     /**
291      * Returns a matcher which matches the double quote character.
292      *
293      * @return a matcher for a double quote
294      */
295     public static StrMatcher doubleQuoteMatcher() {
296         return DOUBLE_QUOTE_MATCHER;
297     }
298 
299     /**
300      * Matches no characters.
301      *
302      * @return a matcher that matches nothing
303      */
304     public static StrMatcher noneMatcher() {
305         return NONE_MATCHER;
306     }
307 
308     /**
309      * Returns a matcher which matches the single or double quote character.
310      *
311      * @return a matcher for a single or double quote
312      */
313     public static StrMatcher quoteMatcher() {
314         return QUOTE_MATCHER;
315     }
316 
317     /**
318      * Returns a matcher which matches the single quote character.
319      *
320      * @return a matcher for a single quote
321      */
322     public static StrMatcher singleQuoteMatcher() {
323         return SINGLE_QUOTE_MATCHER;
324     }
325 
326     /**
327      * Returns a matcher which matches the space character.
328      *
329      * @return a matcher for a space
330      */
331     public static StrMatcher spaceMatcher() {
332         return SPACE_MATCHER;
333     }
334 
335     /**
336      * Matches the same characters as StringTokenizer,
337      * namely space, tab, newline and formfeed.
338      *
339      * @return the split matcher
340      */
341     public static StrMatcher splitMatcher() {
342         return SPLIT_MATCHER;
343     }
344 
345     /**
346      * Constructor that creates a matcher from a string.
347      *
348      * @param str  the string to match, null or empty matches nothing
349      * @return a new Matcher for the given String
350      */
351     public static StrMatcher stringMatcher(final String str) {
352         if (StringUtils.isEmpty(str)) {
353             return NONE_MATCHER;
354         }
355         return new StringMatcher(str);
356     }
357 
358     /**
359      * Returns a matcher which matches the tab character.
360      *
361      * @return a matcher for a tab
362      */
363     public static StrMatcher tabMatcher() {
364         return TAB_MATCHER;
365     }
366 
367     /**
368      * Matches the String trim() whitespace characters.
369      *
370      * @return the trim matcher
371      */
372     public static StrMatcher trimMatcher() {
373         return TRIM_MATCHER;
374     }
375 
376     /**
377      * Constructs a new instance.
378      */
379     protected StrMatcher() {
380     }
381 
382     /**
383      * Returns the number of matching characters, zero for no match.
384      * <p>
385      * This method is called to check for a match.
386      * The parameter {@code pos} represents the current position to be
387      * checked in the string {@code buffer} (a character array which must
388      * not be changed).
389      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
390      * </p>
391      * <p>
392      * The matching code may check one character or many.
393      * It may check characters preceding {@code pos} as well as those after.
394      * </p>
395      * <p>
396      * It must return zero for no match, or a positive number if a match was found.
397      * The number indicates the number of characters that matched.
398      * </p>
399      *
400      * @param buffer  the text content to match against, do not change
401      * @param pos  the starting position for the match, valid for buffer
402      * @return the number of matching characters, zero for no match
403      * @since 2.4
404      */
405     public int isMatch(final char[] buffer, final int pos) {
406         return isMatch(buffer, pos, 0, buffer.length);
407     }
408 
409     /**
410      * Returns the number of matching characters, zero for no match.
411      * <p>
412      * This method is called to check for a match.
413      * The parameter {@code pos} represents the current position to be
414      * checked in the string {@code buffer} (a character array which must
415      * not be changed).
416      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
417      * </p>
418      * <p>
419      * The character array may be larger than the active area to be matched.
420      * Only values in the buffer between the specified indices may be accessed.
421      * </p>
422      * <p>
423      * The matching code may check one character or many.
424      * It may check characters preceding {@code pos} as well as those
425      * after, so long as no checks exceed the bounds specified.
426      * </p>
427      * <p>
428      * It must return zero for no match, or a positive number if a match was found.
429      * The number indicates the number of characters that matched.
430      * </p>
431      *
432      * @param buffer  the text content to match against, do not change
433      * @param pos  the starting position for the match, valid for buffer
434      * @param bufferStart  the first active index in the buffer, valid for buffer
435      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
436      * @return the number of matching characters, zero for no match
437      */
438     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
439 
440 }