001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019/** 020 * Operations on {@link CharSequence} that are 021 * {@code null} safe. 022 * 023 * @see CharSequence 024 * @since 3.0 025 */ 026public class CharSequenceUtils { 027 028 private static final int NOT_FOUND = -1; 029 030 static final int TO_STRING_LIMIT = 16; 031 032 private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { 033 for (int i = 1, j = len2 - 1; i <= j; i++, j--) { 034 if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) { 035 return false; 036 } 037 } 038 return true; 039 } 040 041 /** 042 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. 043 * 044 * @param cs the {@link CharSequence} to be processed 045 * @param searchChar the {@link CharSequence} to be searched for 046 * @param start the start index 047 * @return the index where the search sequence was found 048 */ 049 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 050 if (cs instanceof String) { 051 return ((String) cs).indexOf(searchChar.toString(), start); 052 } 053 if (cs instanceof StringBuilder) { 054 return ((StringBuilder) cs).indexOf(searchChar.toString(), start); 055 } 056 if (cs instanceof StringBuffer) { 057 return ((StringBuffer) cs).indexOf(searchChar.toString(), start); 058 } 059 return cs.toString().indexOf(searchChar.toString(), start); 060// if (cs instanceof String && searchChar instanceof String) { 061// // TODO: Do we assume searchChar is usually relatively small; 062// // If so then calling toString() on it is better than reverting to 063// // the green implementation in the else block 064// return ((String) cs).indexOf((String) searchChar, start); 065// } else { 066// // TODO: Implement rather than convert to String 067// return cs.toString().indexOf(searchChar.toString(), start); 068// } 069 } 070 071 /** 072 * Returns the index within {@code cs} of the first occurrence of the 073 * specified character, starting the search at the specified index. 074 * <p> 075 * If a character with value {@code searchChar} occurs in the 076 * character sequence represented by the {@code cs} 077 * object at an index no smaller than {@code start}, then 078 * the index of the first such occurrence is returned. For values 079 * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), 080 * this is the smallest value <em>k</em> such that: 081 * </p> 082 * <blockquote><pre> 083 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> >= start) 084 * </pre></blockquote> 085 * is true. For other values of {@code searchChar}, it is the 086 * smallest value <em>k</em> such that: 087 * <blockquote><pre> 088 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> >= start) 089 * </pre></blockquote> 090 * <p> 091 * is true. In either case, if no such character occurs inm {@code cs} 092 * at or after position {@code start}, then 093 * {@code -1} is returned. 094 * </p> 095 * <p> 096 * There is no restriction on the value of {@code start}. If it 097 * is negative, it has the same effect as if it were zero: the entire 098 * {@link CharSequence} may be searched. If it is greater than 099 * the length of {@code cs}, it has the same effect as if it were 100 * equal to the length of {@code cs}: {@code -1} is returned. 101 * </p> 102 * <p>All indices are specified in {@code char} values 103 * (Unicode code units). 104 * </p> 105 * 106 * @param cs the {@link CharSequence} to be processed, not null 107 * @param searchChar the char to be searched for 108 * @param start the start index, negative starts at the string start 109 * @return the index where the search char was found, -1 if not found 110 * @since 3.6 updated to behave more like {@link String} 111 */ 112 static int indexOf(final CharSequence cs, final int searchChar, int start) { 113 if (cs instanceof String) { 114 return ((String) cs).indexOf(searchChar, start); 115 } 116 final int sz = cs.length(); 117 if (start < 0) { 118 start = 0; 119 } 120 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 121 for (int i = start; i < sz; i++) { 122 if (cs.charAt(i) == searchChar) { 123 return i; 124 } 125 } 126 return NOT_FOUND; 127 } 128 //supplementary characters (LANG1300) 129 if (searchChar <= Character.MAX_CODE_POINT) { 130 final char[] chars = Character.toChars(searchChar); 131 for (int i = start; i < sz - 1; i++) { 132 final char high = cs.charAt(i); 133 final char low = cs.charAt(i + 1); 134 if (high == chars[0] && low == chars[1]) { 135 return i; 136 } 137 } 138 } 139 return NOT_FOUND; 140 } 141 142 /** 143 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf 144 * 145 * @param cs the {@link CharSequence} to be processed 146 * @param searchChar the {@link CharSequence} to find 147 * @param start the start index 148 * @return the index where the search sequence was found 149 */ 150 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { 151 if (searchChar == null || cs == null) { 152 return NOT_FOUND; 153 } 154 if (searchChar instanceof String) { 155 if (cs instanceof String) { 156 return ((String) cs).lastIndexOf((String) searchChar, start); 157 } 158 if (cs instanceof StringBuilder) { 159 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); 160 } 161 if (cs instanceof StringBuffer) { 162 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); 163 } 164 } 165 166 final int len1 = cs.length(); 167 final int len2 = searchChar.length(); 168 169 if (start > len1) { 170 start = len1; 171 } 172 173 if (start < 0 || len2 > len1) { 174 return NOT_FOUND; 175 } 176 177 if (len2 == 0) { 178 return start; 179 } 180 181 if (len2 <= TO_STRING_LIMIT) { 182 if (cs instanceof String) { 183 return ((String) cs).lastIndexOf(searchChar.toString(), start); 184 } 185 if (cs instanceof StringBuilder) { 186 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); 187 } 188 if (cs instanceof StringBuffer) { 189 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); 190 } 191 } 192 193 if (start + len2 > len1) { 194 start = len1 - len2; 195 } 196 197 final char char0 = searchChar.charAt(0); 198 199 int i = start; 200 while (true) { 201 while (cs.charAt(i) != char0) { 202 i--; 203 if (i < 0) { 204 return NOT_FOUND; 205 } 206 } 207 if (checkLaterThan1(cs, searchChar, len2, i)) { 208 return i; 209 } 210 i--; 211 if (i < 0) { 212 return NOT_FOUND; 213 } 214 } 215 } 216 217 /** 218 * Returns the index within {@code cs} of the last occurrence of 219 * the specified character, searching backward starting at the 220 * specified index. For values of {@code searchChar} in the range 221 * from 0 to 0xFFFF (inclusive), the index returned is the largest 222 * value <em>k</em> such that: 223 * <blockquote><pre> 224 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> <= start) 225 * </pre></blockquote> 226 * is true. For other values of {@code searchChar}, it is the 227 * largest value <em>k</em> such that: 228 * <blockquote><pre> 229 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> <= start) 230 * </pre></blockquote> 231 * is true. In either case, if no such character occurs in {@code cs} 232 * at or before position {@code start}, then {@code -1} is returned. 233 * 234 * <p> 235 * All indices are specified in {@code char} values 236 * (Unicode code units). 237 * </p> 238 * 239 * @param cs the {@link CharSequence} to be processed 240 * @param searchChar the char to be searched for 241 * @param start the start index, negative returns -1, beyond length starts at end 242 * @return the index where the search char was found, -1 if not found 243 * @since 3.6 updated to behave more like {@link String} 244 */ 245 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { 246 if (cs instanceof String) { 247 return ((String) cs).lastIndexOf(searchChar, start); 248 } 249 final int sz = cs.length(); 250 if (start < 0) { 251 return NOT_FOUND; 252 } 253 if (start >= sz) { 254 start = sz - 1; 255 } 256 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 257 for (int i = start; i >= 0; --i) { 258 if (cs.charAt(i) == searchChar) { 259 return i; 260 } 261 } 262 return NOT_FOUND; 263 } 264 //supplementary characters (LANG1300) 265 //NOTE - we must do a forward traversal for this to avoid duplicating code points 266 if (searchChar <= Character.MAX_CODE_POINT) { 267 final char[] chars = Character.toChars(searchChar); 268 //make sure it's not the last index 269 if (start == sz - 1) { 270 return NOT_FOUND; 271 } 272 for (int i = start; i >= 0; i--) { 273 final char high = cs.charAt(i); 274 final char low = cs.charAt(i + 1); 275 if (chars[0] == high && chars[1] == low) { 276 return i; 277 } 278 } 279 } 280 return NOT_FOUND; 281 } 282 283 /** 284 * Green implementation of regionMatches. 285 * 286 * @param cs the {@link CharSequence} to be processed 287 * @param ignoreCase whether or not to be case-insensitive 288 * @param thisStart the index to start on the {@code cs} CharSequence 289 * @param substring the {@link CharSequence} to be looked for 290 * @param start the index to start on the {@code substring} CharSequence 291 * @param length character length of the region 292 * @return whether the region matched 293 */ 294 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, 295 final CharSequence substring, final int start, final int length) { 296 if (cs instanceof String && substring instanceof String) { 297 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); 298 } 299 int index1 = thisStart; 300 int index2 = start; 301 int tmpLen = length; 302 303 // Extract these first so we detect NPEs the same as the java.lang.String version 304 final int srcLen = cs.length() - thisStart; 305 final int otherLen = substring.length() - start; 306 307 // Check for invalid parameters 308 if (thisStart < 0 || start < 0 || length < 0) { 309 return false; 310 } 311 312 // Check that the regions are long enough 313 if (srcLen < length || otherLen < length) { 314 return false; 315 } 316 317 while (tmpLen-- > 0) { 318 final char c1 = cs.charAt(index1++); 319 final char c2 = substring.charAt(index2++); 320 321 if (c1 == c2) { 322 continue; 323 } 324 325 if (!ignoreCase) { 326 return false; 327 } 328 329 // The real same check as in String.regionMatches(): 330 final char u1 = Character.toUpperCase(c1); 331 final char u2 = Character.toUpperCase(c2); 332 if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { 333 return false; 334 } 335 } 336 337 return true; 338 } 339 340 /** 341 * Returns a new {@link CharSequence} that is a subsequence of this 342 * sequence starting with the {@code char} value at the specified index. 343 * 344 * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}. 345 * The length (in {@code char}) of the returned sequence is {@code length() - start}, 346 * so if {@code start == end} then an empty sequence is returned.</p> 347 * 348 * @param cs the specified subsequence, null returns null 349 * @param start the start index, inclusive, valid 350 * @return a new subsequence, may be null 351 * @throws IndexOutOfBoundsException if {@code start} is negative or if 352 * {@code start} is greater than {@code length()} 353 */ 354 public static CharSequence subSequence(final CharSequence cs, final int start) { 355 return cs == null ? null : cs.subSequence(start, cs.length()); 356 } 357 358 /** 359 * Converts the given CharSequence to a char[]. 360 * 361 * @param source the {@link CharSequence} to be processed. 362 * @return the resulting char array, never null. 363 * @since 3.11 364 */ 365 public static char[] toCharArray(final CharSequence source) { 366 final int len = StringUtils.length(source); 367 if (len == 0) { 368 return ArrayUtils.EMPTY_CHAR_ARRAY; 369 } 370 if (source instanceof String) { 371 return ((String) source).toCharArray(); 372 } 373 final char[] array = new char[len]; 374 for (int i = 0; i < len; i++) { 375 array[i] = source.charAt(i); 376 } 377 return array; 378 } 379 380 /** 381 * {@link CharSequenceUtils} instances should NOT be constructed in 382 * standard programming. 383 * 384 * <p>This constructor is public to permit tools that require a JavaBean 385 * instance to operate.</p> 386 * 387 * @deprecated TODO Make private in 4.0. 388 */ 389 @Deprecated 390 public CharSequenceUtils() { 391 // empty 392 } 393}