001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.vfs2.provider; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.SystemUtils; 022import org.apache.commons.vfs2.FileName; 023import org.apache.commons.vfs2.FileSystemException; 024import org.apache.commons.vfs2.FileType; 025import org.apache.commons.vfs2.VFS; 026 027/** 028 * Utilities for dealing with URIs. See RFC 2396 for details. 029 */ 030public final class UriParser { 031 032 /** 033 * The set of valid separators. These are all converted to the normalized one. Does <em>not</em> contain the 034 * normalized separator 035 */ 036 // public static final char[] separators = {'\\'}; 037 public static final char TRANS_SEPARATOR = '\\'; 038 039 /** 040 * The normalized separator to use. 041 */ 042 private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR; 043 044 private static final int HEX_BASE = 16; 045 046 private static final int BITS_IN_HALF_BYTE = 4; 047 048 private static final char LOW_MASK = 0x0F; 049 050 /** 051 * Encodes and appends a string to a StringBuilder. 052 * 053 * @param buffer The StringBuilder to append to. 054 * @param unencodedValue The String to encode and append. 055 * @param reserved characters to encode. 056 */ 057 public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) { 058 final int offset = buffer.length(); 059 buffer.append(unencodedValue); 060 encode(buffer, offset, unencodedValue.length(), reserved); 061 } 062 063 static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) { 064 final int offset = buffer.length(); 065 buffer.append(unencodedValue); 066 encodeRfc2396(buffer, offset, unencodedValue.length(), allowed); 067 } 068 069 /** 070 * Canonicalizes a path. 071 * 072 * @param buffer Source data. 073 * @param offset Where to start reading. 074 * @param length How much to read. 075 * @param fileNameParser Now to encode and decode. 076 * @throws FileSystemException If an I/O error occurs. 077 */ 078 public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length, 079 final FileNameParser fileNameParser) throws FileSystemException { 080 int index = offset; 081 int count = length; 082 for (; count > 0; count--, index++) { 083 final char ch = buffer.charAt(index); 084 if (ch == '%') { 085 if (count < 3) { 086 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 087 buffer.substring(index, index + count)); 088 } 089 090 // Decode 091 final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); 092 final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); 093 if (dig1 == -1 || dig2 == -1) { 094 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 095 buffer.substring(index, index + 3)); 096 } 097 final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); 098 099 final boolean match = value == '%' || fileNameParser.encodeCharacter(value); 100 101 if (match) { 102 // this is a reserved character, not allowed to decode 103 index += 2; 104 count -= 2; 105 continue; 106 } 107 108 // Replace 109 buffer.setCharAt(index, value); 110 buffer.delete(index + 1, index + 3); 111 count -= 2; 112 } else if (fileNameParser.encodeCharacter(ch)) { 113 // Encode 114 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; 115 buffer.setCharAt(index, '%'); 116 buffer.insert(index + 1, digits); 117 index += 2; 118 } 119 } 120 } 121 122 /** 123 * Decodes the String. 124 * 125 * @param uri The String to decode. 126 * @throws FileSystemException if an error occurs. 127 */ 128 public static void checkUriEncoding(final String uri) throws FileSystemException { 129 decode(uri); 130 } 131 132 /** 133 * Removes %nn encodings from a string. 134 * 135 * @param encodedStr The encoded String. 136 * @return The decoded String. 137 * @throws FileSystemException if an error occurs. 138 */ 139 public static String decode(final String encodedStr) throws FileSystemException { 140 if (encodedStr == null) { 141 return null; 142 } 143 if (encodedStr.indexOf('%') < 0) { 144 return encodedStr; 145 } 146 final StringBuilder buffer = new StringBuilder(encodedStr); 147 decode(buffer, 0, buffer.length()); 148 return buffer.toString(); 149 } 150 151 /** 152 * Removes %nn encodings from a string. 153 * 154 * @param buffer StringBuilder containing the string to decode. 155 * @param offset The position in the string to start decoding. 156 * @param length The number of characters to decode. 157 * @throws FileSystemException if an error occurs. 158 */ 159 public static void decode(final StringBuilder buffer, final int offset, final int length) 160 throws FileSystemException { 161 int index = offset; 162 int count = length; 163 boolean ipv6Host = false; 164 for (; count > 0; count--, index++) { 165 final char ch = buffer.charAt(index); 166 if (ch == '[') { 167 ipv6Host = true; 168 } 169 if (ch == ']') { 170 ipv6Host = false; 171 } 172 if (ch != '%' || ipv6Host) { 173 continue; 174 } 175 176 if (count < 3) { 177 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 178 buffer.substring(index, index + count)); 179 } 180 181 // Decode 182 final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE); 183 final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE); 184 if (dig1 == -1 || dig2 == -1) { 185 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error", 186 buffer.substring(index, index + 3)); 187 } 188 final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2); 189 190 // Replace 191 buffer.setCharAt(index, value); 192 buffer.delete(index + 1, index + 3); 193 count -= 2; 194 } 195 } 196 197 /** 198 * Converts "special" characters to their %nn value. 199 * 200 * @param decodedStr The decoded String. 201 * @return The encoded String. 202 */ 203 public static String encode(final String decodedStr) { 204 return encode(decodedStr, null); 205 } 206 207 /** 208 * Converts "special" characters to their %nn value. 209 * 210 * @param decodedStr The decoded String. 211 * @param reserved Characters to encode. 212 * @return The encoded String 213 */ 214 public static String encode(final String decodedStr, final char[] reserved) { 215 if (decodedStr == null) { 216 return null; 217 } 218 final StringBuilder buffer = new StringBuilder(decodedStr); 219 encode(buffer, 0, buffer.length(), reserved); 220 return buffer.toString(); 221 } 222 223 /** 224 * Encode an array of Strings. 225 * 226 * @param strings The array of Strings to encode. 227 * @return An array of encoded Strings. 228 */ 229 public static String[] encode(final String[] strings) { 230 if (strings == null) { 231 return null; 232 } 233 Arrays.setAll(strings, i -> encode(strings[i])); 234 return strings; 235 } 236 237 /** 238 * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters. 239 * 240 * @param buffer The StringBuilder to append to. 241 * @param offset The position in the buffer to start encoding at. 242 * @param length The number of characters to encode. 243 * @param reserved characters to encode. 244 */ 245 public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) { 246 int index = offset; 247 int count = length; 248 for (; count > 0; index++, count--) { 249 final char ch = buffer.charAt(index); 250 boolean match = ch == '%'; 251 if (reserved != null) { 252 for (int i = 0; !match && i < reserved.length; i++) { 253 if (ch == reserved[i]) { 254 match = true; 255 break; 256 } 257 } 258 } 259 if (match) { 260 // Encode 261 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; 262 buffer.setCharAt(index, '%'); 263 buffer.insert(index + 1, digits); 264 index += 2; 265 } 266 } 267 } 268 269 static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) { 270 int index = offset; 271 int count = length; 272 for (; count > 0; index++, count--) { 273 final char ch = buffer.charAt(index); 274 if (Arrays.binarySearch(allowed, ch) < 0) { 275 // Encode 276 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)}; 277 buffer.setCharAt(index, '%'); 278 buffer.insert(index + 1, digits); 279 index += 2; 280 } 281 } 282 } 283 284 /** 285 * Extracts the first element of a path. 286 * 287 * @param name StringBuilder containing the path. 288 * @return The first element of the path. 289 */ 290 public static String extractFirstElement(final StringBuilder name) { 291 final int len = name.length(); 292 if (len < 1) { 293 return null; 294 } 295 int startPos = 0; 296 if (name.charAt(0) == SEPARATOR_CHAR) { 297 startPos = 1; 298 } 299 for (int pos = startPos; pos < len; pos++) { 300 if (name.charAt(pos) == SEPARATOR_CHAR) { 301 // Found a separator 302 final String elem = name.substring(startPos, pos); 303 name.delete(startPos, pos + 1); 304 return elem; 305 } 306 } 307 308 // No separator 309 final String elem = name.substring(startPos); 310 name.setLength(0); 311 return elem; 312 } 313 314 /** 315 * Extract the query String from the URI. 316 * 317 * @param name StringBuilder containing the URI. 318 * @return The query string, if any. null otherwise. 319 */ 320 public static String extractQueryString(final StringBuilder name) { 321 for (int pos = 0; pos < name.length(); pos++) { 322 if (name.charAt(pos) == '?') { 323 final String queryString = name.substring(pos + 1); 324 name.delete(pos, name.length()); 325 return queryString; 326 } 327 } 328 329 return null; 330 } 331 332 /** 333 * Extracts the scheme from a URI. 334 * 335 * @param uri The URI. 336 * @return The scheme name. Returns null if there is no scheme. 337 * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. 338 */ 339 @Deprecated 340 public static String extractScheme(final String uri) { 341 return extractScheme(uri, null); 342 } 343 344 /** 345 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. 346 * 347 * @param uri The URI. 348 * @param buffer Returns the remainder of the URI. 349 * @return The scheme name. Returns null if there is no scheme. 350 * @deprecated Use instead {@link #extractScheme}. Will be removed in 3.0. 351 */ 352 @Deprecated 353 public static String extractScheme(final String uri, final StringBuilder buffer) { 354 if (buffer != null) { 355 buffer.setLength(0); 356 buffer.append(uri); 357 } 358 359 final int maxPos = uri.length(); 360 for (int pos = 0; pos < maxPos; pos++) { 361 final char ch = uri.charAt(pos); 362 363 if (ch == ':') { 364 // Found the end of the scheme 365 final String scheme = uri.substring(0, pos); 366 if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) { 367 // This is not a scheme, but a Windows drive letter 368 return null; 369 } 370 if (buffer != null) { 371 buffer.delete(0, pos + 1); 372 } 373 return scheme.intern(); 374 } 375 376 if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { 377 // A scheme character 378 continue; 379 } 380 if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) { 381 // Not a scheme character 382 break; 383 } 384 // A scheme character (these are not allowed as the first 385 // character of the scheme), but can be used as subsequent 386 // characters. 387 } 388 389 // No scheme in URI 390 return null; 391 } 392 393 /** 394 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. 395 * <p> 396 * The scheme is extracted based on the currently supported schemes in the system. That is to say the schemes 397 * supported by the registered providers. 398 * </p> 399 * <p> 400 * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically 401 * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them. 402 * </p> 403 * @param schemes The schemes to check. 404 * @param uri The potential URI. May also be a name. 405 * @return The scheme name. Returns null if there is no scheme. 406 * @since 2.3 407 */ 408 public static String extractScheme(final String[] schemes, final String uri) { 409 return extractScheme(schemes, uri, null); 410 } 411 412 /** 413 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI. 414 * <p> 415 * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes 416 * supported by the registered providers. 417 * </p> 418 * <p> 419 * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically 420 * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them. 421 * </p> 422 * @param schemes The schemes to check. 423 * @param uri The potential URI. May also just be a name. 424 * @param buffer Returns the remainder of the URI. 425 * @return The scheme name. Returns null if there is no scheme. 426 * @since 2.3 427 */ 428 public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) { 429 if (buffer != null) { 430 buffer.setLength(0); 431 buffer.append(uri); 432 } 433 for (final String scheme : schemes) { 434 if (uri.startsWith(scheme + ":")) { 435 if (buffer != null) { 436 buffer.delete(0, uri.indexOf(':') + 1); 437 } 438 return scheme; 439 } 440 } 441 return null; 442 } 443 444 /** 445 * Normalises the separators in a name. 446 * 447 * @param name The StringBuilder containing the name 448 * @return true if the StringBuilder was modified. 449 */ 450 public static boolean fixSeparators(final StringBuilder name) { 451 boolean changed = false; 452 int maxlen = name.length(); 453 for (int i = 0; i < maxlen; i++) { 454 final char ch = name.charAt(i); 455 if (ch == TRANS_SEPARATOR) { 456 name.setCharAt(i, SEPARATOR_CHAR); 457 changed = true; 458 } 459 if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') { 460 if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') { 461 name.setCharAt(i, SEPARATOR_CHAR); 462 name.delete(i + 1, i + 3); 463 maxlen -= 2; 464 changed = true; 465 } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') { 466 name.setCharAt(i, '.'); 467 name.delete(i + 1, i + 3); 468 maxlen -= 2; 469 changed = true; 470 } 471 } 472 } 473 return changed; 474 } 475 476 /** 477 * Normalises a path. Does the following: 478 * <ul> 479 * <li>Removes empty path elements. 480 * <li>Handles '.' and '..' elements. 481 * <li>Removes trailing separator. 482 * </ul> 483 * 484 * Its assumed that the separators are already fixed. 485 * 486 * @param path The path to normalize. 487 * @return The FileType. 488 * @throws FileSystemException if an error occurs. 489 * @see #fixSeparators 490 */ 491 public static FileType normalisePath(final StringBuilder path) throws FileSystemException { 492 FileType fileType = FileType.FOLDER; 493 if (path.length() == 0) { 494 return fileType; 495 } 496 497 // '/' or '.' or '..' or anyPath/..' or 'anyPath/.' should always be a path 498 if (path.charAt(path.length() - 1) != '/' 499 && path.lastIndexOf("/..") != path.length() - 3 500 && path.lastIndexOf("/.") != path.length() - 2 501 && path.lastIndexOf("..") != 0 502 && path.lastIndexOf(".") != 0 503 ) { 504 fileType = FileType.FILE; 505 } 506 507 // Adjust separators 508 // fixSeparators(path); 509 510 // Determine the start of the first element 511 int startFirstElem = 0; 512 if (path.charAt(0) == SEPARATOR_CHAR) { 513 if (path.length() == 1) { 514 return fileType; 515 } 516 startFirstElem = 1; 517 } 518 519 // Iterate over each element 520 int startElem = startFirstElem; 521 int maxlen = path.length(); 522 while (startElem < maxlen) { 523 // Find the end of the element 524 int endElem = startElem; 525 while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) { 526 endElem++; 527 } 528 529 final int elemLen = endElem - startElem; 530 if (elemLen == 0) { 531 // An empty element - axe it 532 path.deleteCharAt(endElem); 533 maxlen = path.length(); 534 continue; 535 } 536 if (elemLen == 1 && path.charAt(startElem) == '.') { 537 // A '.' element - axe it 538 path.deleteCharAt(startElem); 539 maxlen = path.length(); 540 continue; 541 } 542 if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') { 543 // A '..' element - remove the previous element 544 if (startElem == startFirstElem) { 545 // Previous element is missing 546 throw new FileSystemException("vfs.provider/invalid-relative-path.error"); 547 } 548 549 // Find start of previous element 550 int pos = startElem - 2; 551 while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) { 552 pos--; 553 } 554 startElem = pos + 1; 555 556 path.delete(startElem, endElem + 1); 557 maxlen = path.length(); 558 continue; 559 } 560 561 // A regular element 562 startElem = endElem + 1; 563 } 564 565 // Remove trailing separator 566 if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) { 567 path.deleteCharAt(maxlen - 1); 568 } 569 570 return fileType; 571 } 572 573 private UriParser() { 574 } 575}