001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.util.ArrayDeque; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Deque; 025import java.util.List; 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028import java.util.stream.Stream; 029 030/** 031 * General file name and file path manipulation utilities. The methods in this class 032 * operate on strings that represent relative or absolute paths. Nothing in this class 033 * ever accesses the file system, or depends on whether a path points to a file that exists. 034 * <p> 035 * When dealing with file names, you can hit problems when moving from a Windows 036 * based development machine to a UNIX based production machine. 037 * This class aims to help avoid those problems. 038 * </p> 039 * <p> 040 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by 041 * using JDK {@link File File} objects and the two argument constructor 042 * {@link File#File(java.io.File, String) File(File,String)}. 043 * </p> 044 * <p> 045 * Most methods in this class are designed to work the same on both UNIX and Windows. 046 * Those that don't include 'System', 'Unix', or 'Windows' in their name. 047 * </p> 048 * <p> 049 * Most methods recognize both separators (forward and backslashes), and both 050 * sets of prefixes. See the Javadoc of each method for details. 051 * </p> 052 * <p> 053 * This class defines six components within a path (sometimes called a file name or a full file name). 054 * Given an absolute Windows path such as C:\dev\project\file.txt they are: 055 * </p> 056 * <ul> 057 * <li>the full file name, or just file name - C:\dev\project\file.txt</li> 058 * <li>the prefix - C:\</li> 059 * <li>the path - dev\project\</li> 060 * <li>the full path - C:\dev\project\</li> 061 * <li>the name - file.txt</li> 062 * <li>the base name - file</li> 063 * <li>the extension - txt</li> 064 * </ul> 065 * <p> 066 * Given an absolute UNIX path such as /dev/project/file.txt they are: 067 * </p> 068 * <ul> 069 * <li>the full file name, or just file name - /dev/project/file.txt</li> 070 * <li>the prefix - /</li> 071 * <li>the path - dev/project</li> 072 * <li>the full path - /dev/project</li> 073 * <li>the name - file.txt</li> 074 * <li>the base name - file</li> 075 * <li>the extension - txt</li> 076 * </ul> 077 * <p> 078 * Given a relative Windows path such as dev\project\file.txt they are: 079 * </p> 080 * <ul> 081 * <li>the full file name, or just file name - dev\project\file.txt</li> 082 * <li>the prefix - null</li> 083 * <li>the path - dev\project\</li> 084 * <li>the full path - dev\project\</li> 085 * <li>the name - file.txt</li> 086 * <li>the base name - file</li> 087 * <li>the extension - txt</li> 088 * </ul> 089 * <p> 090 * Given an absolute UNIX path such as /dev/project/file.txt they are: 091 * </p> 092 * <ul> 093 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li> 094 * <li>the prefix - /</li> 095 * <li>the path - dev/project</li> 096 * <li>the full path - /dev/project</li> 097 * <li>the name - file.txt</li> 098 * <li>the base name - file</li> 099 * <li>the extension - txt</li> 100 * </ul> 101 * 102 * 103 * <p> 104 * This class works best if directory names end with a separator. 105 * If you omit the last separator, it is impossible to determine if the last component 106 * corresponds to a file or a directory. This class treats final components 107 * that do not end with a separator as files, not directories. 108 * </p> 109 * <p> 110 * This class only supports UNIX and Windows style names. 111 * Prefixes are matched as follows: 112 * </p> 113 * <pre> 114 * Windows: 115 * a\b\c.txt --> "" --> relative 116 * \a\b\c.txt --> "\" --> current drive absolute 117 * C:a\b\c.txt --> "C:" --> drive relative 118 * C:\a\b\c.txt --> "C:\" --> absolute 119 * \\server\a\b\c.txt --> "\\server\" --> UNC 120 * 121 * Unix: 122 * a/b/c.txt --> "" --> relative 123 * /a/b/c.txt --> "/" --> absolute 124 * ~/a/b/c.txt --> "~/" --> current user 125 * ~ --> "~/" --> current user (slash added) 126 * ~user/a/b/c.txt --> "~user/" --> named user 127 * ~user --> "~user/" --> named user (slash added) 128 * </pre> 129 * <p> 130 * Both prefix styles are matched, irrespective of the machine that you are 131 * currently running on. 132 * </p> 133 * 134 * @since 1.1 135 */ 136public class FilenameUtils { 137 138 private static final String[] EMPTY_STRING_ARRAY = {}; 139 140 private static final String EMPTY_STRING = ""; 141 142 private static final int NOT_FOUND = -1; 143 144 /** 145 * The extension separator character. 146 * @since 1.4 147 */ 148 public static final char EXTENSION_SEPARATOR = '.'; 149 150 /** 151 * The extension separator String. 152 * @since 1.4 153 */ 154 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 155 156 /** 157 * The UNIX separator character. 158 */ 159 private static final char UNIX_NAME_SEPARATOR = '/'; 160 161 /** 162 * The Windows separator character. 163 */ 164 private static final char WINDOWS_NAME_SEPARATOR = '\\'; 165 166 /** 167 * The system separator character. 168 */ 169 private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; 170 171 /** 172 * The separator character that is the opposite of the system separator. 173 */ 174 private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR); 175 176 private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 177 178 private static final int IPV4_MAX_OCTET_VALUE = 255; 179 180 private static final int IPV6_MAX_HEX_GROUPS = 8; 181 182 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 183 184 private static final int MAX_UNSIGNED_SHORT = 0xffff; 185 186 private static final int BASE_16 = 16; 187 188 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 189 190 /** 191 * Concatenates a fileName to a base path using normal command line style rules. 192 * <p> 193 * The effect is equivalent to resultant directory after changing 194 * directory to the first argument, followed by changing directory to 195 * the second argument. 196 * </p> 197 * <p> 198 * The first argument is the base path, the second is the path to concatenate. 199 * The returned path is always normalized via {@link #normalize(String)}, 200 * thus {@code ..} is handled. 201 * </p> 202 * <p> 203 * If {@code pathToAdd} is absolute (has an absolute prefix), then 204 * it will be normalized and returned. 205 * Otherwise, the paths will be joined, normalized and returned. 206 * </p> 207 * <p> 208 * The output will be the same on both UNIX and Windows except 209 * for the separator character. 210 * </p> 211 * <pre> 212 * /foo/ + bar --> /foo/bar 213 * /foo + bar --> /foo/bar 214 * /foo + /bar --> /bar 215 * /foo + C:/bar --> C:/bar 216 * /foo + C:bar --> C:bar [1] 217 * /foo/a/ + ../bar --> /foo/bar 218 * /foo/ + ../../bar --> null 219 * /foo/ + /bar --> /bar 220 * /foo/.. + /bar --> /bar 221 * /foo + bar/c.txt --> /foo/bar/c.txt 222 * /foo/c.txt + bar --> /foo/c.txt/bar [2] 223 * </pre> 224 * <p> 225 * [1] Note that the Windows relative drive prefix is unreliable when 226 * used with this method. 227 * </p> 228 * <p> 229 * [2] Note that the first parameter must be a path. If it ends with a name, then 230 * the name will be built into the concatenated path. If this might be a problem, 231 * use {@link #getFullPath(String)} on the base path argument. 232 * </p> 233 * 234 * @param basePath the base path to attach to, always treated as a path 235 * @param fullFileNameToAdd the file name (or path) to attach to the base 236 * @return the concatenated path, or null if invalid 237 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 238 */ 239 public static String concat(final String basePath, final String fullFileNameToAdd) { 240 final int prefix = getPrefixLength(fullFileNameToAdd); 241 if (prefix < 0) { 242 return null; 243 } 244 if (prefix > 0) { 245 return normalize(fullFileNameToAdd); 246 } 247 if (basePath == null) { 248 return null; 249 } 250 final int len = basePath.length(); 251 if (len == 0) { 252 return normalize(fullFileNameToAdd); 253 } 254 final char ch = basePath.charAt(len - 1); 255 if (isSeparator(ch)) { 256 return normalize(basePath + fullFileNameToAdd); 257 } 258 return normalize(basePath + '/' + fullFileNameToAdd); 259 } 260 261 /** 262 * Determines whether the {@code parent} directory contains the {@code child} (a file or directory). 263 * This does not read from the file system, and there is no guarantee or expectation that 264 * these paths actually exist. 265 * <p> 266 * The files names are expected to be normalized. 267 * </p> 268 * 269 * Edge cases: 270 * <ul> 271 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 272 * <li>A directory does not contain itself: return false</li> 273 * <li>A null child file is not contained in any parent: return false</li> 274 * </ul> 275 * 276 * @param canonicalParent the path string to consider as the parent. 277 * @param canonicalChild the path string to consider as the child. 278 * @return true if the candidate leaf is under the specified composite. False otherwise. 279 * @since 2.2 280 * @see FileUtils#directoryContains(File, File) 281 */ 282 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { 283 if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) { 284 return false; 285 } 286 287 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 288 return false; 289 } 290 291 final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR); 292 final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; 293 294 return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); 295 } 296 297 /** 298 * Does the work of getting the path. 299 * 300 * @param fileName the file name 301 * @param includeSeparator true to include the end separator 302 * @return the path 303 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 304 */ 305 private static String doGetFullPath(final String fileName, final boolean includeSeparator) { 306 if (fileName == null) { 307 return null; 308 } 309 final int prefix = getPrefixLength(fileName); 310 if (prefix < 0) { 311 return null; 312 } 313 if (prefix >= fileName.length()) { 314 if (includeSeparator) { 315 return getPrefix(fileName); // add end slash if necessary 316 } 317 return fileName; 318 } 319 final int index = indexOfLastSeparator(fileName); 320 if (index < 0) { 321 return fileName.substring(0, prefix); 322 } 323 int end = index + (includeSeparator ? 1 : 0); 324 if (end == 0) { 325 end++; 326 } 327 return fileName.substring(0, end); 328 } 329 330 /** 331 * Does the work of getting the path. 332 * 333 * @param fileName the file name 334 * @param separatorAdd 0 to omit the end separator, 1 to return it 335 * @return the path 336 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 337 */ 338 private static String doGetPath(final String fileName, final int separatorAdd) { 339 if (fileName == null) { 340 return null; 341 } 342 final int prefix = getPrefixLength(fileName); 343 if (prefix < 0) { 344 return null; 345 } 346 final int index = indexOfLastSeparator(fileName); 347 final int endIndex = index + separatorAdd; 348 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 349 return EMPTY_STRING; 350 } 351 return requireNonNullChars(fileName.substring(prefix, endIndex)); 352 } 353 354 /** 355 * Internal method to perform the normalization. 356 * 357 * @param fileName the file name 358 * @param separator The separator character to use 359 * @param keepSeparator true to keep the final separator 360 * @return the normalized fileName 361 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 362 */ 363 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 364 if (fileName == null) { 365 return null; 366 } 367 368 requireNonNullChars(fileName); 369 370 int size = fileName.length(); 371 if (size == 0) { 372 return fileName; 373 } 374 final int prefix = getPrefixLength(fileName); 375 if (prefix < 0) { 376 return null; 377 } 378 379 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 380 fileName.getChars(0, fileName.length(), array, 0); 381 382 // fix separators throughout 383 final char otherSeparator = flipSeparator(separator); 384 for (int i = 0; i < array.length; i++) { 385 if (array[i] == otherSeparator) { 386 array[i] = separator; 387 } 388 } 389 390 // add extra separator on the end to simplify code below 391 boolean lastIsDirectory = true; 392 if (array[size - 1] != separator) { 393 array[size++] = separator; 394 lastIsDirectory = false; 395 } 396 397 // adjoining slashes 398 // If we get here, prefix can only be 0 or greater, size 1 or greater 399 // If prefix is 0, set loop start to 1 to prevent index errors 400 for (int i = prefix != 0 ? prefix : 1; i < size; i++) { 401 if (array[i] == separator && array[i - 1] == separator) { 402 System.arraycopy(array, i, array, i - 1, size - i); 403 size--; 404 i--; 405 } 406 } 407 408 // dot slash 409 for (int i = prefix + 1; i < size; i++) { 410 if (array[i] == separator && array[i - 1] == '.' && 411 (i == prefix + 1 || array[i - 2] == separator)) { 412 if (i == size - 1) { 413 lastIsDirectory = true; 414 } 415 System.arraycopy(array, i + 1, array, i - 1, size - i); 416 size -=2; 417 i--; 418 } 419 } 420 421 // double dot slash 422 outer: 423 for (int i = prefix + 2; i < size; i++) { 424 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 425 (i == prefix + 2 || array[i - 3] == separator)) { 426 if (i == prefix + 2) { 427 return null; 428 } 429 if (i == size - 1) { 430 lastIsDirectory = true; 431 } 432 int j; 433 for (j = i - 4 ; j >= prefix; j--) { 434 if (array[j] == separator) { 435 // remove b/../ from a/b/../c 436 System.arraycopy(array, i + 1, array, j + 1, size - i); 437 size -= i - j; 438 i = j + 1; 439 continue outer; 440 } 441 } 442 // remove a/../ from a/../c 443 System.arraycopy(array, i + 1, array, prefix, size - i); 444 size -= i + 1 - prefix; 445 i = prefix + 1; 446 } 447 } 448 449 if (size <= 0) { // should never be less than 0 450 return EMPTY_STRING; 451 } 452 if (size <= prefix) { // should never be less than prefix 453 return new String(array, 0, size); 454 } 455 if (lastIsDirectory && keepSeparator) { 456 return new String(array, 0, size); // keep trailing separator 457 } 458 return new String(array, 0, size - 1); // lose trailing separator 459 } 460 461 /** 462 * Checks whether two file names are exactly equal. 463 * <p> 464 * No processing is performed on the file names other than comparison. 465 * This is merely a null-safe case-sensitive string equality. 466 * </p> 467 * 468 * @param fileName1 the first file name, may be null 469 * @param fileName2 the second file name, may be null 470 * @return true if the file names are equal, null equals null 471 * @see IOCase#SENSITIVE 472 */ 473 public static boolean equals(final String fileName1, final String fileName2) { 474 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 475 } 476 477 /** 478 * Checks whether two file names are equal, optionally normalizing and providing 479 * control over the case-sensitivity. 480 * 481 * @param fileName1 the first file name, may be null 482 * @param fileName2 the second file name, may be null 483 * @param normalize whether to normalize the file names 484 * @param ioCase what case sensitivity rule to use, null means case-sensitive 485 * @return true if the file names are equal, null equals null 486 * @since 1.3 487 */ 488 public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) { 489 490 if (fileName1 == null || fileName2 == null) { 491 return fileName1 == null && fileName2 == null; 492 } 493 if (normalize) { 494 fileName1 = normalize(fileName1); 495 if (fileName1 == null) { 496 return false; 497 } 498 fileName2 = normalize(fileName2); 499 if (fileName2 == null) { 500 return false; 501 } 502 } 503 return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2); 504 } 505 506 /** 507 * Checks whether two file names are equal after both have been normalized. 508 * <p> 509 * Both file names are first passed to {@link #normalize(String)}. 510 * The check is then performed in a case-sensitive manner. 511 * </p> 512 * 513 * @param fileName1 the first file name, may be null 514 * @param fileName2 the second file name, may be null 515 * @return true if the file names are equal, null equals null 516 * @see IOCase#SENSITIVE 517 */ 518 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 519 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 520 } 521 522 /** 523 * Checks whether two file names are equal using the case rules of the system 524 * after both have been normalized. 525 * <p> 526 * Both file names are first passed to {@link #normalize(String)}. 527 * The check is then performed case-sensitively on UNIX and 528 * case-insensitively on Windows. 529 * </p> 530 * 531 * @param fileName1 the first file name, may be null 532 * @param fileName2 the second file name, may be null 533 * @return true if the file names are equal, null equals null 534 * @see IOCase#SYSTEM 535 */ 536 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 537 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 538 } 539 540 /** 541 * Checks whether two file names are equal using the case rules of the system. 542 * <p> 543 * No processing is performed on the file names other than comparison. 544 * The check is case-sensitive on UNIX and case-insensitive on Windows. 545 * </p> 546 * 547 * @param fileName1 the first file name, may be null 548 * @param fileName2 the second file name, may be null 549 * @return true if the file names are equal, null equals null 550 * @see IOCase#SYSTEM 551 */ 552 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 553 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 554 } 555 556 /** 557 * Flips the Windows name separator to Linux and vice-versa. 558 * 559 * @param ch The Windows or Linux name separator. 560 * @return The Windows or Linux name separator. 561 */ 562 static char flipSeparator(final char ch) { 563 if (ch == UNIX_NAME_SEPARATOR) { 564 return WINDOWS_NAME_SEPARATOR; 565 } 566 if (ch == WINDOWS_NAME_SEPARATOR) { 567 return UNIX_NAME_SEPARATOR; 568 } 569 throw new IllegalArgumentException(String.valueOf(ch)); 570 } 571 572 /** 573 * Special handling for NTFS ADS: Don't accept colon in the file name. 574 * 575 * @param fileName a file name 576 * @return ADS offsets. 577 */ 578 private static int getAdsCriticalOffset(final String fileName) { 579 // Step 1: Remove leading path segments. 580 final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); 581 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 582 if (offset1 == -1) { 583 if (offset2 == -1) { 584 return 0; 585 } 586 return offset2 + 1; 587 } 588 if (offset2 == -1) { 589 return offset1 + 1; 590 } 591 return Math.max(offset1, offset2) + 1; 592 } 593 594 /** 595 * Gets the base name, minus the full path and extension, from a full file name. 596 * <p> 597 * This method will handle a path in either UNIX or Windows format. 598 * The text after the last forward or backslash and before the last dot is returned. 599 * </p> 600 * <pre> 601 * a/b/c.txt --> c 602 * a\b\c.txt --> c 603 * a/b/c.foo.txt --> c.foo 604 * a.txt --> a 605 * a/b/c --> c 606 * a/b/c/ --> "" 607 * </pre> 608 * <p> 609 * The output will be the same irrespective of the machine that the code is running on. 610 * </p> 611 * 612 * @param fileName the file name, null returns null 613 * @return the name of the file without the path, or an empty string if none exists 614 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 615 */ 616 public static String getBaseName(final String fileName) { 617 return removeExtension(getName(fileName)); 618 } 619 620 /** 621 * Gets the extension of a fileName. 622 * <p> 623 * This method returns the textual part of the file name after the last dot. 624 * There must be no directory separator after the dot. 625 * </p> 626 * <pre> 627 * foo.txt --> "txt" 628 * a/b/c.jpg --> "jpg" 629 * a/b.txt/c --> "" 630 * a/b/c --> "" 631 * </pre> 632 * <p> 633 * The output will be the same irrespective of the machine that the code is running on, with the 634 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 635 * </p> 636 * <p> 637 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt". 638 * In this case, the name wouldn't be the name of a file, but the identifier of an 639 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 640 * ".txt" here, which would be misleading. Commons IO 2.7 and later throw 641 * an {@link IllegalArgumentException} for names like this. 642 * </p> 643 * 644 * @param fileName the file name to retrieve the extension of. 645 * @return the extension of the file or an empty string if none exists or {@code null} 646 * if the file name is {@code null}. 647 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact, 648 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 649 */ 650 public static String getExtension(final String fileName) throws IllegalArgumentException { 651 if (fileName == null) { 652 return null; 653 } 654 final int index = indexOfExtension(fileName); 655 if (index == NOT_FOUND) { 656 return EMPTY_STRING; 657 } 658 return fileName.substring(index + 1); 659 } 660 661 /** 662 * Gets the full path (prefix + path) from a full file name. 663 * <p> 664 * This method will handle a file in either UNIX or Windows format. 665 * The method is entirely text based, and returns the text before and 666 * including the last forward or backslash. 667 * </p> 668 * <pre> 669 * C:\a\b\c.txt --> C:\a\b\ 670 * ~/a/b/c.txt --> ~/a/b/ 671 * a.txt --> "" 672 * a/b/c --> a/b/ 673 * a/b/c/ --> a/b/c/ 674 * C: --> C: 675 * C:\ --> C:\ 676 * ~ --> ~/ 677 * ~/ --> ~/ 678 * ~user --> ~user/ 679 * ~user/ --> ~user/ 680 * </pre> 681 * <p> 682 * The output will be the same irrespective of the machine that the code is running on. 683 * </p> 684 * 685 * @param fileName the file name, null returns null 686 * @return the path of the file, an empty string if none exists, null if invalid 687 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 688 */ 689 public static String getFullPath(final String fileName) { 690 return doGetFullPath(fileName, true); 691 } 692 693 /** 694 * Gets the full path (prefix + path) from a full file name, 695 * excluding the final directory separator. 696 * <p> 697 * This method will handle a file in either UNIX or Windows format. 698 * The method is entirely text based, and returns the text before the 699 * last forward or backslash. 700 * </p> 701 * <pre> 702 * C:\a\b\c.txt --> C:\a\b 703 * ~/a/b/c.txt --> ~/a/b 704 * a.txt --> "" 705 * a/b/c --> a/b 706 * a/b/c/ --> a/b/c 707 * C: --> C: 708 * C:\ --> C:\ 709 * ~ --> ~ 710 * ~/ --> ~ 711 * ~user --> ~user 712 * ~user/ --> ~user 713 * </pre> 714 * <p> 715 * The output will be the same irrespective of the machine that the code is running on. 716 * </p> 717 * 718 * @param fileName the file name, null returns null 719 * @return the path of the file, an empty string if none exists, null if invalid 720 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 721 */ 722 public static String getFullPathNoEndSeparator(final String fileName) { 723 return doGetFullPath(fileName, false); 724 } 725 726 /** 727 * Gets the name minus the path from a full file name. 728 * <p> 729 * This method will handle a file in either UNIX or Windows format. 730 * The text after the last forward or backslash is returned. 731 * </p> 732 * <pre> 733 * a/b/c.txt --> c.txt 734 * a\b\c.txt --> c.txt 735 * a.txt --> a.txt 736 * a/b/c --> c 737 * a/b/c/ --> "" 738 * </pre> 739 * <p> 740 * The output will be the same irrespective of the machine that the code is running on. 741 * </p> 742 * 743 * @param fileName the file name, null returns null 744 * @return the name of the file without the path, or an empty string if none exists 745 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 746 */ 747 public static String getName(final String fileName) { 748 if (fileName == null) { 749 return null; 750 } 751 return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1); 752 } 753 754 /** 755 * Gets the path from a full file name, which excludes the prefix and the name. 756 * <p> 757 * This method will handle a file in either UNIX or Windows format. 758 * The method is entirely text based, and returns the text before and 759 * including the last forward or backslash. 760 * </p> 761 * <pre> 762 * C:\a\b\c.txt --> a\b\ 763 * ~/a/b/c.txt --> a/b/ 764 * a.txt --> "" 765 * a/b/c --> a/b/ 766 * a/b/c/ --> a/b/c/ 767 * </pre> 768 * <p> 769 * The output will be the same irrespective of the machine that the code is running on. 770 * </p> 771 * <p> 772 * This method drops the prefix from the result. 773 * See {@link #getFullPath(String)} for the method that retains the prefix. 774 * </p> 775 * 776 * @param fileName the file name, null returns null 777 * @return the path of the file, an empty string if none exists, null if invalid 778 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 779 */ 780 public static String getPath(final String fileName) { 781 return doGetPath(fileName, 1); 782 } 783 784 /** 785 * Gets the path (which excludes the prefix) from a full file name, and 786 * also excluding the final directory separator. 787 * <p> 788 * This method will handle a file in either UNIX or Windows format. 789 * The method is entirely text based, and returns the text before the 790 * last forward or backslash. 791 * </p> 792 * <pre> 793 * C:\a\b\c.txt --> a\b 794 * ~/a/b/c.txt --> a/b 795 * a.txt --> "" 796 * a/b/c --> a/b 797 * a/b/c/ --> a/b/c 798 * </pre> 799 * <p> 800 * The output will be the same irrespective of the machine that the code is running on. 801 * </p> 802 * <p> 803 * This method drops the prefix from the result. 804 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 805 * </p> 806 * 807 * @param fileName the file name, null returns null 808 * @return the path of the file, an empty string if none exists, null if invalid 809 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}) 810 */ 811 public static String getPathNoEndSeparator(final String fileName) { 812 return doGetPath(fileName, 0); 813 } 814 815 /** 816 * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name, 817 * <p> 818 * This method will handle a file in either UNIX or Windows format. 819 * The prefix includes the first slash in the full file name where applicable. 820 * </p> 821 * <pre> 822 * Windows: 823 * a\b\c.txt --> "" --> relative 824 * \a\b\c.txt --> "\" --> current drive absolute 825 * C:a\b\c.txt --> "C:" --> drive relative 826 * C:\a\b\c.txt --> "C:\" --> absolute 827 * \\server\a\b\c.txt --> "\\server\" --> UNC 828 * 829 * Unix: 830 * a/b/c.txt --> "" --> relative 831 * /a/b/c.txt --> "/" --> absolute 832 * ~/a/b/c.txt --> "~/" --> current user 833 * ~ --> "~/" --> current user (slash added) 834 * ~user/a/b/c.txt --> "~user/" --> named user 835 * ~user --> "~user/" --> named user (slash added) 836 * </pre> 837 * <p> 838 * The output will be the same irrespective of the machine that the code is running on. 839 * ie. both UNIX and Windows prefixes are matched regardless. 840 * </p> 841 * 842 * @param fileName the file name, null returns null 843 * @return the prefix of the file, null if invalid 844 * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}) 845 */ 846 public static String getPrefix(final String fileName) { 847 if (fileName == null) { 848 return null; 849 } 850 final int len = getPrefixLength(fileName); 851 if (len < 0) { 852 return null; 853 } 854 if (len > fileName.length()) { 855 requireNonNullChars(fileName); 856 return fileName + UNIX_NAME_SEPARATOR; 857 } 858 return requireNonNullChars(fileName.substring(0, len)); 859 } 860 861 /** 862 * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}. 863 * <p> 864 * This method will handle a file in either UNIX or Windows format. 865 * </p> 866 * <p> 867 * The prefix length includes the first slash in the full file name 868 * if applicable. Thus, it is possible that the length returned is greater 869 * than the length of the input string. 870 * </p> 871 * <pre> 872 * Windows: 873 * a\b\c.txt --> 0 --> relative 874 * \a\b\c.txt --> 1 --> current drive absolute 875 * C:a\b\c.txt --> 2 --> drive relative 876 * C:\a\b\c.txt --> 3 --> absolute 877 * \\server\a\b\c.txt --> 9 --> UNC 878 * \\\a\b\c.txt --> -1 --> error 879 * 880 * Unix: 881 * a/b/c.txt --> 0 --> relative 882 * /a/b/c.txt --> 1 --> absolute 883 * ~/a/b/c.txt --> 2 --> current user 884 * ~ --> 2 --> current user (slash added) 885 * ~user/a/b/c.txt --> 6 --> named user 886 * ~user --> 6 --> named user (slash added) 887 * //server/a/b/c.txt --> 9 888 * ///a/b/c.txt --> -1 --> error 889 * C: --> 0 --> valid file name as only null character and / are reserved characters 890 * </pre> 891 * <p> 892 * The output will be the same irrespective of the machine that the code is running on. 893 * ie. both UNIX and Windows prefixes are matched regardless. 894 * </p> 895 * <p> 896 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 897 * These must be followed by a server name, so double-slashes are not collapsed 898 * to a single slash at the start of the file name. 899 * </p> 900 * 901 * @param fileName the file name to find the prefix in, null returns -1 902 * @return the length of the prefix, -1 if invalid or null 903 */ 904 public static int getPrefixLength(final String fileName) { 905 if (fileName == null) { 906 return NOT_FOUND; 907 } 908 final int len = fileName.length(); 909 if (len == 0) { 910 return 0; 911 } 912 char ch0 = fileName.charAt(0); 913 if (ch0 == ':') { 914 return NOT_FOUND; 915 } 916 if (len == 1) { 917 if (ch0 == '~') { 918 return 2; // return a length greater than the input 919 } 920 return isSeparator(ch0) ? 1 : 0; 921 } 922 if (ch0 == '~') { 923 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); 924 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); 925 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 926 return len + 1; // return a length greater than the input 927 } 928 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 929 posWin = posWin == NOT_FOUND ? posUnix : posWin; 930 return Math.min(posUnix, posWin) + 1; 931 } 932 final char ch1 = fileName.charAt(1); 933 if (ch1 == ':') { 934 ch0 = Character.toUpperCase(ch0); 935 if (ch0 >= 'A' && ch0 <= 'Z') { 936 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { 937 return 0; 938 } 939 if (len == 2 || !isSeparator(fileName.charAt(2))) { 940 return 2; 941 } 942 return 3; 943 } 944 if (ch0 == UNIX_NAME_SEPARATOR) { 945 return 1; 946 } 947 return NOT_FOUND; 948 949 } 950 if (!isSeparator(ch0) || !isSeparator(ch1)) { 951 return isSeparator(ch0) ? 1 : 0; 952 } 953 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); 954 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); 955 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 956 return NOT_FOUND; 957 } 958 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 959 posWin = posWin == NOT_FOUND ? posUnix : posWin; 960 final int pos = Math.min(posUnix, posWin) + 1; 961 final String hostnamePart = fileName.substring(2, pos - 1); 962 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 963 } 964 965 /** 966 * Returns the index of the last extension separator character, which is a dot. 967 * <p> 968 * This method also checks that there is no directory separator after the last dot. To do this it uses 969 * {@link #indexOfLastSeparator(String)} which will handle a file in either UNIX or Windows format. 970 * </p> 971 * <p> 972 * The output will be the same irrespective of the machine that the code is running on, with the 973 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 974 * </p> 975 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt". 976 * In this case, the name wouldn't be the name of a file, but the identifier of an 977 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 978 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 979 * an {@link IllegalArgumentException} for names like this. 980 * 981 * @param fileName 982 * the file name to find the last extension separator in, null returns -1 983 * @return the index of the last extension separator character, or -1 if there is no such character 984 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact, 985 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 986 */ 987 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 988 if (fileName == null) { 989 return NOT_FOUND; 990 } 991 if (isSystemWindows()) { 992 // Special handling for NTFS ADS: Don't accept colon in the file name. 993 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 994 if (offset != -1) { 995 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 996 } 997 } 998 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 999 final int lastSeparator = indexOfLastSeparator(fileName); 1000 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 1001 } 1002 1003 /** 1004 * Returns the index of the last directory separator character. 1005 * <p> 1006 * This method will handle a file in either UNIX or Windows format. 1007 * The position of the last forward or backslash is returned. 1008 * <p> 1009 * The output will be the same irrespective of the machine that the code is running on. 1010 * 1011 * @param fileName the file name to find the last path separator in, null returns -1 1012 * @return the index of the last separator character, or -1 if there 1013 * is no such character 1014 */ 1015 public static int indexOfLastSeparator(final String fileName) { 1016 if (fileName == null) { 1017 return NOT_FOUND; 1018 } 1019 final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); 1020 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); 1021 return Math.max(lastUnixPos, lastWindowsPos); 1022 } 1023 1024 private static boolean isEmpty(final String string) { 1025 return string == null || string.isEmpty(); 1026 } 1027 1028 /** 1029 * Checks whether the extension of the file name is one of those specified. 1030 * <p> 1031 * This method obtains the extension as the textual part of the file name 1032 * after the last dot. There must be no directory separator after the dot. 1033 * The extension check is case-sensitive on all platforms. 1034 * 1035 * @param fileName the file name, null returns false 1036 * @param extensions the extensions to check for, null checks for no extension 1037 * @return true if the file name is one of the extensions 1038 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1039 */ 1040 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1041 if (fileName == null) { 1042 return false; 1043 } 1044 requireNonNullChars(fileName); 1045 1046 if (extensions == null || extensions.isEmpty()) { 1047 return indexOfExtension(fileName) == NOT_FOUND; 1048 } 1049 return extensions.contains(getExtension(fileName)); 1050 } 1051 1052 /** 1053 * Checks whether the extension of the file name is that specified. 1054 * <p> 1055 * This method obtains the extension as the textual part of the file name 1056 * after the last dot. There must be no directory separator after the dot. 1057 * The extension check is case-sensitive on all platforms. 1058 * 1059 * @param fileName the file name, null returns false 1060 * @param extension the extension to check for, null or empty checks for no extension 1061 * @return true if the file name has the specified extension 1062 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1063 */ 1064 public static boolean isExtension(final String fileName, final String extension) { 1065 if (fileName == null) { 1066 return false; 1067 } 1068 requireNonNullChars(fileName); 1069 1070 if (isEmpty(extension)) { 1071 return indexOfExtension(fileName) == NOT_FOUND; 1072 } 1073 return getExtension(fileName).equals(extension); 1074 } 1075 1076 /** 1077 * Checks whether the extension of the file name is one of those specified. 1078 * <p> 1079 * This method obtains the extension as the textual part of the file name 1080 * after the last dot. There must be no directory separator after the dot. 1081 * The extension check is case-sensitive on all platforms. 1082 * 1083 * @param fileName the file name, null returns false 1084 * @param extensions the extensions to check for, null checks for no extension 1085 * @return true if the file name is one of the extensions 1086 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1087 */ 1088 public static boolean isExtension(final String fileName, final String... extensions) { 1089 if (fileName == null) { 1090 return false; 1091 } 1092 requireNonNullChars(fileName); 1093 1094 if (extensions == null || extensions.length == 0) { 1095 return indexOfExtension(fileName) == NOT_FOUND; 1096 } 1097 final String fileExt = getExtension(fileName); 1098 return Stream.of(extensions).anyMatch(fileExt::equals); 1099 } 1100 1101 /** 1102 * Checks whether a given string represents a valid IPv4 address. 1103 * 1104 * @param name the name to validate 1105 * @return true if the given name is a valid IPv4 address 1106 */ 1107 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1108 private static boolean isIPv4Address(final String name) { 1109 final Matcher m = IPV4_PATTERN.matcher(name); 1110 if (!m.matches() || m.groupCount() != 4) { 1111 return false; 1112 } 1113 1114 // verify that address subgroups are legal 1115 for (int i = 1; i <= 4; i++) { 1116 final String ipSegment = m.group(i); 1117 final int iIpSegment = Integer.parseInt(ipSegment); 1118 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1119 return false; 1120 } 1121 1122 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1123 return false; 1124 } 1125 1126 } 1127 1128 return true; 1129 } 1130 1131 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1132 /** 1133 * Checks whether a given string represents a valid IPv6 address. 1134 * 1135 * @param inet6Address the name to validate 1136 * @return true if the given name is a valid IPv6 address 1137 */ 1138 private static boolean isIPv6Address(final String inet6Address) { 1139 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1140 if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) { 1141 return false; 1142 } 1143 if (inet6Address.startsWith(":") && !inet6Address.startsWith("::") 1144 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) { 1145 return false; 1146 } 1147 String[] octets = inet6Address.split(":"); 1148 if (containsCompressedZeroes) { 1149 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1150 if (inet6Address.endsWith("::")) { 1151 // String.split() drops ending empty segments 1152 octetList.add(""); 1153 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1154 octetList.remove(0); 1155 } 1156 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1157 } 1158 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1159 return false; 1160 } 1161 int validOctets = 0; 1162 int emptyOctets = 0; // consecutive empty chunks 1163 for (int index = 0; index < octets.length; index++) { 1164 final String octet = octets[index]; 1165 if (octet.isEmpty()) { 1166 emptyOctets++; 1167 if (emptyOctets > 1) { 1168 return false; 1169 } 1170 } else { 1171 emptyOctets = 0; 1172 // Is last chunk an IPv4 address? 1173 if (index == octets.length - 1 && octet.contains(".")) { 1174 if (!isIPv4Address(octet)) { 1175 return false; 1176 } 1177 validOctets += 2; 1178 continue; 1179 } 1180 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1181 return false; 1182 } 1183 final int octetInt; 1184 try { 1185 octetInt = Integer.parseInt(octet, BASE_16); 1186 } catch (final NumberFormatException e) { 1187 return false; 1188 } 1189 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1190 return false; 1191 } 1192 } 1193 validOctets++; 1194 } 1195 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1196 } 1197 1198 /** 1199 * Checks whether a given string is a valid host name according to 1200 * RFC 3986 - not accepting IP addresses. 1201 * 1202 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1203 * @param name the hostname to validate 1204 * @return true if the given name is a valid host name 1205 */ 1206 private static boolean isRFC3986HostName(final String name) { 1207 final String[] parts = name.split("\\.", -1); 1208 for (int i = 0; i < parts.length; i++) { 1209 if (parts[i].isEmpty()) { 1210 // trailing dot is legal, otherwise we've hit a .. sequence 1211 return i == parts.length - 1; 1212 } 1213 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1214 return false; 1215 } 1216 } 1217 return true; 1218 } 1219 1220 /** 1221 * Checks if the character is a separator. 1222 * 1223 * @param ch the character to check 1224 * @return true if it is a separator character 1225 */ 1226 private static boolean isSeparator(final char ch) { 1227 return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR; 1228 } 1229 1230 /** 1231 * Determines if Windows file system is in use. 1232 * 1233 * @return true if the system is Windows 1234 */ 1235 static boolean isSystemWindows() { 1236 return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR; 1237 } 1238 1239 /** 1240 * Checks whether a given string is a valid host name according to 1241 * RFC 3986. 1242 * 1243 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1244 * RFC calls a "reg-name". Percent encoded names don't seem to be 1245 * valid names in UNC paths.</p> 1246 * 1247 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1248 * @param name the hostname to validate 1249 * @return true if the given name is a valid host name 1250 */ 1251 private static boolean isValidHostName(final String name) { 1252 return isIPv6Address(name) || isRFC3986HostName(name); 1253 } 1254 1255 /** 1256 * Normalizes a path, removing double and single dot path steps. 1257 * <p> 1258 * This method normalizes a path to a standard format. 1259 * The input may contain separators in either UNIX or Windows format. 1260 * The output will contain separators in the format of the system. 1261 * <p> 1262 * A trailing slash will be retained. 1263 * A double slash will be merged to a single slash (but UNC names are handled). 1264 * A single dot path segment will be removed. 1265 * A double dot will cause that path segment and the one before to be removed. 1266 * If the double dot has no parent path segment, {@code null} is returned. 1267 * <p> 1268 * The output will be the same on both UNIX and Windows except 1269 * for the separator character. 1270 * <pre> 1271 * /foo// --> /foo/ 1272 * /foo/./ --> /foo/ 1273 * /foo/../bar --> /bar 1274 * /foo/../bar/ --> /bar/ 1275 * /foo/../bar/../baz --> /baz 1276 * //foo//./bar --> //foo/bar 1277 * /../ --> null 1278 * ../foo --> null 1279 * foo/bar/.. --> foo/ 1280 * foo/../../bar --> null 1281 * foo/../bar --> bar 1282 * //server/foo/../bar --> //server/bar 1283 * //server/../bar --> null 1284 * C:\foo\..\bar --> C:\bar 1285 * C:\..\bar --> null 1286 * ~/foo/../bar/ --> ~/bar/ 1287 * ~/../bar --> null 1288 * </pre> 1289 * (Note the file separator will be correct for Windows/Unix.) 1290 * 1291 * @param fileName the file name to normalize, null returns null 1292 * @return the normalized fileName, or null if invalid 1293 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1294 */ 1295 public static String normalize(final String fileName) { 1296 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true); 1297 } 1298 1299 /** 1300 * Normalizes a path, removing double and single dot path steps. 1301 * <p> 1302 * This method normalizes a path to a standard format. 1303 * The input may contain separators in either UNIX or Windows format. 1304 * The output will contain separators in the format specified. 1305 * <p> 1306 * A trailing slash will be retained. 1307 * A double slash will be merged to a single slash (but UNC names are handled). 1308 * A single dot path segment will be removed. 1309 * A double dot will cause that path segment and the one before to be removed. 1310 * If the double dot has no parent path segment to work with, {@code null} 1311 * is returned. 1312 * <p> 1313 * The output will be the same on both UNIX and Windows except 1314 * for the separator character. 1315 * <pre> 1316 * /foo// --> /foo/ 1317 * /foo/./ --> /foo/ 1318 * /foo/../bar --> /bar 1319 * /foo/../bar/ --> /bar/ 1320 * /foo/../bar/../baz --> /baz 1321 * //foo//./bar --> /foo/bar 1322 * /../ --> null 1323 * ../foo --> null 1324 * foo/bar/.. --> foo/ 1325 * foo/../../bar --> null 1326 * foo/../bar --> bar 1327 * //server/foo/../bar --> //server/bar 1328 * //server/../bar --> null 1329 * C:\foo\..\bar --> C:\bar 1330 * C:\..\bar --> null 1331 * ~/foo/../bar/ --> ~/bar/ 1332 * ~/../bar --> null 1333 * </pre> 1334 * The output will be the same on both UNIX and Windows including 1335 * the separator character. 1336 * 1337 * @param fileName the file name to normalize, null returns null 1338 * @param unixSeparator {@code true} if a UNIX separator should 1339 * be used or {@code false} if a Windows separator should be used. 1340 * @return the normalized fileName, or null if invalid 1341 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1342 * @since 2.0 1343 */ 1344 public static String normalize(final String fileName, final boolean unixSeparator) { 1345 return doNormalize(fileName, toSeparator(unixSeparator), true); 1346 } 1347 1348 /** 1349 * Normalizes a path, removing double and single dot path steps, 1350 * and removing any final directory separator. 1351 * <p> 1352 * This method normalizes a path to a standard format. 1353 * The input may contain separators in either UNIX or Windows format. 1354 * The output will contain separators in the format of the system. 1355 * <p> 1356 * A trailing slash will be removed. 1357 * A double slash will be merged to a single slash (but UNC names are handled). 1358 * A single dot path segment will be removed. 1359 * A double dot will cause that path segment and the one before to be removed. 1360 * If the double dot has no parent path segment to work with, {@code null} 1361 * is returned. 1362 * <p> 1363 * The output will be the same on both UNIX and Windows except 1364 * for the separator character. 1365 * <pre> 1366 * /foo// --> /foo 1367 * /foo/./ --> /foo 1368 * /foo/../bar --> /bar 1369 * /foo/../bar/ --> /bar 1370 * /foo/../bar/../baz --> /baz 1371 * //foo//./bar --> /foo/bar 1372 * /../ --> null 1373 * ../foo --> null 1374 * foo/bar/.. --> foo 1375 * foo/../../bar --> null 1376 * foo/../bar --> bar 1377 * //server/foo/../bar --> //server/bar 1378 * //server/../bar --> null 1379 * C:\foo\..\bar --> C:\bar 1380 * C:\..\bar --> null 1381 * ~/foo/../bar/ --> ~/bar 1382 * ~/../bar --> null 1383 * </pre> 1384 * (Note the file separator returned will be correct for Windows/Unix) 1385 * 1386 * @param fileName the file name to normalize, null returns null 1387 * @return the normalized fileName, or null if invalid 1388 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1389 */ 1390 public static String normalizeNoEndSeparator(final String fileName) { 1391 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false); 1392 } 1393 1394 /** 1395 * Normalizes a path, removing double and single dot path steps, 1396 * and removing any final directory separator. 1397 * <p> 1398 * This method normalizes a path to a standard format. 1399 * The input may contain separators in either UNIX or Windows format. 1400 * The output will contain separators in the format specified. 1401 * <p> 1402 * A trailing slash will be removed. 1403 * A double slash will be merged to a single slash (but UNC names are handled). 1404 * A single dot path segment will be removed. 1405 * A double dot will cause that path segment and the one before to be removed. 1406 * If the double dot has no parent path segment to work with, {@code null} 1407 * is returned. 1408 * <p> 1409 * The output will be the same on both UNIX and Windows including 1410 * the separator character. 1411 * <pre> 1412 * /foo// --> /foo 1413 * /foo/./ --> /foo 1414 * /foo/../bar --> /bar 1415 * /foo/../bar/ --> /bar 1416 * /foo/../bar/../baz --> /baz 1417 * //foo//./bar --> /foo/bar 1418 * /../ --> null 1419 * ../foo --> null 1420 * foo/bar/.. --> foo 1421 * foo/../../bar --> null 1422 * foo/../bar --> bar 1423 * //server/foo/../bar --> //server/bar 1424 * //server/../bar --> null 1425 * C:\foo\..\bar --> C:\bar 1426 * C:\..\bar --> null 1427 * ~/foo/../bar/ --> ~/bar 1428 * ~/../bar --> null 1429 * </pre> 1430 * 1431 * @param fileName the file name to normalize, null returns null 1432 * @param unixSeparator {@code true} if a UNIX separator should 1433 * be used or {@code false} if a Windows separator should be used. 1434 * @return the normalized fileName, or null if invalid 1435 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1436 * @since 2.0 1437 */ 1438 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 1439 return doNormalize(fileName, toSeparator(unixSeparator), false); 1440 } 1441 1442 /** 1443 * Removes the extension from a fileName. 1444 * <p> 1445 * This method returns the textual part of the file name before the last dot. 1446 * There must be no directory separator after the dot. 1447 * <pre> 1448 * foo.txt --> foo 1449 * .txt --> "" (empty string) 1450 * a\b\c.jpg --> a\b\c 1451 * /a/b/c.jpg --> /a/b/c 1452 * a\b\c --> a\b\c 1453 * a.b\c --> a.b\c 1454 * </pre> 1455 * <p> 1456 * The output will be the same irrespective of the machine that the code is running on. 1457 * 1458 * @param fileName the file name, null returns null 1459 * @return the file name minus the extension 1460 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}) 1461 */ 1462 public static String removeExtension(final String fileName) { 1463 if (fileName == null) { 1464 return null; 1465 } 1466 requireNonNullChars(fileName); 1467 1468 final int index = indexOfExtension(fileName); 1469 if (index == NOT_FOUND) { 1470 return fileName; 1471 } 1472 return fileName.substring(0, index); 1473 } 1474 1475 /** 1476 * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions. 1477 * 1478 * This may be used to defend against poison byte attacks. 1479 * 1480 * @param path the path to check 1481 * @return The input 1482 * @throws IllegalArgumentException if path contains the null character ({@code U+0000}) 1483 */ 1484 private static String requireNonNullChars(final String path) { 1485 if (path.indexOf(0) >= 0) { 1486 throw new IllegalArgumentException( 1487 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it"); 1488 } 1489 return path; 1490 } 1491 1492 /** 1493 * Converts all separators to the system separator. 1494 * 1495 * @param path the path to be changed, null ignored. 1496 * @return the updated path. 1497 */ 1498 public static String separatorsToSystem(final String path) { 1499 return FileSystem.getCurrent().normalizeSeparators(path); 1500 } 1501 1502 /** 1503 * Converts all separators to the UNIX separator of forward slash. 1504 * 1505 * @param path the path to be changed, null ignored. 1506 * @return the new path. 1507 */ 1508 public static String separatorsToUnix(final String path) { 1509 return FileSystem.LINUX.normalizeSeparators(path); 1510 } 1511 1512 /** 1513 * Converts all separators to the Windows separator of backslash. 1514 * 1515 * @param path the path to be changed, null ignored. 1516 * @return the updated path. 1517 */ 1518 public static String separatorsToWindows(final String path) { 1519 return FileSystem.WINDOWS.normalizeSeparators(path); 1520 } 1521 1522 /** 1523 * Splits a string into a number of tokens. 1524 * The text is split by '?' and '*'. 1525 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1526 * 1527 * @param text the text to split 1528 * @return the array of tokens, never null 1529 */ 1530 static String[] splitOnTokens(final String text) { 1531 // used by wildcardMatch 1532 // package level so a unit test may run on this 1533 1534 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1535 return new String[] { text }; 1536 } 1537 1538 final char[] array = text.toCharArray(); 1539 final ArrayList<String> list = new ArrayList<>(); 1540 final StringBuilder buffer = new StringBuilder(); 1541 char prevChar = 0; 1542 for (final char ch : array) { 1543 if (ch == '?' || ch == '*') { 1544 if (buffer.length() != 0) { 1545 list.add(buffer.toString()); 1546 buffer.setLength(0); 1547 } 1548 if (ch == '?') { 1549 list.add("?"); 1550 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1551 list.add("*"); 1552 } 1553 } else { 1554 buffer.append(ch); 1555 } 1556 prevChar = ch; 1557 } 1558 if (buffer.length() != 0) { 1559 list.add(buffer.toString()); 1560 } 1561 1562 return list.toArray(EMPTY_STRING_ARRAY); 1563 } 1564 1565 /** 1566 * Returns '/' if given true, '\\' otherwise. 1567 * 1568 * @param unixSeparator which separator to return. 1569 * @return '/' if given true, '\\' otherwise. 1570 */ 1571 private static char toSeparator(final boolean unixSeparator) { 1572 return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; 1573 } 1574 1575 /** 1576 * Checks a fileName to see if it matches the specified wildcard matcher, 1577 * always testing case-sensitive. 1578 * <p> 1579 * The wildcard matcher uses the characters '?' and '*' to represent a 1580 * single or multiple (zero or more) wildcard characters. 1581 * This is the same as often found on DOS/Unix command lines. 1582 * The check is case-sensitive always. 1583 * <pre> 1584 * wildcardMatch("c.txt", "*.txt") --> true 1585 * wildcardMatch("c.txt", "*.jpg") --> false 1586 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1587 * wildcardMatch("c.txt", "*.???") --> true 1588 * wildcardMatch("c.txt", "*.????") --> false 1589 * </pre> 1590 * N.B. the sequence "*?" does not work properly at present in match strings. 1591 * 1592 * @param fileName the file name to match on 1593 * @param wildcardMatcher the wildcard string to match against 1594 * @return true if the file name matches the wildcard string 1595 * @see IOCase#SENSITIVE 1596 */ 1597 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1598 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1599 } 1600 1601 /** 1602 * Checks a fileName to see if it matches the specified wildcard matcher 1603 * allowing control over case-sensitivity. 1604 * <p> 1605 * The wildcard matcher uses the characters '?' and '*' to represent a 1606 * single or multiple (zero or more) wildcard characters. 1607 * N.B. the sequence "*?" does not work properly at present in match strings. 1608 * 1609 * @param fileName the file name to match on 1610 * @param wildcardMatcher the wildcard string to match against 1611 * @param ioCase what case sensitivity rule to use, null means case-sensitive 1612 * @return true if the file name matches the wildcard string 1613 * @since 1.3 1614 */ 1615 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) { 1616 if (fileName == null && wildcardMatcher == null) { 1617 return true; 1618 } 1619 if (fileName == null || wildcardMatcher == null) { 1620 return false; 1621 } 1622 ioCase = IOCase.value(ioCase, IOCase.SENSITIVE); 1623 final String[] wcs = splitOnTokens(wildcardMatcher); 1624 boolean anyChars = false; 1625 int textIdx = 0; 1626 int wcsIdx = 0; 1627 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1628 1629 // loop around a backtrack stack, to handle complex * matching 1630 do { 1631 if (!backtrack.isEmpty()) { 1632 final int[] array = backtrack.pop(); 1633 wcsIdx = array[0]; 1634 textIdx = array[1]; 1635 anyChars = true; 1636 } 1637 1638 // loop whilst tokens and text left to process 1639 while (wcsIdx < wcs.length) { 1640 1641 if (wcs[wcsIdx].equals("?")) { 1642 // ? so move to next text char 1643 textIdx++; 1644 if (textIdx > fileName.length()) { 1645 break; 1646 } 1647 anyChars = false; 1648 1649 } else if (wcs[wcsIdx].equals("*")) { 1650 // set any chars status 1651 anyChars = true; 1652 if (wcsIdx == wcs.length - 1) { 1653 textIdx = fileName.length(); 1654 } 1655 1656 } else { 1657 // matching text token 1658 if (anyChars) { 1659 // any chars then try to locate text token 1660 textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1661 if (textIdx == NOT_FOUND) { 1662 // token not found 1663 break; 1664 } 1665 final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1666 if (repeat >= 0) { 1667 backtrack.push(new int[] {wcsIdx, repeat}); 1668 } 1669 } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1670 // matching from current position 1671 // couldn't match token 1672 break; 1673 } 1674 1675 // matched text token, move text index to end of matched token 1676 textIdx += wcs[wcsIdx].length(); 1677 anyChars = false; 1678 } 1679 1680 wcsIdx++; 1681 } 1682 1683 // full match 1684 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1685 return true; 1686 } 1687 1688 } while (!backtrack.isEmpty()); 1689 1690 return false; 1691 } 1692 1693 /** 1694 * Checks a fileName to see if it matches the specified wildcard matcher 1695 * using the case rules of the system. 1696 * <p> 1697 * The wildcard matcher uses the characters '?' and '*' to represent a 1698 * single or multiple (zero or more) wildcard characters. 1699 * This is the same as often found on DOS/Unix command lines. 1700 * The check is case-sensitive on UNIX and case-insensitive on Windows. 1701 * <pre> 1702 * wildcardMatch("c.txt", "*.txt") --> true 1703 * wildcardMatch("c.txt", "*.jpg") --> false 1704 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1705 * wildcardMatch("c.txt", "*.???") --> true 1706 * wildcardMatch("c.txt", "*.????") --> false 1707 * </pre> 1708 * N.B. the sequence "*?" does not work properly at present in match strings. 1709 * 1710 * @param fileName the file name to match on 1711 * @param wildcardMatcher the wildcard string to match against 1712 * @return true if the file name matches the wildcard string 1713 * @see IOCase#SYSTEM 1714 */ 1715 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1716 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1717 } 1718 1719 /** 1720 * Instances should NOT be constructed in standard programming. 1721 * 1722 * @deprecated TODO Make private in 3.0. 1723 */ 1724 @Deprecated 1725 public FilenameUtils() { 1726 // empty 1727 } 1728}