001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.util.ArrayDeque;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Deque;
025import java.util.List;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028import java.util.stream.Stream;
029
030/**
031 * General file name and file path manipulation utilities. The methods in this class
032 * operate on strings that represent relative or absolute paths. Nothing in this class
033 * ever accesses the file system, or depends on whether a path points to a file that exists.
034 * <p>
035 * When dealing with file names, you can hit problems when moving from a Windows
036 * based development machine to a UNIX based production machine.
037 * This class aims to help avoid those problems.
038 * </p>
039 * <p>
040 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
041 * using JDK {@link File File} objects and the two argument constructor
042 * {@link File#File(java.io.File, String) File(File,String)}.
043 * </p>
044 * <p>
045 * Most methods in this class are designed to work the same on both UNIX and Windows.
046 * Those that don't include 'System', 'Unix', or 'Windows' in their name.
047 * </p>
048 * <p>
049 * Most methods recognize both separators (forward and backslashes), and both
050 * sets of prefixes. See the Javadoc of each method for details.
051 * </p>
052 * <p>
053 * This class defines six components within a path (sometimes called a file name or a full file name).
054 * Given an absolute Windows path such as C:\dev\project\file.txt they are:
055 * </p>
056 * <ul>
057 * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
058 * <li>the prefix - C:\</li>
059 * <li>the path - dev\project\</li>
060 * <li>the full path - C:\dev\project\</li>
061 * <li>the name - file.txt</li>
062 * <li>the base name - file</li>
063 * <li>the extension - txt</li>
064 * </ul>
065 * <p>
066 * Given an absolute UNIX path such as /dev/project/file.txt they are:
067 * </p>
068 * <ul>
069 * <li>the full file name, or just file name - /dev/project/file.txt</li>
070 * <li>the prefix - /</li>
071 * <li>the path - dev/project</li>
072 * <li>the full path - /dev/project</li>
073 * <li>the name - file.txt</li>
074 * <li>the base name - file</li>
075 * <li>the extension - txt</li>
076 * </ul>
077 * <p>
078 * Given a relative Windows path such as dev\project\file.txt they are:
079 * </p>
080 * <ul>
081 * <li>the full file name, or just file name - dev\project\file.txt</li>
082 * <li>the prefix - null</li>
083 * <li>the path - dev\project\</li>
084 * <li>the full path - dev\project\</li>
085 * <li>the name - file.txt</li>
086 * <li>the base name - file</li>
087 * <li>the extension - txt</li>
088 * </ul>
089 * <p>
090 * Given an absolute UNIX path such as /dev/project/file.txt they are:
091 * </p>
092 * <ul>
093 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
094 * <li>the prefix - /</li>
095 * <li>the path - dev/project</li>
096 * <li>the full path - /dev/project</li>
097 * <li>the name - file.txt</li>
098 * <li>the base name - file</li>
099 * <li>the extension - txt</li>
100 * </ul>
101 *
102 *
103 * <p>
104 * This class works best if directory names end with a separator.
105 * If you omit the last separator, it is impossible to determine if the last component
106 * corresponds to a file or a directory. This class treats final components
107 * that do not end with a separator as files, not directories.
108 * </p>
109 * <p>
110 * This class only supports UNIX and Windows style names.
111 * Prefixes are matched as follows:
112 * </p>
113 * <pre>
114 * Windows:
115 * a\b\c.txt           --&gt; ""          --&gt; relative
116 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
117 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
118 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
119 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
120 *
121 * Unix:
122 * a/b/c.txt           --&gt; ""          --&gt; relative
123 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
124 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
125 * ~                   --&gt; "~/"        --&gt; current user (slash added)
126 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
127 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
128 * </pre>
129 * <p>
130 * Both prefix styles are matched, irrespective of the machine that you are
131 * currently running on.
132 * </p>
133 *
134 * @since 1.1
135 */
136public class FilenameUtils {
137
138    private static final String[] EMPTY_STRING_ARRAY = {};
139
140    private static final String EMPTY_STRING = "";
141
142    private static final int NOT_FOUND = -1;
143
144    /**
145     * The extension separator character.
146     * @since 1.4
147     */
148    public static final char EXTENSION_SEPARATOR = '.';
149
150    /**
151     * The extension separator String.
152     * @since 1.4
153     */
154    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
155
156    /**
157     * The UNIX separator character.
158     */
159    private static final char UNIX_NAME_SEPARATOR = '/';
160
161    /**
162     * The Windows separator character.
163     */
164    private static final char WINDOWS_NAME_SEPARATOR = '\\';
165
166    /**
167     * The system separator character.
168     */
169    private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
170
171    /**
172     * The separator character that is the opposite of the system separator.
173     */
174    private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
175
176    private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
177
178    private static final int IPV4_MAX_OCTET_VALUE = 255;
179
180    private static final int IPV6_MAX_HEX_GROUPS = 8;
181
182    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
183
184    private static final int MAX_UNSIGNED_SHORT = 0xffff;
185
186    private static final int BASE_16 = 16;
187
188    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
189
190    /**
191     * Concatenates a fileName to a base path using normal command line style rules.
192     * <p>
193     * The effect is equivalent to resultant directory after changing
194     * directory to the first argument, followed by changing directory to
195     * the second argument.
196     * </p>
197     * <p>
198     * The first argument is the base path, the second is the path to concatenate.
199     * The returned path is always normalized via {@link #normalize(String)},
200     * thus {@code ..} is handled.
201     * </p>
202     * <p>
203     * If {@code pathToAdd} is absolute (has an absolute prefix), then
204     * it will be normalized and returned.
205     * Otherwise, the paths will be joined, normalized and returned.
206     * </p>
207     * <p>
208     * The output will be the same on both UNIX and Windows except
209     * for the separator character.
210     * </p>
211     * <pre>
212     * /foo/      + bar        --&gt;  /foo/bar
213     * /foo       + bar        --&gt;  /foo/bar
214     * /foo       + /bar       --&gt;  /bar
215     * /foo       + C:/bar     --&gt;  C:/bar
216     * /foo       + C:bar      --&gt;  C:bar [1]
217     * /foo/a/    + ../bar     --&gt;  /foo/bar
218     * /foo/      + ../../bar  --&gt;  null
219     * /foo/      + /bar       --&gt;  /bar
220     * /foo/..    + /bar       --&gt;  /bar
221     * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
222     * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
223     * </pre>
224     * <p>
225     * [1] Note that the Windows relative drive prefix is unreliable when
226     * used with this method.
227     * </p>
228     * <p>
229     * [2] Note that the first parameter must be a path. If it ends with a name, then
230     * the name will be built into the concatenated path. If this might be a problem,
231     * use {@link #getFullPath(String)} on the base path argument.
232     * </p>
233     *
234     * @param basePath  the base path to attach to, always treated as a path
235     * @param fullFileNameToAdd  the file name (or path) to attach to the base
236     * @return the concatenated path, or null if invalid
237     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
238     */
239    public static String concat(final String basePath, final String fullFileNameToAdd) {
240        final int prefix = getPrefixLength(fullFileNameToAdd);
241        if (prefix < 0) {
242            return null;
243        }
244        if (prefix > 0) {
245            return normalize(fullFileNameToAdd);
246        }
247        if (basePath == null) {
248            return null;
249        }
250        final int len = basePath.length();
251        if (len == 0) {
252            return normalize(fullFileNameToAdd);
253        }
254        final char ch = basePath.charAt(len - 1);
255        if (isSeparator(ch)) {
256            return normalize(basePath + fullFileNameToAdd);
257        }
258        return normalize(basePath + '/' + fullFileNameToAdd);
259    }
260
261    /**
262     * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
263     * This does not read from the file system, and there is no guarantee or expectation that
264     * these paths actually exist.
265     * <p>
266     * The files names are expected to be normalized.
267     * </p>
268     *
269     * Edge cases:
270     * <ul>
271     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
272     * <li>A directory does not contain itself: return false</li>
273     * <li>A null child file is not contained in any parent: return false</li>
274     * </ul>
275     *
276     * @param canonicalParent the path string to consider as the parent.
277     * @param canonicalChild the path string to consider as the child.
278     * @return true if the candidate leaf is under the specified composite. False otherwise.
279     * @since 2.2
280     * @see FileUtils#directoryContains(File, File)
281     */
282    public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
283        if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) {
284            return false;
285        }
286
287        if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
288            return false;
289        }
290
291        final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
292        final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
293
294        return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
295    }
296
297    /**
298     * Does the work of getting the path.
299     *
300     * @param fileName  the file name
301     * @param includeSeparator  true to include the end separator
302     * @return the path
303     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
304     */
305    private static String doGetFullPath(final String fileName, final boolean includeSeparator) {
306        if (fileName == null) {
307            return null;
308        }
309        final int prefix = getPrefixLength(fileName);
310        if (prefix < 0) {
311            return null;
312        }
313        if (prefix >= fileName.length()) {
314            if (includeSeparator) {
315                return getPrefix(fileName);  // add end slash if necessary
316            }
317            return fileName;
318        }
319        final int index = indexOfLastSeparator(fileName);
320        if (index < 0) {
321            return fileName.substring(0, prefix);
322        }
323        int end = index + (includeSeparator ?  1 : 0);
324        if (end == 0) {
325            end++;
326        }
327        return fileName.substring(0, end);
328    }
329
330    /**
331     * Does the work of getting the path.
332     *
333     * @param fileName  the file name
334     * @param separatorAdd  0 to omit the end separator, 1 to return it
335     * @return the path
336     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
337     */
338    private static String doGetPath(final String fileName, final int separatorAdd) {
339        if (fileName == null) {
340            return null;
341        }
342        final int prefix = getPrefixLength(fileName);
343        if (prefix < 0) {
344            return null;
345        }
346        final int index = indexOfLastSeparator(fileName);
347        final int endIndex = index + separatorAdd;
348        if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
349            return EMPTY_STRING;
350        }
351        return requireNonNullChars(fileName.substring(prefix, endIndex));
352    }
353
354    /**
355     * Internal method to perform the normalization.
356     *
357     * @param fileName  the file name
358     * @param separator The separator character to use
359     * @param keepSeparator  true to keep the final separator
360     * @return the normalized fileName
361     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
362     */
363    private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
364        if (fileName == null) {
365            return null;
366        }
367
368        requireNonNullChars(fileName);
369
370        int size = fileName.length();
371        if (size == 0) {
372            return fileName;
373        }
374        final int prefix = getPrefixLength(fileName);
375        if (prefix < 0) {
376            return null;
377        }
378
379        final char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
380        fileName.getChars(0, fileName.length(), array, 0);
381
382        // fix separators throughout
383        final char otherSeparator = flipSeparator(separator);
384        for (int i = 0; i < array.length; i++) {
385            if (array[i] == otherSeparator) {
386                array[i] = separator;
387            }
388        }
389
390        // add extra separator on the end to simplify code below
391        boolean lastIsDirectory = true;
392        if (array[size - 1] != separator) {
393            array[size++] = separator;
394            lastIsDirectory = false;
395        }
396
397        // adjoining slashes
398        // If we get here, prefix can only be 0 or greater, size 1 or greater
399        // If prefix is 0, set loop start to 1 to prevent index errors
400        for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
401            if (array[i] == separator && array[i - 1] == separator) {
402                System.arraycopy(array, i, array, i - 1, size - i);
403                size--;
404                i--;
405            }
406        }
407
408        // dot slash
409        for (int i = prefix + 1; i < size; i++) {
410            if (array[i] == separator && array[i - 1] == '.' &&
411                    (i == prefix + 1 || array[i - 2] == separator)) {
412                if (i == size - 1) {
413                    lastIsDirectory = true;
414                }
415                System.arraycopy(array, i + 1, array, i - 1, size - i);
416                size -=2;
417                i--;
418            }
419        }
420
421        // double dot slash
422        outer:
423        for (int i = prefix + 2; i < size; i++) {
424            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' &&
425                    (i == prefix + 2 || array[i - 3] == separator)) {
426                if (i == prefix + 2) {
427                    return null;
428                }
429                if (i == size - 1) {
430                    lastIsDirectory = true;
431                }
432                int j;
433                for (j = i - 4 ; j >= prefix; j--) {
434                    if (array[j] == separator) {
435                        // remove b/../ from a/b/../c
436                        System.arraycopy(array, i + 1, array, j + 1, size - i);
437                        size -= i - j;
438                        i = j + 1;
439                        continue outer;
440                    }
441                }
442                // remove a/../ from a/../c
443                System.arraycopy(array, i + 1, array, prefix, size - i);
444                size -= i + 1 - prefix;
445                i = prefix + 1;
446            }
447        }
448
449        if (size <= 0) {  // should never be less than 0
450            return EMPTY_STRING;
451        }
452        if (size <= prefix) {  // should never be less than prefix
453            return new String(array, 0, size);
454        }
455        if (lastIsDirectory && keepSeparator) {
456            return new String(array, 0, size);  // keep trailing separator
457        }
458        return new String(array, 0, size - 1);  // lose trailing separator
459    }
460
461    /**
462     * Checks whether two file names are exactly equal.
463     * <p>
464     * No processing is performed on the file names other than comparison.
465     * This is merely a null-safe case-sensitive string equality.
466     * </p>
467     *
468     * @param fileName1  the first file name, may be null
469     * @param fileName2  the second file name, may be null
470     * @return true if the file names are equal, null equals null
471     * @see IOCase#SENSITIVE
472     */
473    public static boolean equals(final String fileName1, final String fileName2) {
474        return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
475    }
476
477    /**
478     * Checks whether two file names are equal, optionally normalizing and providing
479     * control over the case-sensitivity.
480     *
481     * @param fileName1  the first file name, may be null
482     * @param fileName2  the second file name, may be null
483     * @param normalize  whether to normalize the file names
484     * @param ioCase  what case sensitivity rule to use, null means case-sensitive
485     * @return true if the file names are equal, null equals null
486     * @since 1.3
487     */
488    public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
489
490        if (fileName1 == null || fileName2 == null) {
491            return fileName1 == null && fileName2 == null;
492        }
493        if (normalize) {
494            fileName1 = normalize(fileName1);
495            if (fileName1 == null) {
496                return false;
497            }
498            fileName2 = normalize(fileName2);
499            if (fileName2 == null) {
500                return false;
501            }
502        }
503        return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
504    }
505
506    /**
507     * Checks whether two file names are equal after both have been normalized.
508     * <p>
509     * Both file names are first passed to {@link #normalize(String)}.
510     * The check is then performed in a case-sensitive manner.
511     * </p>
512     *
513     * @param fileName1  the first file name, may be null
514     * @param fileName2  the second file name, may be null
515     * @return true if the file names are equal, null equals null
516     * @see IOCase#SENSITIVE
517     */
518    public static boolean equalsNormalized(final String fileName1, final String fileName2) {
519        return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
520    }
521
522    /**
523     * Checks whether two file names are equal using the case rules of the system
524     * after both have been normalized.
525     * <p>
526     * Both file names are first passed to {@link #normalize(String)}.
527     * The check is then performed case-sensitively on UNIX and
528     * case-insensitively on Windows.
529     * </p>
530     *
531     * @param fileName1  the first file name, may be null
532     * @param fileName2  the second file name, may be null
533     * @return true if the file names are equal, null equals null
534     * @see IOCase#SYSTEM
535     */
536    public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
537        return equals(fileName1, fileName2, true, IOCase.SYSTEM);
538    }
539
540    /**
541     * Checks whether two file names are equal using the case rules of the system.
542     * <p>
543     * No processing is performed on the file names other than comparison.
544     * The check is case-sensitive on UNIX and case-insensitive on Windows.
545     * </p>
546     *
547     * @param fileName1  the first file name, may be null
548     * @param fileName2  the second file name, may be null
549     * @return true if the file names are equal, null equals null
550     * @see IOCase#SYSTEM
551     */
552    public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
553        return equals(fileName1, fileName2, false, IOCase.SYSTEM);
554    }
555
556    /**
557     * Flips the Windows name separator to Linux and vice-versa.
558     *
559     * @param ch The Windows or Linux name separator.
560     * @return The Windows or Linux name separator.
561     */
562    static char flipSeparator(final char ch) {
563        if (ch == UNIX_NAME_SEPARATOR) {
564            return WINDOWS_NAME_SEPARATOR;
565        }
566        if (ch == WINDOWS_NAME_SEPARATOR) {
567            return UNIX_NAME_SEPARATOR;
568        }
569        throw new IllegalArgumentException(String.valueOf(ch));
570    }
571
572    /**
573     * Special handling for NTFS ADS: Don't accept colon in the file name.
574     *
575     * @param fileName a file name
576     * @return ADS offsets.
577     */
578    private static int getAdsCriticalOffset(final String fileName) {
579        // Step 1: Remove leading path segments.
580        final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
581        final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
582        if (offset1 == -1) {
583            if (offset2 == -1) {
584                return 0;
585            }
586            return offset2 + 1;
587        }
588        if (offset2 == -1) {
589            return offset1 + 1;
590        }
591        return Math.max(offset1, offset2) + 1;
592    }
593
594    /**
595     * Gets the base name, minus the full path and extension, from a full file name.
596     * <p>
597     * This method will handle a path in either UNIX or Windows format.
598     * The text after the last forward or backslash and before the last dot is returned.
599     * </p>
600     * <pre>
601     * a/b/c.txt --&gt; c
602     * a\b\c.txt --&gt; c
603     * a/b/c.foo.txt --&gt; c.foo
604     * a.txt     --&gt; a
605     * a/b/c     --&gt; c
606     * a/b/c/    --&gt; ""
607     * </pre>
608     * <p>
609     * The output will be the same irrespective of the machine that the code is running on.
610     * </p>
611     *
612     * @param fileName  the file name, null returns null
613     * @return the name of the file without the path, or an empty string if none exists
614     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
615     */
616    public static String getBaseName(final String fileName) {
617        return removeExtension(getName(fileName));
618    }
619
620    /**
621     * Gets the extension of a fileName.
622     * <p>
623     * This method returns the textual part of the file name after the last dot.
624     * There must be no directory separator after the dot.
625     * </p>
626     * <pre>
627     * foo.txt      --&gt; "txt"
628     * a/b/c.jpg    --&gt; "jpg"
629     * a/b.txt/c    --&gt; ""
630     * a/b/c        --&gt; ""
631     * </pre>
632     * <p>
633     * The output will be the same irrespective of the machine that the code is running on, with the
634     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
635     * </p>
636     * <p>
637     * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
638     * In this case, the name wouldn't be the name of a file, but the identifier of an
639     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
640     * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
641     * an {@link IllegalArgumentException} for names like this.
642     * </p>
643     *
644     * @param fileName the file name to retrieve the extension of.
645     * @return the extension of the file or an empty string if none exists or {@code null}
646     * if the file name is {@code null}.
647     * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
648     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
649     */
650    public static String getExtension(final String fileName) throws IllegalArgumentException {
651        if (fileName == null) {
652            return null;
653        }
654        final int index = indexOfExtension(fileName);
655        if (index == NOT_FOUND) {
656            return EMPTY_STRING;
657        }
658        return fileName.substring(index + 1);
659    }
660
661    /**
662     * Gets the full path (prefix + path) from a full file name.
663     * <p>
664     * This method will handle a file in either UNIX or Windows format.
665     * The method is entirely text based, and returns the text before and
666     * including the last forward or backslash.
667     * </p>
668     * <pre>
669     * C:\a\b\c.txt --&gt; C:\a\b\
670     * ~/a/b/c.txt  --&gt; ~/a/b/
671     * a.txt        --&gt; ""
672     * a/b/c        --&gt; a/b/
673     * a/b/c/       --&gt; a/b/c/
674     * C:           --&gt; C:
675     * C:\          --&gt; C:\
676     * ~            --&gt; ~/
677     * ~/           --&gt; ~/
678     * ~user        --&gt; ~user/
679     * ~user/       --&gt; ~user/
680     * </pre>
681     * <p>
682     * The output will be the same irrespective of the machine that the code is running on.
683     * </p>
684     *
685     * @param fileName  the file name, null returns null
686     * @return the path of the file, an empty string if none exists, null if invalid
687     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
688     */
689    public static String getFullPath(final String fileName) {
690        return doGetFullPath(fileName, true);
691    }
692
693    /**
694     * Gets the full path (prefix + path) from a full file name,
695     * excluding the final directory separator.
696     * <p>
697     * This method will handle a file in either UNIX or Windows format.
698     * The method is entirely text based, and returns the text before the
699     * last forward or backslash.
700     * </p>
701     * <pre>
702     * C:\a\b\c.txt --&gt; C:\a\b
703     * ~/a/b/c.txt  --&gt; ~/a/b
704     * a.txt        --&gt; ""
705     * a/b/c        --&gt; a/b
706     * a/b/c/       --&gt; a/b/c
707     * C:           --&gt; C:
708     * C:\          --&gt; C:\
709     * ~            --&gt; ~
710     * ~/           --&gt; ~
711     * ~user        --&gt; ~user
712     * ~user/       --&gt; ~user
713     * </pre>
714     * <p>
715     * The output will be the same irrespective of the machine that the code is running on.
716     * </p>
717     *
718     * @param fileName  the file name, null returns null
719     * @return the path of the file, an empty string if none exists, null if invalid
720     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
721     */
722    public static String getFullPathNoEndSeparator(final String fileName) {
723        return doGetFullPath(fileName, false);
724    }
725
726    /**
727     * Gets the name minus the path from a full file name.
728     * <p>
729     * This method will handle a file in either UNIX or Windows format.
730     * The text after the last forward or backslash is returned.
731     * </p>
732     * <pre>
733     * a/b/c.txt --&gt; c.txt
734     * a\b\c.txt --&gt; c.txt
735     * a.txt     --&gt; a.txt
736     * a/b/c     --&gt; c
737     * a/b/c/    --&gt; ""
738     * </pre>
739     * <p>
740     * The output will be the same irrespective of the machine that the code is running on.
741     * </p>
742     *
743     * @param fileName  the file name, null returns null
744     * @return the name of the file without the path, or an empty string if none exists
745     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
746     */
747    public static String getName(final String fileName) {
748        if (fileName == null) {
749            return null;
750        }
751        return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
752    }
753
754    /**
755     * Gets the path from a full file name, which excludes the prefix and the name.
756     * <p>
757     * This method will handle a file in either UNIX or Windows format.
758     * The method is entirely text based, and returns the text before and
759     * including the last forward or backslash.
760     * </p>
761     * <pre>
762     * C:\a\b\c.txt --&gt; a\b\
763     * ~/a/b/c.txt  --&gt; a/b/
764     * a.txt        --&gt; ""
765     * a/b/c        --&gt; a/b/
766     * a/b/c/       --&gt; a/b/c/
767     * </pre>
768     * <p>
769     * The output will be the same irrespective of the machine that the code is running on.
770     * </p>
771     * <p>
772     * This method drops the prefix from the result.
773     * See {@link #getFullPath(String)} for the method that retains the prefix.
774     * </p>
775     *
776     * @param fileName  the file name, null returns null
777     * @return the path of the file, an empty string if none exists, null if invalid
778     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
779     */
780    public static String getPath(final String fileName) {
781        return doGetPath(fileName, 1);
782    }
783
784    /**
785     * Gets the path (which excludes the prefix) from a full file name, and
786     * also excluding the final directory separator.
787     * <p>
788     * This method will handle a file in either UNIX or Windows format.
789     * The method is entirely text based, and returns the text before the
790     * last forward or backslash.
791     * </p>
792     * <pre>
793     * C:\a\b\c.txt --&gt; a\b
794     * ~/a/b/c.txt  --&gt; a/b
795     * a.txt        --&gt; ""
796     * a/b/c        --&gt; a/b
797     * a/b/c/       --&gt; a/b/c
798     * </pre>
799     * <p>
800     * The output will be the same irrespective of the machine that the code is running on.
801     * </p>
802     * <p>
803     * This method drops the prefix from the result.
804     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
805     * </p>
806     *
807     * @param fileName  the file name, null returns null
808     * @return the path of the file, an empty string if none exists, null if invalid
809     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000})
810     */
811    public static String getPathNoEndSeparator(final String fileName) {
812        return doGetPath(fileName, 0);
813    }
814
815    /**
816     * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
817     * <p>
818     * This method will handle a file in either UNIX or Windows format.
819     * The prefix includes the first slash in the full file name where applicable.
820     * </p>
821     * <pre>
822     * Windows:
823     * a\b\c.txt           --&gt; ""          --&gt; relative
824     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
825     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
826     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
827     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
828     *
829     * Unix:
830     * a/b/c.txt           --&gt; ""          --&gt; relative
831     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
832     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
833     * ~                   --&gt; "~/"        --&gt; current user (slash added)
834     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
835     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
836     * </pre>
837     * <p>
838     * The output will be the same irrespective of the machine that the code is running on.
839     * ie. both UNIX and Windows prefixes are matched regardless.
840     * </p>
841     *
842     * @param fileName  the file name, null returns null
843     * @return the prefix of the file, null if invalid
844     * @throws IllegalArgumentException if the result contains the null character ({@code U+0000})
845     */
846    public static String getPrefix(final String fileName) {
847        if (fileName == null) {
848            return null;
849        }
850        final int len = getPrefixLength(fileName);
851        if (len < 0) {
852            return null;
853        }
854        if (len > fileName.length()) {
855            requireNonNullChars(fileName);
856            return fileName + UNIX_NAME_SEPARATOR;
857        }
858        return requireNonNullChars(fileName.substring(0, len));
859    }
860
861    /**
862     * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
863     * <p>
864     * This method will handle a file in either UNIX or Windows format.
865     * </p>
866     * <p>
867     * The prefix length includes the first slash in the full file name
868     * if applicable. Thus, it is possible that the length returned is greater
869     * than the length of the input string.
870     * </p>
871     * <pre>
872     * Windows:
873     * a\b\c.txt           --&gt; 0           --&gt; relative
874     * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
875     * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
876     * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
877     * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
878     * \\\a\b\c.txt        --&gt; -1          --&gt; error
879     *
880     * Unix:
881     * a/b/c.txt           --&gt; 0           --&gt; relative
882     * /a/b/c.txt          --&gt; 1           --&gt; absolute
883     * ~/a/b/c.txt         --&gt; 2           --&gt; current user
884     * ~                   --&gt; 2           --&gt; current user (slash added)
885     * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
886     * ~user               --&gt; 6           --&gt; named user (slash added)
887     * //server/a/b/c.txt  --&gt; 9
888     * ///a/b/c.txt        --&gt; -1          --&gt; error
889     * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
890     * </pre>
891     * <p>
892     * The output will be the same irrespective of the machine that the code is running on.
893     * ie. both UNIX and Windows prefixes are matched regardless.
894     * </p>
895     * <p>
896     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
897     * These must be followed by a server name, so double-slashes are not collapsed
898     * to a single slash at the start of the file name.
899     * </p>
900     *
901     * @param fileName  the file name to find the prefix in, null returns -1
902     * @return the length of the prefix, -1 if invalid or null
903     */
904    public static int getPrefixLength(final String fileName) {
905        if (fileName == null) {
906            return NOT_FOUND;
907        }
908        final int len = fileName.length();
909        if (len == 0) {
910            return 0;
911        }
912        char ch0 = fileName.charAt(0);
913        if (ch0 == ':') {
914            return NOT_FOUND;
915        }
916        if (len == 1) {
917            if (ch0 == '~') {
918                return 2;  // return a length greater than the input
919            }
920            return isSeparator(ch0) ? 1 : 0;
921        }
922        if (ch0 == '~') {
923            int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
924            int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
925            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
926                return len + 1;  // return a length greater than the input
927            }
928            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
929            posWin = posWin == NOT_FOUND ? posUnix : posWin;
930            return Math.min(posUnix, posWin) + 1;
931        }
932        final char ch1 = fileName.charAt(1);
933        if (ch1 == ':') {
934            ch0 = Character.toUpperCase(ch0);
935            if (ch0 >= 'A' && ch0 <= 'Z') {
936                if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
937                    return 0;
938                }
939                if (len == 2 || !isSeparator(fileName.charAt(2))) {
940                    return 2;
941                }
942                return 3;
943            }
944            if (ch0 == UNIX_NAME_SEPARATOR) {
945                return 1;
946            }
947            return NOT_FOUND;
948
949        }
950        if (!isSeparator(ch0) || !isSeparator(ch1)) {
951            return isSeparator(ch0) ? 1 : 0;
952        }
953        int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
954        int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
955        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
956            return NOT_FOUND;
957        }
958        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
959        posWin = posWin == NOT_FOUND ? posUnix : posWin;
960        final int pos = Math.min(posUnix, posWin) + 1;
961        final String hostnamePart = fileName.substring(2, pos - 1);
962        return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
963    }
964
965    /**
966     * Returns the index of the last extension separator character, which is a dot.
967     * <p>
968     * This method also checks that there is no directory separator after the last dot. To do this it uses
969     * {@link #indexOfLastSeparator(String)} which will handle a file in either UNIX or Windows format.
970     * </p>
971     * <p>
972     * The output will be the same irrespective of the machine that the code is running on, with the
973     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
974     * </p>
975     * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
976     * In this case, the name wouldn't be the name of a file, but the identifier of an
977     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
978     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
979     * an {@link IllegalArgumentException} for names like this.
980     *
981     * @param fileName
982     *            the file name to find the last extension separator in, null returns -1
983     * @return the index of the last extension separator character, or -1 if there is no such character
984     * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
985     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
986     */
987    public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
988        if (fileName == null) {
989            return NOT_FOUND;
990        }
991        if (isSystemWindows()) {
992            // Special handling for NTFS ADS: Don't accept colon in the file name.
993            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
994            if (offset != -1) {
995                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
996            }
997        }
998        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
999        final int lastSeparator = indexOfLastSeparator(fileName);
1000        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
1001    }
1002
1003    /**
1004     * Returns the index of the last directory separator character.
1005     * <p>
1006     * This method will handle a file in either UNIX or Windows format.
1007     * The position of the last forward or backslash is returned.
1008     * <p>
1009     * The output will be the same irrespective of the machine that the code is running on.
1010     *
1011     * @param fileName  the file name to find the last path separator in, null returns -1
1012     * @return the index of the last separator character, or -1 if there
1013     * is no such character
1014     */
1015    public static int indexOfLastSeparator(final String fileName) {
1016        if (fileName == null) {
1017            return NOT_FOUND;
1018        }
1019        final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1020        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1021        return Math.max(lastUnixPos, lastWindowsPos);
1022    }
1023
1024    private static boolean isEmpty(final String string) {
1025        return string == null || string.isEmpty();
1026    }
1027
1028    /**
1029     * Checks whether the extension of the file name is one of those specified.
1030     * <p>
1031     * This method obtains the extension as the textual part of the file name
1032     * after the last dot. There must be no directory separator after the dot.
1033     * The extension check is case-sensitive on all platforms.
1034     *
1035     * @param fileName  the file name, null returns false
1036     * @param extensions  the extensions to check for, null checks for no extension
1037     * @return true if the file name is one of the extensions
1038     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1039     */
1040    public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1041        if (fileName == null) {
1042            return false;
1043        }
1044        requireNonNullChars(fileName);
1045
1046        if (extensions == null || extensions.isEmpty()) {
1047            return indexOfExtension(fileName) == NOT_FOUND;
1048        }
1049        return extensions.contains(getExtension(fileName));
1050    }
1051
1052    /**
1053     * Checks whether the extension of the file name is that specified.
1054     * <p>
1055     * This method obtains the extension as the textual part of the file name
1056     * after the last dot. There must be no directory separator after the dot.
1057     * The extension check is case-sensitive on all platforms.
1058     *
1059     * @param fileName  the file name, null returns false
1060     * @param extension  the extension to check for, null or empty checks for no extension
1061     * @return true if the file name has the specified extension
1062     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1063     */
1064    public static boolean isExtension(final String fileName, final String extension) {
1065        if (fileName == null) {
1066            return false;
1067        }
1068        requireNonNullChars(fileName);
1069
1070        if (isEmpty(extension)) {
1071            return indexOfExtension(fileName) == NOT_FOUND;
1072        }
1073        return getExtension(fileName).equals(extension);
1074    }
1075
1076    /**
1077     * Checks whether the extension of the file name is one of those specified.
1078     * <p>
1079     * This method obtains the extension as the textual part of the file name
1080     * after the last dot. There must be no directory separator after the dot.
1081     * The extension check is case-sensitive on all platforms.
1082     *
1083     * @param fileName  the file name, null returns false
1084     * @param extensions  the extensions to check for, null checks for no extension
1085     * @return true if the file name is one of the extensions
1086     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1087     */
1088    public static boolean isExtension(final String fileName, final String... extensions) {
1089        if (fileName == null) {
1090            return false;
1091        }
1092        requireNonNullChars(fileName);
1093
1094        if (extensions == null || extensions.length == 0) {
1095            return indexOfExtension(fileName) == NOT_FOUND;
1096        }
1097        final String fileExt = getExtension(fileName);
1098        return Stream.of(extensions).anyMatch(fileExt::equals);
1099    }
1100
1101    /**
1102     * Checks whether a given string represents a valid IPv4 address.
1103     *
1104     * @param name the name to validate
1105     * @return true if the given name is a valid IPv4 address
1106     */
1107    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1108    private static boolean isIPv4Address(final String name) {
1109        final Matcher m = IPV4_PATTERN.matcher(name);
1110        if (!m.matches() || m.groupCount() != 4) {
1111            return false;
1112        }
1113
1114        // verify that address subgroups are legal
1115        for (int i = 1; i <= 4; i++) {
1116            final String ipSegment = m.group(i);
1117            final int iIpSegment = Integer.parseInt(ipSegment);
1118            if (iIpSegment > IPV4_MAX_OCTET_VALUE) {
1119                return false;
1120            }
1121
1122            if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1123                return false;
1124            }
1125
1126        }
1127
1128        return true;
1129    }
1130
1131    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1132    /**
1133     * Checks whether a given string represents a valid IPv6 address.
1134     *
1135     * @param inet6Address the name to validate
1136     * @return true if the given name is a valid IPv6 address
1137     */
1138    private static boolean isIPv6Address(final String inet6Address) {
1139        final boolean containsCompressedZeroes = inet6Address.contains("::");
1140        if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1141            return false;
1142        }
1143        if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1144                || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1145            return false;
1146        }
1147        String[] octets = inet6Address.split(":");
1148        if (containsCompressedZeroes) {
1149            final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1150            if (inet6Address.endsWith("::")) {
1151                // String.split() drops ending empty segments
1152                octetList.add("");
1153            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1154                octetList.remove(0);
1155            }
1156            octets = octetList.toArray(EMPTY_STRING_ARRAY);
1157        }
1158        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1159            return false;
1160        }
1161        int validOctets = 0;
1162        int emptyOctets = 0; // consecutive empty chunks
1163        for (int index = 0; index < octets.length; index++) {
1164            final String octet = octets[index];
1165            if (octet.isEmpty()) {
1166                emptyOctets++;
1167                if (emptyOctets > 1) {
1168                    return false;
1169                }
1170            } else {
1171                emptyOctets = 0;
1172                // Is last chunk an IPv4 address?
1173                if (index == octets.length - 1 && octet.contains(".")) {
1174                    if (!isIPv4Address(octet)) {
1175                        return false;
1176                    }
1177                    validOctets += 2;
1178                    continue;
1179                }
1180                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1181                    return false;
1182                }
1183                final int octetInt;
1184                try {
1185                    octetInt = Integer.parseInt(octet, BASE_16);
1186                } catch (final NumberFormatException e) {
1187                    return false;
1188                }
1189                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1190                    return false;
1191                }
1192            }
1193            validOctets++;
1194        }
1195        return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1196    }
1197
1198    /**
1199     * Checks whether a given string is a valid host name according to
1200     * RFC 3986 - not accepting IP addresses.
1201     *
1202     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1203     * @param name the hostname to validate
1204     * @return true if the given name is a valid host name
1205     */
1206    private static boolean isRFC3986HostName(final String name) {
1207        final String[] parts = name.split("\\.", -1);
1208        for (int i = 0; i < parts.length; i++) {
1209            if (parts[i].isEmpty()) {
1210                // trailing dot is legal, otherwise we've hit a .. sequence
1211                return i == parts.length - 1;
1212            }
1213            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1214                return false;
1215            }
1216        }
1217        return true;
1218    }
1219
1220    /**
1221     * Checks if the character is a separator.
1222     *
1223     * @param ch  the character to check
1224     * @return true if it is a separator character
1225     */
1226    private static boolean isSeparator(final char ch) {
1227        return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1228    }
1229
1230    /**
1231     * Determines if Windows file system is in use.
1232     *
1233     * @return true if the system is Windows
1234     */
1235    static boolean isSystemWindows() {
1236        return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1237    }
1238
1239    /**
1240     * Checks whether a given string is a valid host name according to
1241     * RFC 3986.
1242     *
1243     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1244     * RFC calls a "reg-name". Percent encoded names don't seem to be
1245     * valid names in UNC paths.</p>
1246     *
1247     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1248     * @param name the hostname to validate
1249     * @return true if the given name is a valid host name
1250     */
1251    private static boolean isValidHostName(final String name) {
1252        return isIPv6Address(name) || isRFC3986HostName(name);
1253    }
1254
1255    /**
1256     * Normalizes a path, removing double and single dot path steps.
1257     * <p>
1258     * This method normalizes a path to a standard format.
1259     * The input may contain separators in either UNIX or Windows format.
1260     * The output will contain separators in the format of the system.
1261     * <p>
1262     * A trailing slash will be retained.
1263     * A double slash will be merged to a single slash (but UNC names are handled).
1264     * A single dot path segment will be removed.
1265     * A double dot will cause that path segment and the one before to be removed.
1266     * If the double dot has no parent path segment, {@code null} is returned.
1267     * <p>
1268     * The output will be the same on both UNIX and Windows except
1269     * for the separator character.
1270     * <pre>
1271     * /foo//               --&gt;   /foo/
1272     * /foo/./              --&gt;   /foo/
1273     * /foo/../bar          --&gt;   /bar
1274     * /foo/../bar/         --&gt;   /bar/
1275     * /foo/../bar/../baz   --&gt;   /baz
1276     * //foo//./bar         --&gt;   //foo/bar
1277     * /../                 --&gt;   null
1278     * ../foo               --&gt;   null
1279     * foo/bar/..           --&gt;   foo/
1280     * foo/../../bar        --&gt;   null
1281     * foo/../bar           --&gt;   bar
1282     * //server/foo/../bar  --&gt;   //server/bar
1283     * //server/../bar      --&gt;   null
1284     * C:\foo\..\bar        --&gt;   C:\bar
1285     * C:\..\bar            --&gt;   null
1286     * ~/foo/../bar/        --&gt;   ~/bar/
1287     * ~/../bar             --&gt;   null
1288     * </pre>
1289     * (Note the file separator will be correct for Windows/Unix.)
1290     *
1291     * @param fileName  the file name to normalize, null returns null
1292     * @return the normalized fileName, or null if invalid
1293     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1294     */
1295    public static String normalize(final String fileName) {
1296        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1297    }
1298
1299    /**
1300     * Normalizes a path, removing double and single dot path steps.
1301     * <p>
1302     * This method normalizes a path to a standard format.
1303     * The input may contain separators in either UNIX or Windows format.
1304     * The output will contain separators in the format specified.
1305     * <p>
1306     * A trailing slash will be retained.
1307     * A double slash will be merged to a single slash (but UNC names are handled).
1308     * A single dot path segment will be removed.
1309     * A double dot will cause that path segment and the one before to be removed.
1310     * If the double dot has no parent path segment to work with, {@code null}
1311     * is returned.
1312     * <p>
1313     * The output will be the same on both UNIX and Windows except
1314     * for the separator character.
1315     * <pre>
1316     * /foo//               --&gt;   /foo/
1317     * /foo/./              --&gt;   /foo/
1318     * /foo/../bar          --&gt;   /bar
1319     * /foo/../bar/         --&gt;   /bar/
1320     * /foo/../bar/../baz   --&gt;   /baz
1321     * //foo//./bar         --&gt;   /foo/bar
1322     * /../                 --&gt;   null
1323     * ../foo               --&gt;   null
1324     * foo/bar/..           --&gt;   foo/
1325     * foo/../../bar        --&gt;   null
1326     * foo/../bar           --&gt;   bar
1327     * //server/foo/../bar  --&gt;   //server/bar
1328     * //server/../bar      --&gt;   null
1329     * C:\foo\..\bar        --&gt;   C:\bar
1330     * C:\..\bar            --&gt;   null
1331     * ~/foo/../bar/        --&gt;   ~/bar/
1332     * ~/../bar             --&gt;   null
1333     * </pre>
1334     * The output will be the same on both UNIX and Windows including
1335     * the separator character.
1336     *
1337     * @param fileName  the file name to normalize, null returns null
1338     * @param unixSeparator {@code true} if a UNIX separator should
1339     * be used or {@code false} if a Windows separator should be used.
1340     * @return the normalized fileName, or null if invalid
1341     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1342     * @since 2.0
1343     */
1344    public static String normalize(final String fileName, final boolean unixSeparator) {
1345        return doNormalize(fileName, toSeparator(unixSeparator), true);
1346    }
1347
1348    /**
1349     * Normalizes a path, removing double and single dot path steps,
1350     * and removing any final directory separator.
1351     * <p>
1352     * This method normalizes a path to a standard format.
1353     * The input may contain separators in either UNIX or Windows format.
1354     * The output will contain separators in the format of the system.
1355     * <p>
1356     * A trailing slash will be removed.
1357     * A double slash will be merged to a single slash (but UNC names are handled).
1358     * A single dot path segment will be removed.
1359     * A double dot will cause that path segment and the one before to be removed.
1360     * If the double dot has no parent path segment to work with, {@code null}
1361     * is returned.
1362     * <p>
1363     * The output will be the same on both UNIX and Windows except
1364     * for the separator character.
1365     * <pre>
1366     * /foo//               --&gt;   /foo
1367     * /foo/./              --&gt;   /foo
1368     * /foo/../bar          --&gt;   /bar
1369     * /foo/../bar/         --&gt;   /bar
1370     * /foo/../bar/../baz   --&gt;   /baz
1371     * //foo//./bar         --&gt;   /foo/bar
1372     * /../                 --&gt;   null
1373     * ../foo               --&gt;   null
1374     * foo/bar/..           --&gt;   foo
1375     * foo/../../bar        --&gt;   null
1376     * foo/../bar           --&gt;   bar
1377     * //server/foo/../bar  --&gt;   //server/bar
1378     * //server/../bar      --&gt;   null
1379     * C:\foo\..\bar        --&gt;   C:\bar
1380     * C:\..\bar            --&gt;   null
1381     * ~/foo/../bar/        --&gt;   ~/bar
1382     * ~/../bar             --&gt;   null
1383     * </pre>
1384     * (Note the file separator returned will be correct for Windows/Unix)
1385     *
1386     * @param fileName  the file name to normalize, null returns null
1387     * @return the normalized fileName, or null if invalid
1388     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1389     */
1390    public static String normalizeNoEndSeparator(final String fileName) {
1391        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1392    }
1393
1394    /**
1395     * Normalizes a path, removing double and single dot path steps,
1396     * and removing any final directory separator.
1397     * <p>
1398     * This method normalizes a path to a standard format.
1399     * The input may contain separators in either UNIX or Windows format.
1400     * The output will contain separators in the format specified.
1401     * <p>
1402     * A trailing slash will be removed.
1403     * A double slash will be merged to a single slash (but UNC names are handled).
1404     * A single dot path segment will be removed.
1405     * A double dot will cause that path segment and the one before to be removed.
1406     * If the double dot has no parent path segment to work with, {@code null}
1407     * is returned.
1408     * <p>
1409     * The output will be the same on both UNIX and Windows including
1410     * the separator character.
1411     * <pre>
1412     * /foo//               --&gt;   /foo
1413     * /foo/./              --&gt;   /foo
1414     * /foo/../bar          --&gt;   /bar
1415     * /foo/../bar/         --&gt;   /bar
1416     * /foo/../bar/../baz   --&gt;   /baz
1417     * //foo//./bar         --&gt;   /foo/bar
1418     * /../                 --&gt;   null
1419     * ../foo               --&gt;   null
1420     * foo/bar/..           --&gt;   foo
1421     * foo/../../bar        --&gt;   null
1422     * foo/../bar           --&gt;   bar
1423     * //server/foo/../bar  --&gt;   //server/bar
1424     * //server/../bar      --&gt;   null
1425     * C:\foo\..\bar        --&gt;   C:\bar
1426     * C:\..\bar            --&gt;   null
1427     * ~/foo/../bar/        --&gt;   ~/bar
1428     * ~/../bar             --&gt;   null
1429     * </pre>
1430     *
1431     * @param fileName  the file name to normalize, null returns null
1432     * @param unixSeparator {@code true} if a UNIX separator should
1433     * be used or {@code false} if a Windows separator should be used.
1434     * @return the normalized fileName, or null if invalid
1435     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1436     * @since 2.0
1437     */
1438    public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1439         return doNormalize(fileName, toSeparator(unixSeparator), false);
1440    }
1441
1442    /**
1443     * Removes the extension from a fileName.
1444     * <p>
1445     * This method returns the textual part of the file name before the last dot.
1446     * There must be no directory separator after the dot.
1447     * <pre>
1448     * foo.txt    --&gt; foo
1449     * .txt       --&gt; "" (empty string)
1450     * a\b\c.jpg  --&gt; a\b\c
1451     * /a/b/c.jpg --&gt; /a/b/c
1452     * a\b\c      --&gt; a\b\c
1453     * a.b\c      --&gt; a.b\c
1454     * </pre>
1455     * <p>
1456     * The output will be the same irrespective of the machine that the code is running on.
1457     *
1458     * @param fileName  the file name, null returns null
1459     * @return the file name minus the extension
1460     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000})
1461     */
1462    public static String removeExtension(final String fileName) {
1463        if (fileName == null) {
1464            return null;
1465        }
1466        requireNonNullChars(fileName);
1467
1468        final int index = indexOfExtension(fileName);
1469        if (index == NOT_FOUND) {
1470            return fileName;
1471        }
1472        return fileName.substring(0, index);
1473    }
1474
1475    /**
1476     * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1477     *
1478     * This may be used to defend against poison byte attacks.
1479     *
1480     * @param path the path to check
1481     * @return The input
1482     * @throws IllegalArgumentException if path contains the null character ({@code U+0000})
1483     */
1484    private static String requireNonNullChars(final String path) {
1485        if (path.indexOf(0) >= 0) {
1486            throw new IllegalArgumentException(
1487                "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1488        }
1489        return path;
1490    }
1491
1492    /**
1493     * Converts all separators to the system separator.
1494     *
1495     * @param path the path to be changed, null ignored.
1496     * @return the updated path.
1497     */
1498    public static String separatorsToSystem(final String path) {
1499        return FileSystem.getCurrent().normalizeSeparators(path);
1500    }
1501
1502    /**
1503     * Converts all separators to the UNIX separator of forward slash.
1504     *
1505     * @param path the path to be changed, null ignored.
1506     * @return the new path.
1507     */
1508    public static String separatorsToUnix(final String path) {
1509        return FileSystem.LINUX.normalizeSeparators(path);
1510    }
1511
1512    /**
1513     * Converts all separators to the Windows separator of backslash.
1514     *
1515     * @param path the path to be changed, null ignored.
1516     * @return the updated path.
1517     */
1518    public static String separatorsToWindows(final String path) {
1519        return FileSystem.WINDOWS.normalizeSeparators(path);
1520    }
1521
1522    /**
1523     * Splits a string into a number of tokens.
1524     * The text is split by '?' and '*'.
1525     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1526     *
1527     * @param text  the text to split
1528     * @return the array of tokens, never null
1529     */
1530    static String[] splitOnTokens(final String text) {
1531        // used by wildcardMatch
1532        // package level so a unit test may run on this
1533
1534        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1535            return new String[] { text };
1536        }
1537
1538        final char[] array = text.toCharArray();
1539        final ArrayList<String> list = new ArrayList<>();
1540        final StringBuilder buffer = new StringBuilder();
1541        char prevChar = 0;
1542        for (final char ch : array) {
1543            if (ch == '?' || ch == '*') {
1544                if (buffer.length() != 0) {
1545                    list.add(buffer.toString());
1546                    buffer.setLength(0);
1547                }
1548                if (ch == '?') {
1549                    list.add("?");
1550                } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*'
1551                    list.add("*");
1552                }
1553            } else {
1554                buffer.append(ch);
1555            }
1556            prevChar = ch;
1557        }
1558        if (buffer.length() != 0) {
1559            list.add(buffer.toString());
1560        }
1561
1562        return list.toArray(EMPTY_STRING_ARRAY);
1563    }
1564
1565    /**
1566     * Returns '/' if given true, '\\' otherwise.
1567     *
1568     * @param unixSeparator which separator to return.
1569     * @return '/' if given true, '\\' otherwise.
1570     */
1571    private static char toSeparator(final boolean unixSeparator) {
1572        return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1573    }
1574
1575    /**
1576     * Checks a fileName to see if it matches the specified wildcard matcher,
1577     * always testing case-sensitive.
1578     * <p>
1579     * The wildcard matcher uses the characters '?' and '*' to represent a
1580     * single or multiple (zero or more) wildcard characters.
1581     * This is the same as often found on DOS/Unix command lines.
1582     * The check is case-sensitive always.
1583     * <pre>
1584     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1585     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1586     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1587     * wildcardMatch("c.txt", "*.???")      --&gt; true
1588     * wildcardMatch("c.txt", "*.????")     --&gt; false
1589     * </pre>
1590     * N.B. the sequence "*?" does not work properly at present in match strings.
1591     *
1592     * @param fileName  the file name to match on
1593     * @param wildcardMatcher  the wildcard string to match against
1594     * @return true if the file name matches the wildcard string
1595     * @see IOCase#SENSITIVE
1596     */
1597    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1598        return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1599    }
1600
1601    /**
1602     * Checks a fileName to see if it matches the specified wildcard matcher
1603     * allowing control over case-sensitivity.
1604     * <p>
1605     * The wildcard matcher uses the characters '?' and '*' to represent a
1606     * single or multiple (zero or more) wildcard characters.
1607     * N.B. the sequence "*?" does not work properly at present in match strings.
1608     *
1609     * @param fileName  the file name to match on
1610     * @param wildcardMatcher  the wildcard string to match against
1611     * @param ioCase  what case sensitivity rule to use, null means case-sensitive
1612     * @return true if the file name matches the wildcard string
1613     * @since 1.3
1614     */
1615    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1616        if (fileName == null && wildcardMatcher == null) {
1617            return true;
1618        }
1619        if (fileName == null || wildcardMatcher == null) {
1620            return false;
1621        }
1622        ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1623        final String[] wcs = splitOnTokens(wildcardMatcher);
1624        boolean anyChars = false;
1625        int textIdx = 0;
1626        int wcsIdx = 0;
1627        final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1628
1629        // loop around a backtrack stack, to handle complex * matching
1630        do {
1631            if (!backtrack.isEmpty()) {
1632                final int[] array = backtrack.pop();
1633                wcsIdx = array[0];
1634                textIdx = array[1];
1635                anyChars = true;
1636            }
1637
1638            // loop whilst tokens and text left to process
1639            while (wcsIdx < wcs.length) {
1640
1641                if (wcs[wcsIdx].equals("?")) {
1642                    // ? so move to next text char
1643                    textIdx++;
1644                    if (textIdx > fileName.length()) {
1645                        break;
1646                    }
1647                    anyChars = false;
1648
1649                } else if (wcs[wcsIdx].equals("*")) {
1650                    // set any chars status
1651                    anyChars = true;
1652                    if (wcsIdx == wcs.length - 1) {
1653                        textIdx = fileName.length();
1654                    }
1655
1656                } else {
1657                    // matching text token
1658                    if (anyChars) {
1659                        // any chars then try to locate text token
1660                        textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1661                        if (textIdx == NOT_FOUND) {
1662                            // token not found
1663                            break;
1664                        }
1665                        final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1666                        if (repeat >= 0) {
1667                            backtrack.push(new int[] {wcsIdx, repeat});
1668                        }
1669                    } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1670                        // matching from current position
1671                        // couldn't match token
1672                        break;
1673                    }
1674
1675                    // matched text token, move text index to end of matched token
1676                    textIdx += wcs[wcsIdx].length();
1677                    anyChars = false;
1678                }
1679
1680                wcsIdx++;
1681            }
1682
1683            // full match
1684            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1685                return true;
1686            }
1687
1688        } while (!backtrack.isEmpty());
1689
1690        return false;
1691    }
1692
1693    /**
1694     * Checks a fileName to see if it matches the specified wildcard matcher
1695     * using the case rules of the system.
1696     * <p>
1697     * The wildcard matcher uses the characters '?' and '*' to represent a
1698     * single or multiple (zero or more) wildcard characters.
1699     * This is the same as often found on DOS/Unix command lines.
1700     * The check is case-sensitive on UNIX and case-insensitive on Windows.
1701     * <pre>
1702     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1703     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1704     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1705     * wildcardMatch("c.txt", "*.???")      --&gt; true
1706     * wildcardMatch("c.txt", "*.????")     --&gt; false
1707     * </pre>
1708     * N.B. the sequence "*?" does not work properly at present in match strings.
1709     *
1710     * @param fileName  the file name to match on
1711     * @param wildcardMatcher  the wildcard string to match against
1712     * @return true if the file name matches the wildcard string
1713     * @see IOCase#SYSTEM
1714     */
1715    public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1716        return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1717    }
1718
1719    /**
1720     * Instances should NOT be constructed in standard programming.
1721     *
1722     * @deprecated TODO Make private in 3.0.
1723     */
1724    @Deprecated
1725    public FilenameUtils() {
1726        // empty
1727    }
1728}