001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.vfs2.provider;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.SystemUtils;
022import org.apache.commons.vfs2.FileName;
023import org.apache.commons.vfs2.FileSystemException;
024import org.apache.commons.vfs2.FileType;
025import org.apache.commons.vfs2.VFS;
026
027/**
028 * Utilities for dealing with URIs. See RFC 2396 for details.
029 */
030public final class UriParser {
031
032    /**
033     * The set of valid separators. These are all converted to the normalized one. Does <em>not</em> contain the
034     * normalized separator
035     */
036    // public static final char[] separators = {'\\'};
037    public static final char TRANS_SEPARATOR = '\\';
038
039    /**
040     * The normalized separator to use.
041     */
042    private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
043
044    private static final int HEX_BASE = 16;
045
046    private static final int BITS_IN_HALF_BYTE = 4;
047
048    private static final char LOW_MASK = 0x0F;
049
050    /**
051     * Encodes and appends a string to a StringBuilder.
052     *
053     * @param buffer The StringBuilder to append to.
054     * @param unencodedValue The String to encode and append.
055     * @param reserved characters to encode.
056     */
057    public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) {
058        final int offset = buffer.length();
059        buffer.append(unencodedValue);
060        encode(buffer, offset, unencodedValue.length(), reserved);
061    }
062
063    static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) {
064        final int offset = buffer.length();
065        buffer.append(unencodedValue);
066        encodeRfc2396(buffer, offset, unencodedValue.length(), allowed);
067    }
068
069    /**
070     * Canonicalizes a path.
071     *
072     * @param buffer Source data.
073     * @param offset Where to start reading.
074     * @param length How much to read.
075     * @param fileNameParser Now to encode and decode.
076     * @throws FileSystemException If an I/O error occurs.
077     */
078    public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length,
079            final FileNameParser fileNameParser) throws FileSystemException {
080        int index = offset;
081        int count = length;
082        for (; count > 0; count--, index++) {
083            final char ch = buffer.charAt(index);
084            if (ch == '%') {
085                if (count < 3) {
086                    throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
087                            buffer.substring(index, index + count));
088                }
089
090                // Decode
091                final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
092                final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
093                if (dig1 == -1 || dig2 == -1) {
094                    throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
095                            buffer.substring(index, index + 3));
096                }
097                final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
098
099                final boolean match = value == '%' || fileNameParser.encodeCharacter(value);
100
101                if (match) {
102                    // this is a reserved character, not allowed to decode
103                    index += 2;
104                    count -= 2;
105                    continue;
106                }
107
108                // Replace
109                buffer.setCharAt(index, value);
110                buffer.delete(index + 1, index + 3);
111                count -= 2;
112            } else if (fileNameParser.encodeCharacter(ch)) {
113                // Encode
114                final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
115                buffer.setCharAt(index, '%');
116                buffer.insert(index + 1, digits);
117                index += 2;
118            }
119        }
120    }
121
122    /**
123     * Decodes the String.
124     *
125     * @param uri The String to decode.
126     * @throws FileSystemException if an error occurs.
127     */
128    public static void checkUriEncoding(final String uri) throws FileSystemException {
129        decode(uri);
130    }
131
132    /**
133     * Removes %nn encodings from a string.
134     *
135     * @param encodedStr The encoded String.
136     * @return The decoded String.
137     * @throws FileSystemException if an error occurs.
138     */
139    public static String decode(final String encodedStr) throws FileSystemException {
140        if (encodedStr == null) {
141            return null;
142        }
143        if (encodedStr.indexOf('%') < 0) {
144            return encodedStr;
145        }
146        final StringBuilder buffer = new StringBuilder(encodedStr);
147        decode(buffer, 0, buffer.length());
148        return buffer.toString();
149    }
150
151    /**
152     * Removes %nn encodings from a string.
153     *
154     * @param buffer StringBuilder containing the string to decode.
155     * @param offset The position in the string to start decoding.
156     * @param length The number of characters to decode.
157     * @throws FileSystemException if an error occurs.
158     */
159    public static void decode(final StringBuilder buffer, final int offset, final int length)
160            throws FileSystemException {
161        int index = offset;
162        int count = length;
163        boolean ipv6Host = false;
164        for (; count > 0; count--, index++) {
165            final char ch = buffer.charAt(index);
166            if (ch == '[') {
167                ipv6Host = true;
168            }
169            if (ch == ']') {
170                ipv6Host = false;
171            }
172            if (ch != '%' || ipv6Host) {
173                continue;
174            }
175
176            if (count < 3) {
177                throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
178                        buffer.substring(index, index + count));
179            }
180
181            // Decode
182            final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
183            final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
184            if (dig1 == -1 || dig2 == -1) {
185                throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
186                        buffer.substring(index, index + 3));
187            }
188            final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
189
190            // Replace
191            buffer.setCharAt(index, value);
192            buffer.delete(index + 1, index + 3);
193            count -= 2;
194        }
195    }
196
197    /**
198     * Converts "special" characters to their %nn value.
199     *
200     * @param decodedStr The decoded String.
201     * @return The encoded String.
202     */
203    public static String encode(final String decodedStr) {
204        return encode(decodedStr, null);
205    }
206
207    /**
208     * Converts "special" characters to their %nn value.
209     *
210     * @param decodedStr The decoded String.
211     * @param reserved Characters to encode.
212     * @return The encoded String
213     */
214    public static String encode(final String decodedStr, final char[] reserved) {
215        if (decodedStr == null) {
216            return null;
217        }
218        final StringBuilder buffer = new StringBuilder(decodedStr);
219        encode(buffer, 0, buffer.length(), reserved);
220        return buffer.toString();
221    }
222
223    /**
224     * Encode an array of Strings.
225     *
226     * @param strings The array of Strings to encode.
227     * @return An array of encoded Strings.
228     */
229    public static String[] encode(final String[] strings) {
230        if (strings == null) {
231            return null;
232        }
233        Arrays.setAll(strings, i -> encode(strings[i]));
234        return strings;
235    }
236
237    /**
238     * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters.
239     *
240     * @param buffer The StringBuilder to append to.
241     * @param offset The position in the buffer to start encoding at.
242     * @param length The number of characters to encode.
243     * @param reserved characters to encode.
244     */
245    public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) {
246        int index = offset;
247        int count = length;
248        for (; count > 0; index++, count--) {
249            final char ch = buffer.charAt(index);
250            boolean match = ch == '%';
251            if (reserved != null) {
252                for (int i = 0; !match && i < reserved.length; i++) {
253                    if (ch == reserved[i]) {
254                        match = true;
255                        break;
256                    }
257                }
258            }
259            if (match) {
260                // Encode
261                final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
262                buffer.setCharAt(index, '%');
263                buffer.insert(index + 1, digits);
264                index += 2;
265            }
266        }
267    }
268
269    static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) {
270        int index = offset;
271        int count = length;
272        for (; count > 0; index++, count--) {
273            final char ch = buffer.charAt(index);
274            if (Arrays.binarySearch(allowed, ch) < 0) {
275                // Encode
276                final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
277                buffer.setCharAt(index, '%');
278                buffer.insert(index + 1, digits);
279                index += 2;
280            }
281        }
282    }
283
284    /**
285     * Extracts the first element of a path.
286     *
287     * @param name StringBuilder containing the path.
288     * @return The first element of the path.
289     */
290    public static String extractFirstElement(final StringBuilder name) {
291        final int len = name.length();
292        if (len < 1) {
293            return null;
294        }
295        int startPos = 0;
296        if (name.charAt(0) == SEPARATOR_CHAR) {
297            startPos = 1;
298        }
299        for (int pos = startPos; pos < len; pos++) {
300            if (name.charAt(pos) == SEPARATOR_CHAR) {
301                // Found a separator
302                final String elem = name.substring(startPos, pos);
303                name.delete(startPos, pos + 1);
304                return elem;
305            }
306        }
307
308        // No separator
309        final String elem = name.substring(startPos);
310        name.setLength(0);
311        return elem;
312    }
313
314    /**
315     * Extract the query String from the URI.
316     *
317     * @param name StringBuilder containing the URI.
318     * @return The query string, if any. null otherwise.
319     */
320    public static String extractQueryString(final StringBuilder name) {
321        for (int pos = 0; pos < name.length(); pos++) {
322            if (name.charAt(pos) == '?') {
323                final String queryString = name.substring(pos + 1);
324                name.delete(pos, name.length());
325                return queryString;
326            }
327        }
328
329        return null;
330    }
331
332    /**
333     * Extracts the scheme from a URI.
334     *
335     * @param uri The URI.
336     * @return The scheme name. Returns null if there is no scheme.
337     * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
338     */
339    @Deprecated
340    public static String extractScheme(final String uri) {
341        return extractScheme(uri, null);
342    }
343
344    /**
345     * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
346     *
347     * @param uri The URI.
348     * @param buffer Returns the remainder of the URI.
349     * @return The scheme name. Returns null if there is no scheme.
350     * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
351     */
352    @Deprecated
353    public static String extractScheme(final String uri, final StringBuilder buffer) {
354        if (buffer != null) {
355            buffer.setLength(0);
356            buffer.append(uri);
357        }
358
359        final int maxPos = uri.length();
360        for (int pos = 0; pos < maxPos; pos++) {
361            final char ch = uri.charAt(pos);
362
363            if (ch == ':') {
364                // Found the end of the scheme
365                final String scheme = uri.substring(0, pos);
366                if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) {
367                    // This is not a scheme, but a Windows drive letter
368                    return null;
369                }
370                if (buffer != null) {
371                    buffer.delete(0, pos + 1);
372                }
373                return scheme.intern();
374            }
375
376            if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
377                // A scheme character
378                continue;
379            }
380            if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) {
381                // Not a scheme character
382                break;
383            }
384            // A scheme character (these are not allowed as the first
385            // character of the scheme), but can be used as subsequent
386            // characters.
387        }
388
389        // No scheme in URI
390        return null;
391    }
392
393    /**
394     * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
395     * <p>
396     * The scheme is extracted based on the currently supported schemes in the system.  That is to say the schemes
397     * supported by the registered providers.
398     * </p>
399     * <p>
400     * This allows us to handle varying scheme's without making assumptions based on the ':' character.  Specifically
401     * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them.
402     * </p>
403     * @param schemes The schemes to check.
404     * @param uri The potential URI. May also be a name.
405     * @return The scheme name. Returns null if there is no scheme.
406     * @since 2.3
407     */
408    public static String extractScheme(final String[] schemes, final String uri) {
409        return extractScheme(schemes, uri, null);
410    }
411
412    /**
413     * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
414     * <p>
415     * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes
416     * supported by the registered providers.
417     * </p>
418     * <p>
419     * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically
420     * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them.
421     * </p>
422     * @param schemes The schemes to check.
423     * @param uri The potential URI. May also just be a name.
424     * @param buffer Returns the remainder of the URI.
425     * @return The scheme name. Returns null if there is no scheme.
426     * @since 2.3
427     */
428    public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) {
429        if (buffer != null) {
430            buffer.setLength(0);
431            buffer.append(uri);
432        }
433        for (final String scheme : schemes) {
434            if (uri.startsWith(scheme + ":")) {
435                if (buffer != null) {
436                    buffer.delete(0, uri.indexOf(':') + 1);
437                }
438                return scheme;
439            }
440        }
441        return null;
442    }
443
444    /**
445     * Normalises the separators in a name.
446     *
447     * @param name The StringBuilder containing the name
448     * @return true if the StringBuilder was modified.
449     */
450    public static boolean fixSeparators(final StringBuilder name) {
451        boolean changed = false;
452        int maxlen = name.length();
453        for (int i = 0; i < maxlen; i++) {
454            final char ch = name.charAt(i);
455            if (ch == TRANS_SEPARATOR) {
456                name.setCharAt(i, SEPARATOR_CHAR);
457                changed = true;
458            }
459            if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') {
460                if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') {
461                    name.setCharAt(i, SEPARATOR_CHAR);
462                    name.delete(i + 1, i + 3);
463                    maxlen -= 2;
464                    changed = true;
465                } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') {
466                    name.setCharAt(i, '.');
467                    name.delete(i + 1, i + 3);
468                    maxlen -= 2;
469                    changed = true;
470                }
471            }
472        }
473        return changed;
474    }
475
476    /**
477     * Normalises a path. Does the following:
478     * <ul>
479     * <li>Removes empty path elements.
480     * <li>Handles '.' and '..' elements.
481     * <li>Removes trailing separator.
482     * </ul>
483     *
484     * Its assumed that the separators are already fixed.
485     *
486     * @param path The path to normalize.
487     * @return The FileType.
488     * @throws FileSystemException if an error occurs.
489     * @see #fixSeparators
490     */
491    public static FileType normalisePath(final StringBuilder path) throws FileSystemException {
492        FileType fileType = FileType.FOLDER;
493        if (path.length() == 0) {
494            return fileType;
495        }
496
497        // '/' or '.' or '..' or anyPath/..' or 'anyPath/.'  should always be a path
498        if (path.charAt(path.length() - 1) != '/'
499                && path.lastIndexOf("/..") != path.length() - 3
500                && path.lastIndexOf("/.") != path.length() - 2
501                && path.lastIndexOf("..") != 0
502                && path.lastIndexOf(".") != 0
503        ) {
504            fileType = FileType.FILE;
505        }
506
507        // Adjust separators
508        // fixSeparators(path);
509
510        // Determine the start of the first element
511        int startFirstElem = 0;
512        if (path.charAt(0) == SEPARATOR_CHAR) {
513            if (path.length() == 1) {
514                return fileType;
515            }
516            startFirstElem = 1;
517        }
518
519        // Iterate over each element
520        int startElem = startFirstElem;
521        int maxlen = path.length();
522        while (startElem < maxlen) {
523            // Find the end of the element
524            int endElem = startElem;
525            while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) {
526                endElem++;
527            }
528
529            final int elemLen = endElem - startElem;
530            if (elemLen == 0) {
531                // An empty element - axe it
532                path.deleteCharAt(endElem);
533                maxlen = path.length();
534                continue;
535            }
536            if (elemLen == 1 && path.charAt(startElem) == '.') {
537                // A '.' element - axe it
538                path.deleteCharAt(startElem);
539                maxlen = path.length();
540                continue;
541            }
542            if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') {
543                // A '..' element - remove the previous element
544                if (startElem == startFirstElem) {
545                    // Previous element is missing
546                    throw new FileSystemException("vfs.provider/invalid-relative-path.error");
547                }
548
549                // Find start of previous element
550                int pos = startElem - 2;
551                while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) {
552                    pos--;
553                }
554                startElem = pos + 1;
555
556                path.delete(startElem, endElem + 1);
557                maxlen = path.length();
558                continue;
559            }
560
561            // A regular element
562            startElem = endElem + 1;
563        }
564
565        // Remove trailing separator
566        if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) {
567            path.deleteCharAt(maxlen - 1);
568        }
569
570        return fileType;
571    }
572
573    private UriParser() {
574    }
575}