001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019
020import java.text.ParseException;
021import java.util.List;
022import java.util.regex.Pattern;
023
024import org.apache.commons.net.ftp.FTPClientConfig;
025import org.apache.commons.net.ftp.FTPFile;
026
027/**
028 * Implementation FTPFileEntryParser and FTPFileListParser for standard UNIX Systems.
029 *
030 * This class is based on the logic of Daniel Savarese's DefaultFTPListParser, but adapted to use regular expressions and to fit the new FTPFileEntryParser
031 * interface.
032 *
033 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions)
034 */
035public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
036
037    static final String DEFAULT_DATE_FORMAT = "MMM d yyyy"; // Nov 9 2001
038
039    static final String DEFAULT_RECENT_DATE_FORMAT = "MMM d HH:mm"; // Nov 9 20:06
040
041    static final String NUMERIC_DATE_FORMAT = "yyyy-MM-dd HH:mm"; // 2001-11-09 20:06
042
043    // Suffixes used in Japanese listings after the numeric values
044    private static final String JA_MONTH = "\u6708";
045    private static final String JA_DAY = "\u65e5";
046    private static final String JA_YEAR = "\u5e74";
047
048    private static final String DEFAULT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; // 6月 3日 2003年
049
050    private static final String DEFAULT_RECENT_DATE_FORMAT_JA = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; // 8月 17日 20:10
051
052    private static final Pattern TOTAL_PATTERN = Pattern.compile("^total \\d+$");
053
054    /**
055     * Some Linux distributions are now shipping an FTP server which formats file listing dates in an all-numeric format: <code>"yyyy-MM-dd HH:mm</code>. This
056     * is a very welcome development, and hopefully it will soon become the standard. However, since it is so new, for now, and possibly forever, we merely
057     * accommodate it, but do not make it the default.
058     * <p>
059     * For now end users may specify this format only via <code>UnixFTPEntryParser(FTPClientConfig)</code>. Steve Cohen - 2005-04-17
060     */
061    public static final FTPClientConfig NUMERIC_DATE_CONFIG = new FTPClientConfig(FTPClientConfig.SYST_UNIX, NUMERIC_DATE_FORMAT, null);
062
063    /**
064     * this is the regular expression used by this parser.
065     *
066     * Permissions: r the file is readable w the file is writable x the file is executable - the indicated permission is not granted L mandatory locking occurs
067     * during access (the set-group-ID bit is on and the group execution bit is off) s the set-user-ID or set-group-ID bit is on, and the corresponding user or
068     * group execution bit is also on S undefined bit-state (the set-user-ID bit is on and the user execution bit is off) t the 1000 (octal) bit, or sticky bit,
069     * is on [see chmod(1)], and execution is on T the 1000 bit is turned on, and execution is off (undefined bit-state) e z/OS external link bit. Final letter
070     * may be appended: + file has extended security attributes (e.g. ACL) Note: local listings on MacOSX also use '@'; this is not allowed for here as does not
071     * appear to be shown by FTP servers {@code @} file has extended attributes
072     */
073    private static final String REGEX = "([bcdelfmpSs-])" // file type
074            + "(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions
075
076            + "\\s*" // separator TODO why allow it to be omitted??
077
078            + "(\\d+)" // link count
079
080            + "\\s+" // separator
081
082            + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces)
083            + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces)
084            + "(\\d+(?:,\\s*\\d+)?)" // size or n,m
085
086            + "\\s+" // separator
087
088            /*
089             * numeric or standard format date: yyyy-mm-dd (expecting hh:mm to follow) MMM [d]d [d]d MMM N.B. use non-space for MMM to allow for languages such
090             * as German which use diacritics (e.g. umlaut) in some abbreviations. Japanese uses numeric day and month with suffixes to distinguish them [d]dXX
091             * [d]dZZ
092             */
093            + "(" + "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd
094            "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d
095            "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM
096            "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")" + ")"
097
098            + "\\s+" // separator
099
100            /*
101             * year (for non-recent standard format) - yyyy or time (for numeric or recent standard format) [h]h:mm or Japanese year - yyyyXX
102             */
103            + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20)
104
105            + "\\s" // separator
106
107            + "(.*)"; // the rest (21)
108
109    // if true, leading spaces are trimmed from file names
110    // this was the case for the original implementation
111    final boolean trimLeadingSpaces; // package protected for access from test code
112
113    /**
114     * The default constructor for a UnixFTPEntryParser object.
115     *
116     * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
117     *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
118     */
119    public UnixFTPEntryParser() {
120        this(null);
121    }
122
123    /**
124     * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
125     *
126     * @param config The {@link FTPClientConfig configuration} object used to configure this parser.
127     * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
128     *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
129     * @since 1.4
130     */
131    public UnixFTPEntryParser(final FTPClientConfig config) {
132        this(config, false);
133    }
134
135    /**
136     * This constructor allows the creation of a UnixFTPEntryParser object with something other than the default configuration.
137     *
138     * @param config            The {@link FTPClientConfig configuration} object used to configure this parser.
139     * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names
140     * @throws IllegalArgumentException Thrown if the regular expression is unparseable. Should not be seen under normal conditions.
141     *                                  If this exception is seen, this is a sign that {@code REGEX} is not a valid regular expression.
142     * @since 3.4
143     */
144    public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) {
145        super(REGEX);
146        configure(config);
147        this.trimLeadingSpaces = trimLeadingSpaces;
148    }
149
150    /**
151     * Defines a default configuration to be used when this class is instantiated without a {@link FTPClientConfig FTPClientConfig} parameter being specified.
152     *
153     * @return the default configuration for this parser.
154     */
155    @Override
156    protected FTPClientConfig getDefaultConfiguration() {
157        return new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT, DEFAULT_RECENT_DATE_FORMAT);
158    }
159
160    /**
161     * Parses a line of a unix (standard) FTP server file listing and converts it into a usable format in the form of an <code>FTPFile</code> instance. If the
162     * file listing line doesn't describe a file, <code>null</code> is returned, otherwise a <code>FTPFile</code> instance representing the files in the
163     * directory is returned.
164     *
165     * @param entry A line of text from the file listing
166     * @return An FTPFile instance corresponding to the supplied entry
167     */
168    @Override
169    public FTPFile parseFTPEntry(final String entry) {
170        final FTPFile file = new FTPFile();
171        file.setRawListing(entry);
172        final int type;
173        boolean isDevice = false;
174
175        if (matches(entry)) {
176            final String typeStr = group(1);
177            final String hardLinkCount = group(15);
178            final String usr = group(16);
179            final String grp = group(17);
180            final String filesize = group(18);
181            final String datestr = group(19) + " " + group(20);
182            String name = group(21);
183            if (trimLeadingSpaces) {
184                name = name.replaceFirst("^\\s+", "");
185            }
186
187            try {
188                if (group(19).contains(JA_MONTH)) { // special processing for Japanese format
189                    final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl();
190                    jaParser.configure(new FTPClientConfig(FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA));
191                    file.setTimestamp(jaParser.parseTimestamp(datestr));
192                } else {
193                    file.setTimestamp(super.parseTimestamp(datestr));
194                }
195            } catch (final ParseException e) {
196                // intentionally do nothing
197            }
198
199            // A 'whiteout' file is an ARTIFICIAL entry in any of several types of
200            // 'translucent' filesystems, of which a 'union' filesystem is one.
201
202            // bcdelfmpSs-
203            switch (typeStr.charAt(0)) {
204            case 'd':
205                type = FTPFile.DIRECTORY_TYPE;
206                break;
207            case 'e': // NET-39 => z/OS external link
208                type = FTPFile.SYMBOLIC_LINK_TYPE;
209                break;
210            case 'l':
211                type = FTPFile.SYMBOLIC_LINK_TYPE;
212                break;
213            case 'b':
214            case 'c':
215                isDevice = true;
216                type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented
217                break;
218            case 'f':
219            case '-':
220                type = FTPFile.FILE_TYPE;
221                break;
222            default: // e.g. ? and w = whiteout
223                type = FTPFile.UNKNOWN_TYPE;
224            }
225
226            file.setType(type);
227
228            int g = 4;
229            for (int access = 0; access < 3; access++, g += 4) {
230                // Use != '-' to avoid having to check for suid and sticky bits
231                file.setPermission(access, FTPFile.READ_PERMISSION, !group(g).equals("-"));
232                file.setPermission(access, FTPFile.WRITE_PERMISSION, !group(g + 1).equals("-"));
233
234                final String execPerm = group(g + 2);
235                file.setPermission(access, FTPFile.EXECUTE_PERMISSION, !execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0)));
236            }
237
238            if (!isDevice) {
239                try {
240                    file.setHardLinkCount(Integer.parseInt(hardLinkCount));
241                } catch (final NumberFormatException e) {
242                    // intentionally do nothing
243                }
244            }
245
246            file.setUser(usr);
247            file.setGroup(grp);
248
249            try {
250                file.setSize(Long.parseLong(filesize));
251            } catch (final NumberFormatException e) {
252                // intentionally do nothing
253            }
254
255            // oddball cases like symbolic links, file names
256            // with spaces in them.
257            if (type == FTPFile.SYMBOLIC_LINK_TYPE) {
258
259                final int end = name.indexOf(" -> ");
260                // Give up if no link indicator is present
261                if (end == -1) {
262                    file.setName(name);
263                } else {
264                    file.setName(name.substring(0, end));
265                    file.setLink(name.substring(end + 4));
266                }
267
268            } else {
269                file.setName(name);
270            }
271            return file;
272        }
273        return null;
274    }
275
276    /**
277     * Preparses the list to discard "total nnn" lines.
278     */
279    @Override
280    public List<String> preParse(final List<String> original) {
281        // NET-389
282        original.removeIf(entry -> TOTAL_PATTERN.matcher(entry).matches());
283        return original;
284    }
285
286}