001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.build.AbstractStreamBuilder;
031
032/**
033 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
034 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
035 * configurable.
036 * <p>
037 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
038 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
039 * memory.
040 * </p>
041 * <p>
042 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
043 * use case, the use of buffering may still further improve performance. For example:
044 * </p>
045 * <p>
046 * To build an instance, use {@link Builder}.
047 * </p>
048 * <pre>{@code
049 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
050 *   MemoryMappedFileInputStream.builder()
051 *     .setPath(path)
052 *     .setBufferSize(256 * 1024)
053 *     .get()));}
054 * </pre>
055 * <p>
056 * should outperform:
057 * </p>
058 * <pre>
059 * new GzipInputStream(new MemoryMappedFileInputStream(path))
060 * </pre>
061 * <pre>{@code
062 * GzipInputStream s = new GzipInputStream(
063 *   MemoryMappedFileInputStream.builder()
064 *     .setPath(path)
065 *     .setBufferSize(256 * 1024)
066 *     .get());}
067 * </pre>
068 *
069 * @see Builder
070 * @since 2.12.0
071 */
072public final class MemoryMappedFileInputStream extends AbstractInputStream {
073
074    // @formatter:off
075    /**
076     * Builds a new {@link MemoryMappedFileInputStream}.
077     *
078     * <p>
079     * For example:
080     * </p>
081     * <pre>{@code
082     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
083     *   .setPath(path)
084     *   .setBufferSize(256 * 1024)
085     *   .get();}
086     * </pre>
087     *
088     * @see #get()
089     * @since 2.12.0
090     */
091    // @formatter:on
092    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
093
094        /**
095         * Constructs a new {@link Builder}.
096         */
097        public Builder() {
098            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
099            setBufferSize(DEFAULT_BUFFER_SIZE);
100        }
101
102        /**
103         * Builds a new {@link MemoryMappedFileInputStream}.
104         * <p>
105         * You must set input that supports {@link #getPath()}, otherwise, this method throws an exception.
106         * </p>
107         * <p>
108         * This builder use the following aspects:
109         * </p>
110         * <ul>
111         * <li>{@link #getPath()}</li>
112         * <li>{@link #getBufferSize()}</li>
113         * </ul>
114         *
115         * @return a new instance.
116         * @throws IllegalStateException         if the {@code origin} is {@code null}.
117         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
118         * @throws IOException                   if an I/O error occurs.
119         * @see #getPath()
120         * @see #getBufferSize()
121         */
122        @Override
123        public MemoryMappedFileInputStream get() throws IOException {
124            return new MemoryMappedFileInputStream(getPath(), getBufferSize());
125        }
126    }
127
128    /**
129     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
130     * Increasing the value beyond the default size will generally not provide any increase in throughput.
131     */
132    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
133
134    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
135
136    /**
137     * Constructs a new {@link Builder}.
138     *
139     * @return a new {@link Builder}.
140     * @since 2.12.0
141     */
142    public static Builder builder() {
143        return new Builder();
144    }
145
146    private final int bufferSize;
147    private final FileChannel channel;
148    private ByteBuffer buffer = EMPTY_BUFFER;
149
150    /**
151     * The starting position (within the file) of the next sliding buffer.
152     */
153    private long nextBufferPosition;
154
155    /**
156     * Constructs a new instance.
157     *
158     * @param file The path of the file to open.
159     * @param bufferSize Size of the sliding buffer.
160     * @throws IOException If an I/O error occurs.
161     */
162    private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
163        this.bufferSize = bufferSize;
164        this.channel = FileChannel.open(file, StandardOpenOption.READ);
165    }
166
167    @Override
168    public int available() throws IOException {
169        //return buffer != null ? buffer.remaining(): 0;
170        return buffer.remaining();
171    }
172
173    private void cleanBuffer() {
174        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
175            ByteBufferCleaner.clean(buffer);
176        }
177    }
178
179    @Override
180    public void close() throws IOException {
181        if (!isClosed()) {
182            cleanBuffer();
183            buffer = EMPTY_BUFFER;
184            channel.close();
185            super.close();
186        }
187    }
188
189    int getBufferSize() {
190        return bufferSize;
191    }
192
193    private void nextBuffer() throws IOException {
194        final long remainingInFile = channel.size() - nextBufferPosition;
195        if (remainingInFile > 0) {
196            final long amountToMap = Math.min(remainingInFile, bufferSize);
197            cleanBuffer();
198            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
199            nextBufferPosition += amountToMap;
200        } else {
201            buffer = EMPTY_BUFFER;
202        }
203    }
204
205    @Override
206    public int read() throws IOException {
207        checkOpen();
208        if (!buffer.hasRemaining()) {
209            nextBuffer();
210            if (!buffer.hasRemaining()) {
211                return EOF;
212            }
213        }
214        return Short.toUnsignedInt(buffer.get());
215    }
216
217    @Override
218    public int read(final byte[] b, final int off, final int len) throws IOException {
219        checkOpen();
220        if (!buffer.hasRemaining()) {
221            nextBuffer();
222            if (!buffer.hasRemaining()) {
223                return EOF;
224            }
225        }
226        final int numBytes = Math.min(buffer.remaining(), len);
227        buffer.get(b, off, numBytes);
228        return numBytes;
229    }
230
231    @Override
232    public long skip(final long n) throws IOException {
233        checkOpen();
234        if (n <= 0) {
235            return 0;
236        }
237        if (n <= buffer.remaining()) {
238            buffer.position((int) (buffer.position() + n));
239            return n;
240        }
241        final long remainingInFile = channel.size() - nextBufferPosition;
242        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
243        nextBufferPosition += skipped - buffer.remaining();
244        nextBuffer();
245        return skipped;
246    }
247
248}