MemoryMappedFileInputStream.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.io.input;
import static org.apache.commons.io.IOUtils.EOF;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import org.apache.commons.io.build.AbstractStreamBuilder;
/**
* An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
* mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
* configurable.
* <p>
* For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
* kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
* memory.
* </p>
* <p>
* Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
* use case, the use of buffering may still further improve performance. For example:
* </p>
* <p>
* To build an instance, use {@link Builder}.
* </p>
* <pre>{@code
* BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
* MemoryMappedFileInputStream.builder()
* .setPath(path)
* .setBufferSize(256 * 1024)
* .get()));}
* </pre>
* <p>
* should outperform:
* </p>
* <pre>
* new GzipInputStream(new MemoryMappedFileInputStream(path))
* </pre>
* <pre>{@code
* GzipInputStream s = new GzipInputStream(
* MemoryMappedFileInputStream.builder()
* .setPath(path)
* .setBufferSize(256 * 1024)
* .get());}
* </pre>
*
* @see Builder
* @since 2.12.0
*/
public final class MemoryMappedFileInputStream extends AbstractInputStream {
// @formatter:off
/**
* Builds a new {@link MemoryMappedFileInputStream}.
*
* <p>
* For example:
* </p>
* <pre>{@code
* MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
* .setPath(path)
* .setBufferSize(256 * 1024)
* .get();}
* </pre>
*
* @see #get()
* @since 2.12.0
*/
// @formatter:on
public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
/**
* Constructs a new {@link Builder}.
*/
public Builder() {
setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
setBufferSize(DEFAULT_BUFFER_SIZE);
}
/**
* Builds a new {@link MemoryMappedFileInputStream}.
* <p>
* You must set input that supports {@link #getPath()}, otherwise, this method throws an exception.
* </p>
* <p>
* This builder use the following aspects:
* </p>
* <ul>
* <li>{@link #getPath()}</li>
* <li>{@link #getBufferSize()}</li>
* </ul>
*
* @return a new instance.
* @throws IllegalStateException if the {@code origin} is {@code null}.
* @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
* @throws IOException if an I/O error occurs.
* @see #getPath()
* @see #getBufferSize()
*/
@Override
public MemoryMappedFileInputStream get() throws IOException {
return new MemoryMappedFileInputStream(getPath(), getBufferSize());
}
}
/**
* Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
* Increasing the value beyond the default size will generally not provide any increase in throughput.
*/
private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
/**
* Constructs a new {@link Builder}.
*
* @return a new {@link Builder}.
* @since 2.12.0
*/
public static Builder builder() {
return new Builder();
}
private final int bufferSize;
private final FileChannel channel;
private ByteBuffer buffer = EMPTY_BUFFER;
/**
* The starting position (within the file) of the next sliding buffer.
*/
private long nextBufferPosition;
/**
* Constructs a new instance.
*
* @param file The path of the file to open.
* @param bufferSize Size of the sliding buffer.
* @throws IOException If an I/O error occurs.
*/
private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
this.bufferSize = bufferSize;
this.channel = FileChannel.open(file, StandardOpenOption.READ);
}
@Override
public int available() throws IOException {
//return buffer != null ? buffer.remaining(): 0;
return buffer.remaining();
}
private void cleanBuffer() {
if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
ByteBufferCleaner.clean(buffer);
}
}
@Override
public void close() throws IOException {
if (!isClosed()) {
cleanBuffer();
buffer = EMPTY_BUFFER;
channel.close();
super.close();
}
}
int getBufferSize() {
return bufferSize;
}
private void nextBuffer() throws IOException {
final long remainingInFile = channel.size() - nextBufferPosition;
if (remainingInFile > 0) {
final long amountToMap = Math.min(remainingInFile, bufferSize);
cleanBuffer();
buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
nextBufferPosition += amountToMap;
} else {
buffer = EMPTY_BUFFER;
}
}
@Override
public int read() throws IOException {
checkOpen();
if (!buffer.hasRemaining()) {
nextBuffer();
if (!buffer.hasRemaining()) {
return EOF;
}
}
return Short.toUnsignedInt(buffer.get());
}
@Override
public int read(final byte[] b, final int off, final int len) throws IOException {
checkOpen();
if (!buffer.hasRemaining()) {
nextBuffer();
if (!buffer.hasRemaining()) {
return EOF;
}
}
final int numBytes = Math.min(buffer.remaining(), len);
buffer.get(b, off, numBytes);
return numBytes;
}
@Override
public long skip(final long n) throws IOException {
checkOpen();
if (n <= 0) {
return 0;
}
if (n <= buffer.remaining()) {
buffer.position((int) (buffer.position() + n));
return n;
}
final long remainingInFile = channel.size() - nextBufferPosition;
final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
nextBufferPosition += skipped - buffer.remaining();
nextBuffer();
return skipped;
}
}