001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.BufferedInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.channels.FileChannel; 026import java.nio.channels.FileChannel.MapMode; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029 030import org.apache.commons.io.build.AbstractStreamBuilder; 031 032/** 033 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is 034 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is 035 * configurable. 036 * <p> 037 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of 038 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into 039 * memory. 040 * </p> 041 * <p> 042 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the 043 * use case, the use of buffering may still further improve performance. For example: 044 * </p> 045 * <p> 046 * To build an instance, use {@link Builder}. 047 * </p> 048 * <pre>{@code 049 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream( 050 * MemoryMappedFileInputStream.builder() 051 * .setPath(path) 052 * .setBufferSize(256 * 1024) 053 * .get()));} 054 * </pre> 055 * <p> 056 * should outperform: 057 * </p> 058 * <pre> 059 * new GzipInputStream(new MemoryMappedFileInputStream(path)) 060 * </pre> 061 * <pre>{@code 062 * GzipInputStream s = new GzipInputStream( 063 * MemoryMappedFileInputStream.builder() 064 * .setPath(path) 065 * .setBufferSize(256 * 1024) 066 * .get());} 067 * </pre> 068 * 069 * @see Builder 070 * @since 2.12.0 071 */ 072public final class MemoryMappedFileInputStream extends AbstractInputStream { 073 074 // @formatter:off 075 /** 076 * Builds a new {@link MemoryMappedFileInputStream}. 077 * 078 * <p> 079 * For example: 080 * </p> 081 * <pre>{@code 082 * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder() 083 * .setPath(path) 084 * .setBufferSize(256 * 1024) 085 * .get();} 086 * </pre> 087 * 088 * @see #get() 089 * @since 2.12.0 090 */ 091 // @formatter:on 092 public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> { 093 094 /** 095 * Constructs a new {@link Builder}. 096 */ 097 public Builder() { 098 setBufferSizeDefault(DEFAULT_BUFFER_SIZE); 099 setBufferSize(DEFAULT_BUFFER_SIZE); 100 } 101 102 /** 103 * Builds a new {@link MemoryMappedFileInputStream}. 104 * <p> 105 * You must set input that supports {@link #getPath()}, otherwise, this method throws an exception. 106 * </p> 107 * <p> 108 * This builder use the following aspects: 109 * </p> 110 * <ul> 111 * <li>{@link #getPath()}</li> 112 * <li>{@link #getBufferSize()}</li> 113 * </ul> 114 * 115 * @return a new instance. 116 * @throws IllegalStateException if the {@code origin} is {@code null}. 117 * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}. 118 * @throws IOException if an I/O error occurs. 119 * @see #getPath() 120 * @see #getBufferSize() 121 */ 122 @Override 123 public MemoryMappedFileInputStream get() throws IOException { 124 return new MemoryMappedFileInputStream(getPath(), getBufferSize()); 125 } 126 } 127 128 /** 129 * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size). 130 * Increasing the value beyond the default size will generally not provide any increase in throughput. 131 */ 132 private static final int DEFAULT_BUFFER_SIZE = 256 * 1024; 133 134 private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer(); 135 136 /** 137 * Constructs a new {@link Builder}. 138 * 139 * @return a new {@link Builder}. 140 * @since 2.12.0 141 */ 142 public static Builder builder() { 143 return new Builder(); 144 } 145 146 private final int bufferSize; 147 private final FileChannel channel; 148 private ByteBuffer buffer = EMPTY_BUFFER; 149 150 /** 151 * The starting position (within the file) of the next sliding buffer. 152 */ 153 private long nextBufferPosition; 154 155 /** 156 * Constructs a new instance. 157 * 158 * @param file The path of the file to open. 159 * @param bufferSize Size of the sliding buffer. 160 * @throws IOException If an I/O error occurs. 161 */ 162 private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException { 163 this.bufferSize = bufferSize; 164 this.channel = FileChannel.open(file, StandardOpenOption.READ); 165 } 166 167 @Override 168 public int available() throws IOException { 169 //return buffer != null ? buffer.remaining(): 0; 170 return buffer.remaining(); 171 } 172 173 private void cleanBuffer() { 174 if (ByteBufferCleaner.isSupported() && buffer.isDirect()) { 175 ByteBufferCleaner.clean(buffer); 176 } 177 } 178 179 @Override 180 public void close() throws IOException { 181 if (!isClosed()) { 182 cleanBuffer(); 183 buffer = EMPTY_BUFFER; 184 channel.close(); 185 super.close(); 186 } 187 } 188 189 int getBufferSize() { 190 return bufferSize; 191 } 192 193 private void nextBuffer() throws IOException { 194 final long remainingInFile = channel.size() - nextBufferPosition; 195 if (remainingInFile > 0) { 196 final long amountToMap = Math.min(remainingInFile, bufferSize); 197 cleanBuffer(); 198 buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap); 199 nextBufferPosition += amountToMap; 200 } else { 201 buffer = EMPTY_BUFFER; 202 } 203 } 204 205 @Override 206 public int read() throws IOException { 207 checkOpen(); 208 if (!buffer.hasRemaining()) { 209 nextBuffer(); 210 if (!buffer.hasRemaining()) { 211 return EOF; 212 } 213 } 214 return Short.toUnsignedInt(buffer.get()); 215 } 216 217 @Override 218 public int read(final byte[] b, final int off, final int len) throws IOException { 219 checkOpen(); 220 if (!buffer.hasRemaining()) { 221 nextBuffer(); 222 if (!buffer.hasRemaining()) { 223 return EOF; 224 } 225 } 226 final int numBytes = Math.min(buffer.remaining(), len); 227 buffer.get(b, off, numBytes); 228 return numBytes; 229 } 230 231 @Override 232 public long skip(final long n) throws IOException { 233 checkOpen(); 234 if (n <= 0) { 235 return 0; 236 } 237 if (n <= buffer.remaining()) { 238 buffer.position((int) (buffer.position() + n)); 239 return n; 240 } 241 final long remainingInFile = channel.size() - nextBufferPosition; 242 final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining()); 243 nextBufferPosition += skipped - buffer.remaining(); 244 nextBuffer(); 245 return skipped; 246 } 247 248}