View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.commons.compress.archivers.tar;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.Closeable;
21  import java.io.File;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.channels.SeekableByteChannel;
26  import java.nio.file.Files;
27  import java.nio.file.Path;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.HashMap;
31  import java.util.LinkedList;
32  import java.util.List;
33  import java.util.Map;
34  
35  import org.apache.commons.compress.archivers.zip.ZipEncoding;
36  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
37  import org.apache.commons.compress.utils.ArchiveUtils;
38  import org.apache.commons.compress.utils.BoundedArchiveInputStream;
39  import org.apache.commons.compress.utils.BoundedInputStream;
40  import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
41  import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
42  
43  /**
44   * Provides random access to UNIX archives.
45   *
46   * @since 1.21
47   */
48  public class TarFile implements Closeable {
49  
50      private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
51  
52          private final SeekableByteChannel channel;
53  
54          private final TarArchiveEntry entry;
55  
56          private long entryOffset;
57  
58          private int currentSparseInputStreamIndex;
59  
60          BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
61              super(entry.getDataOffset(), entry.getRealSize());
62              if (channel.size() - entry.getSize() < entry.getDataOffset()) {
63                  throw new IOException("entry size exceeds archive size");
64              }
65              this.entry = entry;
66              this.channel = channel;
67          }
68  
69          @Override
70          protected int read(final long pos, final ByteBuffer buf) throws IOException {
71              if (entryOffset >= entry.getRealSize()) {
72                  return -1;
73              }
74  
75              final int totalRead;
76              if (entry.isSparse()) {
77                  totalRead = readSparse(entryOffset, buf, buf.limit());
78              } else {
79                  totalRead = readArchive(pos, buf);
80              }
81  
82              if (totalRead == -1) {
83                  if (buf.array().length > 0) {
84                      throw new IOException("Truncated TAR archive");
85                  }
86                  setAtEOF(true);
87              } else {
88                  entryOffset += totalRead;
89                  buf.flip();
90              }
91              return totalRead;
92          }
93  
94          private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
95              channel.position(pos);
96              return channel.read(buf);
97          }
98  
99          private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
100             // if there are no actual input streams, just read from the original archive
101             final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
102             if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
103                 return readArchive(entry.getDataOffset() + pos, buf);
104             }
105 
106             if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
107                 return -1;
108             }
109 
110             final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
111             final byte[] bufArray = new byte[numToRead];
112             final int readLen = currentInputStream.read(bufArray);
113             if (readLen != -1) {
114                 buf.put(bufArray, 0, readLen);
115             }
116 
117             // if the current input stream is the last input stream,
118             // just return the number of bytes read from current input stream
119             if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
120                 return readLen;
121             }
122 
123             // if EOF of current input stream is meet, open a new input stream and recursively call read
124             if (readLen == -1) {
125                 currentSparseInputStreamIndex++;
126                 return readSparse(pos, buf, numToRead);
127             }
128 
129             // if the rest data of current input stream is not long enough, open a new input stream
130             // and recursively call read
131             if (readLen < numToRead) {
132                 currentSparseInputStreamIndex++;
133                 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
134                 if (readLenOfNext == -1) {
135                     return readLen;
136                 }
137 
138                 return readLen + readLenOfNext;
139             }
140 
141             // if the rest data of current input stream is enough(which means readLen == len), just return readLen
142             return readLen;
143         }
144     }
145 
146     private static final int SMALL_BUFFER_SIZE = 256;
147 
148     private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
149 
150     private final SeekableByteChannel archive;
151 
152     /**
153      * The encoding of the tar file
154      */
155     private final ZipEncoding zipEncoding;
156 
157     private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
158 
159     private final int blockSize;
160 
161     private final boolean lenient;
162 
163     private final int recordSize;
164 
165     private final ByteBuffer recordBuffer;
166 
167     // the global sparse headers, this is only used in PAX Format 0.X
168     private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
169 
170     private boolean hasHitEOF;
171 
172     /**
173      * The meta-data about the current entry
174      */
175     private TarArchiveEntry currEntry;
176 
177     // the global PAX header
178     private Map<String, String> globalPaxHeaders = new HashMap<>();
179 
180     private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
181 
182     /**
183      * Constructor for TarFile.
184      *
185      * @param content the content to use
186      * @throws IOException when reading the tar archive fails
187      */
188     public TarFile(final byte[] content) throws IOException {
189         this(new SeekableInMemoryByteChannel(content));
190     }
191 
192     /**
193      * Constructor for TarFile.
194      *
195      * @param content the content to use
196      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
197      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
198      * @throws IOException when reading the tar archive fails
199      */
200     public TarFile(final byte[] content, final boolean lenient) throws IOException {
201         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
202     }
203 
204     /**
205      * Constructor for TarFile.
206      *
207      * @param content  the content to use
208      * @param encoding the encoding to use
209      * @throws IOException when reading the tar archive fails
210      */
211     public TarFile(final byte[] content, final String encoding) throws IOException {
212         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
213     }
214 
215     /**
216      * Constructor for TarFile.
217      *
218      * @param archive the file of the archive to use
219      * @throws IOException when reading the tar archive fails
220      */
221     public TarFile(final File archive) throws IOException {
222         this(archive.toPath());
223     }
224 
225     /**
226      * Constructor for TarFile.
227      *
228      * @param archive the file of the archive to use
229      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
230      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
231      * @throws IOException when reading the tar archive fails
232      */
233     public TarFile(final File archive, final boolean lenient) throws IOException {
234         this(archive.toPath(), lenient);
235     }
236 
237     /**
238      * Constructor for TarFile.
239      *
240      * @param archive  the file of the archive to use
241      * @param encoding the encoding to use
242      * @throws IOException when reading the tar archive fails
243      */
244     public TarFile(final File archive, final String encoding) throws IOException {
245         this(archive.toPath(), encoding);
246     }
247 
248     /**
249      * Constructor for TarFile.
250      *
251      * @param archivePath the path of the archive to use
252      * @throws IOException when reading the tar archive fails
253      */
254     public TarFile(final Path archivePath) throws IOException {
255         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
256     }
257 
258     /**
259      * Constructor for TarFile.
260      *
261      * @param archivePath the path of the archive to use
262      * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
263      *                    {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
264      * @throws IOException when reading the tar archive fails
265      */
266     public TarFile(final Path archivePath, final boolean lenient) throws IOException {
267         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
268     }
269 
270     /**
271      * Constructor for TarFile.
272      *
273      * @param archivePath the path of the archive to use
274      * @param encoding    the encoding to use
275      * @throws IOException when reading the tar archive fails
276      */
277     public TarFile(final Path archivePath, final String encoding) throws IOException {
278         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
279     }
280 
281     /**
282      * Constructor for TarFile.
283      *
284      * @param content the content to use
285      * @throws IOException when reading the tar archive fails
286      */
287     public TarFile(final SeekableByteChannel content) throws IOException {
288         this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
289     }
290 
291     /**
292      * Constructor for TarFile.
293      *
294      * @param archive    the seekable byte channel to use
295      * @param blockSize  the blocks size to use
296      * @param recordSize the record size to use
297      * @param encoding   the encoding to use
298      * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
299      *                   {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
300      * @throws IOException when reading the tar archive fails
301      */
302     public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient)
303             throws IOException {
304         this.archive = archive;
305         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
306         this.recordSize = recordSize;
307         this.recordBuffer = ByteBuffer.allocate(this.recordSize);
308         this.blockSize = blockSize;
309         this.lenient = lenient;
310 
311         TarArchiveEntry entry;
312         while ((entry = getNextTarEntry()) != null) {
313             entries.add(entry);
314         }
315     }
316 
317     /**
318      * Update the current entry with the read pax headers
319      *
320      * @param headers       Headers read from the pax header
321      * @param sparseHeaders Sparse headers read from pax header
322      */
323     private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
324         currEntry.updateEntryFromPaxHeaders(headers);
325         currEntry.setSparseHeaders(sparseHeaders);
326     }
327 
328     /**
329      * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is
330      * actually read from the original input stream. The size of each input stream is introduced by the sparse headers.
331      *
332      * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are
333      *           meaningless.
334      */
335     private void buildSparseInputStreams() throws IOException {
336         final List<InputStream> streams = new ArrayList<>();
337 
338         final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
339 
340         // Stream doesn't need to be closed at all as it doesn't use any resources
341         final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
342         // logical offset into the extracted entry
343         long offset = 0;
344         long numberOfZeroBytesInSparseEntry = 0;
345         for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
346             final long zeroBlockSize = sparseHeader.getOffset() - offset;
347             if (zeroBlockSize < 0) {
348                 // sparse header says to move backwards inside the extracted entry
349                 throw new IOException("Corrupted struct sparse detected");
350             }
351 
352             // only store the zero block if it is not empty
353             if (zeroBlockSize > 0) {
354                 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
355                 numberOfZeroBytesInSparseEntry += zeroBlockSize;
356             }
357 
358             // only store the input streams with non-zero size
359             if (sparseHeader.getNumbytes() > 0) {
360                 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
361                 if (start + sparseHeader.getNumbytes() < start) {
362                     // possible integer overflow
363                     throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
364                 }
365                 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
366             }
367 
368             offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
369         }
370 
371         sparseInputStreams.put(currEntry.getName(), streams);
372     }
373 
374     @Override
375     public void close() throws IOException {
376         archive.close();
377     }
378 
379     /**
380      * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this
381      * archive has padded the last block.
382      */
383     private void consumeRemainderOfLastBlock() throws IOException {
384         final long bytesReadOfLastBlock = archive.position() % blockSize;
385         if (bytesReadOfLastBlock > 0) {
386             repositionForwardBy(blockSize - bytesReadOfLastBlock);
387         }
388     }
389 
390     /**
391      * Gets all TAR Archive Entries from the TarFile
392      *
393      * @return All entries from the tar file
394      */
395     public List<TarArchiveEntry> getEntries() {
396         return new ArrayList<>(entries);
397     }
398 
399     /**
400      * Gets the input stream for the provided Tar Archive Entry.
401      *
402      * @param entry Entry to get the input stream from
403      * @return Input stream of the provided entry
404      * @throws IOException Corrupted TAR archive. Can't read entry.
405      */
406     public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
407         try {
408             return new BoundedTarEntryInputStream(entry, archive);
409         } catch (final RuntimeException ex) {
410             throw new IOException("Corrupted TAR archive. Can't read entry", ex);
411         }
412     }
413 
414     /**
415      * Gets the next entry in this tar archive as long name data.
416      *
417      * @return The next entry in the archive as long name data, or null.
418      * @throws IOException on error
419      */
420     private byte[] getLongNameData() throws IOException {
421         final ByteArrayOutputStream longName = new ByteArrayOutputStream();
422         int length;
423         try (InputStream in = getInputStream(currEntry)) {
424             while ((length = in.read(smallBuf)) >= 0) {
425                 longName.write(smallBuf, 0, length);
426             }
427         }
428         getNextTarEntry();
429         if (currEntry == null) {
430             // Bugzilla: 40334
431             // Malformed tar file - long entry name not followed by entry
432             return null;
433         }
434         byte[] longNameData = longName.toByteArray();
435         // remove trailing null terminator(s)
436         length = longNameData.length;
437         while (length > 0 && longNameData[length - 1] == 0) {
438             --length;
439         }
440         if (length != longNameData.length) {
441             longNameData = Arrays.copyOf(longNameData, length);
442         }
443         return longNameData;
444     }
445 
446     /**
447      * Gets the next entry in this tar archive. This will skip to the end of the current entry, if there is one, and place the position of the channel at the
448      * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in
449      * the archive, null will be returned to indicate that the end of the archive has been reached.
450      *
451      * @return The next TarEntry in the archive, or null if there is no next entry.
452      * @throws IOException when reading the next TarEntry fails
453      */
454     private TarArchiveEntry getNextTarEntry() throws IOException {
455         if (isAtEOF()) {
456             return null;
457         }
458 
459         if (currEntry != null) {
460             // Skip to the end of the entry
461             repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
462             throwExceptionIfPositionIsNotInArchive();
463             skipRecordPadding();
464         }
465 
466         final ByteBuffer headerBuf = getRecord();
467         if (null == headerBuf) {
468             /* hit EOF */
469             currEntry = null;
470             return null;
471         }
472 
473         try {
474             final long position = archive.position();
475             currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
476         } catch (final IllegalArgumentException e) {
477             throw new IOException("Error detected parsing the header", e);
478         }
479 
480         if (currEntry.isGNULongLinkEntry()) {
481             final byte[] longLinkData = getLongNameData();
482             if (longLinkData == null) {
483                 // Bugzilla: 40334
484                 // Malformed tar file - long link entry name not followed by
485                 // entry
486                 return null;
487             }
488             currEntry.setLinkName(zipEncoding.decode(longLinkData));
489         }
490 
491         if (currEntry.isGNULongNameEntry()) {
492             final byte[] longNameData = getLongNameData();
493             if (longNameData == null) {
494                 // Bugzilla: 40334
495                 // Malformed tar file - long entry name not followed by
496                 // entry
497                 return null;
498             }
499 
500             // COMPRESS-509 : the name of directories should end with '/'
501             final String name = zipEncoding.decode(longNameData);
502             currEntry.setName(name);
503             if (currEntry.isDirectory() && !name.endsWith("/")) {
504                 currEntry.setName(name + "/");
505             }
506         }
507 
508         if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
509             readGlobalPaxHeaders();
510         }
511 
512         try {
513             if (currEntry.isPaxHeader()) { // Process Pax headers
514                 paxHeaders();
515             } else if (!globalPaxHeaders.isEmpty()) {
516                 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
517             }
518         } catch (final NumberFormatException e) {
519             throw new IOException("Error detected parsing the pax header", e);
520         }
521 
522         if (currEntry.isOldGNUSparse()) { // Process sparse files
523             readOldGNUSparse();
524         }
525 
526         return currEntry;
527     }
528 
529     /**
530      * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the
531      * header of the next entry.
532      *
533      * <p>
534      * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the
535      * {@code hasHitEOF} marker will be set to true.
536      * </p>
537      *
538      * @return The next TarEntry in the archive, or null if there is no next entry.
539      * @throws IOException when reading the next TarEntry fails
540      */
541     private ByteBuffer getRecord() throws IOException {
542         ByteBuffer headerBuf = readRecord();
543         setAtEOF(isEOFRecord(headerBuf));
544         if (isAtEOF() && headerBuf != null) {
545             // Consume rest
546             tryToConsumeSecondEOFRecord();
547             consumeRemainderOfLastBlock();
548             headerBuf = null;
549         }
550         return headerBuf;
551     }
552 
553     protected final boolean isAtEOF() {
554         return hasHitEOF;
555     }
556 
557     private boolean isDirectory() {
558         return currEntry != null && currEntry.isDirectory();
559     }
560 
561     private boolean isEOFRecord(final ByteBuffer headerBuf) {
562         return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
563     }
564 
565     /**
566      * <p>
567      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
568      *
569      * <pre>
570      * GNU.sparse.size=size
571      * GNU.sparse.numblocks=numblocks
572      * repeat numblocks times
573      *   GNU.sparse.offset=offset
574      *   GNU.sparse.numbytes=numbytes
575      * end repeat
576      * </pre>
577      *
578      * <p>
579      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
580      *
581      * <pre>
582      * GNU.sparse.map
583      *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
584      * </pre>
585      *
586      * <p>
587      * For PAX Format 1.X: <br>
588      * The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers delimited by newlines.
589      * The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are map entries, each one
590      * consisting of two numbers giving the offset and size of the data block it describes.
591      *
592      * @throws IOException
593      */
594     private void paxHeaders() throws IOException {
595         List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
596         final Map<String, String> headers;
597         try (InputStream input = getInputStream(currEntry)) {
598             headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
599         }
600 
601         // for 0.1 PAX Headers
602         if (headers.containsKey(TarGnuSparseKeys.MAP)) {
603             sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
604         }
605         getNextTarEntry(); // Get the actual file entry
606         if (currEntry == null) {
607             throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
608         }
609         applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
610 
611         // for 1.0 PAX Format, the sparse map is stored in the file data block
612         if (currEntry.isPaxGNU1XSparse()) {
613             try (InputStream input = getInputStream(currEntry)) {
614                 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
615             }
616             currEntry.setSparseHeaders(sparseHeaders);
617             // data of the entry is after the pax gnu entry. So we need to update the data position once again
618             currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
619         }
620 
621         // sparse headers are all done reading, we need to build
622         // sparse input streams using these sparse headers
623         buildSparseInputStreams();
624     }
625 
626     private void readGlobalPaxHeaders() throws IOException {
627         try (InputStream input = getInputStream(currEntry)) {
628             globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize());
629         }
630         getNextTarEntry(); // Get the actual file entry
631 
632         if (currEntry == null) {
633             throw new IOException("Error detected parsing the pax header");
634         }
635     }
636 
637     /**
638      * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry.
639      *
640      * @throws IOException when reading the sparse entry fails
641      */
642     private void readOldGNUSparse() throws IOException {
643         if (currEntry.isExtended()) {
644             TarArchiveSparseEntry entry;
645             do {
646                 final ByteBuffer headerBuf = getRecord();
647                 if (headerBuf == null) {
648                     throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
649                 }
650                 entry = new TarArchiveSparseEntry(headerBuf.array());
651                 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
652                 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
653             } while (entry.isExtended());
654         }
655 
656         // sparse headers are all done reading, we need to build
657         // sparse input streams using these sparse headers
658         buildSparseInputStreams();
659     }
660 
661     /**
662      * Read a record from the input stream and return the data.
663      *
664      * @return The record data or null if EOF has been hit.
665      * @throws IOException if reading from the archive fails
666      */
667     private ByteBuffer readRecord() throws IOException {
668         recordBuffer.rewind();
669         final int readNow = archive.read(recordBuffer);
670         if (readNow != recordSize) {
671             return null;
672         }
673         return recordBuffer;
674     }
675 
676     private void repositionForwardBy(final long offset) throws IOException {
677         repositionForwardTo(archive.position() + offset);
678     }
679 
680     private void repositionForwardTo(final long newPosition) throws IOException {
681         final long currPosition = archive.position();
682         if (newPosition < currPosition) {
683             throw new IOException("trying to move backwards inside of the archive");
684         }
685         archive.position(newPosition);
686     }
687 
688     protected final void setAtEOF(final boolean b) {
689         hasHitEOF = b;
690     }
691 
692     /**
693      * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry
694      *
695      * @throws IOException when skipping the padding of the record fails
696      */
697     private void skipRecordPadding() throws IOException {
698         if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
699             final long numRecords = currEntry.getSize() / recordSize + 1;
700             final long padding = numRecords * recordSize - currEntry.getSize();
701             repositionForwardBy(padding);
702             throwExceptionIfPositionIsNotInArchive();
703         }
704     }
705 
706     /**
707      * Checks if the current position of the SeekableByteChannel is in the archive.
708      *
709      * @throws IOException If the position is not in the archive
710      */
711     private void throwExceptionIfPositionIsNotInArchive() throws IOException {
712         if (archive.size() < archive.position()) {
713             throw new IOException("Truncated TAR archive");
714         }
715     }
716 
717     /**
718      * Tries to read the next record resetting the position in the archive if it is not an EOF record.
719      *
720      * <p>
721      * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a
722      * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the
723      * archive anyway.
724      * </p>
725      *
726      * @throws IOException if reading the record of resetting the position in the archive fails
727      */
728     private void tryToConsumeSecondEOFRecord() throws IOException {
729         boolean shouldReset = true;
730         try {
731             shouldReset = !isEOFRecord(readRecord());
732         } finally {
733             if (shouldReset) {
734                 archive.position(archive.position() - recordSize);
735             }
736         }
737     }
738 }