001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018/* 019 * This package is based on the work done by Timothy Gerard Endres 020 * (time@ice.com) to whom the Ant project is very grateful for his great code. 021 */ 022 023package org.apache.commons.compress.archivers.tar; 024 025import java.io.ByteArrayOutputStream; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.ArchiveEntry; 036import org.apache.commons.compress.archivers.ArchiveInputStream; 037import org.apache.commons.compress.archivers.zip.ZipEncoding; 038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 039import org.apache.commons.compress.utils.ArchiveUtils; 040import org.apache.commons.compress.utils.BoundedInputStream; 041import org.apache.commons.compress.utils.IOUtils; 042 043/** 044 * The TarInputStream reads a UNIX tar archive as an InputStream. methods are provided to position at each successive entry in the archive, and the read each 045 * entry as a normal input stream using read(). 046 * 047 * @NotThreadSafe 048 */ 049public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> { 050 051 private static final int SMALL_BUFFER_SIZE = 256; 052 053 /** 054 * Checks if the signature matches what is expected for a tar file. 055 * 056 * @param signature the bytes to check 057 * @param length the number of bytes to check 058 * @return true, if this stream is a tar archive stream, false otherwise 059 */ 060 public static boolean matches(final byte[] signature, final int length) { 061 final int versionOffset = TarConstants.VERSION_OFFSET; 062 final int versionLen = TarConstants.VERSIONLEN; 063 if (length < versionOffset + versionLen) { 064 return false; 065 } 066 067 final int magicOffset = TarConstants.MAGIC_OFFSET; 068 final int magicLen = TarConstants.MAGICLEN; 069 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen) 070 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, signature, versionOffset, versionLen)) { 071 return true; 072 } 073 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, signature, magicOffset, magicLen) 074 && (ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, signature, versionOffset, versionLen) 075 || ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, signature, versionOffset, versionLen))) { 076 return true; 077 } 078 // COMPRESS-107 - recognize Ant tar files 079 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, signature, magicOffset, magicLen) 080 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, signature, versionOffset, versionLen); 081 } 082 083 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 084 085 /** The buffer to store the TAR header. **/ 086 private final byte[] recordBuffer; 087 088 /** The size of a block. */ 089 private final int blockSize; 090 091 /** True if stream is at EOF. */ 092 private boolean atEof; 093 094 /** Size of the current . */ 095 private long entrySize; 096 097 /** How far into the entry the stream is at. */ 098 private long entryOffset; 099 100 /** Input streams for reading sparse entries. **/ 101 private List<InputStream> sparseInputStreams; 102 103 /** The index of current input stream being read when reading sparse entries. */ 104 private int currentSparseInputStreamIndex; 105 106 /** The meta-data about the current entry. */ 107 private TarArchiveEntry currEntry; 108 109 /** The encoding of the file. */ 110 private final ZipEncoding zipEncoding; 111 112 /** The global PAX header. */ 113 private Map<String, String> globalPaxHeaders = new HashMap<>(); 114 115 /** The global sparse headers, this is only used in PAX Format 0.X. */ 116 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 117 118 private final boolean lenient; 119 120 /** 121 * Constructs a new instance. 122 * 123 * @param inputStream the input stream to use 124 */ 125 public TarArchiveInputStream(final InputStream inputStream) { 126 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 127 } 128 129 /** 130 * Constructs a new instance. 131 * 132 * @param inputStream the input stream to use 133 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 134 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 135 * @since 1.19 136 */ 137 public TarArchiveInputStream(final InputStream inputStream, final boolean lenient) { 138 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 139 } 140 141 /** 142 * Constructs a new instance. 143 * 144 * @param inputStream the input stream to use 145 * @param blockSize the block size to use 146 */ 147 public TarArchiveInputStream(final InputStream inputStream, final int blockSize) { 148 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE); 149 } 150 151 /** 152 * Constructs a new instance. 153 * 154 * @param inputStream the input stream to use 155 * @param blockSize the block size to use 156 * @param recordSize the record size to use 157 */ 158 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize) { 159 this(inputStream, blockSize, recordSize, null); 160 } 161 162 /** 163 * Constructs a new instance. 164 * 165 * @param inputStream the input stream to use 166 * @param blockSize the block size to use 167 * @param recordSize the record size to use 168 * @param encoding name of the encoding to use for file names 169 * @since 1.4 170 */ 171 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding) { 172 this(inputStream, blockSize, recordSize, encoding, false); 173 } 174 175 /** 176 * Constructs a new instance. 177 * 178 * @param inputStream the input stream to use 179 * @param blockSize the block size to use 180 * @param recordSize the record size to use 181 * @param encoding name of the encoding to use for file names 182 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 183 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 184 * @since 1.19 185 */ 186 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding, final boolean lenient) { 187 super(inputStream, encoding); 188 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 189 this.recordBuffer = new byte[recordSize]; 190 this.blockSize = blockSize; 191 this.lenient = lenient; 192 } 193 194 /** 195 * Constructs a new instance. 196 * 197 * @param inputStream the input stream to use 198 * @param blockSize the block size to use 199 * @param encoding name of the encoding to use for file names 200 * @since 1.4 201 */ 202 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final String encoding) { 203 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 204 } 205 206 /** 207 * Constructs a new instance. 208 * 209 * @param inputStream the input stream to use 210 * @param encoding name of the encoding to use for file names 211 * @since 1.4 212 */ 213 public TarArchiveInputStream(final InputStream inputStream, final String encoding) { 214 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding); 215 } 216 217 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException { 218 currEntry.updateEntryFromPaxHeaders(headers); 219 currEntry.setSparseHeaders(sparseHeaders); 220 } 221 222 /** 223 * Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only 224 * in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry. 225 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE bytes are left in the current entry in the archive. 226 * 227 * @return The number of available bytes for the current entry. 228 * @throws IOException for signature 229 */ 230 @Override 231 public int available() throws IOException { 232 if (isDirectory()) { 233 return 0; 234 } 235 final long available = currEntry.getRealSize() - entryOffset; 236 if (available > Integer.MAX_VALUE) { 237 return Integer.MAX_VALUE; 238 } 239 return (int) available; 240 } 241 242 /** 243 * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is 244 * actually read from the original input stream. The size of each input stream is introduced by the sparse headers. 245 * <p> 246 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are meaningless. 247 * </p> 248 */ 249 private void buildSparseInputStreams() throws IOException { 250 currentSparseInputStreamIndex = -1; 251 sparseInputStreams = new ArrayList<>(); 252 253 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 254 255 // Stream doesn't need to be closed at all as it doesn't use any resources 256 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR 257 // logical offset into the extracted entry 258 long offset = 0; 259 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 260 final long zeroBlockSize = sparseHeader.getOffset() - offset; 261 if (zeroBlockSize < 0) { 262 // sparse header says to move backwards inside the extracted entry 263 throw new IOException("Corrupted struct sparse detected"); 264 } 265 266 // only store the zero block if it is not empty 267 if (zeroBlockSize > 0) { 268 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); 269 } 270 271 // only store the input streams with non-zero size 272 if (sparseHeader.getNumbytes() > 0) { 273 sparseInputStreams.add(new BoundedInputStream(in, sparseHeader.getNumbytes())); 274 } 275 276 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 277 } 278 279 if (!sparseInputStreams.isEmpty()) { 280 currentSparseInputStreamIndex = 0; 281 } 282 } 283 284 /** 285 * Whether this class is able to read the given entry. 286 * 287 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 288 */ 289 @Override 290 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 291 return archiveEntry instanceof TarArchiveEntry; 292 } 293 294 /** 295 * Closes this stream. Calls the TarBuffer's close() method. 296 * 297 * @throws IOException on error 298 */ 299 @Override 300 public void close() throws IOException { 301 // Close all the input streams in sparseInputStreams 302 if (sparseInputStreams != null) { 303 for (final InputStream inputStream : sparseInputStreams) { 304 inputStream.close(); 305 } 306 } 307 in.close(); 308 } 309 310 /** 311 * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this 312 * archive has padded the last block. 313 */ 314 private void consumeRemainderOfLastBlock() throws IOException { 315 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 316 if (bytesReadOfLastBlock > 0) { 317 count(IOUtils.skip(in, blockSize - bytesReadOfLastBlock)); 318 } 319 } 320 321 /** 322 * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped 323 * 324 * @param available available bytes returned by inputStream.available() 325 * @param skipped skipped bytes returned by inputStream.skip() 326 * @param expected bytes expected to skip 327 * @return number of bytes actually skipped 328 * @throws IOException if a truncated tar archive is detected 329 */ 330 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 331 long actuallySkipped = skipped; 332 if (in instanceof FileInputStream) { 333 actuallySkipped = Math.min(skipped, available); 334 } 335 if (actuallySkipped != expected) { 336 throw new IOException("Truncated TAR archive"); 337 } 338 return actuallySkipped; 339 } 340 341 /** 342 * Gets the current TAR Archive Entry that this input stream is processing 343 * 344 * @return The current Archive Entry 345 */ 346 public TarArchiveEntry getCurrentEntry() { 347 return currEntry; 348 } 349 350 /** 351 * Gets the next entry in this tar archive as long name data. 352 * 353 * @return The next entry in the archive as long name data, or null. 354 * @throws IOException on error 355 */ 356 protected byte[] getLongNameData() throws IOException { 357 // read in the name 358 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 359 int length = 0; 360 while ((length = read(smallBuf)) >= 0) { 361 longName.write(smallBuf, 0, length); 362 } 363 getNextEntry(); 364 if (currEntry == null) { 365 // Bugzilla: 40334 366 // Malformed tar file - long entry name not followed by entry 367 return null; 368 } 369 byte[] longNameData = longName.toByteArray(); 370 // remove trailing null terminator(s) 371 length = longNameData.length; 372 while (length > 0 && longNameData[length - 1] == 0) { 373 --length; 374 } 375 if (length != longNameData.length) { 376 longNameData = Arrays.copyOf(longNameData, length); 377 } 378 return longNameData; 379 } 380 381 /** 382 * Gets the next TarArchiveEntry in this stream. 383 * 384 * @return the next entry, or {@code null} if there are no more entries 385 * @throws IOException if the next entry could not be read 386 */ 387 @Override 388 public TarArchiveEntry getNextEntry() throws IOException { 389 return getNextTarEntry(); 390 } 391 392 /** 393 * Gets the next entry in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 394 * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in 395 * the archive, null will be returned to indicate that the end of the archive has been reached. 396 * 397 * @return The next TarEntry in the archive, or null. 398 * @throws IOException on error 399 * @deprecated Use {@link #getNextEntry()}. 400 */ 401 @Deprecated 402 public TarArchiveEntry getNextTarEntry() throws IOException { 403 if (isAtEOF()) { 404 return null; 405 } 406 407 if (currEntry != null) { 408 /* Skip will only go to the end of the current entry */ 409 IOUtils.skip(this, Long.MAX_VALUE); 410 411 /* skip to the end of the last record */ 412 skipRecordPadding(); 413 } 414 415 final byte[] headerBuf = getRecord(); 416 417 if (headerBuf == null) { 418 /* hit EOF */ 419 currEntry = null; 420 return null; 421 } 422 423 try { 424 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); 425 } catch (final IllegalArgumentException e) { 426 throw new IOException("Error detected parsing the header", e); 427 } 428 429 entryOffset = 0; 430 entrySize = currEntry.getSize(); 431 432 if (currEntry.isGNULongLinkEntry()) { 433 final byte[] longLinkData = getLongNameData(); 434 if (longLinkData == null) { 435 // Bugzilla: 40334 436 // Malformed tar file - long link entry name not followed by entry 437 return null; 438 } 439 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 440 } 441 442 if (currEntry.isGNULongNameEntry()) { 443 final byte[] longNameData = getLongNameData(); 444 if (longNameData == null) { 445 // Bugzilla: 40334 446 // Malformed tar file - long entry name not followed by entry 447 return null; 448 } 449 450 // COMPRESS-509 : the name of directories should end with '/' 451 final String name = zipEncoding.decode(longNameData); 452 currEntry.setName(name); 453 if (currEntry.isDirectory() && !name.endsWith("/")) { 454 currEntry.setName(name + "/"); 455 } 456 } 457 458 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 459 readGlobalPaxHeaders(); 460 } 461 462 try { 463 if (currEntry.isPaxHeader()) { // Process Pax headers 464 paxHeaders(); 465 } else if (!globalPaxHeaders.isEmpty()) { 466 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 467 } 468 } catch (final NumberFormatException e) { 469 throw new IOException("Error detected parsing the pax header", e); 470 } 471 472 if (currEntry.isOldGNUSparse()) { // Process sparse files 473 readOldGNUSparse(); 474 } 475 476 // If the size of the next element in the archive has changed 477 // due to a new size being reported in the POSIX header 478 // information, we update entrySize here so that it contains 479 // the correct value. 480 entrySize = currEntry.getSize(); 481 482 return currEntry; 483 } 484 485 /** 486 * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 487 * header of the next entry. 488 * <p> 489 * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the 490 * {@code hasHitEOF} marker will be set to true. 491 * </p> 492 * 493 * @return The next header in the archive, or null. 494 * @throws IOException on error 495 */ 496 private byte[] getRecord() throws IOException { 497 byte[] headerBuf = readRecord(); 498 setAtEOF(isEOFRecord(headerBuf)); 499 if (isAtEOF() && headerBuf != null) { 500 tryToConsumeSecondEOFRecord(); 501 consumeRemainderOfLastBlock(); 502 headerBuf = null; 503 } 504 return headerBuf; 505 } 506 507 /** 508 * Gets the record size being used by this stream's buffer. 509 * 510 * @return The TarBuffer record size. 511 */ 512 public int getRecordSize() { 513 return recordBuffer.length; 514 } 515 516 protected final boolean isAtEOF() { 517 return atEof; 518 } 519 520 private boolean isDirectory() { 521 return currEntry != null && currEntry.isDirectory(); 522 } 523 524 /** 525 * Tests if an archive record indicate End of Archive. End of archive is indicated by a record that consists entirely of null bytes. 526 * 527 * @param record The record data to check. 528 * @return true if the record data is an End of Archive 529 */ 530 protected boolean isEOFRecord(final byte[] record) { 531 return record == null || ArchiveUtils.isArrayZero(record, getRecordSize()); 532 } 533 534 /** 535 * Since we do not support marking just yet, we do nothing. 536 * 537 * @param markLimit The limit to mark. 538 */ 539 @Override 540 public synchronized void mark(final int markLimit) { 541 } 542 543 /** 544 * Since we do not support marking just yet, we return false. 545 * 546 * @return false. 547 */ 548 @Override 549 public boolean markSupported() { 550 return false; 551 } 552 553 /** 554 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 555 * <p> 556 * GNU.sparse.size=size GNU.sparse.numblocks=numblocks repeat numblocks times GNU.sparse.offset=offset GNU.sparse.numbytes=numbytes end repeat 557 * </p> 558 * <p> 559 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 560 * </p> 561 * <p> 562 * GNU.sparse.map Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 563 * </p> 564 * <p> 565 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 566 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 567 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 568 * </p> 569 * 570 * @throws IOException 571 */ 572 private void paxHeaders() throws IOException { 573 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 574 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 575 576 // for 0.1 PAX Headers 577 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 578 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 579 } 580 getNextEntry(); // Get the actual file entry 581 if (currEntry == null) { 582 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 583 } 584 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 585 586 // for 1.0 PAX Format, the sparse map is stored in the file data block 587 if (currEntry.isPaxGNU1XSparse()) { 588 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(in, getRecordSize()); 589 currEntry.setSparseHeaders(sparseHeaders); 590 } 591 592 // sparse headers are all done reading, we need to build 593 // sparse input streams using these sparse headers 594 buildSparseInputStreams(); 595 } 596 597 /** 598 * Reads bytes from the current tar archive entry. 599 * <p> 600 * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF. 601 * </p> 602 * 603 * @param buf The buffer into which to place bytes read. 604 * @param offset The offset at which to place bytes read. 605 * @param numToRead The number of bytes to read. 606 * @return The number of bytes read, or -1 at EOF. 607 * @throws IOException on error 608 */ 609 @Override 610 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 611 if (numToRead == 0) { 612 return 0; 613 } 614 int totalRead = 0; 615 616 if (isAtEOF() || isDirectory()) { 617 return -1; 618 } 619 620 if (currEntry == null) { 621 throw new IllegalStateException("No current tar entry"); 622 } 623 624 if (entryOffset >= currEntry.getRealSize()) { 625 return -1; 626 } 627 628 numToRead = Math.min(numToRead, available()); 629 630 if (currEntry.isSparse()) { 631 // for sparse entries, we need to read them in another way 632 totalRead = readSparse(buf, offset, numToRead); 633 } else { 634 totalRead = in.read(buf, offset, numToRead); 635 } 636 637 if (totalRead == -1) { 638 if (numToRead > 0) { 639 throw new IOException("Truncated TAR archive"); 640 } 641 setAtEOF(true); 642 } else { 643 count(totalRead); 644 entryOffset += totalRead; 645 } 646 647 return totalRead; 648 } 649 650 private void readGlobalPaxHeaders() throws IOException { 651 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 652 getNextEntry(); // Get the actual file entry 653 654 if (currEntry == null) { 655 throw new IOException("Error detected parsing the pax header"); 656 } 657 } 658 659 /** 660 * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry. 661 * 662 * @throws IOException on error 663 */ 664 private void readOldGNUSparse() throws IOException { 665 if (currEntry.isExtended()) { 666 TarArchiveSparseEntry entry; 667 do { 668 final byte[] headerBuf = getRecord(); 669 if (headerBuf == null) { 670 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 671 } 672 entry = new TarArchiveSparseEntry(headerBuf); 673 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 674 } while (entry.isExtended()); 675 } 676 677 // sparse headers are all done reading, we need to build 678 // sparse input streams using these sparse headers 679 buildSparseInputStreams(); 680 } 681 682 /** 683 * Read a record from the input stream and return the data. 684 * 685 * @return The record data or null if EOF has been hit. 686 * @throws IOException on error 687 */ 688 protected byte[] readRecord() throws IOException { 689 final int readCount = IOUtils.readFully(in, recordBuffer); 690 count(readCount); 691 if (readCount != getRecordSize()) { 692 return null; 693 } 694 695 return recordBuffer; 696 } 697 698 /** 699 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored 700 * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the 701 * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together according to 702 * the sparse headers. 703 * 704 * @param buf The buffer into which to place bytes read. 705 * @param offset The offset at which to place bytes read. 706 * @param numToRead The number of bytes to read. 707 * @return The number of bytes read, or -1 at EOF. 708 * @throws IOException on error 709 */ 710 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 711 // if there are no actual input streams, just read from the original input stream 712 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 713 return in.read(buf, offset, numToRead); 714 } 715 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 716 return -1; 717 } 718 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 719 final int readLen = currentInputStream.read(buf, offset, numToRead); 720 // if the current input stream is the last input stream, 721 // just return the number of bytes read from current input stream 722 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 723 return readLen; 724 } 725 // if EOF of current input stream is meet, open a new input stream and recursively call read 726 if (readLen == -1) { 727 currentSparseInputStreamIndex++; 728 return readSparse(buf, offset, numToRead); 729 } 730 // if the rest data of current input stream is not long enough, open a new input stream 731 // and recursively call read 732 if (readLen < numToRead) { 733 currentSparseInputStreamIndex++; 734 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 735 if (readLenOfNext == -1) { 736 return readLen; 737 } 738 return readLen + readLenOfNext; 739 } 740 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 741 return readLen; 742 } 743 744 /** 745 * Since we do not support marking just yet, we do nothing. 746 */ 747 @Override 748 public synchronized void reset() { 749 } 750 751 protected final void setAtEOF(final boolean atEof) { 752 this.atEof = atEof; 753 } 754 755 protected final void setCurrentEntry(final TarArchiveEntry currEntry) { 756 this.currEntry = currEntry; 757 } 758 759 /** 760 * Skips over and discards {@code n} bytes of data from this input stream. The {@code skip} method may, for a variety of reasons, end up skipping over some 761 * smaller number of bytes, possibly {@code 0}. This may result from any of a number of conditions; reaching end of file or end of entry before {@code n} 762 * bytes have been skipped; are only two possibilities. The actual number of bytes skipped is returned. If {@code n} is negative, no bytes are skipped. 763 * 764 * @param n the number of bytes to be skipped. 765 * @return the actual number of bytes skipped. 766 * @throws IOException if a truncated tar archive is detected or some other I/O error occurs 767 */ 768 @Override 769 public long skip(final long n) throws IOException { 770 if (n <= 0 || isDirectory()) { 771 return 0; 772 } 773 774 final long availableOfInputStream = in.available(); 775 final long available = currEntry.getRealSize() - entryOffset; 776 final long numToSkip = Math.min(n, available); 777 long skipped; 778 779 if (!currEntry.isSparse()) { 780 skipped = IOUtils.skip(in, numToSkip); 781 // for non-sparse entry, we should get the bytes actually skipped bytes along with 782 // inputStream.available() if inputStream is instance of FileInputStream 783 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 784 } else { 785 skipped = skipSparse(numToSkip); 786 } 787 788 count(skipped); 789 entryOffset += skipped; 790 return skipped; 791 } 792 793 /** 794 * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry. 795 * 796 * @throws IOException if a truncated tar archive is detected 797 */ 798 private void skipRecordPadding() throws IOException { 799 if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) { 800 final long available = in.available(); 801 final long numRecords = this.entrySize / getRecordSize() + 1; 802 final long padding = numRecords * getRecordSize() - this.entrySize; 803 long skipped = IOUtils.skip(in, padding); 804 805 skipped = getActuallySkipped(available, skipped, padding); 806 807 count(skipped); 808 } 809 } 810 811 /** 812 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest 813 * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped 814 * 815 * @param n bytes of data to skip 816 * @return actual bytes of data skipped 817 * @throws IOException 818 */ 819 private long skipSparse(final long n) throws IOException { 820 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 821 return in.skip(n); 822 } 823 long bytesSkipped = 0; 824 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 825 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 826 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 827 if (bytesSkipped < n) { 828 currentSparseInputStreamIndex++; 829 } 830 } 831 return bytesSkipped; 832 } 833 834 /** 835 * Tries to read the next record rewinding the stream if it is not an EOF record. 836 * <p> 837 * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a 838 * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the 839 * archive anyway. 840 * </p> 841 */ 842 private void tryToConsumeSecondEOFRecord() throws IOException { 843 boolean shouldReset = true; 844 final boolean marked = in.markSupported(); 845 if (marked) { 846 in.mark(getRecordSize()); 847 } 848 try { 849 shouldReset = !isEOFRecord(readRecord()); 850 } finally { 851 if (shouldReset && marked) { 852 pushedBackBytes(getRecordSize()); 853 in.reset(); 854 } 855 } 856 } 857}