1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.apache.commons.compress.archivers.tar;
24
25 import java.io.ByteArrayOutputStream;
26 import java.io.FileInputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.HashMap;
32 import java.util.List;
33 import java.util.Map;
34
35 import org.apache.commons.compress.archivers.ArchiveEntry;
36 import org.apache.commons.compress.archivers.ArchiveInputStream;
37 import org.apache.commons.compress.archivers.zip.ZipEncoding;
38 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
39 import org.apache.commons.compress.utils.ArchiveUtils;
40 import org.apache.commons.compress.utils.BoundedInputStream;
41 import org.apache.commons.compress.utils.IOUtils;
42
43
44
45
46
47
48
49 public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> {
50
51 private static final int SMALL_BUFFER_SIZE = 256;
52
53
54
55
56
57
58
59
60 public static boolean matches(final byte[] signature, final int length) {
61 final int versionOffset = TarConstants.VERSION_OFFSET;
62 final int versionLen = TarConstants.VERSIONLEN;
63 if (length < versionOffset + versionLen) {
64 return false;
65 }
66
67 final int magicOffset = TarConstants.MAGIC_OFFSET;
68 final int magicLen = TarConstants.MAGICLEN;
69 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen)
70 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, signature, versionOffset, versionLen)) {
71 return true;
72 }
73 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, signature, magicOffset, magicLen)
74 && (ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, signature, versionOffset, versionLen)
75 || ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, signature, versionOffset, versionLen))) {
76 return true;
77 }
78
79 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, signature, magicOffset, magicLen)
80 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, signature, versionOffset, versionLen);
81 }
82
83 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
84
85
86 private final byte[] recordBuffer;
87
88
89 private final int blockSize;
90
91
92 private boolean atEof;
93
94
95 private long entrySize;
96
97
98 private long entryOffset;
99
100
101 private List<InputStream> sparseInputStreams;
102
103
104 private int currentSparseInputStreamIndex;
105
106
107 private TarArchiveEntry currEntry;
108
109
110 private final ZipEncoding zipEncoding;
111
112
113 private Map<String, String> globalPaxHeaders = new HashMap<>();
114
115
116 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
117
118 private final boolean lenient;
119
120
121
122
123
124
125 public TarArchiveInputStream(final InputStream inputStream) {
126 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
127 }
128
129
130
131
132
133
134
135
136
137 public TarArchiveInputStream(final InputStream inputStream, final boolean lenient) {
138 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
139 }
140
141
142
143
144
145
146
147 public TarArchiveInputStream(final InputStream inputStream, final int blockSize) {
148 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE);
149 }
150
151
152
153
154
155
156
157
158 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize) {
159 this(inputStream, blockSize, recordSize, null);
160 }
161
162
163
164
165
166
167
168
169
170
171 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding) {
172 this(inputStream, blockSize, recordSize, encoding, false);
173 }
174
175
176
177
178
179
180
181
182
183
184
185
186 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding, final boolean lenient) {
187 super(inputStream, encoding);
188 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
189 this.recordBuffer = new byte[recordSize];
190 this.blockSize = blockSize;
191 this.lenient = lenient;
192 }
193
194
195
196
197
198
199
200
201
202 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final String encoding) {
203 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
204 }
205
206
207
208
209
210
211
212
213 public TarArchiveInputStream(final InputStream inputStream, final String encoding) {
214 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding);
215 }
216
217 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
218 currEntry.updateEntryFromPaxHeaders(headers);
219 currEntry.setSparseHeaders(sparseHeaders);
220 }
221
222
223
224
225
226
227
228
229
230 @Override
231 public int available() throws IOException {
232 if (isDirectory()) {
233 return 0;
234 }
235 final long available = currEntry.getRealSize() - entryOffset;
236 if (available > Integer.MAX_VALUE) {
237 return Integer.MAX_VALUE;
238 }
239 return (int) available;
240 }
241
242
243
244
245
246
247
248
249 private void buildSparseInputStreams() throws IOException {
250 currentSparseInputStreamIndex = -1;
251 sparseInputStreams = new ArrayList<>();
252
253 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
254
255
256 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
257
258 long offset = 0;
259 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
260 final long zeroBlockSize = sparseHeader.getOffset() - offset;
261 if (zeroBlockSize < 0) {
262
263 throw new IOException("Corrupted struct sparse detected");
264 }
265
266
267 if (zeroBlockSize > 0) {
268 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
269 }
270
271
272 if (sparseHeader.getNumbytes() > 0) {
273 sparseInputStreams.add(new BoundedInputStream(in, sparseHeader.getNumbytes()));
274 }
275
276 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
277 }
278
279 if (!sparseInputStreams.isEmpty()) {
280 currentSparseInputStreamIndex = 0;
281 }
282 }
283
284
285
286
287
288
289 @Override
290 public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
291 return archiveEntry instanceof TarArchiveEntry;
292 }
293
294
295
296
297
298
299 @Override
300 public void close() throws IOException {
301
302 if (sparseInputStreams != null) {
303 for (final InputStream inputStream : sparseInputStreams) {
304 inputStream.close();
305 }
306 }
307 in.close();
308 }
309
310
311
312
313
314 private void consumeRemainderOfLastBlock() throws IOException {
315 final long bytesReadOfLastBlock = getBytesRead() % blockSize;
316 if (bytesReadOfLastBlock > 0) {
317 count(IOUtils.skip(in, blockSize - bytesReadOfLastBlock));
318 }
319 }
320
321
322
323
324
325
326
327
328
329
330 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
331 long actuallySkipped = skipped;
332 if (in instanceof FileInputStream) {
333 actuallySkipped = Math.min(skipped, available);
334 }
335 if (actuallySkipped != expected) {
336 throw new IOException("Truncated TAR archive");
337 }
338 return actuallySkipped;
339 }
340
341
342
343
344
345
346 public TarArchiveEntry getCurrentEntry() {
347 return currEntry;
348 }
349
350
351
352
353
354
355
356 protected byte[] getLongNameData() throws IOException {
357
358 final ByteArrayOutputStream longName = new ByteArrayOutputStream();
359 int length = 0;
360 while ((length = read(smallBuf)) >= 0) {
361 longName.write(smallBuf, 0, length);
362 }
363 getNextEntry();
364 if (currEntry == null) {
365
366
367 return null;
368 }
369 byte[] longNameData = longName.toByteArray();
370
371 length = longNameData.length;
372 while (length > 0 && longNameData[length - 1] == 0) {
373 --length;
374 }
375 if (length != longNameData.length) {
376 longNameData = Arrays.copyOf(longNameData, length);
377 }
378 return longNameData;
379 }
380
381
382
383
384
385
386
387 @Override
388 public TarArchiveEntry getNextEntry() throws IOException {
389 return getNextTarEntry();
390 }
391
392
393
394
395
396
397
398
399
400
401 @Deprecated
402 public TarArchiveEntry getNextTarEntry() throws IOException {
403 if (isAtEOF()) {
404 return null;
405 }
406
407 if (currEntry != null) {
408
409 IOUtils.skip(this, Long.MAX_VALUE);
410
411
412 skipRecordPadding();
413 }
414
415 final byte[] headerBuf = getRecord();
416
417 if (headerBuf == null) {
418
419 currEntry = null;
420 return null;
421 }
422
423 try {
424 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
425 } catch (final IllegalArgumentException e) {
426 throw new IOException("Error detected parsing the header", e);
427 }
428
429 entryOffset = 0;
430 entrySize = currEntry.getSize();
431
432 if (currEntry.isGNULongLinkEntry()) {
433 final byte[] longLinkData = getLongNameData();
434 if (longLinkData == null) {
435
436
437 return null;
438 }
439 currEntry.setLinkName(zipEncoding.decode(longLinkData));
440 }
441
442 if (currEntry.isGNULongNameEntry()) {
443 final byte[] longNameData = getLongNameData();
444 if (longNameData == null) {
445
446
447 return null;
448 }
449
450
451 final String name = zipEncoding.decode(longNameData);
452 currEntry.setName(name);
453 if (currEntry.isDirectory() && !name.endsWith("/")) {
454 currEntry.setName(name + "/");
455 }
456 }
457
458 if (currEntry.isGlobalPaxHeader()) {
459 readGlobalPaxHeaders();
460 }
461
462 try {
463 if (currEntry.isPaxHeader()) {
464 paxHeaders();
465 } else if (!globalPaxHeaders.isEmpty()) {
466 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
467 }
468 } catch (final NumberFormatException e) {
469 throw new IOException("Error detected parsing the pax header", e);
470 }
471
472 if (currEntry.isOldGNUSparse()) {
473 readOldGNUSparse();
474 }
475
476
477
478
479
480 entrySize = currEntry.getSize();
481
482 return currEntry;
483 }
484
485
486
487
488
489
490
491
492
493
494
495
496 private byte[] getRecord() throws IOException {
497 byte[] headerBuf = readRecord();
498 setAtEOF(isEOFRecord(headerBuf));
499 if (isAtEOF() && headerBuf != null) {
500 tryToConsumeSecondEOFRecord();
501 consumeRemainderOfLastBlock();
502 headerBuf = null;
503 }
504 return headerBuf;
505 }
506
507
508
509
510
511
512 public int getRecordSize() {
513 return recordBuffer.length;
514 }
515
516 protected final boolean isAtEOF() {
517 return atEof;
518 }
519
520 private boolean isDirectory() {
521 return currEntry != null && currEntry.isDirectory();
522 }
523
524
525
526
527
528
529
530 protected boolean isEOFRecord(final byte[] record) {
531 return record == null || ArchiveUtils.isArrayZero(record, getRecordSize());
532 }
533
534
535
536
537
538
539 @Override
540 public synchronized void mark(final int markLimit) {
541 }
542
543
544
545
546
547
548 @Override
549 public boolean markSupported() {
550 return false;
551 }
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572 private void paxHeaders() throws IOException {
573 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
574 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize);
575
576
577 if (headers.containsKey(TarGnuSparseKeys.MAP)) {
578 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
579 }
580 getNextEntry();
581 if (currEntry == null) {
582 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
583 }
584 applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
585
586
587 if (currEntry.isPaxGNU1XSparse()) {
588 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(in, getRecordSize());
589 currEntry.setSparseHeaders(sparseHeaders);
590 }
591
592
593
594 buildSparseInputStreams();
595 }
596
597
598
599
600
601
602
603
604
605
606
607
608
609 @Override
610 public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
611 if (numToRead == 0) {
612 return 0;
613 }
614 int totalRead = 0;
615
616 if (isAtEOF() || isDirectory()) {
617 return -1;
618 }
619
620 if (currEntry == null) {
621 throw new IllegalStateException("No current tar entry");
622 }
623
624 if (entryOffset >= currEntry.getRealSize()) {
625 return -1;
626 }
627
628 numToRead = Math.min(numToRead, available());
629
630 if (currEntry.isSparse()) {
631
632 totalRead = readSparse(buf, offset, numToRead);
633 } else {
634 totalRead = in.read(buf, offset, numToRead);
635 }
636
637 if (totalRead == -1) {
638 if (numToRead > 0) {
639 throw new IOException("Truncated TAR archive");
640 }
641 setAtEOF(true);
642 } else {
643 count(totalRead);
644 entryOffset += totalRead;
645 }
646
647 return totalRead;
648 }
649
650 private void readGlobalPaxHeaders() throws IOException {
651 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize);
652 getNextEntry();
653
654 if (currEntry == null) {
655 throw new IOException("Error detected parsing the pax header");
656 }
657 }
658
659
660
661
662
663
664 private void readOldGNUSparse() throws IOException {
665 if (currEntry.isExtended()) {
666 TarArchiveSparseEntry entry;
667 do {
668 final byte[] headerBuf = getRecord();
669 if (headerBuf == null) {
670 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
671 }
672 entry = new TarArchiveSparseEntry(headerBuf);
673 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
674 } while (entry.isExtended());
675 }
676
677
678
679 buildSparseInputStreams();
680 }
681
682
683
684
685
686
687
688 protected byte[] readRecord() throws IOException {
689 final int readCount = IOUtils.readFully(in, recordBuffer);
690 count(readCount);
691 if (readCount != getRecordSize()) {
692 return null;
693 }
694
695 return recordBuffer;
696 }
697
698
699
700
701
702
703
704
705
706
707
708
709
710 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
711
712 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
713 return in.read(buf, offset, numToRead);
714 }
715 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
716 return -1;
717 }
718 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
719 final int readLen = currentInputStream.read(buf, offset, numToRead);
720
721
722 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
723 return readLen;
724 }
725
726 if (readLen == -1) {
727 currentSparseInputStreamIndex++;
728 return readSparse(buf, offset, numToRead);
729 }
730
731
732 if (readLen < numToRead) {
733 currentSparseInputStreamIndex++;
734 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
735 if (readLenOfNext == -1) {
736 return readLen;
737 }
738 return readLen + readLenOfNext;
739 }
740
741 return readLen;
742 }
743
744
745
746
747 @Override
748 public synchronized void reset() {
749 }
750
751 protected final void setAtEOF(final boolean atEof) {
752 this.atEof = atEof;
753 }
754
755 protected final void setCurrentEntry(final TarArchiveEntry currEntry) {
756 this.currEntry = currEntry;
757 }
758
759
760
761
762
763
764
765
766
767
768 @Override
769 public long skip(final long n) throws IOException {
770 if (n <= 0 || isDirectory()) {
771 return 0;
772 }
773
774 final long availableOfInputStream = in.available();
775 final long available = currEntry.getRealSize() - entryOffset;
776 final long numToSkip = Math.min(n, available);
777 long skipped;
778
779 if (!currEntry.isSparse()) {
780 skipped = IOUtils.skip(in, numToSkip);
781
782
783 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
784 } else {
785 skipped = skipSparse(numToSkip);
786 }
787
788 count(skipped);
789 entryOffset += skipped;
790 return skipped;
791 }
792
793
794
795
796
797
798 private void skipRecordPadding() throws IOException {
799 if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) {
800 final long available = in.available();
801 final long numRecords = this.entrySize / getRecordSize() + 1;
802 final long padding = numRecords * getRecordSize() - this.entrySize;
803 long skipped = IOUtils.skip(in, padding);
804
805 skipped = getActuallySkipped(available, skipped, padding);
806
807 count(skipped);
808 }
809 }
810
811
812
813
814
815
816
817
818
819 private long skipSparse(final long n) throws IOException {
820 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
821 return in.skip(n);
822 }
823 long bytesSkipped = 0;
824 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
825 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
826 bytesSkipped += currentInputStream.skip(n - bytesSkipped);
827 if (bytesSkipped < n) {
828 currentSparseInputStreamIndex++;
829 }
830 }
831 return bytesSkipped;
832 }
833
834
835
836
837
838
839
840
841
842 private void tryToConsumeSecondEOFRecord() throws IOException {
843 boolean shouldReset = true;
844 final boolean marked = in.markSupported();
845 if (marked) {
846 in.mark(getRecordSize());
847 }
848 try {
849 shouldReset = !isEOFRecord(readRecord());
850 } finally {
851 if (shouldReset && marked) {
852 pushedBackBytes(getRecordSize());
853 in.reset();
854 }
855 }
856 }
857 }