1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.compress.archivers.tar;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.Closeable;
21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.nio.ByteBuffer;
25 import java.nio.channels.SeekableByteChannel;
26 import java.nio.file.Files;
27 import java.nio.file.Path;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.LinkedList;
32 import java.util.List;
33 import java.util.Map;
34
35 import org.apache.commons.compress.archivers.zip.ZipEncoding;
36 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
37 import org.apache.commons.compress.utils.ArchiveUtils;
38 import org.apache.commons.compress.utils.BoundedArchiveInputStream;
39 import org.apache.commons.compress.utils.BoundedInputStream;
40 import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
41 import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
42
43
44
45
46
47
48 public class TarFile implements Closeable {
49
50 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
51
52 private final SeekableByteChannel channel;
53
54 private final TarArchiveEntry entry;
55
56 private long entryOffset;
57
58 private int currentSparseInputStreamIndex;
59
60 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
61 super(entry.getDataOffset(), entry.getRealSize());
62 if (channel.size() - entry.getSize() < entry.getDataOffset()) {
63 throw new IOException("entry size exceeds archive size");
64 }
65 this.entry = entry;
66 this.channel = channel;
67 }
68
69 @Override
70 protected int read(final long pos, final ByteBuffer buf) throws IOException {
71 if (entryOffset >= entry.getRealSize()) {
72 return -1;
73 }
74
75 final int totalRead;
76 if (entry.isSparse()) {
77 totalRead = readSparse(entryOffset, buf, buf.limit());
78 } else {
79 totalRead = readArchive(pos, buf);
80 }
81
82 if (totalRead == -1) {
83 if (buf.array().length > 0) {
84 throw new IOException("Truncated TAR archive");
85 }
86 setAtEOF(true);
87 } else {
88 entryOffset += totalRead;
89 buf.flip();
90 }
91 return totalRead;
92 }
93
94 private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
95 channel.position(pos);
96 return channel.read(buf);
97 }
98
99 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
100
101 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
102 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
103 return readArchive(entry.getDataOffset() + pos, buf);
104 }
105
106 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
107 return -1;
108 }
109
110 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
111 final byte[] bufArray = new byte[numToRead];
112 final int readLen = currentInputStream.read(bufArray);
113 if (readLen != -1) {
114 buf.put(bufArray, 0, readLen);
115 }
116
117
118
119 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
120 return readLen;
121 }
122
123
124 if (readLen == -1) {
125 currentSparseInputStreamIndex++;
126 return readSparse(pos, buf, numToRead);
127 }
128
129
130
131 if (readLen < numToRead) {
132 currentSparseInputStreamIndex++;
133 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
134 if (readLenOfNext == -1) {
135 return readLen;
136 }
137
138 return readLen + readLenOfNext;
139 }
140
141
142 return readLen;
143 }
144 }
145
146 private static final int SMALL_BUFFER_SIZE = 256;
147
148 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
149
150 private final SeekableByteChannel archive;
151
152
153
154
155 private final ZipEncoding zipEncoding;
156
157 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
158
159 private final int blockSize;
160
161 private final boolean lenient;
162
163 private final int recordSize;
164
165 private final ByteBuffer recordBuffer;
166
167
168 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
169
170 private boolean hasHitEOF;
171
172
173
174
175 private TarArchiveEntry currEntry;
176
177
178 private Map<String, String> globalPaxHeaders = new HashMap<>();
179
180 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
181
182
183
184
185
186
187
188 public TarFile(final byte[] content) throws IOException {
189 this(new SeekableInMemoryByteChannel(content));
190 }
191
192
193
194
195
196
197
198
199
200 public TarFile(final byte[] content, final boolean lenient) throws IOException {
201 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
202 }
203
204
205
206
207
208
209
210
211 public TarFile(final byte[] content, final String encoding) throws IOException {
212 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
213 }
214
215
216
217
218
219
220
221 public TarFile(final File archive) throws IOException {
222 this(archive.toPath());
223 }
224
225
226
227
228
229
230
231
232
233 public TarFile(final File archive, final boolean lenient) throws IOException {
234 this(archive.toPath(), lenient);
235 }
236
237
238
239
240
241
242
243
244 public TarFile(final File archive, final String encoding) throws IOException {
245 this(archive.toPath(), encoding);
246 }
247
248
249
250
251
252
253
254 public TarFile(final Path archivePath) throws IOException {
255 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
256 }
257
258
259
260
261
262
263
264
265
266 public TarFile(final Path archivePath, final boolean lenient) throws IOException {
267 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
268 }
269
270
271
272
273
274
275
276
277 public TarFile(final Path archivePath, final String encoding) throws IOException {
278 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
279 }
280
281
282
283
284
285
286
287 public TarFile(final SeekableByteChannel content) throws IOException {
288 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient)
303 throws IOException {
304 this.archive = archive;
305 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
306 this.recordSize = recordSize;
307 this.recordBuffer = ByteBuffer.allocate(this.recordSize);
308 this.blockSize = blockSize;
309 this.lenient = lenient;
310
311 TarArchiveEntry entry;
312 while ((entry = getNextTarEntry()) != null) {
313 entries.add(entry);
314 }
315 }
316
317
318
319
320
321
322
323 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
324 currEntry.updateEntryFromPaxHeaders(headers);
325 currEntry.setSparseHeaders(sparseHeaders);
326 }
327
328
329
330
331
332
333
334
335 private void buildSparseInputStreams() throws IOException {
336 final List<InputStream> streams = new ArrayList<>();
337
338 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
339
340
341 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
342
343 long offset = 0;
344 long numberOfZeroBytesInSparseEntry = 0;
345 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
346 final long zeroBlockSize = sparseHeader.getOffset() - offset;
347 if (zeroBlockSize < 0) {
348
349 throw new IOException("Corrupted struct sparse detected");
350 }
351
352
353 if (zeroBlockSize > 0) {
354 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
355 numberOfZeroBytesInSparseEntry += zeroBlockSize;
356 }
357
358
359 if (sparseHeader.getNumbytes() > 0) {
360 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
361 if (start + sparseHeader.getNumbytes() < start) {
362
363 throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
364 }
365 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
366 }
367
368 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
369 }
370
371 sparseInputStreams.put(currEntry.getName(), streams);
372 }
373
374 @Override
375 public void close() throws IOException {
376 archive.close();
377 }
378
379
380
381
382
383 private void consumeRemainderOfLastBlock() throws IOException {
384 final long bytesReadOfLastBlock = archive.position() % blockSize;
385 if (bytesReadOfLastBlock > 0) {
386 repositionForwardBy(blockSize - bytesReadOfLastBlock);
387 }
388 }
389
390
391
392
393
394
395 public List<TarArchiveEntry> getEntries() {
396 return new ArrayList<>(entries);
397 }
398
399
400
401
402
403
404
405
406 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
407 try {
408 return new BoundedTarEntryInputStream(entry, archive);
409 } catch (final RuntimeException ex) {
410 throw new IOException("Corrupted TAR archive. Can't read entry", ex);
411 }
412 }
413
414
415
416
417
418
419
420 private byte[] getLongNameData() throws IOException {
421 final ByteArrayOutputStream longName = new ByteArrayOutputStream();
422 int length;
423 try (InputStream in = getInputStream(currEntry)) {
424 while ((length = in.read(smallBuf)) >= 0) {
425 longName.write(smallBuf, 0, length);
426 }
427 }
428 getNextTarEntry();
429 if (currEntry == null) {
430
431
432 return null;
433 }
434 byte[] longNameData = longName.toByteArray();
435
436 length = longNameData.length;
437 while (length > 0 && longNameData[length - 1] == 0) {
438 --length;
439 }
440 if (length != longNameData.length) {
441 longNameData = Arrays.copyOf(longNameData, length);
442 }
443 return longNameData;
444 }
445
446
447
448
449
450
451
452
453
454 private TarArchiveEntry getNextTarEntry() throws IOException {
455 if (isAtEOF()) {
456 return null;
457 }
458
459 if (currEntry != null) {
460
461 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
462 throwExceptionIfPositionIsNotInArchive();
463 skipRecordPadding();
464 }
465
466 final ByteBuffer headerBuf = getRecord();
467 if (null == headerBuf) {
468
469 currEntry = null;
470 return null;
471 }
472
473 try {
474 final long position = archive.position();
475 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
476 } catch (final IllegalArgumentException e) {
477 throw new IOException("Error detected parsing the header", e);
478 }
479
480 if (currEntry.isGNULongLinkEntry()) {
481 final byte[] longLinkData = getLongNameData();
482 if (longLinkData == null) {
483
484
485
486 return null;
487 }
488 currEntry.setLinkName(zipEncoding.decode(longLinkData));
489 }
490
491 if (currEntry.isGNULongNameEntry()) {
492 final byte[] longNameData = getLongNameData();
493 if (longNameData == null) {
494
495
496
497 return null;
498 }
499
500
501 final String name = zipEncoding.decode(longNameData);
502 currEntry.setName(name);
503 if (currEntry.isDirectory() && !name.endsWith("/")) {
504 currEntry.setName(name + "/");
505 }
506 }
507
508 if (currEntry.isGlobalPaxHeader()) {
509 readGlobalPaxHeaders();
510 }
511
512 try {
513 if (currEntry.isPaxHeader()) {
514 paxHeaders();
515 } else if (!globalPaxHeaders.isEmpty()) {
516 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
517 }
518 } catch (final NumberFormatException e) {
519 throw new IOException("Error detected parsing the pax header", e);
520 }
521
522 if (currEntry.isOldGNUSparse()) {
523 readOldGNUSparse();
524 }
525
526 return currEntry;
527 }
528
529
530
531
532
533
534
535
536
537
538
539
540
541 private ByteBuffer getRecord() throws IOException {
542 ByteBuffer headerBuf = readRecord();
543 setAtEOF(isEOFRecord(headerBuf));
544 if (isAtEOF() && headerBuf != null) {
545
546 tryToConsumeSecondEOFRecord();
547 consumeRemainderOfLastBlock();
548 headerBuf = null;
549 }
550 return headerBuf;
551 }
552
553 protected final boolean isAtEOF() {
554 return hasHitEOF;
555 }
556
557 private boolean isDirectory() {
558 return currEntry != null && currEntry.isDirectory();
559 }
560
561 private boolean isEOFRecord(final ByteBuffer headerBuf) {
562 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
563 }
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594 private void paxHeaders() throws IOException {
595 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
596 final Map<String, String> headers;
597 try (InputStream input = getInputStream(currEntry)) {
598 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
599 }
600
601
602 if (headers.containsKey(TarGnuSparseKeys.MAP)) {
603 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
604 }
605 getNextTarEntry();
606 if (currEntry == null) {
607 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
608 }
609 applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
610
611
612 if (currEntry.isPaxGNU1XSparse()) {
613 try (InputStream input = getInputStream(currEntry)) {
614 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
615 }
616 currEntry.setSparseHeaders(sparseHeaders);
617
618 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
619 }
620
621
622
623 buildSparseInputStreams();
624 }
625
626 private void readGlobalPaxHeaders() throws IOException {
627 try (InputStream input = getInputStream(currEntry)) {
628 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize());
629 }
630 getNextTarEntry();
631
632 if (currEntry == null) {
633 throw new IOException("Error detected parsing the pax header");
634 }
635 }
636
637
638
639
640
641
642 private void readOldGNUSparse() throws IOException {
643 if (currEntry.isExtended()) {
644 TarArchiveSparseEntry entry;
645 do {
646 final ByteBuffer headerBuf = getRecord();
647 if (headerBuf == null) {
648 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
649 }
650 entry = new TarArchiveSparseEntry(headerBuf.array());
651 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
652 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
653 } while (entry.isExtended());
654 }
655
656
657
658 buildSparseInputStreams();
659 }
660
661
662
663
664
665
666
667 private ByteBuffer readRecord() throws IOException {
668 recordBuffer.rewind();
669 final int readNow = archive.read(recordBuffer);
670 if (readNow != recordSize) {
671 return null;
672 }
673 return recordBuffer;
674 }
675
676 private void repositionForwardBy(final long offset) throws IOException {
677 repositionForwardTo(archive.position() + offset);
678 }
679
680 private void repositionForwardTo(final long newPosition) throws IOException {
681 final long currPosition = archive.position();
682 if (newPosition < currPosition) {
683 throw new IOException("trying to move backwards inside of the archive");
684 }
685 archive.position(newPosition);
686 }
687
688 protected final void setAtEOF(final boolean b) {
689 hasHitEOF = b;
690 }
691
692
693
694
695
696
697 private void skipRecordPadding() throws IOException {
698 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
699 final long numRecords = currEntry.getSize() / recordSize + 1;
700 final long padding = numRecords * recordSize - currEntry.getSize();
701 repositionForwardBy(padding);
702 throwExceptionIfPositionIsNotInArchive();
703 }
704 }
705
706
707
708
709
710
711 private void throwExceptionIfPositionIsNotInArchive() throws IOException {
712 if (archive.size() < archive.position()) {
713 throw new IOException("Truncated TAR archive");
714 }
715 }
716
717
718
719
720
721
722
723
724
725
726
727
728 private void tryToConsumeSecondEOFRecord() throws IOException {
729 boolean shouldReset = true;
730 try {
731 shouldReset = !isEOFRecord(readRecord());
732 } finally {
733 if (shouldReset) {
734 archive.position(archive.position() - recordSize);
735 }
736 }
737 }
738 }