001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.UncheckedIOException; 024import java.math.BigInteger; 025import java.nio.ByteBuffer; 026import java.nio.charset.Charset; 027import java.nio.charset.StandardCharsets; 028import java.util.ArrayList; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.List; 032import java.util.Map; 033 034import org.apache.commons.compress.archivers.zip.ZipEncoding; 035import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 036import org.apache.commons.compress.utils.IOUtils; 037import org.apache.commons.compress.utils.ParsingUtils; 038import org.apache.commons.io.output.ByteArrayOutputStream; 039 040/** 041 * This class provides static utility methods to work with byte streams. 042 * 043 * @Immutable 044 */ 045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 046public class TarUtils { 047 048 private static final int BYTE_MASK = 255; 049 050 static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset()); 051 052 /** 053 * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding. 054 */ 055 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 056 057 @Override 058 public boolean canEncode(final String name) { 059 return true; 060 } 061 062 @Override 063 public String decode(final byte[] buffer) { 064 final int length = buffer.length; 065 final StringBuilder result = new StringBuilder(length); 066 for (final byte b : buffer) { 067 if (b == 0) { // Trailing null 068 break; 069 } 070 result.append((char) (b & 0xFF)); // Allow for sign-extension 071 } 072 return result.toString(); 073 } 074 075 @Override 076 public ByteBuffer encode(final String name) { 077 final int length = name.length(); 078 final byte[] buf = new byte[length]; 079 // copy until end of input or output is reached. 080 for (int i = 0; i < length; ++i) { 081 buf[i] = (byte) name.charAt(i); 082 } 083 return ByteBuffer.wrap(buf); 084 } 085 }; 086 087 /** 088 * Computes the checksum of a tar entry header. 089 * 090 * @param buf The tar entry's header buffer. 091 * @return The computed checksum. 092 */ 093 public static long computeCheckSum(final byte[] buf) { 094 long sum = 0; 095 for (final byte element : buf) { 096 sum += BYTE_MASK & element; 097 } 098 return sum; 099 } 100 101 // Helper method to generate the exception message 102 private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) { 103 // default charset is good enough for an exception message, 104 // 105 // the alternative was to modify parseOctal and 106 // parseOctalOrBinary to receive the ZipEncoding of the 107 // archive (deprecating the existing public methods, of 108 // course) and dealing with the fact that ZipEncoding#decode 109 // can throw an IOException which parseOctal* doesn't declare 110 String string = new String(buffer, offset, length, Charset.defaultCharset()); 111 112 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 113 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 114 } 115 116 private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 117 final BigInteger val = BigInteger.valueOf(value); 118 final byte[] b = val.toByteArray(); 119 final int len = b.length; 120 if (len > length - 1) { 121 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 122 } 123 final int off = offset + length - len; 124 System.arraycopy(b, 0, buf, off, len); 125 final byte fill = (byte) (negative ? 0xff : 0); 126 for (int i = offset + 1; i < off; i++) { 127 buf[i] = fill; 128 } 129 } 130 131 /** 132 * Writes an octal value into a buffer. 133 * 134 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then 135 * space. 136 * 137 * @param value The value to convert 138 * @param buf The destination buffer 139 * @param offset The starting offset into the buffer. 140 * @param length The size of the buffer. 141 * @return The updated value of offset, i.e. offset+length 142 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 143 */ 144 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 145 int idx = length - 2; // for NUL and space 146 formatUnsignedOctalString(value, buf, offset, idx); 147 buf[offset + idx++] = 0; // Trailing null 148 buf[offset + idx] = (byte) ' '; // Trailing space 149 return offset + length; 150 } 151 152 private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 153 final int bits = (length - 1) * 8; 154 final long max = 1L << bits; 155 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 156 if (val < 0 || val >= max) { 157 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 158 } 159 if (negative) { 160 val ^= max - 1; 161 val++; 162 val |= 0xffL << bits; 163 } 164 for (int i = offset + length - 1; i >= offset; i--) { 165 buf[i] = (byte) val; 166 val >>= 8; 167 } 168 } 169 170 /** 171 * Writes an octal long integer into a buffer. 172 * 173 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 174 * 175 * @param value The value to write as octal 176 * @param buf The destinationbuffer. 177 * @param offset The starting offset into the buffer. 178 * @param length The length of the buffer 179 * @return The updated offset 180 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 181 */ 182 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 183 final int idx = length - 1; // For space 184 formatUnsignedOctalString(value, buf, offset, idx); 185 buf[offset + idx] = (byte) ' '; // Trailing space 186 return offset + length; 187 } 188 189 /** 190 * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise. 191 * 192 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 193 * 194 * @param value The value to write into the buffer. 195 * @param buf The destination buffer. 196 * @param offset The starting offset into the buffer. 197 * @param length The length of the buffer. 198 * @return The updated offset. 199 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer. 200 * @since 1.4 201 */ 202 public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) { 203 // Check whether we are dealing with UID/GID or SIZE field 204 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 205 final boolean negative = value < 0; 206 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 207 return formatLongOctalBytes(value, buf, offset, length); 208 } 209 if (length < 9) { 210 formatLongBinary(value, buf, offset, length, negative); 211 } else { 212 formatBigIntegerBinary(value, buf, offset, length, negative); 213 } 214 buf[offset] = (byte) (negative ? 0xff : 0x80); 215 return offset + length; 216 } 217 218 /** 219 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 220 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 221 * 222 * @param name The header name from which to copy the characters. 223 * @param buf The buffer where the name is to be stored. 224 * @param offset The starting offset into the buffer 225 * @param length The maximum number of header bytes to copy. 226 * @return The updated offset, i.e. offset + length 227 */ 228 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 229 try { 230 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 231 } catch (final IOException ex) { // NOSONAR 232 try { 233 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING); 234 } catch (final IOException ex2) { 235 // impossible 236 throw new UncheckedIOException(ex2); // NOSONAR 237 } 238 } 239 } 240 241 /** 242 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 243 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 244 * 245 * @param name The header name from which to copy the characters. 246 * @param buf The buffer where the name is to be stored. 247 * @param offset The starting offset into the buffer 248 * @param length The maximum number of header bytes to copy. 249 * @param encoding name of the encoding to use for file names 250 * @since 1.4 251 * @return The updated offset, i.e. offset + length 252 * @throws IOException on error 253 */ 254 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException { 255 int len = name.length(); 256 ByteBuffer b = encoding.encode(name); 257 while (b.limit() > length && len > 0) { 258 b = encoding.encode(name.substring(0, --len)); 259 } 260 final int limit = b.limit() - b.position(); 261 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 262 // Pad any remaining output bytes with NUL 263 for (int i = limit; i < length; ++i) { 264 buf[offset + i] = 0; 265 } 266 return offset + length; 267 } 268 269 /** 270 * Writes an octal integer into a buffer. 271 * 272 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL 273 * 274 * @param value The value to write 275 * @param buf The buffer to receive the output 276 * @param offset The starting offset into the buffer 277 * @param length The size of the output buffer 278 * @return The updated offset, i.e. offset+length 279 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 280 */ 281 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 282 int idx = length - 2; // For space and trailing null 283 formatUnsignedOctalString(value, buf, offset, idx); 284 buf[offset + idx++] = (byte) ' '; // Trailing space 285 buf[offset + idx] = 0; // Trailing null 286 return offset + length; 287 } 288 289 /** 290 * Fills a buffer with unsigned octal number, padded with leading zeroes. 291 * 292 * @param value number to convert to octal - treated as unsigned 293 * @param buffer destination buffer 294 * @param offset starting offset in buffer 295 * @param length length of buffer to fill 296 * @throws IllegalArgumentException if the value will not fit in the buffer 297 */ 298 public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) { 299 int remaining = length; 300 remaining--; 301 if (value == 0) { 302 buffer[offset + remaining--] = (byte) '0'; 303 } else { 304 long val = value; 305 for (; remaining >= 0 && val != 0; --remaining) { 306 // CheckStyle:MagicNumber OFF 307 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 308 val = val >>> 3; 309 // CheckStyle:MagicNumber ON 310 } 311 if (val != 0) { 312 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length); 313 } 314 } 315 316 for (; remaining >= 0; --remaining) { // leading zeros 317 buffer[offset + remaining] = (byte) '0'; 318 } 319 } 320 321 private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) { 322 final byte[] remainder = new byte[length - 1]; 323 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 324 BigInteger val = new BigInteger(remainder); 325 if (negative) { 326 // 2's complement 327 val = val.add(BigInteger.valueOf(-1)).not(); 328 } 329 if (val.bitLength() > 63) { 330 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value"); 331 } 332 return negative ? -val.longValue() : val.longValue(); 333 } 334 335 private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) { 336 if (length >= 9) { 337 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value"); 338 } 339 long val = 0; 340 for (int i = 1; i < length; i++) { 341 val = (val << 8) + (buffer[offset + i] & 0xff); 342 } 343 if (negative) { 344 // 2's complement 345 val--; 346 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 347 } 348 return negative ? -val : val; 349 } 350 351 /** 352 * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs. 353 * 354 * @param buffer The buffer from which to parse. 355 * @param offset The offset into the buffer from which to parse. 356 * @return The boolean value of the bytes. 357 * @throws IllegalArgumentException if an invalid byte is detected. 358 */ 359 public static boolean parseBoolean(final byte[] buffer, final int offset) { 360 return buffer[offset] == 1; 361 } 362 363 /** 364 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string 365 * consisting of comma-separated values "offset,size[,offset-1,size-1...]" 366 * 367 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 368 * @return unmodifiable list of sparse headers parsed from sparse map 369 * @throws IOException Corrupted TAR archive. 370 * @since 1.21 371 */ 372 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException { 373 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 374 final String[] sparseHeaderStrings = sparseMap.split(","); 375 if (sparseHeaderStrings.length % 2 == 1) { 376 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 377 } 378 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 379 final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]); 380 if (sparseOffset < 0) { 381 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value"); 382 } 383 final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]); 384 if (sparseNumbytes < 0) { 385 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value"); 386 } 387 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 388 } 389 return Collections.unmodifiableList(sparseHeaders); 390 } 391 392 /** 393 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 394 * 395 * @param buffer The buffer from which to parse. 396 * @param offset The offset into the buffer from which to parse. 397 * @param length The maximum number of bytes to parse. 398 * @return The entry name. 399 */ 400 public static String parseName(final byte[] buffer, final int offset, final int length) { 401 try { 402 return parseName(buffer, offset, length, DEFAULT_ENCODING); 403 } catch (final IOException ex) { // NOSONAR 404 try { 405 return parseName(buffer, offset, length, FALLBACK_ENCODING); 406 } catch (final IOException ex2) { 407 // impossible 408 throw new UncheckedIOException(ex2); // NOSONAR 409 } 410 } 411 } 412 413 /** 414 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 415 * 416 * @param buffer The buffer from which to parse. 417 * @param offset The offset into the buffer from which to parse. 418 * @param length The maximum number of bytes to parse. 419 * @param encoding name of the encoding to use for file names 420 * @since 1.4 421 * @return The entry name. 422 * @throws IOException on error 423 */ 424 public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException { 425 int len = 0; 426 for (int i = offset; len < length && buffer[i] != 0; i++) { 427 len++; 428 } 429 if (len > 0) { 430 final byte[] b = new byte[len]; 431 System.arraycopy(buffer, offset, b, 0, len); 432 return encoding.decode(b); 433 } 434 return ""; 435 } 436 437 /** 438 * Parses an octal string from a buffer. 439 * 440 * <p> 441 * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL. 442 * </p> 443 * 444 * <p> 445 * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields). 446 * </p> 447 * 448 * <p> 449 * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4. 450 * </p> 451 * 452 * @param buffer The buffer from which to parse. 453 * @param offset The offset into the buffer from which to parse. 454 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 455 * @return The long value of the octal string. 456 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. 457 */ 458 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 459 long result = 0; 460 int end = offset + length; 461 int start = offset; 462 if (length < 2) { 463 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 464 } 465 if (buffer[start] == 0) { 466 return 0L; 467 } 468 // Skip leading spaces 469 while (start < end) { 470 if (buffer[start] != ' ') { 471 break; 472 } 473 start++; 474 } 475 // Trim all trailing NULs and spaces. 476 // The ustar and POSIX tar specs require a trailing NUL or 477 // space but some implementations use the extra digit for big 478 // sizes/uids/gids ... 479 byte trailer = buffer[end - 1]; 480 while (start < end && (trailer == 0 || trailer == ' ')) { 481 end--; 482 trailer = buffer[end - 1]; 483 } 484 for (; start < end; start++) { 485 final byte currentByte = buffer[start]; 486 // CheckStyle:MagicNumber OFF 487 if (currentByte < '0' || currentByte > '7') { 488 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 489 } 490 result = (result << 3) + (currentByte - '0'); // convert from ASCII 491 // CheckStyle:MagicNumber ON 492 } 493 return result; 494 } 495 496 /** 497 * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of 498 * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above. 499 * 500 * @param buffer The buffer from which to parse. 501 * @param offset The offset into the buffer from which to parse. 502 * @param length The maximum number of bytes to parse. 503 * @return The long value of the octal or binary string. 504 * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would 505 * exceed the size of a signed long 64-bit integer. 506 * @since 1.4 507 */ 508 public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { 509 if ((buffer[offset] & 0x80) == 0) { 510 return parseOctal(buffer, offset, length); 511 } 512 final boolean negative = buffer[offset] == (byte) 0xff; 513 if (length < 9) { 514 return parseBinaryLong(buffer, offset, length, negative); 515 } 516 return parseBinaryBigInteger(buffer, offset, length, negative); 517 } 518 519 /** 520 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 521 * 522 * <p> 523 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 524 * </p> 525 * <p> 526 * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use 527 * {@link #parseFromPAX01SparseHeaders} directly instead. 528 * </p> 529 * 530 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 531 * @return sparse headers parsed from sparse map 532 * @deprecated use #parseFromPAX01SparseHeaders instead 533 */ 534 @Deprecated 535 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) { 536 try { 537 return parseFromPAX01SparseHeaders(sparseMap); 538 } catch (final IOException ex) { 539 throw new UncheckedIOException(ex.getMessage(), ex); 540 } 541 } 542 543 /** 544 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 545 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 546 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 547 * 548 * @param inputStream parsing source. 549 * @param recordSize The size the TAR header 550 * @return sparse headers 551 * @throws IOException if an I/O error occurs. 552 */ 553 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 554 // for 1.X PAX Headers 555 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 556 long bytesRead = 0; 557 long[] readResult = readLineOfNumberForPax1X(inputStream); 558 long sparseHeadersCount = readResult[0]; 559 if (sparseHeadersCount < 0) { 560 // overflow while reading number? 561 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 562 } 563 bytesRead += readResult[1]; 564 while (sparseHeadersCount-- > 0) { 565 readResult = readLineOfNumberForPax1X(inputStream); 566 final long sparseOffset = readResult[0]; 567 if (sparseOffset < 0) { 568 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value"); 569 } 570 bytesRead += readResult[1]; 571 572 readResult = readLineOfNumberForPax1X(inputStream); 573 final long sparseNumbytes = readResult[0]; 574 if (sparseNumbytes < 0) { 575 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value"); 576 } 577 bytesRead += readResult[1]; 578 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 579 } 580 // skip the rest of this record data 581 final long bytesToSkip = recordSize - bytesRead % recordSize; 582 org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip); 583 return sparseHeaders; 584 } 585 586 /** 587 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 588 * 589 * <pre> 590 * GNU.sparse.size=size 591 * GNU.sparse.numblocks=numblocks 592 * repeat numblocks times 593 * GNU.sparse.offset=offset 594 * GNU.sparse.numbytes=numbytes 595 * end repeat 596 * </pre> 597 * <p> 598 * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map 599 * </p> 600 * <p> 601 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 602 * </p> 603 * 604 * @param inputStream input stream to read keys and values 605 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 606 * @param globalPaxHeaders global PAX headers of the tar archive 607 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 608 * @throws IOException if an I/O error occurs. 609 * @deprecated use the four-arg version instead 610 */ 611 @Deprecated 612 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 613 final Map<String, String> globalPaxHeaders) throws IOException { 614 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 615 } 616 617 /** 618 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 619 * 620 * <pre> 621 * GNU.sparse.size=size 622 * GNU.sparse.numblocks=numblocks 623 * repeat numblocks times 624 * GNU.sparse.offset=offset 625 * GNU.sparse.numbytes=numbytes 626 * end repeat 627 * </pre> 628 * <p> 629 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 630 * </p> 631 * <p> 632 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 633 * </p> 634 * 635 * @param inputStream input stream to read keys and values 636 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 637 * @param globalPaxHeaders global PAX headers of the tar archive 638 * @param headerSize total size of the PAX header, will be ignored if negative 639 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 640 * @throws IOException if an I/O error occurs. 641 * @since 1.21 642 */ 643 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 644 final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException { 645 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 646 Long offset = null; 647 // Format is "length keyword=value\n"; 648 int totalRead = 0; 649 while (true) { // get length 650 int ch; 651 int len = 0; 652 int read = 0; 653 while ((ch = inputStream.read()) != -1) { 654 read++; 655 totalRead++; 656 if (ch == '\n') { // blank line in header 657 break; 658 } 659 if (ch == ' ') { // End of length string 660 // Get keyword 661 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 662 while ((ch = inputStream.read()) != -1) { 663 read++; 664 totalRead++; 665 if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) { 666 break; 667 } 668 if (ch == '=') { // end of keyword 669 final String keyword = coll.toString(StandardCharsets.UTF_8); 670 // Get rest of entry 671 final int restLen = len - read; 672 if (restLen <= 1) { // only NL 673 headers.remove(keyword); 674 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 675 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record"); 676 } else { 677 final byte[] rest = IOUtils.readRange(inputStream, restLen); 678 final int got = rest.length; 679 if (got != restLen) { 680 throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got); 681 } 682 totalRead += restLen; 683 // Drop trailing NL 684 if (rest[restLen - 1] != '\n') { 685 throw new IOException("Failed to read Paxheader." + "Value should end with a newline"); 686 } 687 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); 688 headers.put(keyword, value); 689 690 // for 0.0 PAX Headers 691 if (keyword.equals(TarGnuSparseKeys.OFFSET)) { 692 if (offset != null) { 693 // previous GNU.sparse.offset header but no numBytes 694 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 695 } 696 try { 697 offset = Long.valueOf(value); 698 } catch (final NumberFormatException ex) { 699 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); 700 } 701 if (offset < 0) { 702 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value"); 703 } 704 } 705 706 // for 0.0 PAX Headers 707 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { 708 if (offset == null) { 709 throw new IOException( 710 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); 711 } 712 final long numbytes = ParsingUtils.parseLongValue(value); 713 if (numbytes < 0) { 714 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value"); 715 } 716 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 717 offset = null; 718 } 719 } 720 break; 721 } 722 coll.write((byte) ch); 723 } 724 break; // Processed single header 725 } 726 // COMPRESS-530 : throw if we encounter a non-number while reading length 727 if (ch < '0' || ch > '9') { 728 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 729 } 730 len *= 10; 731 len += ch - '0'; 732 } 733 if (ch == -1) { // EOF 734 break; 735 } 736 } 737 if (offset != null) { 738 // offset but no numBytes 739 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 740 } 741 return headers; 742 } 743 744 /** 745 * Parses the content of a PAX 1.0 sparse block. 746 * 747 * @since 1.20 748 * @param buffer The buffer from which to parse. 749 * @param offset The offset into the buffer from which to parse. 750 * @return a parsed sparse struct 751 */ 752 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 753 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); 754 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); 755 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 756 } 757 758 /** 759 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 760 * delimited by newlines. 761 * 762 * @param inputStream the input stream of the tar file 763 * @return the decimal number delimited by '\n', and the bytes read from input stream 764 * @throws IOException 765 */ 766 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 767 int number; 768 long result = 0; 769 long bytesRead = 0; 770 while ((number = inputStream.read()) != '\n') { 771 bytesRead += 1; 772 if (number == -1) { 773 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 774 } 775 if (number < '0' || number > '9') { 776 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 777 } 778 result = result * 10 + (number - '0'); 779 } 780 bytesRead += 1; 781 return new long[] { result, bytesRead }; 782 } 783 784 /** 785 * @since 1.21 786 */ 787 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException { 788 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 789 for (int i = 0; i < entries; i++) { 790 try { 791 final TarArchiveStructSparse sparseHeader = parseSparse(buffer, 792 offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); 793 if (sparseHeader.getOffset() < 0) { 794 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 795 } 796 if (sparseHeader.getNumbytes() < 0) { 797 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 798 } 799 sparseHeaders.add(sparseHeader); 800 } catch (final IllegalArgumentException ex) { 801 // thrown internally by parseOctalOrBinary 802 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 803 } 804 } 805 return Collections.unmodifiableList(sparseHeaders); 806 } 807 808 /** 809 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the 810 * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal 811 * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore 812 * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations 813 * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote> 814 * <p> 815 * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may 816 * well evolve over time as more special cases are encountered. 817 * </p> 818 * 819 * @param header tar header 820 * @return whether the checksum is reasonably good 821 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 822 * @since 1.5 823 */ 824 public static boolean verifyCheckSum(final byte[] header) { 825 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); 826 long unsignedSum = 0; 827 long signedSum = 0; 828 for (int i = 0; i < header.length; i++) { 829 byte b = header[i]; 830 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { 831 b = ' '; 832 } 833 unsignedSum += 0xff & b; 834 signedSum += b; 835 } 836 return storedSum == unsignedSum || storedSum == signedSum; 837 } 838 839 /** Prevents instantiation. */ 840 private TarUtils() { 841 } 842 843}