001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.Collections; 028import java.util.Locale; 029import java.util.ServiceLoader; 030import java.util.Set; 031import java.util.SortedMap; 032import java.util.TreeMap; 033 034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 042import org.apache.commons.compress.archivers.sevenz.SevenZFile; 043import org.apache.commons.compress.archivers.tar.TarArchiveEntry; 044import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 045import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 047import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 048import org.apache.commons.compress.utils.IOUtils; 049import org.apache.commons.compress.utils.Sets; 050 051/** 052 * Factory to create Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend 053 * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course). 054 * 055 * Compressing a ZIP-File: 056 * 057 * <pre> 058 * final OutputStream out = Files.newOutputStream(output.toPath()); 059 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 060 * 061 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 062 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 063 * os.closeArchiveEntry(); 064 * 065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 066 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 067 * os.closeArchiveEntry(); 068 * os.close(); 069 * </pre> 070 * 071 * Decompressing a ZIP-File: 072 * 073 * <pre> 074 * final InputStream is = Files.newInputStream(input.toPath()); 075 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 076 * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry(); 077 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 078 * IOUtils.copy(in, out); 079 * out.close(); 080 * in.close(); 081 * </pre> 082 * 083 * @Immutable provided that the deprecated method setEntryEncoding is not used. 084 * @ThreadSafe even if the deprecated method setEntryEncoding is used 085 */ 086public class ArchiveStreamFactory implements ArchiveStreamProvider { 087 088 private static final int TAR_HEADER_SIZE = 512; 089 090 private static final int TAR_TEST_ENTRY_COUNT = 10; 091 092 private static final int DUMP_SIGNATURE_SIZE = 32; 093 094 private static final int SIGNATURE_SIZE = 12; 095 096 /** 097 * The singleton instance using the platform default encoding. 098 * 099 * @since 1.21 100 */ 101 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 102 103 /** 104 * Constant (value {@value}) used to identify the APK archive format. 105 * <p> 106 * APK file extensions are .apk, .xapk, .apks, .apkm 107 * </p> 108 * 109 * @since 1.22 110 */ 111 public static final String APK = "apk"; 112 113 /** 114 * Constant (value {@value}) used to identify the XAPK archive format. 115 * <p> 116 * APK file extensions are .apk, .xapk, .apks, .apkm 117 * </p> 118 * 119 * @since 1.22 120 */ 121 public static final String XAPK = "xapk"; 122 123 /** 124 * Constant (value {@value}) used to identify the APKS archive format. 125 * <p> 126 * APK file extensions are .apk, .xapk, .apks, .apkm 127 * </p> 128 * 129 * @since 1.22 130 */ 131 public static final String APKS = "apks"; 132 133 /** 134 * Constant (value {@value}) used to identify the APKM archive format. 135 * <p> 136 * APK file extensions are .apk, .xapk, .apks, .apkm 137 * </p> 138 * 139 * @since 1.22 140 */ 141 public static final String APKM = "apkm"; 142 143 /** 144 * Constant (value {@value}) used to identify the AR archive format. 145 * 146 * @since 1.1 147 */ 148 public static final String AR = "ar"; 149 150 /** 151 * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type. 152 * 153 * @since 1.6 154 */ 155 public static final String ARJ = "arj"; 156 157 /** 158 * Constant (value {@value}) used to identify the CPIO archive format. 159 * 160 * @since 1.1 161 */ 162 public static final String CPIO = "cpio"; 163 164 /** 165 * Constant (value {@value}) used to identify the UNIX DUMP archive format. Not supported as an output stream type. 166 * 167 * @since 1.3 168 */ 169 public static final String DUMP = "dump"; 170 171 /** 172 * Constant (value {@value}) used to identify the JAR archive format. 173 * 174 * @since 1.1 175 */ 176 public static final String JAR = "jar"; 177 178 /** 179 * Constant used to identify the TAR archive format. 180 * 181 * @since 1.1 182 */ 183 public static final String TAR = "tar"; 184 185 /** 186 * Constant (value {@value}) used to identify the ZIP archive format. 187 * 188 * @since 1.1 189 */ 190 public static final String ZIP = "zip"; 191 192 /** 193 * Constant (value {@value}) used to identify the 7z archive format. 194 * 195 * @since 1.8 196 */ 197 public static final String SEVEN_Z = "7z"; 198 199 private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() { 200 return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader()); 201 } 202 203 /** 204 * Try to determine the type of Archiver 205 * 206 * @param in input stream 207 * @return type of archiver if found 208 * @throws ArchiveException if an archiver cannot be detected in the stream 209 * @since 1.14 210 */ 211 public static String detect(final InputStream in) throws ArchiveException { 212 if (in == null) { 213 throw new IllegalArgumentException("Stream must not be null."); 214 } 215 216 if (!in.markSupported()) { 217 throw new IllegalArgumentException("Mark is not supported."); 218 } 219 220 final byte[] signature = new byte[SIGNATURE_SIZE]; 221 in.mark(signature.length); 222 int signatureLength = -1; 223 try { 224 signatureLength = IOUtils.readFully(in, signature); 225 in.reset(); 226 } catch (final IOException e) { 227 throw new ArchiveException("IOException while reading signature.", e); 228 } 229 230 // For now JAR files are detected as ZIP files. 231 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 232 return ZIP; 233 } 234 // For now JAR files are detected as ZIP files. 235 if (JarArchiveInputStream.matches(signature, signatureLength)) { 236 return JAR; 237 } 238 if (ArArchiveInputStream.matches(signature, signatureLength)) { 239 return AR; 240 } 241 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 242 return CPIO; 243 } 244 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 245 return ARJ; 246 } 247 if (SevenZFile.matches(signature, signatureLength)) { 248 return SEVEN_Z; 249 } 250 251 // Dump needs a bigger buffer to check the signature; 252 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 253 in.mark(dumpsig.length); 254 try { 255 signatureLength = IOUtils.readFully(in, dumpsig); 256 in.reset(); 257 } catch (final IOException e) { 258 throw new ArchiveException("IOException while reading dump signature", e); 259 } 260 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 261 return DUMP; 262 } 263 264 // Tar needs an even bigger buffer to check the signature; read the first block 265 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 266 in.mark(tarHeader.length); 267 try { 268 signatureLength = IOUtils.readFully(in, tarHeader); 269 in.reset(); 270 } catch (final IOException e) { 271 throw new ArchiveException("IOException while reading tar signature", e); 272 } 273 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 274 return TAR; 275 } 276 277 // COMPRESS-117 278 if (signatureLength >= TAR_HEADER_SIZE) { 279 try (TarArchiveInputStream inputStream = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) { 280 // COMPRESS-191 - verify the header checksum 281 // COMPRESS-644 - do not allow zero byte file entries 282 TarArchiveEntry entry = inputStream.getNextEntry(); 283 // try to find the first non-directory entry within the first 10 entries. 284 int count = 0; 285 while (entry != null && entry.isDirectory() && entry.isCheckSumOK() && count++ < TAR_TEST_ENTRY_COUNT) { 286 entry = inputStream.getNextEntry(); 287 } 288 if (entry != null && entry.isCheckSumOK() && !entry.isDirectory() && entry.getSize() > 0 || count > 0) { 289 return TAR; 290 } 291 } catch (final Exception ignored) { 292 // can generate IllegalArgumentException as well as IOException auto-detection, simply not a TAR ignored 293 } 294 } 295 throw new ArchiveException("No Archiver found for the stream signature"); 296 } 297 298 /** 299 * Constructs a new sorted map from input stream provider names to provider objects. 300 * 301 * <p> 302 * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more 303 * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified. 304 * </p> 305 * 306 * <p> 307 * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method 308 * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection. 309 * </p> 310 * 311 * <p> 312 * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine. 313 * </p> 314 * 315 * @return An immutable, map from names to provider objects 316 * @since 1.13 317 */ 318 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 319 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 320 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 321 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 322 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map)); 323 return map; 324 }); 325 } 326 327 /** 328 * Constructs a new sorted map from output stream provider names to provider objects. 329 * 330 * <p> 331 * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more 332 * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified. 333 * </p> 334 * 335 * <p> 336 * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method 337 * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection. 338 * </p> 339 * 340 * <p> 341 * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine. 342 * </p> 343 * 344 * @return An immutable, map from names to provider objects 345 * @since 1.13 346 */ 347 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 348 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 349 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 350 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 351 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map)); 352 return map; 353 }); 354 } 355 356 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) { 357 names.forEach(name -> map.put(toKey(name), provider)); 358 } 359 360 private static String toKey(final String name) { 361 return name.toUpperCase(Locale.ROOT); 362 } 363 364 /** 365 * Entry encoding, null for the default. 366 */ 367 private volatile String entryEncoding; 368 369 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 370 371 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 372 373 /** 374 * Constructs an instance using the platform default encoding. 375 */ 376 public ArchiveStreamFactory() { 377 this(null); 378 } 379 380 /** 381 * Constructs an instance using the specified encoding. 382 * 383 * @param entryEncoding the encoding to be used. 384 * 385 * @since 1.10 386 */ 387 public ArchiveStreamFactory(final String entryEncoding) { 388 this.entryEncoding = entryEncoding; 389 } 390 391 /** 392 * Creates an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support 393 * marks, like BufferedInputStream. 394 * 395 * @param <I> The {@link ArchiveInputStream} type. 396 * @param in the input stream 397 * @return the archive input stream 398 * @throws ArchiveException if the archiver name is not known 399 * @throws StreamingNotSupportedException if the format cannot be read from a stream 400 * @throws IllegalArgumentException if the stream is null or does not support mark 401 */ 402 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException { 403 return createArchiveInputStream(detect(in), in); 404 } 405 406 /** 407 * Creates an archive input stream from an archiver name and an input stream. 408 * 409 * @param <I> The {@link ArchiveInputStream} type. 410 * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or 411 * {@value #SEVEN_Z} 412 * @param in the input stream 413 * @return the archive input stream 414 * @throws ArchiveException if the archiver name is not known 415 * @throws StreamingNotSupportedException if the format cannot be read from a stream 416 * @throws IllegalArgumentException if the archiver name or stream is null 417 */ 418 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in) 419 throws ArchiveException { 420 return createArchiveInputStream(archiverName, in, entryEncoding); 421 } 422 423 @SuppressWarnings("unchecked") 424 @Override 425 public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in, 426 final String actualEncoding) throws ArchiveException { 427 428 if (archiverName == null) { 429 throw new IllegalArgumentException("Archiver name must not be null."); 430 } 431 432 if (in == null) { 433 throw new IllegalArgumentException("InputStream must not be null."); 434 } 435 436 if (AR.equalsIgnoreCase(archiverName)) { 437 return (I) new ArArchiveInputStream(in); 438 } 439 if (ARJ.equalsIgnoreCase(archiverName)) { 440 if (actualEncoding != null) { 441 return (I) new ArjArchiveInputStream(in, actualEncoding); 442 } 443 return (I) new ArjArchiveInputStream(in); 444 } 445 if (ZIP.equalsIgnoreCase(archiverName)) { 446 if (actualEncoding != null) { 447 return (I) new ZipArchiveInputStream(in, actualEncoding); 448 } 449 return (I) new ZipArchiveInputStream(in); 450 } 451 if (TAR.equalsIgnoreCase(archiverName)) { 452 if (actualEncoding != null) { 453 return (I) new TarArchiveInputStream(in, actualEncoding); 454 } 455 return (I) new TarArchiveInputStream(in); 456 } 457 if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) { 458 if (actualEncoding != null) { 459 return (I) new JarArchiveInputStream(in, actualEncoding); 460 } 461 return (I) new JarArchiveInputStream(in); 462 } 463 if (CPIO.equalsIgnoreCase(archiverName)) { 464 if (actualEncoding != null) { 465 return (I) new CpioArchiveInputStream(in, actualEncoding); 466 } 467 return (I) new CpioArchiveInputStream(in); 468 } 469 if (DUMP.equalsIgnoreCase(archiverName)) { 470 if (actualEncoding != null) { 471 return (I) new DumpArchiveInputStream(in, actualEncoding); 472 } 473 return (I) new DumpArchiveInputStream(in); 474 } 475 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 476 throw new StreamingNotSupportedException(SEVEN_Z); 477 } 478 479 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 480 if (archiveStreamProvider != null) { 481 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 482 } 483 484 throw new ArchiveException("Archiver: " + archiverName + " not found."); 485 } 486 487 /** 488 * Creates an archive output stream from an archiver name and an output stream. 489 * 490 * @param <O> The {@link ArchiveOutputStream} type. 491 * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 492 * @param out the output stream 493 * @return the archive output stream 494 * @throws ArchiveException if the archiver name is not known 495 * @throws StreamingNotSupportedException if the format cannot be written to a stream 496 * @throws IllegalArgumentException if the archiver name or stream is null 497 */ 498 public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out) 499 throws ArchiveException { 500 return createArchiveOutputStream(archiverName, out, entryEncoding); 501 } 502 503 @SuppressWarnings("unchecked") 504 @Override 505 public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out, 506 final String actualEncoding) throws ArchiveException { 507 if (archiverName == null) { 508 throw new IllegalArgumentException("Archiver name must not be null."); 509 } 510 if (out == null) { 511 throw new IllegalArgumentException("OutputStream must not be null."); 512 } 513 514 if (AR.equalsIgnoreCase(archiverName)) { 515 return (O) new ArArchiveOutputStream(out); 516 } 517 if (ZIP.equalsIgnoreCase(archiverName)) { 518 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 519 if (actualEncoding != null) { 520 zip.setEncoding(actualEncoding); 521 } 522 return (O) zip; 523 } 524 if (TAR.equalsIgnoreCase(archiverName)) { 525 if (actualEncoding != null) { 526 return (O) new TarArchiveOutputStream(out, actualEncoding); 527 } 528 return (O) new TarArchiveOutputStream(out); 529 } 530 if (JAR.equalsIgnoreCase(archiverName)) { 531 if (actualEncoding != null) { 532 return (O) new JarArchiveOutputStream(out, actualEncoding); 533 } 534 return (O) new JarArchiveOutputStream(out); 535 } 536 if (CPIO.equalsIgnoreCase(archiverName)) { 537 if (actualEncoding != null) { 538 return (O) new CpioArchiveOutputStream(out, actualEncoding); 539 } 540 return (O) new CpioArchiveOutputStream(out); 541 } 542 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 543 throw new StreamingNotSupportedException(SEVEN_Z); 544 } 545 546 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 547 if (archiveStreamProvider != null) { 548 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 549 } 550 551 throw new ArchiveException("Archiver: " + archiverName + " not found."); 552 } 553 554 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 555 if (archiveInputStreamProviders == null) { 556 archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 557 } 558 return archiveInputStreamProviders; 559 } 560 561 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 562 if (archiveOutputStreamProviders == null) { 563 archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 564 } 565 return archiveOutputStreamProviders; 566 } 567 568 /** 569 * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default. 570 * 571 * @return entry encoding, or null for the archiver default 572 * @since 1.5 573 */ 574 public String getEntryEncoding() { 575 return entryEncoding; 576 } 577 578 @Override 579 public Set<String> getInputStreamArchiveNames() { 580 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 581 } 582 583 @Override 584 public Set<String> getOutputStreamArchiveNames() { 585 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 586 } 587 588 /** 589 * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default. 590 * 591 * @param entryEncoding the entry encoding, null uses the archiver default. 592 * @since 1.5 593 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 594 */ 595 @Deprecated 596 public void setEntryEncoding(final String entryEncoding) { 597 this.entryEncoding = entryEncoding; 598 } 599 600}