ParallelScatterZipCreator.java

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.commons.compress.archivers.zip;

import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Deque;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.zip.Deflater;

import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;

/**
 * Creates a ZIP in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
 * <p>
 * Note that until 1.18, this class generally made no guarantees about the order of things written to the output file. Things that needed to come in a specific
 * order (manifests, directories) had to be handled by the client of this class, usually by writing these things to the {@link ZipArchiveOutputStream}
 * <em>before</em> calling {@link #writeTo writeTo} on this class.
 * </p>
 * <p>
 * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of memory model consistency, this will be shut down by this class
 * prior to completion.
 * </p>
 *
 * @since 1.10
 */
public class ParallelScatterZipCreator {

    private final Deque<ScatterZipOutputStream> streams = new ConcurrentLinkedDeque<>();
    private final ExecutorService executorService;
    private final ScatterGatherBackingStoreSupplier backingStoreSupplier;

    private final Deque<Future<? extends ScatterZipOutputStream>> futures = new ConcurrentLinkedDeque<>();
    private final long startedAt = System.currentTimeMillis();
    private long compressionDoneAt;
    private long scatterDoneAt;

    private final int compressionLevel;

    private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() {
        @Override
        protected ScatterZipOutputStream initialValue() {
            try {
                final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier);
                streams.add(scatterStream);
                return scatterStream;
            } catch (final IOException e) {
                throw new UncheckedIOException(e); // NOSONAR
            }
        }
    };

    /**
     * Constructs a ParallelScatterZipCreator with default threads, which is set to the number of available processors, as defined by
     * {@link Runtime#availableProcessors}
     */
    public ParallelScatterZipCreator() {
        this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
    }

    /**
     * Constructs a ParallelScatterZipCreator
     *
     * @param executorService The executorService to use for parallel scheduling. For technical reasons, this will be shut down by this class.
     */
    public ParallelScatterZipCreator(final ExecutorService executorService) {
        this(executorService, new DefaultBackingStoreSupplier(null));
    }

    /**
     * Constructs a ParallelScatterZipCreator
     *
     * @param executorService      The executorService to use. For technical reasons, this will be shut down by this class.
     * @param backingStoreSupplier The supplier of backing store which shall be used
     */
    public ParallelScatterZipCreator(final ExecutorService executorService, final ScatterGatherBackingStoreSupplier backingStoreSupplier) {
        this(executorService, backingStoreSupplier, Deflater.DEFAULT_COMPRESSION);
    }

    /**
     * Constructs a ParallelScatterZipCreator
     *
     * @param executorService      The executorService to use. For technical reasons, this will be shut down by this class.
     * @param backingStoreSupplier The supplier of backing store which shall be used
     * @param compressionLevel     The compression level used in compression, this value should be -1(default level) or between 0~9.
     * @throws IllegalArgumentException if the compression level is illegal
     * @since 1.21
     */
    public ParallelScatterZipCreator(final ExecutorService executorService, final ScatterGatherBackingStoreSupplier backingStoreSupplier,
            final int compressionLevel) throws IllegalArgumentException {
        if ((compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) && compressionLevel != Deflater.DEFAULT_COMPRESSION) {
            throw new IllegalArgumentException("Compression level is expected between -1~9");
        }

        this.backingStoreSupplier = backingStoreSupplier;
        this.executorService = executorService;
        this.compressionLevel = compressionLevel;
    }

    /**
     * Adds an archive entry to this archive.
     * <p>
     * This method is expected to be called from a single client thread
     * </p>
     *
     * @param zipArchiveEntry The entry to add.
     * @param source          The source input stream supplier
     */

    public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
        submitStreamAwareCallable(createCallable(zipArchiveEntry, source));
    }

    /**
     * Adds an archive entry to this archive.
     * <p>
     * This method is expected to be called from a single client thread
     * </p>
     *
     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
     * @since 1.13
     */
    public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
        submitStreamAwareCallable(createCallable(zipArchiveEntryRequestSupplier));
    }

    private void closeAll() {
        for (final ScatterZipOutputStream scatterStream : streams) {
            try {
                scatterStream.close();
            } catch (final IOException ignored) {
                // no way to properly log this
            }
        }
    }

    /**
     * Creates a callable that will compress the given archive entry.
     *
     * <p>
     * This method is expected to be called from a single client thread.
     * </p>
     *
     * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submitStreamAwareCallable submitStreamAwareCallable}. The
     * most common use case for using {@link #createCallable createCallable} and {@link #submitStreamAwareCallable submitStreamAwareCallable} from a client is
     * if you want to wrap the callable in something that can be prioritized by the supplied {@link ExecutorService}, for instance to process large or slow
     * files first. Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client.
     *
     * @param zipArchiveEntry The entry to add.
     * @param source          The source input stream supplier
     * @return A callable that should subsequently be passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The value of this callable is
     *         not used, but any exceptions happening inside the compression will be propagated through the callable.
     */

    public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
        final int method = zipArchiveEntry.getMethod();
        if (method == ZipMethod.UNKNOWN_CODE) {
            throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry);
        }
        final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source);
        return () -> {
            final ScatterZipOutputStream scatterStream = tlScatterStreams.get();
            scatterStream.addArchiveEntry(zipArchiveEntryRequest);
            return scatterStream;
        };
    }

    /**
     * Creates a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}.
     *
     * <p>
     * This method is expected to be called from a single client thread.
     * </p>
     *
     * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry to be added is supplied by a
     * {@link ZipArchiveEntryRequestSupplier}.
     *
     * @see #createCallable(ZipArchiveEntry, InputStreamSupplier)
     *
     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
     * @return A callable that should subsequently be passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The value of this callable is
     *         not used, but any exceptions happening inside the compression will be propagated through the callable.
     * @since 1.13
     */
    public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
        return () -> {
            final ScatterZipOutputStream scatterStream = tlScatterStreams.get();
            scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get());
            return scatterStream;
        };
    }

    @SuppressWarnings("resource") // Caller closes
    private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) throws IOException {
        final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get();
        // lifecycle is bound to the ScatterZipOutputStream returned
        final StreamCompressor sc = StreamCompressor.create(compressionLevel, bs); // NOSONAR
        return new ScatterZipOutputStream(bs, sc);
    }

    /**
     * Gets a message describing the overall statistics of the compression run
     *
     * @return A string
     */
    public ScatterStatistics getStatisticsMessage() {
        return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt);
    }

    /**
     * Submits a callable for compression.
     *
     * @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
     *
     * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
     */
    public final void submit(final Callable<? extends Object> callable) {
        submitStreamAwareCallable(() -> {
            callable.call();
            return tlScatterStreams.get();
        });
    }

    /**
     * Submits a callable for compression.
     *
     * @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
     *
     * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
     * @since 1.19
     */
    public final void submitStreamAwareCallable(final Callable<? extends ScatterZipOutputStream> callable) {
        futures.add(executorService.submit(callable));
    }

    /**
     * Writes the contents this to the target {@link ZipArchiveOutputStream}.
     * <p>
     * It may be beneficial to write things like directories and manifest files to the targetStream before calling this method.
     * </p>
     * <p>
     * Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link Callable}s {@link #submitStreamAwareCallable
     * submit}ted to this instance throws an exception, the archive can not be created properly and this method will throw an exception.
     * </p>
     *
     * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams
     * @throws IOException          If writing fails
     * @throws InterruptedException If we get interrupted
     * @throws ExecutionException   If something happens in the parallel execution
     */
    public void writeTo(final ZipArchiveOutputStream targetStream) throws IOException, InterruptedException, ExecutionException {

        try {
            // Make sure we catch any exceptions from parallel phase
            try {
                for (final Future<?> future : futures) {
                    future.get();
                }
            } finally {
                executorService.shutdown();
            }

            executorService.awaitTermination(1000 * 60L, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete

            // It is important that all threads terminate before we go on, ensure happens-before relationship
            compressionDoneAt = System.currentTimeMillis();

            for (final Future<? extends ScatterZipOutputStream> future : futures) {
                final ScatterZipOutputStream scatterStream = future.get();
                scatterStream.zipEntryWriter().writeNextZipEntry(targetStream);
            }

            for (final ScatterZipOutputStream scatterStream : streams) {
                scatterStream.close();
            }

            scatterDoneAt = System.currentTimeMillis();
        } finally {
            closeAll();
        }
    }
}