WriterOutputStream.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.io.output;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;

import org.apache.commons.io.Charsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.charset.CharsetDecoders;

/**
 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
 * correctly.
 * <p>
 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
 * </p>
 * <p>
 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
 * </p>
 * <p>
 * To build an instance, use {@link Builder}.
 * </p>
 * <pre>
 * OutputStream out = ...
 * Charset cs = ...
 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
 * WriterOutputStream out2 = WriterOutputStream.builder()
 *   .setWriter(writer)
 *   .setCharset(cs)
 *   .get();
 * </pre>
 * <p>
 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
 * {@link WriterOutputStream} pushes it to the underlying stream.
 * </p>
 * <p>
 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
 * known to represent character data that must be decoded for further use.
 * </p>
 * <p>
 * Instances of {@link WriterOutputStream} are not thread safe.
 * </p>
 *
 * @see Builder
 * @see org.apache.commons.io.input.ReaderInputStream
 * @since 2.0
 */
public class WriterOutputStream extends OutputStream {

    // @formatter:off
    /**
     * Builds a new {@link WriterOutputStream}.
     *
     * <p>
     * For example:
     * </p>
     * <pre>{@code
     * WriterOutputStream s = WriterOutputStream.builder()
     *   .setPath(path)
     *   .setBufferSize(8192)
     *   .setCharset(StandardCharsets.UTF_8)
     *   .setWriteImmediately(false)
     *   .get();}
     * </pre>
     *
     * @see #get()
     * @since 2.12.0
     */
    // @formatter:on
    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {

        private CharsetDecoder charsetDecoder;
        private boolean writeImmediately;

        /**
         * Constructs a new Builder.
         */
        public Builder() {
            this.charsetDecoder = getCharset().newDecoder();
        }

        /**
         * Builds a new {@link WriterOutputStream}.
         * <p>
         * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
         * </p>
         * <p>
         * This builder use the following aspects:
         * </p>
         * <ul>
         * <li>{@link #getWriter()}</li>
         * <li>{@link #getBufferSize()}</li>
         * <li>charsetDecoder</li>
         * <li>writeImmediately</li>
         * </ul>
         *
         * @return a new instance.
         * @throws UnsupportedOperationException if the origin cannot provide a Writer.
         * @see #getWriter()
         */
        @SuppressWarnings("resource")
        @Override
        public WriterOutputStream get() throws IOException {
            return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
        }

        @Override
        public Builder setCharset(final Charset charset) {
            super.setCharset(charset);
            this.charsetDecoder = getCharset().newDecoder();
            return this;
        }

        @Override
        public Builder setCharset(final String charset) {
            super.setCharset(charset);
            this.charsetDecoder = getCharset().newDecoder();
            return this;
        }

        /**
         * Sets the charset decoder.
         *
         * @param charsetDecoder the charset decoder.
         * @return {@code this} instance.
         */
        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
            super.setCharset(this.charsetDecoder.charset());
            return this;
        }

        /**
         * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
         * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
         * {@link #close()} is called.
         *
         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
         *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
         *                         {@link #flush()} or {@link #close()} is called.
         * @return {@code this} instance.
         */
        public Builder setWriteImmediately(final boolean writeImmediately) {
            this.writeImmediately = writeImmediately;
            return this;
        }

    }

    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;

    /**
     * Constructs a new {@link Builder}.
     *
     * @return a new {@link Builder}.
     * @since 2.12.0
     */
    public static Builder builder() {
        return new Builder();
    }

    /**
     * Checks if the JDK in use properly supports the given charset.
     *
     * @param charset the charset to check the support for
     */
    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
            return;
        }
        final String TEST_STRING_2 = "v\u00e9s";
        final byte[] bytes = TEST_STRING_2.getBytes(charset);

        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
        final ByteBuffer bb2 = ByteBuffer.allocate(16);
        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
        final int len = bytes.length;
        for (int i = 0; i < len; i++) {
            bb2.put(bytes[i]);
            bb2.flip();
            try {
                charsetDecoder2.decode(bb2, cb2, i == len - 1);
            } catch (final IllegalArgumentException e) {
                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
            }
            bb2.compact();
        }
        cb2.rewind();
        if (!TEST_STRING_2.equals(cb2.toString())) {
            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
        }

    }

    private final Writer writer;
    private final CharsetDecoder decoder;

    private final boolean writeImmediately;

    /**
     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
     */
    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);

    /**
     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
     */
    private final CharBuffer decoderOut;

    /**
     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
     *
     * @param writer the target {@link Writer}
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer) {
        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
    }

    /**
     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
     * when it overflows or when {@link #flush()} or {@link #close()} is called.
     *
     * @param writer  the target {@link Writer}
     * @param charset the charset encoding
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final Charset charset) {
        this(writer, charset, BUFFER_SIZE, false);
    }

    /**
     * Constructs a new {@link WriterOutputStream}.
     *
     * @param writer           the target {@link Writer}
     * @param charset          the charset encoding
     * @param bufferSize       the size of the output buffer in number of characters
     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
     *                         {@link #flush()} or {@link #close()} is called.
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
        // @formatter:off
        this(writer,
            Charsets.toCharset(charset).newDecoder()
                    .onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
                    .replaceWith("?"),
             bufferSize,
             writeImmediately);
        // @formatter:on
    }

    /**
     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
     * when it overflows or when {@link #flush()} or {@link #close()} is called.
     *
     * @param writer  the target {@link Writer}
     * @param decoder the charset decoder
     * @since 2.1
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
        this(writer, decoder, BUFFER_SIZE, false);
    }

    /**
     * Constructs a new {@link WriterOutputStream}.
     *
     * @param writer           the target {@link Writer}
     * @param decoder          the charset decoder
     * @param bufferSize       the size of the output buffer in number of characters
     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
     *                         {@link #flush()} or {@link #close()} is called.
     * @since 2.1
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
        this.writer = writer;
        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
        this.writeImmediately = writeImmediately;
        this.decoderOut = CharBuffer.allocate(bufferSize);
    }

    /**
     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
     * when it overflows or when {@link #flush()} or {@link #close()} is called.
     *
     * @param writer      the target {@link Writer}
     * @param charsetName the name of the charset encoding
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final String charsetName) {
        this(writer, charsetName, BUFFER_SIZE, false);
    }

    /**
     * Constructs a new {@link WriterOutputStream}.
     *
     * @param writer           the target {@link Writer}
     * @param charsetName      the name of the charset encoding
     * @param bufferSize       the size of the output buffer in number of characters
     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
     *                         {@link #flush()} or {@link #close()} is called.
     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
     */
    @Deprecated
    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
    }

    /**
     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
     * {@link Writer#close()} will be called.
     *
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void close() throws IOException {
        processInput(true);
        flushOutput();
        writer.close();
    }

    /**
     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
     * {@link Writer#flush()} will be called.
     *
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void flush() throws IOException {
        flushOutput();
        writer.flush();
    }

    /**
     * Flush the output.
     *
     * @throws IOException if an I/O error occurs.
     */
    private void flushOutput() throws IOException {
        if (decoderOut.position() > 0) {
            writer.write(decoderOut.array(), 0, decoderOut.position());
            decoderOut.rewind();
        }
    }

    /**
     * Decode the contents of the input ByteBuffer into a CharBuffer.
     *
     * @param endOfInput indicates end of input
     * @throws IOException if an I/O error occurs.
     */
    private void processInput(final boolean endOfInput) throws IOException {
        // Prepare decoderIn for reading
        decoderIn.flip();
        CoderResult coderResult;
        while (true) {
            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
            if (coderResult.isOverflow()) {
                flushOutput();
            } else if (coderResult.isUnderflow()) {
                break;
            } else {
                // The decoder is configured to replace malformed input and unmappable characters,
                // so we should not get here.
                throw new IOException("Unexpected coder result");
            }
        }
        // Discard the bytes that have been read
        decoderIn.compact();
    }

    /**
     * Write bytes from the specified byte array to the stream.
     *
     * @param b the byte array containing the bytes to write
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void write(final byte[] b) throws IOException {
        write(b, 0, b.length);
    }

    /**
     * Write bytes from the specified byte array to the stream.
     *
     * @param b   the byte array containing the bytes to write
     * @param off the start offset in the byte array
     * @param len the number of bytes to write
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void write(final byte[] b, int off, int len) throws IOException {
        while (len > 0) {
            final int c = Math.min(len, decoderIn.remaining());
            decoderIn.put(b, off, c);
            processInput(false);
            len -= c;
            off += c;
        }
        if (writeImmediately) {
            flushOutput();
        }
    }

    /**
     * Write a single byte to the stream.
     *
     * @param b the byte to write
     * @throws IOException if an I/O error occurs.
     */
    @Override
    public void write(final int b) throws IOException {
        write(new byte[] { (byte) b }, 0, 1);
    }
}