001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.BufferedWriter;
020import java.io.IOException;
021import java.io.InputStreamReader;
022import java.io.OutputStream;
023import java.io.OutputStreamWriter;
024import java.io.Writer;
025import java.nio.ByteBuffer;
026import java.nio.CharBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetDecoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.nio.charset.StandardCharsets;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.charset.CharsetDecoders;
037
038/**
039 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
040 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
041 * correctly.
042 * <p>
043 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
044 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
045 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
046 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
047 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
048 * </p>
049 * <p>
050 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
051 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
052 * </p>
053 * <p>
054 * To build an instance, use {@link Builder}.
055 * </p>
056 * <pre>
057 * OutputStream out = ...
058 * Charset cs = ...
059 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
060 * WriterOutputStream out2 = WriterOutputStream.builder()
061 *   .setWriter(writer)
062 *   .setCharset(cs)
063 *   .get();
064 * </pre>
065 * <p>
066 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
067 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
068 * {@link WriterOutputStream} pushes it to the underlying stream.
069 * </p>
070 * <p>
071 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
072 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
073 * known to represent character data that must be decoded for further use.
074 * </p>
075 * <p>
076 * Instances of {@link WriterOutputStream} are not thread safe.
077 * </p>
078 *
079 * @see Builder
080 * @see org.apache.commons.io.input.ReaderInputStream
081 * @since 2.0
082 */
083public class WriterOutputStream extends OutputStream {
084
085    // @formatter:off
086    /**
087     * Builds a new {@link WriterOutputStream}.
088     *
089     * <p>
090     * For example:
091     * </p>
092     * <pre>{@code
093     * WriterOutputStream s = WriterOutputStream.builder()
094     *   .setPath(path)
095     *   .setBufferSize(8192)
096     *   .setCharset(StandardCharsets.UTF_8)
097     *   .setWriteImmediately(false)
098     *   .get();}
099     * </pre>
100     *
101     * @see #get()
102     * @since 2.12.0
103     */
104    // @formatter:on
105    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106
107        private CharsetDecoder charsetDecoder;
108        private boolean writeImmediately;
109
110        /**
111         * Constructs a new builder of {@link WriterOutputStream}.
112         */
113        public Builder() {
114            this.charsetDecoder = getCharset().newDecoder();
115        }
116
117        /**
118         * Builds a new {@link WriterOutputStream}.
119         * <p>
120         * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121         * </p>
122         * <p>
123         * This builder uses the following aspects:
124         * </p>
125         * <ul>
126         * <li>{@link #getWriter()}</li>
127         * <li>{@link #getBufferSize()}</li>
128         * <li>charsetDecoder</li>
129         * <li>writeImmediately</li>
130         * </ul>
131         *
132         * @return a new instance.
133         * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
134         * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
135         * @see #getWriter()
136         * @see #getUnchecked()
137         */
138        @Override
139        public WriterOutputStream get() throws IOException {
140            return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
141        }
142
143        @Override
144        public Builder setCharset(final Charset charset) {
145            super.setCharset(charset);
146            this.charsetDecoder = getCharset().newDecoder();
147            return this;
148        }
149
150        @Override
151        public Builder setCharset(final String charset) {
152            super.setCharset(charset);
153            this.charsetDecoder = getCharset().newDecoder();
154            return this;
155        }
156
157        /**
158         * Sets the charset decoder.
159         *
160         * @param charsetDecoder the charset decoder.
161         * @return {@code this} instance.
162         */
163        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
164            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
165            super.setCharset(this.charsetDecoder.charset());
166            return this;
167        }
168
169        /**
170         * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
171         * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
172         * {@link #close()} is called.
173         *
174         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
175         *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
176         *                         {@link #flush()} or {@link #close()} is called.
177         * @return {@code this} instance.
178         */
179        public Builder setWriteImmediately(final boolean writeImmediately) {
180            this.writeImmediately = writeImmediately;
181            return this;
182        }
183
184    }
185
186    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
187
188    /**
189     * Constructs a new {@link Builder}.
190     *
191     * @return a new {@link Builder}.
192     * @since 2.12.0
193     */
194    public static Builder builder() {
195        return new Builder();
196    }
197
198    /**
199     * Checks if the JDK in use properly supports the given charset.
200     *
201     * @param charset the charset to check the support for
202     */
203    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
204        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
205            return;
206        }
207        final String TEST_STRING_2 = "v\u00e9s";
208        final byte[] bytes = TEST_STRING_2.getBytes(charset);
209
210        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
211        final ByteBuffer bb2 = ByteBuffer.allocate(16);
212        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
213        final int len = bytes.length;
214        for (int i = 0; i < len; i++) {
215            bb2.put(bytes[i]);
216            bb2.flip();
217            try {
218                charsetDecoder2.decode(bb2, cb2, i == len - 1);
219            } catch (final IllegalArgumentException e) {
220                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
221                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
222            }
223            bb2.compact();
224        }
225        cb2.rewind();
226        if (!TEST_STRING_2.equals(cb2.toString())) {
227            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
228                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
229        }
230
231    }
232
233    private final Writer writer;
234    private final CharsetDecoder decoder;
235
236    private final boolean writeImmediately;
237
238    /**
239     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
240     */
241    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
242
243    /**
244     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
245     */
246    private final CharBuffer decoderOut;
247
248    /**
249     * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@link Charset#defaultCharset() default charset} and with a default output
250     * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is
251     * called.
252     *
253     * @param writer the target {@link Writer}
254     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
255     */
256    @Deprecated
257    public WriterOutputStream(final Writer writer) {
258        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
259    }
260
261    /**
262     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
263     * when it overflows or when {@link #flush()} or {@link #close()} is called.
264     *
265     * @param writer  the target {@link Writer}
266     * @param charset the charset encoding
267     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
268     */
269    @Deprecated
270    public WriterOutputStream(final Writer writer, final Charset charset) {
271        this(writer, charset, BUFFER_SIZE, false);
272    }
273
274    /**
275     * Constructs a new {@link WriterOutputStream}.
276     *
277     * @param writer           the target {@link Writer}
278     * @param charset          the charset encoding
279     * @param bufferSize       the size of the output buffer in number of characters
280     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
281     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
282     *                         {@link #flush()} or {@link #close()} is called.
283     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
284     */
285    @Deprecated
286    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
287        // @formatter:off
288        this(writer,
289            Charsets.toCharset(charset).newDecoder()
290                    .onMalformedInput(CodingErrorAction.REPLACE)
291                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
292                    .replaceWith("?"),
293             bufferSize,
294             writeImmediately);
295        // @formatter:on
296    }
297
298    /**
299     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
300     * when it overflows or when {@link #flush()} or {@link #close()} is called.
301     *
302     * @param writer  the target {@link Writer}
303     * @param decoder the charset decoder
304     * @since 2.1
305     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
306     */
307    @Deprecated
308    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
309        this(writer, decoder, BUFFER_SIZE, false);
310    }
311
312    /**
313     * Constructs a new {@link WriterOutputStream}.
314     *
315     * @param writer           the target {@link Writer}
316     * @param decoder          the charset decoder
317     * @param bufferSize       the size of the output buffer in number of characters
318     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
319     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
320     *                         {@link #flush()} or {@link #close()} is called.
321     * @since 2.1
322     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
323     */
324    @Deprecated
325    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
326        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
327        this.writer = writer;
328        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
329        this.writeImmediately = writeImmediately;
330        this.decoderOut = CharBuffer.allocate(bufferSize);
331    }
332
333    /**
334     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
335     * when it overflows or when {@link #flush()} or {@link #close()} is called.
336     *
337     * @param writer      the target {@link Writer}
338     * @param charsetName the name of the charset encoding
339     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
340     */
341    @Deprecated
342    public WriterOutputStream(final Writer writer, final String charsetName) {
343        this(writer, charsetName, BUFFER_SIZE, false);
344    }
345
346    /**
347     * Constructs a new {@link WriterOutputStream}.
348     *
349     * @param writer           the target {@link Writer}
350     * @param charsetName      the name of the charset encoding
351     * @param bufferSize       the size of the output buffer in number of characters
352     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
353     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
354     *                         {@link #flush()} or {@link #close()} is called.
355     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
356     */
357    @Deprecated
358    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
359        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
360    }
361
362    /**
363     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
364     * {@link Writer#close()} will be called.
365     *
366     * @throws IOException if an I/O error occurs.
367     */
368    @Override
369    public void close() throws IOException {
370        processInput(true);
371        flushOutput();
372        writer.close();
373    }
374
375    /**
376     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
377     * {@link Writer#flush()} will be called.
378     *
379     * @throws IOException if an I/O error occurs.
380     */
381    @Override
382    public void flush() throws IOException {
383        flushOutput();
384        writer.flush();
385    }
386
387    /**
388     * Flush the output.
389     *
390     * @throws IOException if an I/O error occurs.
391     */
392    private void flushOutput() throws IOException {
393        if (decoderOut.position() > 0) {
394            writer.write(decoderOut.array(), 0, decoderOut.position());
395            decoderOut.rewind();
396        }
397    }
398
399    /**
400     * Decode the contents of the input ByteBuffer into a CharBuffer.
401     *
402     * @param endOfInput indicates end of input
403     * @throws IOException if an I/O error occurs.
404     */
405    private void processInput(final boolean endOfInput) throws IOException {
406        // Prepare decoderIn for reading
407        decoderIn.flip();
408        CoderResult coderResult;
409        while (true) {
410            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
411            if (coderResult.isOverflow()) {
412                flushOutput();
413            } else if (coderResult.isUnderflow()) {
414                break;
415            } else {
416                // The decoder is configured to replace malformed input and unmappable characters,
417                // so we should not get here.
418                throw new IOException("Unexpected coder result");
419            }
420        }
421        // Discard the bytes that have been read
422        decoderIn.compact();
423    }
424
425    /**
426     * Write bytes from the specified byte array to the stream.
427     *
428     * @param b the byte array containing the bytes to write
429     * @throws IOException if an I/O error occurs.
430     */
431    @Override
432    public void write(final byte[] b) throws IOException {
433        write(b, 0, b.length);
434    }
435
436    /**
437     * Write bytes from the specified byte array to the stream.
438     *
439     * @param b   the byte array containing the bytes to write
440     * @param off the start offset in the byte array
441     * @param len the number of bytes to write
442     * @throws IOException if an I/O error occurs.
443     */
444    @Override
445    public void write(final byte[] b, int off, int len) throws IOException {
446        while (len > 0) {
447            final int c = Math.min(len, decoderIn.remaining());
448            decoderIn.put(b, off, c);
449            processInput(false);
450            len -= c;
451            off += c;
452        }
453        if (writeImmediately) {
454            flushOutput();
455        }
456    }
457
458    /**
459     * Write a single byte to the stream.
460     *
461     * @param b the byte to write
462     * @throws IOException if an I/O error occurs.
463     */
464    @Override
465    public void write(final int b) throws IOException {
466        write(new byte[] { (byte) b }, 0, 1);
467    }
468}