001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.BufferedWriter;
020import java.io.IOException;
021import java.io.InputStreamReader;
022import java.io.OutputStream;
023import java.io.OutputStreamWriter;
024import java.io.Writer;
025import java.nio.ByteBuffer;
026import java.nio.CharBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetDecoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.nio.charset.StandardCharsets;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.charset.CharsetDecoders;
037
038/**
039 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
040 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
041 * correctly.
042 * <p>
043 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
044 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
045 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
046 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
047 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
048 * </p>
049 * <p>
050 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
051 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
052 * </p>
053 * <p>
054 * To build an instance, use {@link Builder}.
055 * </p>
056 * <pre>
057 * OutputStream out = ...
058 * Charset cs = ...
059 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
060 * WriterOutputStream out2 = WriterOutputStream.builder()
061 *   .setWriter(writer)
062 *   .setCharset(cs)
063 *   .get();
064 * </pre>
065 * <p>
066 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
067 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
068 * {@link WriterOutputStream} pushes it to the underlying stream.
069 * </p>
070 * <p>
071 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
072 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
073 * known to represent character data that must be decoded for further use.
074 * </p>
075 * <p>
076 * Instances of {@link WriterOutputStream} are not thread safe.
077 * </p>
078 *
079 * @see Builder
080 * @see org.apache.commons.io.input.ReaderInputStream
081 * @since 2.0
082 */
083public class WriterOutputStream extends OutputStream {
084
085    // @formatter:off
086    /**
087     * Builds a new {@link WriterOutputStream}.
088     *
089     * <p>
090     * For example:
091     * </p>
092     * <pre>{@code
093     * WriterOutputStream s = WriterOutputStream.builder()
094     *   .setPath(path)
095     *   .setBufferSize(8192)
096     *   .setCharset(StandardCharsets.UTF_8)
097     *   .setWriteImmediately(false)
098     *   .get();}
099     * </pre>
100     *
101     * @see #get()
102     * @since 2.12.0
103     */
104    // @formatter:on
105    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106
107        private CharsetDecoder charsetDecoder;
108        private boolean writeImmediately;
109
110        /**
111         * Constructs a new Builder.
112         */
113        public Builder() {
114            this.charsetDecoder = getCharset().newDecoder();
115        }
116
117        /**
118         * Builds a new {@link WriterOutputStream}.
119         * <p>
120         * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121         * </p>
122         * <p>
123         * This builder use the following aspects:
124         * </p>
125         * <ul>
126         * <li>{@link #getWriter()}</li>
127         * <li>{@link #getBufferSize()}</li>
128         * <li>charsetDecoder</li>
129         * <li>writeImmediately</li>
130         * </ul>
131         *
132         * @return a new instance.
133         * @throws UnsupportedOperationException if the origin cannot provide a Writer.
134         * @see #getWriter()
135         */
136        @SuppressWarnings("resource")
137        @Override
138        public WriterOutputStream get() throws IOException {
139            return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
140        }
141
142        @Override
143        public Builder setCharset(final Charset charset) {
144            super.setCharset(charset);
145            this.charsetDecoder = getCharset().newDecoder();
146            return this;
147        }
148
149        @Override
150        public Builder setCharset(final String charset) {
151            super.setCharset(charset);
152            this.charsetDecoder = getCharset().newDecoder();
153            return this;
154        }
155
156        /**
157         * Sets the charset decoder.
158         *
159         * @param charsetDecoder the charset decoder.
160         * @return {@code this} instance.
161         */
162        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
163            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
164            super.setCharset(this.charsetDecoder.charset());
165            return this;
166        }
167
168        /**
169         * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
170         * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
171         * {@link #close()} is called.
172         *
173         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
174         *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
175         *                         {@link #flush()} or {@link #close()} is called.
176         * @return {@code this} instance.
177         */
178        public Builder setWriteImmediately(final boolean writeImmediately) {
179            this.writeImmediately = writeImmediately;
180            return this;
181        }
182
183    }
184
185    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
186
187    /**
188     * Constructs a new {@link Builder}.
189     *
190     * @return a new {@link Builder}.
191     * @since 2.12.0
192     */
193    public static Builder builder() {
194        return new Builder();
195    }
196
197    /**
198     * Checks if the JDK in use properly supports the given charset.
199     *
200     * @param charset the charset to check the support for
201     */
202    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
203        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
204            return;
205        }
206        final String TEST_STRING_2 = "v\u00e9s";
207        final byte[] bytes = TEST_STRING_2.getBytes(charset);
208
209        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
210        final ByteBuffer bb2 = ByteBuffer.allocate(16);
211        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
212        final int len = bytes.length;
213        for (int i = 0; i < len; i++) {
214            bb2.put(bytes[i]);
215            bb2.flip();
216            try {
217                charsetDecoder2.decode(bb2, cb2, i == len - 1);
218            } catch (final IllegalArgumentException e) {
219                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
220                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
221            }
222            bb2.compact();
223        }
224        cb2.rewind();
225        if (!TEST_STRING_2.equals(cb2.toString())) {
226            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
227                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
228        }
229
230    }
231
232    private final Writer writer;
233    private final CharsetDecoder decoder;
234
235    private final boolean writeImmediately;
236
237    /**
238     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
239     */
240    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
241
242    /**
243     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
244     */
245    private final CharBuffer decoderOut;
246
247    /**
248     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
249     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
250     *
251     * @param writer the target {@link Writer}
252     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
253     */
254    @Deprecated
255    public WriterOutputStream(final Writer writer) {
256        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
257    }
258
259    /**
260     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
261     * when it overflows or when {@link #flush()} or {@link #close()} is called.
262     *
263     * @param writer  the target {@link Writer}
264     * @param charset the charset encoding
265     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
266     */
267    @Deprecated
268    public WriterOutputStream(final Writer writer, final Charset charset) {
269        this(writer, charset, BUFFER_SIZE, false);
270    }
271
272    /**
273     * Constructs a new {@link WriterOutputStream}.
274     *
275     * @param writer           the target {@link Writer}
276     * @param charset          the charset encoding
277     * @param bufferSize       the size of the output buffer in number of characters
278     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
279     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
280     *                         {@link #flush()} or {@link #close()} is called.
281     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
282     */
283    @Deprecated
284    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
285        // @formatter:off
286        this(writer,
287            Charsets.toCharset(charset).newDecoder()
288                    .onMalformedInput(CodingErrorAction.REPLACE)
289                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
290                    .replaceWith("?"),
291             bufferSize,
292             writeImmediately);
293        // @formatter:on
294    }
295
296    /**
297     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
298     * when it overflows or when {@link #flush()} or {@link #close()} is called.
299     *
300     * @param writer  the target {@link Writer}
301     * @param decoder the charset decoder
302     * @since 2.1
303     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
304     */
305    @Deprecated
306    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
307        this(writer, decoder, BUFFER_SIZE, false);
308    }
309
310    /**
311     * Constructs a new {@link WriterOutputStream}.
312     *
313     * @param writer           the target {@link Writer}
314     * @param decoder          the charset decoder
315     * @param bufferSize       the size of the output buffer in number of characters
316     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
317     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
318     *                         {@link #flush()} or {@link #close()} is called.
319     * @since 2.1
320     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
321     */
322    @Deprecated
323    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
324        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
325        this.writer = writer;
326        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
327        this.writeImmediately = writeImmediately;
328        this.decoderOut = CharBuffer.allocate(bufferSize);
329    }
330
331    /**
332     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
333     * when it overflows or when {@link #flush()} or {@link #close()} is called.
334     *
335     * @param writer      the target {@link Writer}
336     * @param charsetName the name of the charset encoding
337     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
338     */
339    @Deprecated
340    public WriterOutputStream(final Writer writer, final String charsetName) {
341        this(writer, charsetName, BUFFER_SIZE, false);
342    }
343
344    /**
345     * Constructs a new {@link WriterOutputStream}.
346     *
347     * @param writer           the target {@link Writer}
348     * @param charsetName      the name of the charset encoding
349     * @param bufferSize       the size of the output buffer in number of characters
350     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
351     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
352     *                         {@link #flush()} or {@link #close()} is called.
353     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
354     */
355    @Deprecated
356    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
357        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
358    }
359
360    /**
361     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
362     * {@link Writer#close()} will be called.
363     *
364     * @throws IOException if an I/O error occurs.
365     */
366    @Override
367    public void close() throws IOException {
368        processInput(true);
369        flushOutput();
370        writer.close();
371    }
372
373    /**
374     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
375     * {@link Writer#flush()} will be called.
376     *
377     * @throws IOException if an I/O error occurs.
378     */
379    @Override
380    public void flush() throws IOException {
381        flushOutput();
382        writer.flush();
383    }
384
385    /**
386     * Flush the output.
387     *
388     * @throws IOException if an I/O error occurs.
389     */
390    private void flushOutput() throws IOException {
391        if (decoderOut.position() > 0) {
392            writer.write(decoderOut.array(), 0, decoderOut.position());
393            decoderOut.rewind();
394        }
395    }
396
397    /**
398     * Decode the contents of the input ByteBuffer into a CharBuffer.
399     *
400     * @param endOfInput indicates end of input
401     * @throws IOException if an I/O error occurs.
402     */
403    private void processInput(final boolean endOfInput) throws IOException {
404        // Prepare decoderIn for reading
405        decoderIn.flip();
406        CoderResult coderResult;
407        while (true) {
408            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
409            if (coderResult.isOverflow()) {
410                flushOutput();
411            } else if (coderResult.isUnderflow()) {
412                break;
413            } else {
414                // The decoder is configured to replace malformed input and unmappable characters,
415                // so we should not get here.
416                throw new IOException("Unexpected coder result");
417            }
418        }
419        // Discard the bytes that have been read
420        decoderIn.compact();
421    }
422
423    /**
424     * Write bytes from the specified byte array to the stream.
425     *
426     * @param b the byte array containing the bytes to write
427     * @throws IOException if an I/O error occurs.
428     */
429    @Override
430    public void write(final byte[] b) throws IOException {
431        write(b, 0, b.length);
432    }
433
434    /**
435     * Write bytes from the specified byte array to the stream.
436     *
437     * @param b   the byte array containing the bytes to write
438     * @param off the start offset in the byte array
439     * @param len the number of bytes to write
440     * @throws IOException if an I/O error occurs.
441     */
442    @Override
443    public void write(final byte[] b, int off, int len) throws IOException {
444        while (len > 0) {
445            final int c = Math.min(len, decoderIn.remaining());
446            decoderIn.put(b, off, c);
447            processInput(false);
448            len -= c;
449            off += c;
450        }
451        if (writeImmediately) {
452            flushOutput();
453        }
454    }
455
456    /**
457     * Write a single byte to the stream.
458     *
459     * @param b the byte to write
460     * @throws IOException if an I/O error occurs.
461     */
462    @Override
463    public void write(final int b) throws IOException {
464        write(new byte[] { (byte) b }, 0, 1);
465    }
466}