WriterOutputStream.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.io.output;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.Charsets;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.charset.CharsetDecoders;
/**
* {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
* a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
* correctly.
* <p>
* The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
* chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
* is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
* also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
* implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
* </p>
* <p>
* {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
* would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
* </p>
* <p>
* To build an instance, use {@link Builder}.
* </p>
* <pre>
* OutputStream out = ...
* Charset cs = ...
* OutputStreamWriter writer = new OutputStreamWriter(out, cs);
* WriterOutputStream out2 = WriterOutputStream.builder()
* .setWriter(writer)
* .setCharset(cs)
* .get();
* </pre>
* <p>
* {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
* transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
* {@link WriterOutputStream} pushes it to the underlying stream.
* </p>
* <p>
* Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
* the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
* known to represent character data that must be decoded for further use.
* </p>
* <p>
* Instances of {@link WriterOutputStream} are not thread safe.
* </p>
*
* @see Builder
* @see org.apache.commons.io.input.ReaderInputStream
* @since 2.0
*/
public class WriterOutputStream extends OutputStream {
// @formatter:off
/**
* Builds a new {@link WriterOutputStream}.
*
* <p>
* For example:
* </p>
* <pre>{@code
* WriterOutputStream s = WriterOutputStream.builder()
* .setPath(path)
* .setBufferSize(8192)
* .setCharset(StandardCharsets.UTF_8)
* .setWriteImmediately(false)
* .get();}
* </pre>
*
* @see #get()
* @since 2.12.0
*/
// @formatter:on
public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
private CharsetDecoder charsetDecoder;
private boolean writeImmediately;
/**
* Constructs a new Builder.
*/
public Builder() {
this.charsetDecoder = getCharset().newDecoder();
}
/**
* Builds a new {@link WriterOutputStream}.
* <p>
* You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
* </p>
* <p>
* This builder use the following aspects:
* </p>
* <ul>
* <li>{@link #getWriter()}</li>
* <li>{@link #getBufferSize()}</li>
* <li>charsetDecoder</li>
* <li>writeImmediately</li>
* </ul>
*
* @return a new instance.
* @throws UnsupportedOperationException if the origin cannot provide a Writer.
* @see #getWriter()
*/
@SuppressWarnings("resource")
@Override
public WriterOutputStream get() throws IOException {
return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
}
@Override
public Builder setCharset(final Charset charset) {
super.setCharset(charset);
this.charsetDecoder = getCharset().newDecoder();
return this;
}
@Override
public Builder setCharset(final String charset) {
super.setCharset(charset);
this.charsetDecoder = getCharset().newDecoder();
return this;
}
/**
* Sets the charset decoder.
*
* @param charsetDecoder the charset decoder.
* @return {@code this} instance.
*/
public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
super.setCharset(this.charsetDecoder.charset());
return this;
}
/**
* Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
* underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
* {@link #close()} is called.
*
* @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
* the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
* {@link #flush()} or {@link #close()} is called.
* @return {@code this} instance.
*/
public Builder setWriteImmediately(final boolean writeImmediately) {
this.writeImmediately = writeImmediately;
return this;
}
}
private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
/**
* Constructs a new {@link Builder}.
*
* @return a new {@link Builder}.
* @since 2.12.0
*/
public static Builder builder() {
return new Builder();
}
/**
* Checks if the JDK in use properly supports the given charset.
*
* @param charset the charset to check the support for
*/
private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
return;
}
final String TEST_STRING_2 = "v\u00e9s";
final byte[] bytes = TEST_STRING_2.getBytes(charset);
final CharsetDecoder charsetDecoder2 = charset.newDecoder();
final ByteBuffer bb2 = ByteBuffer.allocate(16);
final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
final int len = bytes.length;
for (int i = 0; i < len; i++) {
bb2.put(bytes[i]);
bb2.flip();
try {
charsetDecoder2.decode(bb2, cb2, i == len - 1);
} catch (final IllegalArgumentException e) {
throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
+ "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
}
bb2.compact();
}
cb2.rewind();
if (!TEST_STRING_2.equals(cb2.toString())) {
throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
+ "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
}
}
private final Writer writer;
private final CharsetDecoder decoder;
private final boolean writeImmediately;
/**
* ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
*/
private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
/**
* CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
*/
private final CharBuffer decoderOut;
/**
* Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
* characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
*
* @param writer the target {@link Writer}
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer) {
this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
}
/**
* Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
* when it overflows or when {@link #flush()} or {@link #close()} is called.
*
* @param writer the target {@link Writer}
* @param charset the charset encoding
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final Charset charset) {
this(writer, charset, BUFFER_SIZE, false);
}
/**
* Constructs a new {@link WriterOutputStream}.
*
* @param writer the target {@link Writer}
* @param charset the charset encoding
* @param bufferSize the size of the output buffer in number of characters
* @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
* underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
* {@link #flush()} or {@link #close()} is called.
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
// @formatter:off
this(writer,
Charsets.toCharset(charset).newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith("?"),
bufferSize,
writeImmediately);
// @formatter:on
}
/**
* Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
* when it overflows or when {@link #flush()} or {@link #close()} is called.
*
* @param writer the target {@link Writer}
* @param decoder the charset decoder
* @since 2.1
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
this(writer, decoder, BUFFER_SIZE, false);
}
/**
* Constructs a new {@link WriterOutputStream}.
*
* @param writer the target {@link Writer}
* @param decoder the charset decoder
* @param bufferSize the size of the output buffer in number of characters
* @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
* underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
* {@link #flush()} or {@link #close()} is called.
* @since 2.1
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
this.writer = writer;
this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
this.writeImmediately = writeImmediately;
this.decoderOut = CharBuffer.allocate(bufferSize);
}
/**
* Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
* when it overflows or when {@link #flush()} or {@link #close()} is called.
*
* @param writer the target {@link Writer}
* @param charsetName the name of the charset encoding
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final String charsetName) {
this(writer, charsetName, BUFFER_SIZE, false);
}
/**
* Constructs a new {@link WriterOutputStream}.
*
* @param writer the target {@link Writer}
* @param charsetName the name of the charset encoding
* @param bufferSize the size of the output buffer in number of characters
* @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
* underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
* {@link #flush()} or {@link #close()} is called.
* @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
*/
@Deprecated
public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
}
/**
* Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
* {@link Writer#close()} will be called.
*
* @throws IOException if an I/O error occurs.
*/
@Override
public void close() throws IOException {
processInput(true);
flushOutput();
writer.close();
}
/**
* Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
* {@link Writer#flush()} will be called.
*
* @throws IOException if an I/O error occurs.
*/
@Override
public void flush() throws IOException {
flushOutput();
writer.flush();
}
/**
* Flush the output.
*
* @throws IOException if an I/O error occurs.
*/
private void flushOutput() throws IOException {
if (decoderOut.position() > 0) {
writer.write(decoderOut.array(), 0, decoderOut.position());
decoderOut.rewind();
}
}
/**
* Decode the contents of the input ByteBuffer into a CharBuffer.
*
* @param endOfInput indicates end of input
* @throws IOException if an I/O error occurs.
*/
private void processInput(final boolean endOfInput) throws IOException {
// Prepare decoderIn for reading
decoderIn.flip();
CoderResult coderResult;
while (true) {
coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
if (coderResult.isOverflow()) {
flushOutput();
} else if (coderResult.isUnderflow()) {
break;
} else {
// The decoder is configured to replace malformed input and unmappable characters,
// so we should not get here.
throw new IOException("Unexpected coder result");
}
}
// Discard the bytes that have been read
decoderIn.compact();
}
/**
* Write bytes from the specified byte array to the stream.
*
* @param b the byte array containing the bytes to write
* @throws IOException if an I/O error occurs.
*/
@Override
public void write(final byte[] b) throws IOException {
write(b, 0, b.length);
}
/**
* Write bytes from the specified byte array to the stream.
*
* @param b the byte array containing the bytes to write
* @param off the start offset in the byte array
* @param len the number of bytes to write
* @throws IOException if an I/O error occurs.
*/
@Override
public void write(final byte[] b, int off, int len) throws IOException {
while (len > 0) {
final int c = Math.min(len, decoderIn.remaining());
decoderIn.put(b, off, c);
processInput(false);
len -= c;
off += c;
}
if (writeImmediately) {
flushOutput();
}
}
/**
* Write a single byte to the stream.
*
* @param b the byte to write
* @throws IOException if an I/O error occurs.
*/
@Override
public void write(final int b) throws IOException {
write(new byte[] { (byte) b }, 0, 1);
}
}