001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.BufferedWriter; 020import java.io.IOException; 021import java.io.InputStreamReader; 022import java.io.OutputStream; 023import java.io.OutputStreamWriter; 024import java.io.Writer; 025import java.nio.ByteBuffer; 026import java.nio.CharBuffer; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetDecoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.nio.charset.StandardCharsets; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetDecoders; 037 038/** 039 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to 040 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled 041 * correctly. 042 * <p> 043 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in 044 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()} 045 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can 046 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer}, 047 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}. 048 * </p> 049 * <p> 050 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2} 051 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding): 052 * </p> 053 * <p> 054 * To build an instance, use {@link Builder}. 055 * </p> 056 * <pre> 057 * OutputStream out = ... 058 * Charset cs = ... 059 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 060 * WriterOutputStream out2 = WriterOutputStream.builder() 061 * .setWriter(writer) 062 * .setCharset(cs) 063 * .get(); 064 * </pre> 065 * <p> 066 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes 067 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while 068 * {@link WriterOutputStream} pushes it to the underlying stream. 069 * </p> 070 * <p> 071 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in 072 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is 073 * known to represent character data that must be decoded for further use. 074 * </p> 075 * <p> 076 * Instances of {@link WriterOutputStream} are not thread safe. 077 * </p> 078 * 079 * @see Builder 080 * @see org.apache.commons.io.input.ReaderInputStream 081 * @since 2.0 082 */ 083public class WriterOutputStream extends OutputStream { 084 085 // @formatter:off 086 /** 087 * Builds a new {@link WriterOutputStream}. 088 * 089 * <p> 090 * For example: 091 * </p> 092 * <pre>{@code 093 * WriterOutputStream s = WriterOutputStream.builder() 094 * .setPath(path) 095 * .setBufferSize(8192) 096 * .setCharset(StandardCharsets.UTF_8) 097 * .setWriteImmediately(false) 098 * .get();} 099 * </pre> 100 * 101 * @see #get() 102 * @since 2.12.0 103 */ 104 // @formatter:on 105 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> { 106 107 private CharsetDecoder charsetDecoder; 108 private boolean writeImmediately; 109 110 /** 111 * Constructs a new Builder. 112 */ 113 public Builder() { 114 this.charsetDecoder = getCharset().newDecoder(); 115 } 116 117 /** 118 * Builds a new {@link WriterOutputStream}. 119 * <p> 120 * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception. 121 * </p> 122 * <p> 123 * This builder use the following aspects: 124 * </p> 125 * <ul> 126 * <li>{@link #getWriter()}</li> 127 * <li>{@link #getBufferSize()}</li> 128 * <li>charsetDecoder</li> 129 * <li>writeImmediately</li> 130 * </ul> 131 * 132 * @return a new instance. 133 * @throws UnsupportedOperationException if the origin cannot provide a Writer. 134 * @see #getWriter() 135 */ 136 @SuppressWarnings("resource") 137 @Override 138 public WriterOutputStream get() throws IOException { 139 return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately); 140 } 141 142 @Override 143 public Builder setCharset(final Charset charset) { 144 super.setCharset(charset); 145 this.charsetDecoder = getCharset().newDecoder(); 146 return this; 147 } 148 149 @Override 150 public Builder setCharset(final String charset) { 151 super.setCharset(charset); 152 this.charsetDecoder = getCharset().newDecoder(); 153 return this; 154 } 155 156 /** 157 * Sets the charset decoder. 158 * 159 * @param charsetDecoder the charset decoder. 160 * @return {@code this} instance. 161 */ 162 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) { 163 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder(); 164 super.setCharset(this.charsetDecoder.charset()); 165 return this; 166 } 167 168 /** 169 * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the 170 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or 171 * {@link #close()} is called. 172 * 173 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to 174 * the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 175 * {@link #flush()} or {@link #close()} is called. 176 * @return {@code this} instance. 177 */ 178 public Builder setWriteImmediately(final boolean writeImmediately) { 179 this.writeImmediately = writeImmediately; 180 return this; 181 } 182 183 } 184 185 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 186 187 /** 188 * Constructs a new {@link Builder}. 189 * 190 * @return a new {@link Builder}. 191 * @since 2.12.0 192 */ 193 public static Builder builder() { 194 return new Builder(); 195 } 196 197 /** 198 * Checks if the JDK in use properly supports the given charset. 199 * 200 * @param charset the charset to check the support for 201 */ 202 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) { 203 if (!StandardCharsets.UTF_16.name().equals(charset.name())) { 204 return; 205 } 206 final String TEST_STRING_2 = "v\u00e9s"; 207 final byte[] bytes = TEST_STRING_2.getBytes(charset); 208 209 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 210 final ByteBuffer bb2 = ByteBuffer.allocate(16); 211 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 212 final int len = bytes.length; 213 for (int i = 0; i < len; i++) { 214 bb2.put(bytes[i]); 215 bb2.flip(); 216 try { 217 charsetDecoder2.decode(bb2, cb2, i == len - 1); 218 } catch (final IllegalArgumentException e) { 219 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 220 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 221 } 222 bb2.compact(); 223 } 224 cb2.rewind(); 225 if (!TEST_STRING_2.equals(cb2.toString())) { 226 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 227 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 228 } 229 230 } 231 232 private final Writer writer; 233 private final CharsetDecoder decoder; 234 235 private final boolean writeImmediately; 236 237 /** 238 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder. 239 */ 240 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 241 242 /** 243 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer. 244 */ 245 private final CharBuffer decoderOut; 246 247 /** 248 * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE} 249 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called. 250 * 251 * @param writer the target {@link Writer} 252 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 253 */ 254 @Deprecated 255 public WriterOutputStream(final Writer writer) { 256 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 257 } 258 259 /** 260 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 261 * when it overflows or when {@link #flush()} or {@link #close()} is called. 262 * 263 * @param writer the target {@link Writer} 264 * @param charset the charset encoding 265 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 266 */ 267 @Deprecated 268 public WriterOutputStream(final Writer writer, final Charset charset) { 269 this(writer, charset, BUFFER_SIZE, false); 270 } 271 272 /** 273 * Constructs a new {@link WriterOutputStream}. 274 * 275 * @param writer the target {@link Writer} 276 * @param charset the charset encoding 277 * @param bufferSize the size of the output buffer in number of characters 278 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 279 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 280 * {@link #flush()} or {@link #close()} is called. 281 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 282 */ 283 @Deprecated 284 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) { 285 // @formatter:off 286 this(writer, 287 Charsets.toCharset(charset).newDecoder() 288 .onMalformedInput(CodingErrorAction.REPLACE) 289 .onUnmappableCharacter(CodingErrorAction.REPLACE) 290 .replaceWith("?"), 291 bufferSize, 292 writeImmediately); 293 // @formatter:on 294 } 295 296 /** 297 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 298 * when it overflows or when {@link #flush()} or {@link #close()} is called. 299 * 300 * @param writer the target {@link Writer} 301 * @param decoder the charset decoder 302 * @since 2.1 303 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 304 */ 305 @Deprecated 306 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 307 this(writer, decoder, BUFFER_SIZE, false); 308 } 309 310 /** 311 * Constructs a new {@link WriterOutputStream}. 312 * 313 * @param writer the target {@link Writer} 314 * @param decoder the charset decoder 315 * @param bufferSize the size of the output buffer in number of characters 316 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 317 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 318 * {@link #flush()} or {@link #close()} is called. 319 * @since 2.1 320 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 321 */ 322 @Deprecated 323 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) { 324 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset()); 325 this.writer = writer; 326 this.decoder = CharsetDecoders.toCharsetDecoder(decoder); 327 this.writeImmediately = writeImmediately; 328 this.decoderOut = CharBuffer.allocate(bufferSize); 329 } 330 331 /** 332 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 333 * when it overflows or when {@link #flush()} or {@link #close()} is called. 334 * 335 * @param writer the target {@link Writer} 336 * @param charsetName the name of the charset encoding 337 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 338 */ 339 @Deprecated 340 public WriterOutputStream(final Writer writer, final String charsetName) { 341 this(writer, charsetName, BUFFER_SIZE, false); 342 } 343 344 /** 345 * Constructs a new {@link WriterOutputStream}. 346 * 347 * @param writer the target {@link Writer} 348 * @param charsetName the name of the charset encoding 349 * @param bufferSize the size of the output buffer in number of characters 350 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 351 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 352 * {@link #flush()} or {@link #close()} is called. 353 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 354 */ 355 @Deprecated 356 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) { 357 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately); 358 } 359 360 /** 361 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 362 * {@link Writer#close()} will be called. 363 * 364 * @throws IOException if an I/O error occurs. 365 */ 366 @Override 367 public void close() throws IOException { 368 processInput(true); 369 flushOutput(); 370 writer.close(); 371 } 372 373 /** 374 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 375 * {@link Writer#flush()} will be called. 376 * 377 * @throws IOException if an I/O error occurs. 378 */ 379 @Override 380 public void flush() throws IOException { 381 flushOutput(); 382 writer.flush(); 383 } 384 385 /** 386 * Flush the output. 387 * 388 * @throws IOException if an I/O error occurs. 389 */ 390 private void flushOutput() throws IOException { 391 if (decoderOut.position() > 0) { 392 writer.write(decoderOut.array(), 0, decoderOut.position()); 393 decoderOut.rewind(); 394 } 395 } 396 397 /** 398 * Decode the contents of the input ByteBuffer into a CharBuffer. 399 * 400 * @param endOfInput indicates end of input 401 * @throws IOException if an I/O error occurs. 402 */ 403 private void processInput(final boolean endOfInput) throws IOException { 404 // Prepare decoderIn for reading 405 decoderIn.flip(); 406 CoderResult coderResult; 407 while (true) { 408 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 409 if (coderResult.isOverflow()) { 410 flushOutput(); 411 } else if (coderResult.isUnderflow()) { 412 break; 413 } else { 414 // The decoder is configured to replace malformed input and unmappable characters, 415 // so we should not get here. 416 throw new IOException("Unexpected coder result"); 417 } 418 } 419 // Discard the bytes that have been read 420 decoderIn.compact(); 421 } 422 423 /** 424 * Write bytes from the specified byte array to the stream. 425 * 426 * @param b the byte array containing the bytes to write 427 * @throws IOException if an I/O error occurs. 428 */ 429 @Override 430 public void write(final byte[] b) throws IOException { 431 write(b, 0, b.length); 432 } 433 434 /** 435 * Write bytes from the specified byte array to the stream. 436 * 437 * @param b the byte array containing the bytes to write 438 * @param off the start offset in the byte array 439 * @param len the number of bytes to write 440 * @throws IOException if an I/O error occurs. 441 */ 442 @Override 443 public void write(final byte[] b, int off, int len) throws IOException { 444 while (len > 0) { 445 final int c = Math.min(len, decoderIn.remaining()); 446 decoderIn.put(b, off, c); 447 processInput(false); 448 len -= c; 449 off += c; 450 } 451 if (writeImmediately) { 452 flushOutput(); 453 } 454 } 455 456 /** 457 * Write a single byte to the stream. 458 * 459 * @param b the byte to write 460 * @throws IOException if an I/O error occurs. 461 */ 462 @Override 463 public void write(final int b) throws IOException { 464 write(new byte[] { (byte) b }, 0, 1); 465 } 466}