001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.Locale; 030import java.util.Objects; 031import java.util.regex.Matcher; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.input.XmlStreamReader; 037 038/** 039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream. 040 * <p> 041 * To build an instance, use {@link Builder}. 042 * </p> 043 * 044 * @see Builder 045 * @see XmlStreamReader 046 * @since 2.0 047 */ 048public class XmlStreamWriter extends Writer { 049 050 // @formatter:off 051 /** 052 * Builds a new {@link XmlStreamWriter}. 053 * 054 * <p> 055 * For example: 056 * </p> 057 * <pre>{@code 058 * WriterOutputStream w = WriterOutputStream.builder() 059 * .setPath(path) 060 * .setCharset(StandardCharsets.UTF_8) 061 * .get();} 062 * </pre> 063 * 064 * @see #get() 065 * @since 2.12.0 066 */ 067 // @formatter:off 068 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { 069 070 /** 071 * Constructs a new {@link Builder}. 072 */ 073 public Builder() { 074 setCharsetDefault(StandardCharsets.UTF_8); 075 setCharset(StandardCharsets.UTF_8); 076 } 077 078 /** 079 * Builds a new {@link XmlStreamWriter}. 080 * <p> 081 * You must set input that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception. 082 * </p> 083 * <p> 084 * This builder use the following aspects: 085 * </p> 086 * <ul> 087 * <li>{@link #getOutputStream()}</li> 088 * <li>{@link #getCharset()}</li> 089 * </ul> 090 * 091 * @return a new instance. 092 * @throws IllegalStateException if the {@code origin} is {@code null}. 093 * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}. 094 * @throws IOException if an I/O error occurs. 095 * @see #getOutputStream() 096 */ 097 @SuppressWarnings("resource") 098 @Override 099 public XmlStreamWriter get() throws IOException { 100 return new XmlStreamWriter(getOutputStream(), getCharset()); 101 } 102 103 } 104 105 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 106 107 /** 108 * Constructs a new {@link Builder}. 109 * 110 * @return a new {@link Builder}. 111 * @since 2.12.0 112 */ 113 public static Builder builder() { 114 return new Builder(); 115 } 116 117 private final OutputStream out; 118 119 private final Charset defaultCharset; 120 121 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); 122 123 private Writer writer; 124 125 private Charset charset; 126 127 /** 128 * Constructs a new XML stream writer for the specified file 129 * with a default encoding of UTF-8. 130 * 131 * @param file The file to write to 132 * @throws FileNotFoundException if there is an error creating or 133 * opening the file 134 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 135 */ 136 @Deprecated 137 public XmlStreamWriter(final File file) throws FileNotFoundException { 138 this(file, null); 139 } 140 141 /** 142 * Constructs a new XML stream writer for the specified file 143 * with the specified default encoding. 144 * 145 * @param file The file to write to 146 * @param defaultEncoding The default encoding if not encoding could be detected 147 * @throws FileNotFoundException if there is an error creating or 148 * opening the file 149 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 150 */ 151 @Deprecated 152 @SuppressWarnings("resource") 153 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 154 this(new FileOutputStream(file), defaultEncoding); 155 } 156 157 /** 158 * Constructs a new XML stream writer for the specified output stream 159 * with a default encoding of UTF-8. 160 * 161 * @param out The output stream 162 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 163 */ 164 @Deprecated 165 public XmlStreamWriter(final OutputStream out) { 166 this(out, StandardCharsets.UTF_8); 167 } 168 169 /** 170 * Constructs a new XML stream writer for the specified output stream 171 * with the specified default encoding. 172 * 173 * @param out The output stream 174 * @param defaultEncoding The default encoding if not encoding could be detected 175 */ 176 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { 177 this.out = out; 178 this.defaultCharset = Objects.requireNonNull(defaultEncoding); 179 } 180 181 /** 182 * Constructs a new XML stream writer for the specified output stream 183 * with the specified default encoding. 184 * 185 * @param out The output stream 186 * @param defaultEncoding The default encoding if not encoding could be detected 187 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 188 */ 189 @Deprecated 190 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 191 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); 192 } 193 194 /** 195 * Closes the underlying writer. 196 * 197 * @throws IOException if an error occurs closing the underlying writer 198 */ 199 @Override 200 public void close() throws IOException { 201 if (writer == null) { 202 charset = defaultCharset; 203 writer = new OutputStreamWriter(out, charset); 204 writer.write(prologWriter.toString()); 205 } 206 writer.close(); 207 } 208 209 /** 210 * Detects the encoding. 211 * 212 * @param cbuf the buffer to write the characters from 213 * @param off The start offset 214 * @param len The number of characters to write 215 * @throws IOException if an error occurs detecting the encoding 216 */ 217 private void detectEncoding(final char[] cbuf, final int off, final int len) 218 throws IOException { 219 int size = len; 220 final StringBuffer xmlProlog = prologWriter.getBuffer(); 221 if (xmlProlog.length() + len > BUFFER_SIZE) { 222 size = BUFFER_SIZE - xmlProlog.length(); 223 } 224 prologWriter.write(cbuf, off, size); 225 226 // try to determine encoding 227 if (xmlProlog.length() >= 5) { 228 if (xmlProlog.substring(0, 5).equals("<?xml")) { 229 // try to extract encoding from XML prolog 230 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 231 if (xmlPrologEnd > 0) { 232 // ok, full XML prolog written: let's extract encoding 233 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, 234 xmlPrologEnd)); 235 if (m.find()) { 236 final String encName = m.group(1).toUpperCase(Locale.ROOT); 237 charset = Charset.forName(encName.substring(1, encName.length() - 1)); 238 } else { 239 // no encoding found in XML prolog: using default 240 // encoding 241 charset = defaultCharset; 242 } 243 } else if (xmlProlog.length() >= BUFFER_SIZE) { 244 // no encoding found in first characters: using default 245 // encoding 246 charset = defaultCharset; 247 } 248 } else { 249 // no XML prolog: using default encoding 250 charset = defaultCharset; 251 } 252 if (charset != null) { 253 // encoding has been chosen: let's do it 254 prologWriter = null; 255 writer = new OutputStreamWriter(out, charset); 256 writer.write(xmlProlog.toString()); 257 if (len > size) { 258 writer.write(cbuf, off + size, len - size); 259 } 260 } 261 } 262 } 263 264 /** 265 * Flushes the underlying writer. 266 * 267 * @throws IOException if an error occurs flushing the underlying writer 268 */ 269 @Override 270 public void flush() throws IOException { 271 if (writer != null) { 272 writer.flush(); 273 } 274 } 275 276 /** 277 * Returns the default encoding. 278 * 279 * @return the default encoding 280 */ 281 public String getDefaultEncoding() { 282 return defaultCharset.name(); 283 } 284 285 /** 286 * Returns the detected encoding. 287 * 288 * @return the detected encoding 289 */ 290 public String getEncoding() { 291 return charset.name(); 292 } 293 294 /** 295 * Writes the characters to the underlying writer, detecting encoding. 296 * 297 * @param cbuf the buffer to write the characters from 298 * @param off The start offset 299 * @param len The number of characters to write 300 * @throws IOException if an error occurs detecting the encoding 301 */ 302 @Override 303 public void write(final char[] cbuf, final int off, final int len) throws IOException { 304 if (prologWriter != null) { 305 detectEncoding(cbuf, off, len); 306 } else { 307 writer.write(cbuf, off, len); 308 } 309 } 310}