001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.compress.archivers.zip; 019 020import static java.nio.charset.StandardCharsets.UTF_8; 021 022import java.nio.ByteBuffer; 023import java.nio.charset.Charset; 024import java.nio.charset.UnsupportedCharsetException; 025 026import org.apache.commons.io.Charsets; 027 028/** 029 * Static helper functions for robustly encoding file names in ZIP files. 030 */ 031public abstract class ZipEncodingHelper { 032 033 /** 034 * UTF-8. 035 */ 036 static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(UTF_8); 037 038 /** 039 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 040 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 041 * <p> 042 * If the requested character set cannot be found, the platform default will be used instead. 043 * </p> 044 * 045 * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding. 046 * @return A ZIP encoding for the given encoding name. 047 * @since 1.26.0 048 */ 049 public static ZipEncoding getZipEncoding(final Charset charset) { 050 return new NioZipEncoding(Charsets.toCharset(charset), isUTF8(Charsets.toCharset(charset))); 051 } 052 053 /** 054 * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO 055 * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. 056 * <p> 057 * If the requested character set cannot be found, the platform default will be used instead. 058 * </p> 059 * 060 * @param name The name of the ZIP encoding. Specify {@code null} for the platform's default encoding. 061 * @return A ZIP encoding for the given encoding name. 062 */ 063 public static ZipEncoding getZipEncoding(final String name) { 064 return new NioZipEncoding(toSafeCharset(name), isUTF8(toSafeCharset(name).name())); 065 } 066 067 static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) { 068 buffer.limit(buffer.position()); 069 buffer.rewind(); 070 final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment); 071 on.put(buffer); 072 return on; 073 } 074 075 /** 076 * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 077 * 078 * @param charset If the given charset is null, then check the platform's default encoding. 079 */ 080 static boolean isUTF8(final Charset charset) { 081 return isUTF8Alias(Charsets.toCharset(charset).name()); 082 } 083 084 /** 085 * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. 086 * 087 * @param charsetName If the given name is null, then check the platform's default encoding. 088 */ 089 static boolean isUTF8(final String charsetName) { 090 return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name()); 091 } 092 093 private static boolean isUTF8Alias(final String actual) { 094 return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual)); 095 } 096 097 private static Charset toSafeCharset(final String name) { 098 Charset charset = Charset.defaultCharset(); 099 try { 100 charset = Charsets.toCharset(name); 101 } catch (final UnsupportedCharsetException ignored) { 102 // Use the default encoding instead. 103 } 104 return charset; 105 } 106}