001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Objects; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 026 * 027 * <p> 028 * The class can be parameterized in the following manner with various constructors: 029 * </p> 030 * <ul> 031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 033 * <li>Line separator: Default is CRLF ("\r\n")</li> 034 * </ul> 035 * <p> 036 * This class operates directly on byte streams, and not character streams. 037 * </p> 038 * <p> 039 * This class is thread-safe. 040 * </p> 041 * <p> 042 * You can configure instances with the {@link Builder}. 043 * </p> 044 * <pre> 045 * Base32 base32 = Base32.builder() 046 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 047 * .setEncodeTable(customEncodeTable) 048 * .setLineLength(0) // default is none 049 * .setLineSeparator('\r', '\n') // default is CR LF 050 * .setPadding('=') // default is = 051 * .get() 052 * </pre> 053 * 054 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 055 * @since 1.5 056 */ 057public class Base32 extends BaseNCodec { 058 059 /** 060 * Builds {@link Base32} instances. 061 * 062 * @since 1.17.0 063 */ 064 public static class Builder extends AbstractBuilder<Base32, Builder> { 065 066 /** 067 * Constructs a new instance. 068 */ 069 public Builder() { 070 super(ENCODE_TABLE); 071 } 072 073 @Override 074 public Base32 get() { 075 return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy()); 076 } 077 078 } 079 080 /** 081 * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32 082 * characters. 083 */ 084 private static final int BITS_PER_ENCODED_BYTE = 5; 085 086 private static final int BYTES_PER_ENCODED_BLOCK = 8; 087 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 088 /** 089 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit 090 * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1. 091 */ 092 // @formatter:off 093 private static final byte[] DECODE_TABLE = { 094 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 095 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 096 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 097 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 098 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 099 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 100 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z 101 -1, -1, -1, -1, -1, // 5b-5f 102 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60-6f a-o 103 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70-7a p-z 104 }; 105 // @formatter:on 106 107 /** 108 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC 109 * 4648. 110 */ 111 // @formatter:off 112 private static final byte[] ENCODE_TABLE = { 113 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 114 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 115 '2', '3', '4', '5', '6', '7', 116 }; 117 // @formatter:on 118 119 /** 120 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their 121 * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1. 122 */ 123 // @formatter:off 124 private static final byte[] HEX_DECODE_TABLE = { 125 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 126 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 127 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 128 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 129 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 130 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O 131 25, 26, 27, 28, 29, 30, 31, // 50-56 P-V 132 -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f 133 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o 134 25, 26, 27, 28, 29, 30, 31 // 70-76 p-v 135 }; 136 // @formatter:on 137 138 /** 139 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of 140 * RFC 4648. 141 */ 142 // @formatter:off 143 private static final byte[] HEX_ENCODE_TABLE = { 144 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 145 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 146 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 147 }; 148 // @formatter:on 149 150 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 151 private static final int MASK_5BITS = 0x1f; 152 153 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 154 private static final long MASK_4BITS = 0x0fL; 155 156 /** Mask used to extract 3 bits, used when decoding final trailing character. */ 157 private static final long MASK_3BITS = 0x07L; 158 159 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 160 private static final long MASK_2BITS = 0x03L; 161 162 /** Mask used to extract 1 bits, used when decoding final trailing character. */ 163 private static final long MASK_1BITS = 0x01L; 164 165 /** 166 * Creates a new Builder. 167 * 168 * @return a new Builder. 169 * @since 1.17.0 170 */ 171 public static Builder builder() { 172 return new Builder(); 173 } 174 175 // The static final fields above are used for the original static byte[] methods on Base32. 176 // The private member fields below are used with the new streaming approach, which requires 177 // some state be preserved between calls of encode() and decode(). 178 179 /** 180 * Decode table to use. 181 */ 182 private final byte[] decodeTable; 183 184 /** 185 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link 186 * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;} 187 */ 188 private final int encodeSize; 189 190 /** 191 * Encode table to use. 192 */ 193 private final byte[] encodeTable; 194 195 /** 196 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 197 */ 198 private final byte[] lineSeparator; 199 200 /** 201 * Constructs a Base32 codec used for decoding and encoding. 202 * <p> 203 * When encoding the line length is 0 (no chunking). 204 * </p> 205 */ 206 public Base32() { 207 this(false); 208 } 209 210 /** 211 * Constructs a Base32 codec used for decoding and encoding. 212 * <p> 213 * When encoding the line length is 0 (no chunking). 214 * </p> 215 * 216 * @param useHex if {@code true} then use Base32 Hex alphabet 217 */ 218 public Base32(final boolean useHex) { 219 this(0, null, useHex, PAD_DEFAULT); 220 } 221 222 /** 223 * Constructs a Base32 codec used for decoding and encoding. 224 * <p> 225 * When encoding the line length is 0 (no chunking). 226 * </p> 227 * 228 * @param useHex if {@code true} then use Base32 Hex alphabet 229 * @param padding byte used as padding byte. 230 */ 231 public Base32(final boolean useHex, final byte padding) { 232 this(0, null, useHex, padding); 233 } 234 235 /** 236 * Constructs a Base32 codec used for decoding and encoding. 237 * <p> 238 * When encoding the line length is 0 (no chunking). 239 * </p> 240 * 241 * @param pad byte used as padding byte. 242 */ 243 public Base32(final byte pad) { 244 this(false, pad); 245 } 246 247 /** 248 * Constructs a Base32 codec used for decoding and encoding. 249 * <p> 250 * When encoding the line length is given in the constructor, the line separator is CRLF. 251 * </p> 252 * 253 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, then 254 * the output will not be divided into lines (chunks). Ignored when decoding. 255 */ 256 public Base32(final int lineLength) { 257 this(lineLength, CHUNK_SEPARATOR); 258 } 259 260 /** 261 * Constructs a Base32 codec used for decoding and encoding. 262 * <p> 263 * When encoding the line length and line separator are given in the constructor. 264 * </p> 265 * <p> 266 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 267 * </p> 268 * 269 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 270 * then the output will not be divided into lines (chunks). Ignored when decoding. 271 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 272 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. 273 */ 274 public Base32(final int lineLength, final byte[] lineSeparator) { 275 this(lineLength, lineSeparator, false, PAD_DEFAULT); 276 } 277 278 /** 279 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 280 * <p> 281 * When encoding the line length and line separator are given in the constructor. 282 * </p> 283 * <p> 284 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 285 * </p> 286 * 287 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 288 * then the output will not be divided into lines (chunks). Ignored when decoding. 289 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 290 * @param useHex if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 291 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 292 */ 293 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { 294 this(lineLength, lineSeparator, useHex, PAD_DEFAULT); 295 } 296 297 /** 298 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 299 * <p> 300 * When encoding the line length and line separator are given in the constructor. 301 * </p> 302 * <p> 303 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 304 * </p> 305 * 306 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 307 * then the output will not be divided into lines (chunks). Ignored when decoding. 308 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 309 * @param useHex if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 310 * @param padding padding byte. 311 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 312 */ 313 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) { 314 this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT); 315 } 316 317 /** 318 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 319 * <p> 320 * When encoding the line length and line separator are given in the constructor. 321 * </p> 322 * <p> 323 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 324 * </p> 325 * 326 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 327 * then the output will not be divided into lines (chunks). Ignored when decoding. 328 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 329 * @param useHex if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 330 * @param padding padding byte. 331 * @param decodingPolicy The decoding policy. 332 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 333 * @since 1.15 334 */ 335 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) { 336 this(lineLength, lineSeparator, useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE, padding, decodingPolicy); 337 } 338 339 /** 340 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 341 * <p> 342 * When encoding the line length and line separator are given in the constructor. 343 * </p> 344 * <p> 345 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 346 * </p> 347 * 348 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 349 * then the output will not be divided into lines (chunks). Ignored when decoding. 350 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 351 * @param encodeTable A Base32 alphabet. 352 * @param padding padding byte. 353 * @param decodingPolicy The decoding policy. 354 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 355 */ 356 private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) { 357 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy); 358 Objects.requireNonNull(encodeTable, "encodeTable"); 359 this.encodeTable = encodeTable; 360 this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE; 361 if (lineLength > 0) { 362 if (lineSeparator == null) { 363 throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null"); 364 } 365 final byte[] lineSeparatorCopy = lineSeparator.clone(); 366 // Must be done after initializing the tables 367 if (containsAlphabetOrPad(lineSeparatorCopy)) { 368 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy); 369 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 370 } 371 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length; 372 this.lineSeparator = lineSeparatorCopy; 373 } else { 374 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 375 this.lineSeparator = null; 376 } 377 if (isInAlphabet(padding) || Character.isWhitespace(padding)) { 378 throw new IllegalArgumentException("pad must not be in alphabet or whitespace"); 379 } 380 } 381 382 /** 383 * <p> 384 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with 385 * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either. 386 * </p> 387 * <p> 388 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are silently ignored, but has implications 389 * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity. 390 * </p> 391 * <p> 392 * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using 393 * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position 394 * </p> 395 * 396 * @param input byte[] array of ASCII data to Base32 decode. 397 * @param inPos Position to start reading data from. 398 * @param inAvail Amount of bytes available from input for decoding. 399 * @param context the context to be used 400 */ 401 @Override 402 void decode(final byte[] input, int inPos, final int inAvail, final Context context) { 403 // package protected for access from I/O streams 404 if (context.eof) { 405 return; 406 } 407 if (inAvail < 0) { 408 context.eof = true; 409 } 410 final int decodeSize = this.encodeSize - 1; 411 for (int i = 0; i < inAvail; i++) { 412 final byte b = input[inPos++]; 413 if (b == pad) { 414 // We're done. 415 context.eof = true; 416 break; 417 } 418 final byte[] buffer = ensureBufferSize(decodeSize, context); 419 if (b >= 0 && b < this.decodeTable.length) { 420 final int result = this.decodeTable[b]; 421 if (result >= 0) { 422 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; 423 // collect decoded bytes 424 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; 425 if (context.modulus == 0) { // we can output the 5 bytes 426 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS); 427 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 428 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 429 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 430 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 431 } 432 } 433 } 434 } 435 // Two forms of EOF as far as Base32 decoder is concerned: actual 436 // EOF (-1) and first time '=' character is encountered in stream. 437 // This approach makes the '=' padding characters completely optional. 438 if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do 439 final byte[] buffer = ensureBufferSize(decodeSize, context); 440 // We ignore partial bytes, i.e. only multiples of 8 count. 441 // Any combination not part of a valid encoding is either partially decoded 442 // or will raise an exception. Possible trailing characters are 2, 4, 5, 7. 443 // It is not possible to encode with 1, 3, 6 trailing characters. 444 // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded. 445 // See the encode(byte[]) method EOF section. 446 switch (context.modulus) { 447// case 0 : // impossible, as excluded above 448 case 1: // 5 bits - either ignore entirely, or raise an exception 449 validateTrailingCharacters(); 450 case 2: // 10 bits, drop 2 and output one byte 451 validateCharacter(MASK_2BITS, context); 452 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS); 453 break; 454 case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception 455 validateTrailingCharacters(); 456 // Not possible from a valid encoding but decode anyway 457 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS); 458 break; 459 case 4: // 20 bits = 2*8 + 4 460 validateCharacter(MASK_4BITS, context); 461 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits 462 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 463 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 464 break; 465 case 5: // 25 bits = 3*8 + 1 466 validateCharacter(MASK_1BITS, context); 467 context.lbitWorkArea = context.lbitWorkArea >> 1; 468 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 469 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 470 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 471 break; 472 case 6: // 30 bits = 3*8 + 6, or raise an exception 473 validateTrailingCharacters(); 474 // Not possible from a valid encoding but decode anyway 475 context.lbitWorkArea = context.lbitWorkArea >> 6; 476 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 477 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 478 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 479 break; 480 case 7: // 35 bits = 4*8 +3 481 validateCharacter(MASK_3BITS, context); 482 context.lbitWorkArea = context.lbitWorkArea >> 3; 483 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 484 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 485 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 486 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 487 break; 488 default: 489 // modulus can be 0-7, and we excluded 0,1 already 490 throw new IllegalStateException("Impossible modulus " + context.modulus); 491 } 492 } 493 } 494 495 /** 496 * <p> 497 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with 498 * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5). 499 * </p> 500 * 501 * @param input byte[] array of binary data to Base32 encode. 502 * @param inPos Position to start reading data from. 503 * @param inAvail Amount of bytes available from input for encoding. 504 * @param context the context to be used 505 */ 506 @Override 507 void encode(final byte[] input, int inPos, final int inAvail, final Context context) { 508 // package protected for access from I/O streams 509 if (context.eof) { 510 return; 511 } 512 // inAvail < 0 is how we're informed of EOF in the underlying data we're 513 // encoding. 514 if (inAvail < 0) { 515 context.eof = true; 516 if (0 == context.modulus && lineLength == 0) { 517 return; // no leftovers to process and not using chunking 518 } 519 final byte[] buffer = ensureBufferSize(encodeSize, context); 520 final int savedPos = context.pos; 521 switch (context.modulus) { // % 5 522 case 0: 523 break; 524 case 1: // Only 1 octet; take top 5 bits then remainder 525 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 526 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2 527 buffer[context.pos++] = pad; 528 buffer[context.pos++] = pad; 529 buffer[context.pos++] = pad; 530 buffer[context.pos++] = pad; 531 buffer[context.pos++] = pad; 532 buffer[context.pos++] = pad; 533 break; 534 case 2: // 2 octets = 16 bits to use 535 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 536 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 537 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 538 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 539 buffer[context.pos++] = pad; 540 buffer[context.pos++] = pad; 541 buffer[context.pos++] = pad; 542 buffer[context.pos++] = pad; 543 break; 544 case 3: // 3 octets = 24 bits to use 545 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 546 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 547 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 548 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 549 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 550 buffer[context.pos++] = pad; 551 buffer[context.pos++] = pad; 552 buffer[context.pos++] = pad; 553 break; 554 case 4: // 4 octets = 32 bits to use 555 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 556 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 557 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 558 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 559 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 560 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 561 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 562 buffer[context.pos++] = pad; 563 break; 564 default: 565 throw new IllegalStateException("Impossible modulus " + context.modulus); 566 } 567 context.currentLinePos += context.pos - savedPos; // keep track of current line position 568 // if currentPos == 0 we are at the start of a line, so don't add CRLF 569 if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required 570 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 571 context.pos += lineSeparator.length; 572 } 573 } else { 574 for (int i = 0; i < inAvail; i++) { 575 final byte[] buffer = ensureBufferSize(encodeSize, context); 576 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; 577 int b = input[inPos++]; 578 if (b < 0) { 579 b += 256; 580 } 581 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE 582 if (0 == context.modulus) { // we have enough bytes to create our output 583 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS]; 584 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS]; 585 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS]; 586 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS]; 587 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS]; 588 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS]; 589 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS]; 590 buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS]; 591 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 592 if (lineLength > 0 && lineLength <= context.currentLinePos) { 593 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 594 context.pos += lineSeparator.length; 595 context.currentLinePos = 0; 596 } 597 } 598 } 599 } 600 } 601 602 /** 603 * Gets the line separator (for testing only). 604 * 605 * @return the line separator. 606 */ 607 byte[] getLineSeparator() { 608 return lineSeparator; 609 } 610 611 /** 612 * Returns whether or not the {@code octet} is in the Base32 alphabet. 613 * 614 * @param octet The value to test 615 * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise. 616 */ 617 @Override 618 public boolean isInAlphabet(final byte octet) { 619 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 620 } 621 622 /** 623 * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values. 624 * <p> 625 * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits 626 * that will be discarded. 627 * </p> 628 * 629 * @param emptyBitsMask The mask of the lower bits that should be empty 630 * @param context the context to be used 631 * @throws IllegalArgumentException if the bits being checked contain any non-zero value 632 */ 633 private void validateCharacter(final long emptyBitsMask, final Context context) { 634 // Use the long bit work area 635 if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) { 636 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " + 637 "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero."); 638 } 639 } 640 641 /** 642 * Validates whether decoding allows final trailing characters that cannot be created during encoding. 643 * 644 * @throws IllegalArgumentException if strict decoding is enabled 645 */ 646 private void validateTrailingCharacters() { 647 if (isStrictDecoding()) { 648 throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " + 649 "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes."); 650 } 651 } 652}