001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Objects; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 026 * 027 * <p> 028 * The class can be parameterized in the following manner with various constructors: 029 * </p> 030 * <ul> 031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 033 * <li>Line separator: Default is CRLF ("\r\n")</li> 034 * </ul> 035 * <p> 036 * This class operates directly on byte streams, and not character streams. 037 * </p> 038 * <p> 039 * This class is thread-safe. 040 * </p> 041 * <p> 042 * You can configure instances with the {@link Builder}. 043 * </p> 044 * <pre> 045 * Base32 base32 = Base32.builder() 046 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 047 * .setEncodeTable(customEncodeTable) 048 * .setLineLength(0) // default is none 049 * .setLineSeparator('\r', '\n') // default is CR LF 050 * .setPadding('=') // default is = 051 * .get() 052 * </pre> 053 * 054 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 055 * @since 1.5 056 */ 057public class Base32 extends BaseNCodec { 058 059 /** 060 * Builds {@link Base32} instances. 061 * 062 * @since 1.17.0 063 */ 064 public static class Builder extends AbstractBuilder<Base32, Builder> { 065 066 /** 067 * Constructs a new instance. 068 */ 069 public Builder() { 070 super(ENCODE_TABLE); 071 } 072 073 @Override 074 public Base32 get() { 075 return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy()); 076 } 077 078 /** 079 * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 080 * <p> 081 * This overrides a value previously set with {@link #setEncodeTable(byte...)}. 082 * </p> 083 * 084 * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 085 * @return this instance. 086 * @since 1.18.0 087 */ 088 public Builder setHexDecodeTable(final boolean useHex) { 089 return setEncodeTable(decodeTable(useHex)); 090 } 091 092 /** 093 * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 094 * <p> 095 * This overrides a value previously set with {@link #setEncodeTable(byte...)}. 096 * </p> 097 * 098 * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 099 * @return this instance. 100 * @since 1.18.0 101 */ 102 public Builder setHexEncodeTable(final boolean useHex) { 103 return setEncodeTable(encodeTable(useHex)); 104 } 105 } 106 107 /** 108 * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32 109 * characters. 110 */ 111 private static final int BITS_PER_ENCODED_BYTE = 5; 112 113 private static final int BYTES_PER_ENCODED_BLOCK = 8; 114 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 115 /** 116 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit 117 * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1. 118 */ 119 // @formatter:off 120 private static final byte[] DECODE_TABLE = { 121 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 125 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 126 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 127 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z 128 -1, -1, -1, -1, -1, // 5b-5f 129 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60-6f a-o 130 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70-7a p-z 131 }; 132 // @formatter:on 133 134 /** 135 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC 136 * 4648. 137 */ 138 // @formatter:off 139 private static final byte[] ENCODE_TABLE = { 140 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 141 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 142 '2', '3', '4', '5', '6', '7', 143 }; 144 // @formatter:on 145 146 /** 147 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their 148 * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1. 149 */ 150 // @formatter:off 151 private static final byte[] HEX_DECODE_TABLE = { 152 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 153 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 154 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 155 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 156 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 157 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O 158 25, 26, 27, 28, 29, 30, 31, // 50-56 P-V 159 -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f 160 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o 161 25, 26, 27, 28, 29, 30, 31 // 70-76 p-v 162 }; 163 // @formatter:on 164 165 /** 166 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of 167 * RFC 4648. 168 */ 169 // @formatter:off 170 private static final byte[] HEX_ENCODE_TABLE = { 171 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 172 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 173 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 174 }; 175 // @formatter:on 176 177 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 178 private static final int MASK_5BITS = 0x1f; 179 180 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 181 private static final long MASK_4BITS = 0x0fL; 182 183 /** Mask used to extract 3 bits, used when decoding final trailing character. */ 184 private static final long MASK_3BITS = 0x07L; 185 186 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 187 private static final long MASK_2BITS = 0x03L; 188 189 /** Mask used to extract 1 bits, used when decoding final trailing character. */ 190 private static final long MASK_1BITS = 0x01L; 191 192 // The static final fields above are used for the original static byte[] methods on Base32. 193 // The private member fields below are used with the new streaming approach, which requires 194 // some state be preserved between calls of encode() and decode(). 195 196 /** 197 * Creates a new Builder. 198 * 199 * @return a new Builder. 200 * @since 1.17.0 201 */ 202 public static Builder builder() { 203 return new Builder(); 204 } 205 206 private static byte[] decodeTable(final boolean useHex) { 207 return useHex ? HEX_DECODE_TABLE : DECODE_TABLE; 208 } 209 210 private static byte[] encodeTable(final boolean useHex) { 211 return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE; 212 } 213 214 /** 215 * Decode table to use. 216 */ 217 private final byte[] decodeTable; 218 219 /** 220 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link 221 * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;} 222 */ 223 private final int encodeSize; 224 225 /** 226 * Encode table to use. 227 */ 228 private final byte[] encodeTable; 229 230 /** 231 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 232 */ 233 private final byte[] lineSeparator; 234 235 /** 236 * Constructs a Base32 codec used for decoding and encoding. 237 * <p> 238 * When encoding the line length is 0 (no chunking). 239 * </p> 240 */ 241 public Base32() { 242 this(false); 243 } 244 245 /** 246 * Constructs a Base32 codec used for decoding and encoding. 247 * <p> 248 * When encoding the line length is 0 (no chunking). 249 * </p> 250 * 251 * @param useHex if {@code true} then use Base32 Hex alphabet 252 */ 253 public Base32(final boolean useHex) { 254 this(0, null, useHex, PAD_DEFAULT); 255 } 256 257 /** 258 * Constructs a Base32 codec used for decoding and encoding. 259 * <p> 260 * When encoding the line length is 0 (no chunking). 261 * </p> 262 * 263 * @param useHex if {@code true} then use Base32 Hex alphabet 264 * @param padding byte used as padding byte. 265 */ 266 public Base32(final boolean useHex, final byte padding) { 267 this(0, null, useHex, padding); 268 } 269 270 /** 271 * Constructs a Base32 codec used for decoding and encoding. 272 * <p> 273 * When encoding the line length is 0 (no chunking). 274 * </p> 275 * 276 * @param pad byte used as padding byte. 277 */ 278 public Base32(final byte pad) { 279 this(false, pad); 280 } 281 282 /** 283 * Constructs a Base32 codec used for decoding and encoding. 284 * <p> 285 * When encoding the line length is given in the constructor, the line separator is CRLF. 286 * </p> 287 * 288 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, then 289 * the output will not be divided into lines (chunks). Ignored when decoding. 290 */ 291 public Base32(final int lineLength) { 292 this(lineLength, CHUNK_SEPARATOR); 293 } 294 295 /** 296 * Constructs a Base32 codec used for decoding and encoding. 297 * <p> 298 * When encoding the line length and line separator are given in the constructor. 299 * </p> 300 * <p> 301 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 302 * </p> 303 * 304 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 305 * then the output will not be divided into lines (chunks). Ignored when decoding. 306 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 307 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. 308 */ 309 public Base32(final int lineLength, final byte[] lineSeparator) { 310 this(lineLength, lineSeparator, false, PAD_DEFAULT); 311 } 312 313 /** 314 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 315 * <p> 316 * When encoding the line length and line separator are given in the constructor. 317 * </p> 318 * <p> 319 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 320 * </p> 321 * 322 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 323 * then the output will not be divided into lines (chunks). Ignored when decoding. 324 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 325 * @param useHex if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 326 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 327 */ 328 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { 329 this(lineLength, lineSeparator, useHex, PAD_DEFAULT); 330 } 331 332 /** 333 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 334 * <p> 335 * When encoding the line length and line separator are given in the constructor. 336 * </p> 337 * <p> 338 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 339 * </p> 340 * 341 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 342 * then the output will not be divided into lines (chunks). Ignored when decoding. 343 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 344 * @param useHex if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 345 * @param padding padding byte. 346 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 347 */ 348 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) { 349 this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT); 350 } 351 352 /** 353 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 354 * <p> 355 * When encoding the line length and line separator are given in the constructor. 356 * </p> 357 * <p> 358 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 359 * </p> 360 * 361 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 362 * then the output will not be divided into lines (chunks). Ignored when decoding. 363 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 364 * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 365 * @param padding padding byte. 366 * @param decodingPolicy The decoding policy. 367 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 368 * @since 1.15 369 */ 370 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) { 371 this(lineLength, lineSeparator, encodeTable(useHex), padding, decodingPolicy); 372 } 373 374 /** 375 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 376 * <p> 377 * When encoding the line length and line separator are given in the constructor. 378 * </p> 379 * <p> 380 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 381 * </p> 382 * 383 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 384 * then the output will not be divided into lines (chunks). Ignored when decoding. 385 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 386 * @param encodeTable A Base32 alphabet. 387 * @param padding padding byte. 388 * @param decodingPolicy The decoding policy. 389 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 390 */ 391 private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) { 392 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy); 393 Objects.requireNonNull(encodeTable, "encodeTable"); 394 this.encodeTable = encodeTable; 395 this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE; 396 if (lineLength > 0) { 397 if (lineSeparator == null) { 398 throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null"); 399 } 400 final byte[] lineSeparatorCopy = lineSeparator.clone(); 401 // Must be done after initializing the tables 402 if (containsAlphabetOrPad(lineSeparatorCopy)) { 403 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy); 404 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 405 } 406 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length; 407 this.lineSeparator = lineSeparatorCopy; 408 } else { 409 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 410 this.lineSeparator = null; 411 } 412 if (isInAlphabet(padding) || Character.isWhitespace(padding)) { 413 throw new IllegalArgumentException("pad must not be in alphabet or whitespace"); 414 } 415 } 416 417 /** 418 * <p> 419 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with 420 * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either. 421 * </p> 422 * <p> 423 * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications 424 * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity. 425 * </p> 426 * <p> 427 * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using 428 * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position 429 * </p> 430 * 431 * @param input byte[] array of ASCII data to Base32 decode. 432 * @param inPos Position to start reading data from. 433 * @param inAvail Amount of bytes available from input for decoding. 434 * @param context the context to be used 435 */ 436 @Override 437 void decode(final byte[] input, int inPos, final int inAvail, final Context context) { 438 // package protected for access from I/O streams 439 if (context.eof) { 440 return; 441 } 442 if (inAvail < 0) { 443 context.eof = true; 444 } 445 final int decodeSize = this.encodeSize - 1; 446 for (int i = 0; i < inAvail; i++) { 447 final byte b = input[inPos++]; 448 if (b == pad) { 449 // We're done. 450 context.eof = true; 451 break; 452 } 453 final byte[] buffer = ensureBufferSize(decodeSize, context); 454 if (b >= 0 && b < this.decodeTable.length) { 455 final int result = this.decodeTable[b]; 456 if (result >= 0) { 457 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; 458 // collect decoded bytes 459 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; 460 if (context.modulus == 0) { // we can output the 5 bytes 461 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS); 462 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 463 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 464 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 465 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 466 } 467 } 468 } 469 } 470 // Two forms of EOF as far as Base32 decoder is concerned: actual 471 // EOF (-1) and first time '=' character is encountered in stream. 472 // This approach makes the '=' padding characters completely optional. 473 if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do 474 final byte[] buffer = ensureBufferSize(decodeSize, context); 475 // We ignore partial bytes, i.e. only multiples of 8 count. 476 // Any combination not part of a valid encoding is either partially decoded 477 // or will raise an exception. Possible trailing characters are 2, 4, 5, 7. 478 // It is not possible to encode with 1, 3, 6 trailing characters. 479 // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded. 480 // See the encode(byte[]) method EOF section. 481 switch (context.modulus) { 482// case 0 : // impossible, as excluded above 483 case 1: // 5 bits - either ignore entirely, or raise an exception 484 validateTrailingCharacters(); 485 case 2: // 10 bits, drop 2 and output one byte 486 validateCharacter(MASK_2BITS, context); 487 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS); 488 break; 489 case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception 490 validateTrailingCharacters(); 491 // Not possible from a valid encoding but decode anyway 492 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS); 493 break; 494 case 4: // 20 bits = 2*8 + 4 495 validateCharacter(MASK_4BITS, context); 496 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits 497 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 498 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 499 break; 500 case 5: // 25 bits = 3*8 + 1 501 validateCharacter(MASK_1BITS, context); 502 context.lbitWorkArea = context.lbitWorkArea >> 1; 503 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 504 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 505 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 506 break; 507 case 6: // 30 bits = 3*8 + 6, or raise an exception 508 validateTrailingCharacters(); 509 // Not possible from a valid encoding but decode anyway 510 context.lbitWorkArea = context.lbitWorkArea >> 6; 511 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 512 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 513 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 514 break; 515 case 7: // 35 bits = 4*8 +3 516 validateCharacter(MASK_3BITS, context); 517 context.lbitWorkArea = context.lbitWorkArea >> 3; 518 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 519 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 520 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 521 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 522 break; 523 default: 524 // modulus can be 0-7, and we excluded 0,1 already 525 throw new IllegalStateException("Impossible modulus " + context.modulus); 526 } 527 } 528 } 529 530 /** 531 * <p> 532 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with 533 * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5). 534 * </p> 535 * 536 * @param input byte[] array of binary data to Base32 encode. 537 * @param inPos Position to start reading data from. 538 * @param inAvail Amount of bytes available from input for encoding. 539 * @param context the context to be used 540 */ 541 @Override 542 void encode(final byte[] input, int inPos, final int inAvail, final Context context) { 543 // package protected for access from I/O streams 544 if (context.eof) { 545 return; 546 } 547 // inAvail < 0 is how we're informed of EOF in the underlying data we're 548 // encoding. 549 if (inAvail < 0) { 550 context.eof = true; 551 if (0 == context.modulus && lineLength == 0) { 552 return; // no leftovers to process and not using chunking 553 } 554 final byte[] buffer = ensureBufferSize(encodeSize, context); 555 final int savedPos = context.pos; 556 switch (context.modulus) { // % 5 557 case 0: 558 break; 559 case 1: // Only 1 octet; take top 5 bits then remainder 560 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 561 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2 562 buffer[context.pos++] = pad; 563 buffer[context.pos++] = pad; 564 buffer[context.pos++] = pad; 565 buffer[context.pos++] = pad; 566 buffer[context.pos++] = pad; 567 buffer[context.pos++] = pad; 568 break; 569 case 2: // 2 octets = 16 bits to use 570 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 571 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 572 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 573 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 574 buffer[context.pos++] = pad; 575 buffer[context.pos++] = pad; 576 buffer[context.pos++] = pad; 577 buffer[context.pos++] = pad; 578 break; 579 case 3: // 3 octets = 24 bits to use 580 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 581 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 582 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 583 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 584 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 585 buffer[context.pos++] = pad; 586 buffer[context.pos++] = pad; 587 buffer[context.pos++] = pad; 588 break; 589 case 4: // 4 octets = 32 bits to use 590 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 591 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 592 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 593 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 594 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 595 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 596 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 597 buffer[context.pos++] = pad; 598 break; 599 default: 600 throw new IllegalStateException("Impossible modulus " + context.modulus); 601 } 602 context.currentLinePos += context.pos - savedPos; // keep track of current line position 603 // if currentPos == 0 we are at the start of a line, so don't add CRLF 604 if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required 605 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 606 context.pos += lineSeparator.length; 607 } 608 } else { 609 for (int i = 0; i < inAvail; i++) { 610 final byte[] buffer = ensureBufferSize(encodeSize, context); 611 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; 612 int b = input[inPos++]; 613 if (b < 0) { 614 b += 256; 615 } 616 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE 617 if (0 == context.modulus) { // we have enough bytes to create our output 618 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS]; 619 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS]; 620 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS]; 621 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS]; 622 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS]; 623 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS]; 624 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS]; 625 buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS]; 626 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 627 if (lineLength > 0 && lineLength <= context.currentLinePos) { 628 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 629 context.pos += lineSeparator.length; 630 context.currentLinePos = 0; 631 } 632 } 633 } 634 } 635 } 636 637 /** 638 * Gets the line separator (for testing only). 639 * 640 * @return the line separator. 641 */ 642 byte[] getLineSeparator() { 643 return lineSeparator; 644 } 645 646 /** 647 * Returns whether or not the {@code octet} is in the Base32 alphabet. 648 * 649 * @param octet The value to test 650 * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise. 651 */ 652 @Override 653 public boolean isInAlphabet(final byte octet) { 654 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 655 } 656 657 /** 658 * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values. 659 * <p> 660 * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits 661 * that will be discarded. 662 * </p> 663 * 664 * @param emptyBitsMask The mask of the lower bits that should be empty 665 * @param context the context to be used 666 * @throws IllegalArgumentException if the bits being checked contain any non-zero value 667 */ 668 private void validateCharacter(final long emptyBitsMask, final Context context) { 669 // Use the long bit work area 670 if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) { 671 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " + 672 "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero."); 673 } 674 } 675 676 /** 677 * Validates whether decoding allows final trailing characters that cannot be created during encoding. 678 * 679 * @throws IllegalArgumentException if strict decoding is enabled 680 */ 681 private void validateTrailingCharacters() { 682 if (isStrictDecoding()) { 683 throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " + 684 "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes."); 685 } 686 } 687}