View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import org.apache.commons.codec.BinaryDecoder;
21  import org.apache.commons.codec.BinaryEncoder;
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  
25  /**
26   * Converts between byte arrays and strings of "0"s and "1"s.
27   *
28   * <p>This class is immutable and thread-safe.</p>
29   *
30   * TODO: may want to add more bit vector functions like and/or/xor/nand
31   * TODO: also might be good to generate boolean[] from byte[] et cetera.
32   *
33   * @since 1.3
34   */
35  public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
36      /*
37       * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
38       * it.
39       */
40      /** Empty char array. */
41      private static final char[] EMPTY_CHAR_ARRAY = {};
42  
43      /** Empty byte array. */
44      private static final byte[] EMPTY_BYTE_ARRAY = {};
45  
46      /** Mask for bit 0 of a byte. */
47      private static final int BIT_0 = 1;
48  
49      /** Mask for bit 1 of a byte. */
50      private static final int BIT_1 = 0x02;
51  
52      /** Mask for bit 2 of a byte. */
53      private static final int BIT_2 = 0x04;
54  
55      /** Mask for bit 3 of a byte. */
56      private static final int BIT_3 = 0x08;
57  
58      /** Mask for bit 4 of a byte. */
59      private static final int BIT_4 = 0x10;
60  
61      /** Mask for bit 5 of a byte. */
62      private static final int BIT_5 = 0x20;
63  
64      /** Mask for bit 6 of a byte. */
65      private static final int BIT_6 = 0x40;
66  
67      /** Mask for bit 7 of a byte. */
68      private static final int BIT_7 = 0x80;
69  
70      private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
71  
72      /**
73       * Decodes a byte array where each byte represents an ASCII '0' or '1'.
74       *
75       * @param ascii
76       *                  each byte represents an ASCII '0' or '1'
77       * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
78       */
79      public static byte[] fromAscii(final byte[] ascii) {
80          if (isEmpty(ascii)) {
81              return EMPTY_BYTE_ARRAY;
82          }
83          final int asciiLength = ascii.length;
84          // get length/8 times bytes with 3 bit shifts to the right of the length
85          final byte[] raw = new byte[asciiLength >> 3];
86          /*
87           * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
88           * loop.
89           */
90          for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
91              for (int bits = 0; bits < BITS.length; ++bits) {
92                  if (ascii[jj - bits] == '1') {
93                      raw[ii] |= BITS[bits];
94                  }
95              }
96          }
97          return raw;
98      }
99  
100     /**
101      * Decodes a char array where each char represents an ASCII '0' or '1'.
102      *
103      * @param ascii
104      *                  each char represents an ASCII '0' or '1'
105      * @return the raw encoded binary where each bit corresponds to a char in the char array argument
106      */
107     public static byte[] fromAscii(final char[] ascii) {
108         if (ascii == null || ascii.length == 0) {
109             return EMPTY_BYTE_ARRAY;
110         }
111         final int asciiLength = ascii.length;
112         // get length/8 times bytes with 3 bit shifts to the right of the length
113         final byte[] raw = new byte[asciiLength >> 3];
114         /*
115          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
116          * loop.
117          */
118         for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
119             for (int bits = 0; bits < BITS.length; ++bits) {
120                 if (ascii[jj - bits] == '1') {
121                     raw[ii] |= BITS[bits];
122                 }
123             }
124         }
125         return raw;
126     }
127 
128     /**
129      * Returns {@code true} if the given array is {@code null} or empty (size 0.)
130      *
131      * @param array
132      *            the source array
133      * @return {@code true} if the given array is {@code null} or empty (size 0.)
134      */
135     static boolean isEmpty(final byte[] array) {
136         return array == null || array.length == 0;
137     }
138 
139     /**
140      * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
141      * char.
142      *
143      * @param raw
144      *                  the raw binary data to convert
145      * @return an array of 0 and 1 character bytes for each bit of the argument
146      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
147      */
148     public static byte[] toAsciiBytes(final byte[] raw) {
149         if (isEmpty(raw)) {
150             return EMPTY_BYTE_ARRAY;
151         }
152         final int rawLength = raw.length;
153         // get 8 times the bytes with 3 bit shifts to the left of the length
154         final byte[] l_ascii = new byte[rawLength << 3];
155         /*
156          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
157          * loop.
158          */
159         for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
160             for (int bits = 0; bits < BITS.length; ++bits) {
161                 if ((raw[ii] & BITS[bits]) == 0) {
162                     l_ascii[jj - bits] = '0';
163                 } else {
164                     l_ascii[jj - bits] = '1';
165                 }
166             }
167         }
168         return l_ascii;
169     }
170 
171     /**
172      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
173      *
174      * @param raw
175      *                  the raw binary data to convert
176      * @return an array of 0 and 1 characters for each bit of the argument
177      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
178      */
179     public static char[] toAsciiChars(final byte[] raw) {
180         if (isEmpty(raw)) {
181             return EMPTY_CHAR_ARRAY;
182         }
183         final int rawLength = raw.length;
184         // get 8 times the bytes with 3 bit shifts to the left of the length
185         final char[] l_ascii = new char[rawLength << 3];
186         /*
187          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
188          * loop.
189          */
190         for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
191             for (int bits = 0; bits < BITS.length; ++bits) {
192                 if ((raw[ii] & BITS[bits]) == 0) {
193                     l_ascii[jj - bits] = '0';
194                 } else {
195                     l_ascii[jj - bits] = '1';
196                 }
197             }
198         }
199         return l_ascii;
200     }
201 
202     /**
203      * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
204      *
205      * @param raw
206      *                  the raw binary data to convert
207      * @return a String of 0 and 1 characters representing the binary data
208      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
209      */
210     public static String toAsciiString(final byte[] raw) {
211         return new String(toAsciiChars(raw));
212     }
213 
214     /**
215      * Constructs a new instance.
216      */
217     public BinaryCodec() {
218         // empty
219     }
220 
221     /**
222      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
223      *
224      * @param ascii
225      *                  each byte represents an ASCII '0' or '1'
226      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
227      * @see org.apache.commons.codec.Decoder#decode(Object)
228      */
229     @Override
230     public byte[] decode(final byte[] ascii) {
231         return fromAscii(ascii);
232     }
233 
234     /**
235      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
236      *
237      * @param ascii
238      *                  each byte represents an ASCII '0' or '1'
239      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
240      * @throws DecoderException
241      *                  if argument is not a byte[], char[] or String
242      * @see org.apache.commons.codec.Decoder#decode(Object)
243      */
244     @Override
245     public Object decode(final Object ascii) throws DecoderException {
246         if (ascii == null) {
247             return EMPTY_BYTE_ARRAY;
248         }
249         if (ascii instanceof byte[]) {
250             return fromAscii((byte[]) ascii);
251         }
252         if (ascii instanceof char[]) {
253             return fromAscii((char[]) ascii);
254         }
255         if (ascii instanceof String) {
256             return fromAscii(((String) ascii).toCharArray());
257         }
258         throw new DecoderException("argument not a byte array");
259     }
260 
261     /**
262      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
263      *
264      * @param raw
265      *                  the raw binary data to convert
266      * @return 0 and 1 ASCII character bytes one for each bit of the argument
267      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
268      */
269     @Override
270     public byte[] encode(final byte[] raw) {
271         return toAsciiBytes(raw);
272     }
273 
274     /**
275      * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
276      *
277      * @param raw
278      *                  the raw binary data to convert
279      * @return 0 and 1 ASCII character chars one for each bit of the argument
280      * @throws EncoderException
281      *                  if the argument is not a byte[]
282      * @see org.apache.commons.codec.Encoder#encode(Object)
283      */
284     @Override
285     public Object encode(final Object raw) throws EncoderException {
286         if (!(raw instanceof byte[])) {
287             throw new EncoderException("argument not a byte array");
288         }
289         return toAsciiChars((byte[]) raw);
290     }
291 
292     /**
293      * Decodes a String where each char of the String represents an ASCII '0' or '1'.
294      *
295      * @param ascii
296      *                  String of '0' and '1' characters
297      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
298      * @see org.apache.commons.codec.Decoder#decode(Object)
299      */
300     public byte[] toByteArray(final String ascii) {
301         if (ascii == null) {
302             return EMPTY_BYTE_ARRAY;
303         }
304         return fromAscii(ascii.toCharArray());
305     }
306 }