001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 */
035public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036    /*
037     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038     * it.
039     */
040    /** Empty char array. */
041    private static final char[] EMPTY_CHAR_ARRAY = {};
042
043    /** Empty byte array. */
044    private static final byte[] EMPTY_BYTE_ARRAY = {};
045
046    /** Mask for bit 0 of a byte. */
047    private static final int BIT_0 = 1;
048
049    /** Mask for bit 1 of a byte. */
050    private static final int BIT_1 = 0x02;
051
052    /** Mask for bit 2 of a byte. */
053    private static final int BIT_2 = 0x04;
054
055    /** Mask for bit 3 of a byte. */
056    private static final int BIT_3 = 0x08;
057
058    /** Mask for bit 4 of a byte. */
059    private static final int BIT_4 = 0x10;
060
061    /** Mask for bit 5 of a byte. */
062    private static final int BIT_5 = 0x20;
063
064    /** Mask for bit 6 of a byte. */
065    private static final int BIT_6 = 0x40;
066
067    /** Mask for bit 7 of a byte. */
068    private static final int BIT_7 = 0x80;
069
070    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071
072    /**
073     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
074     *
075     * @param ascii
076     *                  each byte represents an ASCII '0' or '1'
077     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
078     */
079    public static byte[] fromAscii(final byte[] ascii) {
080        if (isEmpty(ascii)) {
081            return EMPTY_BYTE_ARRAY;
082        }
083        final int asciiLength = ascii.length;
084        // get length/8 times bytes with 3 bit shifts to the right of the length
085        final byte[] raw = new byte[asciiLength >> 3];
086        /*
087         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
088         * loop.
089         */
090        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
091            for (int bits = 0; bits < BITS.length; ++bits) {
092                if (ascii[jj - bits] == '1') {
093                    raw[ii] |= BITS[bits];
094                }
095            }
096        }
097        return raw;
098    }
099
100    /**
101     * Decodes a char array where each char represents an ASCII '0' or '1'.
102     *
103     * @param ascii
104     *                  each char represents an ASCII '0' or '1'
105     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
106     */
107    public static byte[] fromAscii(final char[] ascii) {
108        if (ascii == null || ascii.length == 0) {
109            return EMPTY_BYTE_ARRAY;
110        }
111        final int asciiLength = ascii.length;
112        // get length/8 times bytes with 3 bit shifts to the right of the length
113        final byte[] raw = new byte[asciiLength >> 3];
114        /*
115         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
116         * loop.
117         */
118        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
119            for (int bits = 0; bits < BITS.length; ++bits) {
120                if (ascii[jj - bits] == '1') {
121                    raw[ii] |= BITS[bits];
122                }
123            }
124        }
125        return raw;
126    }
127
128    /**
129     * Returns {@code true} if the given array is {@code null} or empty (size 0.)
130     *
131     * @param array
132     *            the source array
133     * @return {@code true} if the given array is {@code null} or empty (size 0.)
134     */
135    static boolean isEmpty(final byte[] array) {
136        return array == null || array.length == 0;
137    }
138
139    /**
140     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
141     * char.
142     *
143     * @param raw
144     *                  the raw binary data to convert
145     * @return an array of 0 and 1 character bytes for each bit of the argument
146     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
147     */
148    public static byte[] toAsciiBytes(final byte[] raw) {
149        if (isEmpty(raw)) {
150            return EMPTY_BYTE_ARRAY;
151        }
152        final int rawLength = raw.length;
153        // get 8 times the bytes with 3 bit shifts to the left of the length
154        final byte[] l_ascii = new byte[rawLength << 3];
155        /*
156         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
157         * loop.
158         */
159        for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
160            for (int bits = 0; bits < BITS.length; ++bits) {
161                if ((raw[ii] & BITS[bits]) == 0) {
162                    l_ascii[jj - bits] = '0';
163                } else {
164                    l_ascii[jj - bits] = '1';
165                }
166            }
167        }
168        return l_ascii;
169    }
170
171    /**
172     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
173     *
174     * @param raw
175     *                  the raw binary data to convert
176     * @return an array of 0 and 1 characters for each bit of the argument
177     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
178     */
179    public static char[] toAsciiChars(final byte[] raw) {
180        if (isEmpty(raw)) {
181            return EMPTY_CHAR_ARRAY;
182        }
183        final int rawLength = raw.length;
184        // get 8 times the bytes with 3 bit shifts to the left of the length
185        final char[] l_ascii = new char[rawLength << 3];
186        /*
187         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
188         * loop.
189         */
190        for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
191            for (int bits = 0; bits < BITS.length; ++bits) {
192                if ((raw[ii] & BITS[bits]) == 0) {
193                    l_ascii[jj - bits] = '0';
194                } else {
195                    l_ascii[jj - bits] = '1';
196                }
197            }
198        }
199        return l_ascii;
200    }
201
202    /**
203     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
204     *
205     * @param raw
206     *                  the raw binary data to convert
207     * @return a String of 0 and 1 characters representing the binary data
208     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
209     */
210    public static String toAsciiString(final byte[] raw) {
211        return new String(toAsciiChars(raw));
212    }
213
214    /**
215     * Constructs a new instance.
216     */
217    public BinaryCodec() {
218        // empty
219    }
220
221    /**
222     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
223     *
224     * @param ascii
225     *                  each byte represents an ASCII '0' or '1'
226     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
227     * @see org.apache.commons.codec.Decoder#decode(Object)
228     */
229    @Override
230    public byte[] decode(final byte[] ascii) {
231        return fromAscii(ascii);
232    }
233
234    /**
235     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
236     *
237     * @param ascii
238     *                  each byte represents an ASCII '0' or '1'
239     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
240     * @throws DecoderException
241     *                  if argument is not a byte[], char[] or String
242     * @see org.apache.commons.codec.Decoder#decode(Object)
243     */
244    @Override
245    public Object decode(final Object ascii) throws DecoderException {
246        if (ascii == null) {
247            return EMPTY_BYTE_ARRAY;
248        }
249        if (ascii instanceof byte[]) {
250            return fromAscii((byte[]) ascii);
251        }
252        if (ascii instanceof char[]) {
253            return fromAscii((char[]) ascii);
254        }
255        if (ascii instanceof String) {
256            return fromAscii(((String) ascii).toCharArray());
257        }
258        throw new DecoderException("argument not a byte array");
259    }
260
261    /**
262     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
263     *
264     * @param raw
265     *                  the raw binary data to convert
266     * @return 0 and 1 ASCII character bytes one for each bit of the argument
267     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
268     */
269    @Override
270    public byte[] encode(final byte[] raw) {
271        return toAsciiBytes(raw);
272    }
273
274    /**
275     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
276     *
277     * @param raw
278     *                  the raw binary data to convert
279     * @return 0 and 1 ASCII character chars one for each bit of the argument
280     * @throws EncoderException
281     *                  if the argument is not a byte[]
282     * @see org.apache.commons.codec.Encoder#encode(Object)
283     */
284    @Override
285    public Object encode(final Object raw) throws EncoderException {
286        if (!(raw instanceof byte[])) {
287            throw new EncoderException("argument not a byte array");
288        }
289        return toAsciiChars((byte[]) raw);
290    }
291
292    /**
293     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
294     *
295     * @param ascii
296     *                  String of '0' and '1' characters
297     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
298     * @see org.apache.commons.codec.Decoder#decode(Object)
299     */
300    public byte[] toByteArray(final String ascii) {
301        if (ascii == null) {
302            return EMPTY_BYTE_ARRAY;
303        }
304        return fromAscii(ascii.toCharArray());
305    }
306}