001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import org.apache.commons.codec.binary.StringUtils;
021
022/**
023 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
024 *
025 * <p>
026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
029 * </p>
030 *
031 * <p>
032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
034 * </p>
035 *
036 * <p>
037 * This is public domain code with no copyrights. From home page of
038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
039 * </p>
040 *
041 * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
042 * code." </blockquote>
043 *
044 * <p>
045 * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
046 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
047 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
048 * </p>
049 *
050 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
051 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
052 *      code</a>
053 * @see <a href=
054 *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
055 *      Apache Hive Murmer3</a>
056 * @since 1.13
057 */
058public final class MurmurHash3 {
059
060    /**
061     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
062     * hash computed.
063     *
064     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
065     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
066     *
067     * <p>This implementation contains a sign-extension bug in the finalization step of
068     * any bytes left over from dividing the length by 4. This manifests if any of these
069     * bytes are negative.</p>
070     *
071     * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
072     */
073    @Deprecated
074    public static class IncrementalHash32 extends IncrementalHash32x86 {
075
076        /**
077         * {@inheritDoc}
078         *
079         * <p>This implementation contains a sign-extension bug in the finalization step of
080         * any bytes left over from dividing the length by 4. This manifests if any of these
081         * bytes are negative.<p>
082         *
083         * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
084         */
085        @Override
086        @Deprecated
087        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
088            int result = hash;
089            // ************
090            // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
091            // ************
092            int k1 = 0;
093            switch (unprocessedLength) {
094            case 3:
095                k1 ^= unprocessed[2] << 16;
096            case 2:
097                k1 ^= unprocessed[1] << 8;
098            case 1:
099                k1 ^= unprocessed[0];
100
101                // mix functions
102                k1 *= C1_32;
103                k1 = Integer.rotateLeft(k1, R1_32);
104                k1 *= C2_32;
105                result ^= k1;
106            }
107
108            // finalization
109            result ^= totalLen;
110            return fmix32(result);
111        }
112    }
113
114    /**
115     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
116     * hash computed.
117     *
118     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
119     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
120     *
121     * @since 1.14
122     */
123    public static class IncrementalHash32x86 {
124
125        /** The size of byte blocks that are processed together. */
126        private static final int BLOCK_SIZE = 4;
127
128        /**
129         * Combines the bytes using an Or operation ({@code | } in a little-endian representation
130         * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
131         * significant.
132         *
133         * @param b1 The first byte
134         * @param b2 The second byte
135         * @param b3 The third byte
136         * @param b4 The fourth byte
137         * @return The 32-bit integer
138         */
139        private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
140            return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
141        }
142
143        /** Up to 3 unprocessed bytes from input data. */
144        private final byte[] unprocessed = new byte[3];
145
146        /** The number of unprocessed bytes in the tail data. */
147        private int unprocessedLength;
148
149        /** The total number of input bytes added since the start. */
150        private int totalLen;
151
152        /**
153         * The current running hash.
154         * This must be finalised to generate the 32-bit hash value.
155         */
156        private int hash;
157
158        /**
159         * Adds the byte array to the current incremental hash.
160         *
161         * @param data The input byte array
162         * @param offset The offset of data
163         * @param length The length of array
164         */
165        public final void add(final byte[] data, final int offset, final int length) {
166            if (length <= 0) {
167                // Nothing to add
168                return;
169            }
170            totalLen += length;
171
172            // Process the bytes in blocks of 4.
173            // New bytes must be added to any current unprocessed bytes,
174            // then processed in blocks of 4 and the remaining bytes saved:
175            //
176            //    |--|---------------------------|--|
177            // unprocessed
178            //                main block
179            //                                remaining
180
181            // Check if the unprocessed bytes and new bytes can fill a block of 4.
182            // Make this overflow safe in the event that length is Integer.MAX_VALUE.
183            // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
184            if (unprocessedLength + length - BLOCK_SIZE < 0) {
185                // Not enough so add to the unprocessed bytes
186                System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
187                unprocessedLength += length;
188                return;
189            }
190
191            // Combine unprocessed bytes with new bytes.
192            final int newOffset;
193            final int newLength;
194            if (unprocessedLength > 0) {
195                int k = -1;
196                switch (unprocessedLength) {
197                case 1:
198                    k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
199                    break;
200                case 2:
201                    k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
202                    break;
203                case 3:
204                    k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
205                    break;
206                default:
207                    throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
208                }
209                hash = mix32(k, hash);
210                // Update the offset and length
211                final int consumed = BLOCK_SIZE - unprocessedLength;
212                newOffset = offset + consumed;
213                newLength = length - consumed;
214            } else {
215                newOffset = offset;
216                newLength = length;
217            }
218
219            // Main processing of blocks of 4 bytes
220            final int nblocks = newLength >> 2;
221
222            for (int i = 0; i < nblocks; i++) {
223                final int index = newOffset + (i << 2);
224                final int k = getLittleEndianInt(data, index);
225                hash = mix32(k, hash);
226            }
227
228            // Save left-over unprocessed bytes
229            final int consumed = nblocks << 2;
230            unprocessedLength = newLength - consumed;
231            if (unprocessedLength != 0) {
232                System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
233            }
234        }
235
236        /**
237         * Generates the 32-bit hash value. Repeat calls to this method with no additional data
238         * will generate the same hash value.
239         *
240         * @return The 32-bit hash
241         */
242        public final int end() {
243            // Allow calling end() again after adding no data to return the same result.
244            return finalise(hash, unprocessedLength, unprocessed, totalLen);
245        }
246
247        /**
248         * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
249         * and performing final mixing.
250         *
251         * @param hash The running hash
252         * @param unprocessedLength The number of unprocessed bytes in the tail data.
253         * @param unprocessed Up to 3 unprocessed bytes from input data.
254         * @param totalLen The total number of input bytes added since the start.
255         * @return The 32-bit hash
256         */
257        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
258            int result = hash;
259            int k1 = 0;
260            switch (unprocessedLength) {
261            case 3:
262                k1 ^= (unprocessed[2] & 0xff) << 16;
263            case 2:
264                k1 ^= (unprocessed[1] & 0xff) << 8;
265            case 1:
266                k1 ^= unprocessed[0] & 0xff;
267
268                // mix functions
269                k1 *= C1_32;
270                k1 = Integer.rotateLeft(k1, R1_32);
271                k1 *= C2_32;
272                result ^= k1;
273            }
274
275            // finalization
276            result ^= totalLen;
277            return fmix32(result);
278        }
279
280        /**
281         * Starts a new incremental hash.
282         *
283         * @param seed The initial seed value
284         */
285        public final void start(final int seed) {
286            // Reset
287            unprocessedLength = totalLen = 0;
288            this.hash = seed;
289        }
290    }
291
292    /**
293     * A random number to use for a hash code.
294     *
295     * @deprecated This is not used internally and will be removed in a future release.
296     */
297    @Deprecated
298    public static final long NULL_HASHCODE = 2862933555777941757L;
299    /**
300     * A default seed to use for the murmur hash algorithm.
301     * Has the value {@code 104729}.
302     */
303    public static final int DEFAULT_SEED = 104729;
304    // Constants for 32-bit variant
305    private static final int C1_32 = 0xcc9e2d51;
306    private static final int C2_32 = 0x1b873593;
307    private static final int R1_32 = 15;
308    private static final int R2_32 = 13;
309
310    private static final int M_32 = 5;
311    private static final int N_32 = 0xe6546b64;
312    // Constants for 128-bit variant
313    private static final long C1 = 0x87c37b91114253d5L;
314    private static final long C2 = 0x4cf5ad432745937fL;
315    private static final int R1 = 31;
316    private static final int R2 = 27;
317    private static final int R3 = 33;
318    private static final int M = 5;
319
320    private static final int N1 = 0x52dce729;
321
322    private static final int N2 = 0x38495ab5;
323
324    /**
325     * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
326     *
327     * @param hash The current hash
328     * @return The final hash
329     */
330    private static int fmix32(int hash) {
331        hash ^= hash >>> 16;
332        hash *= 0x85ebca6b;
333        hash ^= hash >>> 13;
334        hash *= 0xc2b2ae35;
335        hash ^= hash >>> 16;
336        return hash;
337    }
338
339    /**
340     * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
341     *
342     * @param hash The current hash
343     * @return The final hash
344     */
345    private static long fmix64(long hash) {
346        hash ^= hash >>> 33;
347        hash *= 0xff51afd7ed558ccdL;
348        hash ^= hash >>> 33;
349        hash *= 0xc4ceb9fe1a85ec53L;
350        hash ^= hash >>> 33;
351        return hash;
352    }
353
354    /**
355     * Gets the little-endian int from 4 bytes starting at the specified index.
356     *
357     * @param data The data
358     * @param index The index
359     * @return The little-endian int
360     */
361    private static int getLittleEndianInt(final byte[] data, final int index) {
362        return data[index    ] & 0xff |
363               (data[index + 1] & 0xff) <<  8 |
364               (data[index + 2] & 0xff) << 16 |
365               (data[index + 3] & 0xff) << 24;
366    }
367
368    /**
369     * Gets the little-endian long from 8 bytes starting at the specified index.
370     *
371     * @param data The data
372     * @param index The index
373     * @return The little-endian long
374     */
375    private static long getLittleEndianLong(final byte[] data, final int index) {
376        return (long) data[index    ] & 0xff |
377               ((long) data[index + 1] & 0xff) <<  8 |
378               ((long) data[index + 2] & 0xff) << 16 |
379               ((long) data[index + 3] & 0xff) << 24 |
380               ((long) data[index + 4] & 0xff) << 32 |
381               ((long) data[index + 5] & 0xff) << 40 |
382               ((long) data[index + 6] & 0xff) << 48 |
383               ((long) data[index + 7] & 0xff) << 56;
384    }
385
386    /**
387     * Generates 128-bit hash from the byte array with a default seed.
388     * This is a helper method that will produce the same result as:
389     *
390     * <pre>
391     * int offset = 0;
392     * int seed = 104729;
393     * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
394     * </pre>
395     *
396     * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
397     * this result as the default seed is positive.</p>
398     *
399     * @param data The input byte array
400     * @return The 128-bit hash (2 longs)
401     * @see #hash128(byte[], int, int, int)
402     */
403    public static long[] hash128(final byte[] data) {
404        return hash128(data, 0, data.length, DEFAULT_SEED);
405    }
406
407    /**
408     * Generates 128-bit hash from the byte array with the given offset, length and seed.
409     *
410     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
411     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
412     *
413     * <p>This implementation contains a sign-extension bug in the seed initialization.
414     * This manifests if the seed is negative.</p>
415     *
416     * @param data The input byte array
417     * @param offset The first element of array
418     * @param length The length of array
419     * @param seed The initial seed value
420     * @return The 128-bit hash (2 longs)
421     * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
422     */
423    @Deprecated
424    public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
425        // ************
426        // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
427        // to maintain behavioral compatibility with the original version.
428        // The implicit conversion to a long will extend a negative sign
429        // bit through the upper 32-bits of the long seed. These should be zero.
430        // ************
431        return hash128x64Internal(data, offset, length, seed);
432    }
433
434    /**
435     * Generates 128-bit hash from a string with a default seed.
436     * <p>
437     * Before 1.14 the string was converted using default encoding.
438     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
439     * </p>
440     * <p>
441     * This is a helper method that will produce the same result as:
442     * </p>
443     *
444     * <pre>
445     * int offset = 0;
446     * int seed = 104729;
447     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
448     * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
449     * </pre>
450     *
451     * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
452     * this result as the default seed is positive.</p>
453     *
454     * @param data The input String
455     * @return The 128-bit hash (2 longs)
456     * @see #hash128(byte[], int, int, int)
457     * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
458     * {@link String#getBytes(java.nio.charset.Charset)}.
459     */
460    @Deprecated
461    public static long[] hash128(final String data) {
462        final byte[] bytes = StringUtils.getBytesUtf8(data);
463        return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
464    }
465
466    /**
467     * Generates 128-bit hash from the byte array with a seed of zero.
468     * This is a helper method that will produce the same result as:
469     *
470     * <pre>
471     * int offset = 0;
472     * int seed = 0;
473     * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
474     * </pre>
475     *
476     * @param data The input byte array
477     * @return The 128-bit hash (2 longs)
478     * @see #hash128x64(byte[], int, int, int)
479     * @since 1.14
480     */
481    public static long[] hash128x64(final byte[] data) {
482        return hash128x64(data, 0, data.length, 0);
483    }
484
485    /**
486     * Generates 128-bit hash from the byte array with the given offset, length and seed.
487     *
488     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
489     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
490     *
491     * @param data The input byte array
492     * @param offset The first element of array
493     * @param length The length of array
494     * @param seed The initial seed value
495     * @return The 128-bit hash (2 longs)
496     * @since 1.14
497     */
498    public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
499        // Use an unsigned 32-bit integer as the seed
500        return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
501    }
502
503    /**
504     * Generates 128-bit hash from the byte array with the given offset, length and seed.
505     *
506     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
507     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
508     *
509     * @param data The input byte array
510     * @param offset The first element of array
511     * @param length The length of array
512     * @param seed The initial seed value
513     * @return The 128-bit hash (2 longs)
514     */
515    private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
516        long h1 = seed;
517        long h2 = seed;
518        final int nblocks = length >> 4;
519
520        // body
521        for (int i = 0; i < nblocks; i++) {
522            final int index = offset + (i << 4);
523            long k1 = getLittleEndianLong(data, index);
524            long k2 = getLittleEndianLong(data, index + 8);
525
526            // mix functions for k1
527            k1 *= C1;
528            k1 = Long.rotateLeft(k1, R1);
529            k1 *= C2;
530            h1 ^= k1;
531            h1 = Long.rotateLeft(h1, R2);
532            h1 += h2;
533            h1 = h1 * M + N1;
534
535            // mix functions for k2
536            k2 *= C2;
537            k2 = Long.rotateLeft(k2, R3);
538            k2 *= C1;
539            h2 ^= k2;
540            h2 = Long.rotateLeft(h2, R1);
541            h2 += h1;
542            h2 = h2 * M + N2;
543        }
544
545        // tail
546        long k1 = 0;
547        long k2 = 0;
548        final int index = offset + (nblocks << 4);
549        switch (offset + length - index) {
550        case 15:
551            k2 ^= ((long) data[index + 14] & 0xff) << 48;
552        case 14:
553            k2 ^= ((long) data[index + 13] & 0xff) << 40;
554        case 13:
555            k2 ^= ((long) data[index + 12] & 0xff) << 32;
556        case 12:
557            k2 ^= ((long) data[index + 11] & 0xff) << 24;
558        case 11:
559            k2 ^= ((long) data[index + 10] & 0xff) << 16;
560        case 10:
561            k2 ^= ((long) data[index + 9] & 0xff) << 8;
562        case 9:
563            k2 ^= data[index + 8] & 0xff;
564            k2 *= C2;
565            k2 = Long.rotateLeft(k2, R3);
566            k2 *= C1;
567            h2 ^= k2;
568
569        case 8:
570            k1 ^= ((long) data[index + 7] & 0xff) << 56;
571        case 7:
572            k1 ^= ((long) data[index + 6] & 0xff) << 48;
573        case 6:
574            k1 ^= ((long) data[index + 5] & 0xff) << 40;
575        case 5:
576            k1 ^= ((long) data[index + 4] & 0xff) << 32;
577        case 4:
578            k1 ^= ((long) data[index + 3] & 0xff) << 24;
579        case 3:
580            k1 ^= ((long) data[index + 2] & 0xff) << 16;
581        case 2:
582            k1 ^= ((long) data[index + 1] & 0xff) << 8;
583        case 1:
584            k1 ^= data[index] & 0xff;
585            k1 *= C1;
586            k1 = Long.rotateLeft(k1, R1);
587            k1 *= C2;
588            h1 ^= k1;
589        }
590
591        // finalization
592        h1 ^= length;
593        h2 ^= length;
594
595        h1 += h2;
596        h2 += h1;
597
598        h1 = fmix64(h1);
599        h2 = fmix64(h2);
600
601        h1 += h2;
602        h2 += h1;
603
604        return new long[] { h1, h2 };
605    }
606
607    /**
608     * Generates 32-bit hash from the byte array with a default seed.
609     * This is a helper method that will produce the same result as:
610     *
611     * <pre>
612     * int offset = 0;
613     * int seed = 104729;
614     * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
615     * </pre>
616     *
617     * <p>This implementation contains a sign-extension bug in the finalization step of
618     * any bytes left over from dividing the length by 4. This manifests if any of these
619     * bytes are negative.</p>
620     *
621     * @param data The input byte array
622     * @return The 32-bit hash
623     * @see #hash32(byte[], int, int, int)
624     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
625     */
626    @Deprecated
627    public static int hash32(final byte[] data) {
628        return hash32(data, 0, data.length, DEFAULT_SEED);
629    }
630
631    /**
632     * Generates 32-bit hash from the byte array with the given length and a default seed.
633     * This is a helper method that will produce the same result as:
634     *
635     * <pre>
636     * int offset = 0;
637     * int seed = 104729;
638     * int hash = MurmurHash3.hash32(data, offset, length, seed);
639     * </pre>
640     *
641     * <p>This implementation contains a sign-extension bug in the finalization step of
642     * any bytes left over from dividing the length by 4. This manifests if any of these
643     * bytes are negative.</p>
644     *
645     * @param data The input byte array
646     * @param length The length of array
647     * @return The 32-bit hash
648     * @see #hash32(byte[], int, int, int)
649     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
650     */
651    @Deprecated
652    public static int hash32(final byte[] data, final int length) {
653        return hash32(data, length, DEFAULT_SEED);
654    }
655
656    /**
657     * Generates 32-bit hash from the byte array with the given length and seed. This is a
658     * helper method that will produce the same result as:
659     *
660     * <pre>
661     * int offset = 0;
662     * int hash = MurmurHash3.hash32(data, offset, length, seed);
663     * </pre>
664     *
665     * <p>This implementation contains a sign-extension bug in the finalization step of
666     * any bytes left over from dividing the length by 4. This manifests if any of these
667     * bytes are negative.</p>
668     *
669     * @param data The input byte array
670     * @param length The length of array
671     * @param seed The initial seed value
672     * @return The 32-bit hash
673     * @see #hash32(byte[], int, int, int)
674     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
675     */
676    @Deprecated
677    public static int hash32(final byte[] data, final int length, final int seed) {
678        return hash32(data, 0, length, seed);
679    }
680
681    /**
682     * Generates 32-bit hash from the byte array with the given offset, length and seed.
683     *
684     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
685     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
686     *
687     * <p>This implementation contains a sign-extension bug in the finalization step of
688     * any bytes left over from dividing the length by 4. This manifests if any of these
689     * bytes are negative.</p>
690     *
691     * @param data The input byte array
692     * @param offset The offset of data
693     * @param length The length of array
694     * @param seed The initial seed value
695     * @return The 32-bit hash
696     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
697     */
698    @Deprecated
699    public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
700        int hash = seed;
701        final int nblocks = length >> 2;
702
703        // body
704        for (int i = 0; i < nblocks; i++) {
705            final int index = offset + (i << 2);
706            final int k = getLittleEndianInt(data, index);
707            hash = mix32(k, hash);
708        }
709
710        // tail
711        // ************
712        // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
713        // ************
714        final int index = offset + (nblocks << 2);
715        int k1 = 0;
716        switch (offset + length - index) {
717        case 3:
718            k1 ^= data[index + 2] << 16;
719        case 2:
720            k1 ^= data[index + 1] << 8;
721        case 1:
722            k1 ^= data[index];
723
724            // mix functions
725            k1 *= C1_32;
726            k1 = Integer.rotateLeft(k1, R1_32);
727            k1 *= C2_32;
728            hash ^= k1;
729        }
730
731        hash ^= length;
732        return fmix32(hash);
733    }
734
735    /**
736     * Generates 32-bit hash from a long with a default seed value.
737     * This is a helper method that will produce the same result as:
738     *
739     * <pre>
740     * int offset = 0;
741     * int seed = 104729;
742     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
743     *                                            .putLong(data)
744     *                                            .array(), offset, 8, seed);
745     * </pre>
746     *
747     * @param data The long to hash
748     * @return The 32-bit hash
749     * @see #hash32x86(byte[], int, int, int)
750     */
751    public static int hash32(final long data) {
752        return hash32(data, DEFAULT_SEED);
753    }
754
755    /**
756     * Generates 32-bit hash from a long with the given seed.
757     * This is a helper method that will produce the same result as:
758     *
759     * <pre>
760     * int offset = 0;
761     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
762     *                                            .putLong(data)
763     *                                            .array(), offset, 8, seed);
764     * </pre>
765     *
766     * @param data The long to hash
767     * @param seed The initial seed value
768     * @return The 32-bit hash
769     * @see #hash32x86(byte[], int, int, int)
770     */
771    public static int hash32(final long data, final int seed) {
772        int hash = seed;
773        final long r0 = Long.reverseBytes(data);
774
775        hash = mix32((int) r0, hash);
776        hash = mix32((int) (r0 >>> 32), hash);
777
778        hash ^= Long.BYTES;
779        return fmix32(hash);
780    }
781
782    /**
783     * Generates 32-bit hash from two longs with a default seed value.
784     * This is a helper method that will produce the same result as:
785     *
786     * <pre>
787     * int offset = 0;
788     * int seed = 104729;
789     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
790     *                                            .putLong(data1)
791     *                                            .putLong(data2)
792     *                                            .array(), offset, 16, seed);
793     * </pre>
794     *
795     * @param data1 The first long to hash
796     * @param data2 The second long to hash
797     * @return The 32-bit hash
798     * @see #hash32x86(byte[], int, int, int)
799     */
800    public static int hash32(final long data1, final long data2) {
801        return hash32(data1, data2, DEFAULT_SEED);
802    }
803
804    /**
805     * Generates 32-bit hash from two longs with the given seed.
806     * This is a helper method that will produce the same result as:
807     *
808     * <pre>
809     * int offset = 0;
810     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
811     *                                            .putLong(data1)
812     *                                            .putLong(data2)
813     *                                            .array(), offset, 16, seed);
814     * </pre>
815     *
816     * @param data1 The first long to hash
817     * @param data2 The second long to hash
818     * @param seed The initial seed value
819     * @return The 32-bit hash
820     * @see #hash32x86(byte[], int, int, int)
821     */
822    public static int hash32(final long data1, final long data2, final int seed) {
823        int hash = seed;
824        final long r0 = Long.reverseBytes(data1);
825        final long r1 = Long.reverseBytes(data2);
826
827        hash = mix32((int) r0, hash);
828        hash = mix32((int) (r0 >>> 32), hash);
829        hash = mix32((int) r1, hash);
830        hash = mix32((int) (r1 >>> 32), hash);
831
832        hash ^= Long.BYTES * 2;
833        return fmix32(hash);
834    }
835
836    /**
837     * Generates 32-bit hash from a string with a default seed.
838     * <p>
839     * Before 1.14 the string was converted using default encoding.
840     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
841     * </p>
842     * This is a helper method that will produce the same result as:
843     *
844     * <pre>
845     * int offset = 0;
846     * int seed = 104729;
847     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
848     * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
849     * </pre>
850     *
851     * <p>This implementation contains a sign-extension bug in the finalization step of
852     * any bytes left over from dividing the length by 4. This manifests if any of these
853     * bytes are negative.</p>
854     *
855     * @param data The input string
856     * @return The 32-bit hash
857     * @see #hash32(byte[], int, int, int)
858     * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
859     * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
860     */
861    @Deprecated
862    public static int hash32(final String data) {
863        final byte[] bytes = StringUtils.getBytesUtf8(data);
864        return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
865    }
866
867    /**
868     * Generates 32-bit hash from the byte array with a seed of zero.
869     * This is a helper method that will produce the same result as:
870     *
871     * <pre>
872     * int offset = 0;
873     * int seed = 0;
874     * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
875     * </pre>
876     *
877     * @param data The input byte array
878     * @return The 32-bit hash
879     * @see #hash32x86(byte[], int, int, int)
880     * @since 1.14
881     */
882    public static int hash32x86(final byte[] data) {
883        return hash32x86(data, 0, data.length, 0);
884    }
885
886    /**
887     * Generates 32-bit hash from the byte array with the given offset, length and seed.
888     *
889     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
890     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
891     *
892     * @param data The input byte array
893     * @param offset The offset of data
894     * @param length The length of array
895     * @param seed The initial seed value
896     * @return The 32-bit hash
897     * @since 1.14
898     */
899    public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
900        int hash = seed;
901        final int nblocks = length >> 2;
902
903        // body
904        for (int i = 0; i < nblocks; i++) {
905            final int index = offset + (i << 2);
906            final int k = getLittleEndianInt(data, index);
907            hash = mix32(k, hash);
908        }
909
910        // tail
911        final int index = offset + (nblocks << 2);
912        int k1 = 0;
913        switch (offset + length - index) {
914        case 3:
915            k1 ^= (data[index + 2] & 0xff) << 16;
916        case 2:
917            k1 ^= (data[index + 1] & 0xff) << 8;
918        case 1:
919            k1 ^= data[index] & 0xff;
920
921            // mix functions
922            k1 *= C1_32;
923            k1 = Integer.rotateLeft(k1, R1_32);
924            k1 *= C2_32;
925            hash ^= k1;
926        }
927
928        hash ^= length;
929        return fmix32(hash);
930    }
931
932    /**
933     * Generates 64-bit hash from a byte array with a default seed.
934     *
935     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
936     *
937     * <p>This is a Murmur3-like 64-bit variant.
938     * The method does not produce the same result as either half of the hash bytes from
939     * {@linkplain #hash128x64(byte[])} with the same byte data.
940     * This method will be removed in a future release.</p>
941     *
942     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
943     * this result as the default seed is positive.</p>
944     *
945     * <p>This is a helper method that will produce the same result as:</p>
946     *
947     * <pre>
948     * int offset = 0;
949     * int seed = 104729;
950     * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
951     * </pre>
952     *
953     * @param data The input byte array
954     * @return The 64-bit hash
955     * @see #hash64(byte[], int, int, int)
956     * @deprecated Not part of the MurmurHash3 implementation.
957     * Use half of the hash bytes from {@link #hash128x64(byte[])}.
958     */
959    @Deprecated
960    public static long hash64(final byte[] data) {
961        return hash64(data, 0, data.length, DEFAULT_SEED);
962    }
963
964    /**
965     * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
966     *
967     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
968     *
969     * <p>This is a Murmur3-like 64-bit variant.
970     * The method does not produce the same result as either half of the hash bytes from
971     * {@linkplain #hash128x64(byte[])} with the same byte data.
972     * This method will be removed in a future release.</p>
973     *
974     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
975     * this result as the default seed is positive.</p>
976     *
977     * <p>This is a helper method that will produce the same result as:</p>
978     *
979     * <pre>
980     * int seed = 104729;
981     * long hash = MurmurHash3.hash64(data, offset, length, seed);
982     * </pre>
983     *
984     * @param data The input byte array
985     * @param offset The offset of data
986     * @param length The length of array
987     * @return The 64-bit hash
988     * @see #hash64(byte[], int, int, int)
989     * @deprecated Not part of the MurmurHash3 implementation.
990     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
991     */
992    @Deprecated
993    public static long hash64(final byte[] data, final int offset, final int length) {
994        return hash64(data, offset, length, DEFAULT_SEED);
995    }
996
997    /**
998     * Generates 64-bit hash from a byte array with the given offset, length and seed.
999     *
1000     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1001     *
1002     * <p>This is a Murmur3-like 64-bit variant.
1003     * This method will be removed in a future release.</p>
1004     *
1005     * <p>This implementation contains a sign-extension bug in the seed initialization.
1006     * This manifests if the seed is negative.</p>
1007     *
1008     * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1009     * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1010     * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1011     * the same value as the first or second 64-bits of the function
1012     * {@link #hash128(byte[], int, int, int)}.</p>
1013     *
1014     * <p>Use of this method is not advised. Use the first long returned from
1015     * {@link #hash128x64(byte[], int, int, int)}.</p>
1016     *
1017     * @param data The input byte array
1018     * @param offset The offset of data
1019     * @param length The length of array
1020     * @param seed The initial seed value
1021     * @return The 64-bit hash
1022     * @deprecated Not part of the MurmurHash3 implementation.
1023     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1024     */
1025    @Deprecated
1026    public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1027        //
1028        // Note: This fails to apply masking using 0xffffffffL to the seed.
1029        //
1030        long hash = seed;
1031        final int nblocks = length >> 3;
1032
1033        // body
1034        for (int i = 0; i < nblocks; i++) {
1035            final int index = offset + (i << 3);
1036            long k = getLittleEndianLong(data, index);
1037
1038            // mix functions
1039            k *= C1;
1040            k = Long.rotateLeft(k, R1);
1041            k *= C2;
1042            hash ^= k;
1043            hash = Long.rotateLeft(hash, R2) * M + N1;
1044        }
1045
1046        // tail
1047        long k1 = 0;
1048        final int index = offset + (nblocks << 3);
1049        switch (offset + length - index) {
1050        case 7:
1051            k1 ^= ((long) data[index + 6] & 0xff) << 48;
1052        case 6:
1053            k1 ^= ((long) data[index + 5] & 0xff) << 40;
1054        case 5:
1055            k1 ^= ((long) data[index + 4] & 0xff) << 32;
1056        case 4:
1057            k1 ^= ((long) data[index + 3] & 0xff) << 24;
1058        case 3:
1059            k1 ^= ((long) data[index + 2] & 0xff) << 16;
1060        case 2:
1061            k1 ^= ((long) data[index + 1] & 0xff) << 8;
1062        case 1:
1063            k1 ^= (long) data[index] & 0xff;
1064            k1 *= C1;
1065            k1 = Long.rotateLeft(k1, R1);
1066            k1 *= C2;
1067            hash ^= k1;
1068        }
1069
1070        // finalization
1071        hash ^= length;
1072        return fmix64(hash);
1073    }
1074
1075    /**
1076     * Generates 64-bit hash from an int with a default seed.
1077     *
1078     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1079     *
1080     * <p>This is a Murmur3-like 64-bit variant.
1081     * The method does not produce the same result as either half of the hash bytes from
1082     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1083     * This method will be removed in a future release.</p>
1084     *
1085     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1086     * this result as the default seed is positive.</p>
1087     *
1088     * <p>This is a helper method that will produce the same result as:</p>
1089     *
1090     * <pre>
1091     * int offset = 0;
1092     * int seed = 104729;
1093     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1094     *                                          .putInt(data)
1095     *                                          .array(), offset, 4, seed);
1096     * </pre>
1097     *
1098     * @param data The int to hash
1099     * @return The 64-bit hash
1100     * @see #hash64(byte[], int, int, int)
1101     * @deprecated Not part of the MurmurHash3 implementation.
1102     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1103     */
1104    @Deprecated
1105    public static long hash64(final int data) {
1106        long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1107        long hash = DEFAULT_SEED;
1108        k1 *= C1;
1109        k1 = Long.rotateLeft(k1, R1);
1110        k1 *= C2;
1111        hash ^= k1;
1112        // finalization
1113        hash ^= Integer.BYTES;
1114        return fmix64(hash);
1115    }
1116
1117    /**
1118     * Generates 64-bit hash from a long with a default seed.
1119     *
1120     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1121     *
1122     * <p>This is a Murmur3-like 64-bit variant.
1123     * The method does not produce the same result as either half of the hash bytes from
1124     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1125     * This method will be removed in a future release.</p>
1126     *
1127     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1128     * this result as the default seed is positive.</p>
1129     *
1130     * <p>This is a helper method that will produce the same result as:</p>
1131     *
1132     * <pre>
1133     * int offset = 0;
1134     * int seed = 104729;
1135     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1136     *                                          .putLong(data)
1137     *                                          .array(), offset, 8, seed);
1138     * </pre>
1139     *
1140     * @param data The long to hash
1141     * @return The 64-bit hash
1142     * @see #hash64(byte[], int, int, int)
1143     * @deprecated Not part of the MurmurHash3 implementation.
1144     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1145     */
1146    @Deprecated
1147    public static long hash64(final long data) {
1148        long hash = DEFAULT_SEED;
1149        long k = Long.reverseBytes(data);
1150        // mix functions
1151        k *= C1;
1152        k = Long.rotateLeft(k, R1);
1153        k *= C2;
1154        hash ^= k;
1155        hash = Long.rotateLeft(hash, R2) * M + N1;
1156        // finalization
1157        hash ^= Long.BYTES;
1158        return fmix64(hash);
1159    }
1160
1161    /**
1162     * Generates 64-bit hash from a short with a default seed.
1163     *
1164     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1165     *
1166     * <p>This is a Murmur3-like 64-bit variant.
1167     * The method does not produce the same result as either half of the hash bytes from
1168     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1169     * This method will be removed in a future release.</p>
1170     *
1171     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1172     * this result as the default seed is positive.</p>
1173     *
1174     * <p>This is a helper method that will produce the same result as:</p>
1175     *
1176     * <pre>
1177     * int offset = 0;
1178     * int seed = 104729;
1179     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1180     *                                          .putShort(data)
1181     *                                          .array(), offset, 2, seed);
1182     * </pre>
1183     *
1184     * @param data The short to hash
1185     * @return The 64-bit hash
1186     * @see #hash64(byte[], int, int, int)
1187     * @deprecated Not part of the MurmurHash3 implementation.
1188     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1189     */
1190    @Deprecated
1191    public static long hash64(final short data) {
1192        long hash = DEFAULT_SEED;
1193        long k1 = 0;
1194        k1 ^= ((long) data & 0xff) << 8;
1195        k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1196        k1 *= C1;
1197        k1 = Long.rotateLeft(k1, R1);
1198        k1 *= C2;
1199        hash ^= k1;
1200
1201        // finalization
1202        hash ^= Short.BYTES;
1203        return fmix64(hash);
1204    }
1205
1206    /**
1207     * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1208     *
1209     * @param k The data to add to the hash
1210     * @param hash The current hash
1211     * @return The new hash
1212     */
1213    private static int mix32(int k, int hash) {
1214        k *= C1_32;
1215        k = Integer.rotateLeft(k, R1_32);
1216        k *= C2_32;
1217        hash ^= k;
1218        return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1219    }
1220
1221    /** No instance methods. */
1222    private MurmurHash3() {
1223    }
1224}