001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import org.apache.commons.codec.binary.StringUtils;
021
022/**
023 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
024 *
025 * <p>
026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
029 * </p>
030 *
031 * <p>
032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
034 * </p>
035 *
036 * <p>
037 * This is public domain code with no copyrights. From home page of
038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
039 * </p>
040 *
041 * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
042 * code." </blockquote>
043 *
044 * <p>
045 * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
046 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
047 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
048 * </p>
049 *
050 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
051 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
052 *      code</a>
053 * @see <a href=
054 *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
055 *      Apache Hive Murmer3</a>
056 * @since 1.13
057 */
058public final class MurmurHash3 {
059
060    /**
061     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
062     * hash computed.
063     *
064     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
065     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
066     *
067     * <p>This implementation contains a sign-extension bug in the finalization step of
068     * any bytes left over from dividing the length by 4. This manifests if any of these
069     * bytes are negative.</p>
070     *
071     * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
072     */
073    @Deprecated
074    public static class IncrementalHash32 extends IncrementalHash32x86 {
075
076        /**
077         * Constructs a new instance.
078         */
079        public IncrementalHash32() {
080            // empty
081        }
082
083        /**
084         * {@inheritDoc}
085         *
086         * <p>This implementation contains a sign-extension bug in the finalization step of
087         * any bytes left over from dividing the length by 4. This manifests if any of these
088         * bytes are negative.<p>
089         *
090         * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
091         */
092        @Override
093        @Deprecated
094        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
095            int result = hash;
096            // ************
097            // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
098            // ************
099            int k1 = 0;
100            switch (unprocessedLength) {
101            case 3:
102                k1 ^= unprocessed[2] << 16;
103            case 2:
104                k1 ^= unprocessed[1] << 8;
105            case 1:
106                k1 ^= unprocessed[0];
107
108                // mix functions
109                k1 *= C1_32;
110                k1 = Integer.rotateLeft(k1, R1_32);
111                k1 *= C2_32;
112                result ^= k1;
113            }
114
115            // finalization
116            result ^= totalLen;
117            return fmix32(result);
118        }
119    }
120
121    /**
122     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
123     * hash computed.
124     *
125     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
126     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
127     *
128     * @since 1.14
129     */
130    public static class IncrementalHash32x86 {
131
132        /** The size of byte blocks that are processed together. */
133        private static final int BLOCK_SIZE = 4;
134
135        /**
136         * Combines the bytes using an Or operation ({@code | } in a little-endian representation
137         * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
138         * significant.
139         *
140         * @param b1 The first byte
141         * @param b2 The second byte
142         * @param b3 The third byte
143         * @param b4 The fourth byte
144         * @return The 32-bit integer
145         */
146        private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
147            return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
148        }
149
150        /** Up to 3 unprocessed bytes from input data. */
151        private final byte[] unprocessed = new byte[3];
152
153        /** The number of unprocessed bytes in the tail data. */
154        private int unprocessedLength;
155
156        /** The total number of input bytes added since the start. */
157        private int totalLen;
158
159        /**
160         * The current running hash.
161         * This must be finalised to generate the 32-bit hash value.
162         */
163        private int hash;
164
165        /**
166         * Constructs a new instance.
167         */
168        public IncrementalHash32x86() {
169            // empty
170        }
171
172        /**
173         * Adds the byte array to the current incremental hash.
174         *
175         * @param data The input byte array
176         * @param offset The offset of data
177         * @param length The length of array
178         */
179        public final void add(final byte[] data, final int offset, final int length) {
180            if (length <= 0) {
181                // Nothing to add
182                return;
183            }
184            totalLen += length;
185
186            // Process the bytes in blocks of 4.
187            // New bytes must be added to any current unprocessed bytes,
188            // then processed in blocks of 4 and the remaining bytes saved:
189            //
190            //    |--|---------------------------|--|
191            // unprocessed
192            //                main block
193            //                                remaining
194
195            // Check if the unprocessed bytes and new bytes can fill a block of 4.
196            // Make this overflow safe in the event that length is Integer.MAX_VALUE.
197            // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
198            if (unprocessedLength + length - BLOCK_SIZE < 0) {
199                // Not enough so add to the unprocessed bytes
200                System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
201                unprocessedLength += length;
202                return;
203            }
204
205            // Combine unprocessed bytes with new bytes.
206            final int newOffset;
207            final int newLength;
208            if (unprocessedLength > 0) {
209                int k = -1;
210                switch (unprocessedLength) {
211                case 1:
212                    k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
213                    break;
214                case 2:
215                    k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
216                    break;
217                case 3:
218                    k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
219                    break;
220                default:
221                    throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
222                }
223                hash = mix32(k, hash);
224                // Update the offset and length
225                final int consumed = BLOCK_SIZE - unprocessedLength;
226                newOffset = offset + consumed;
227                newLength = length - consumed;
228            } else {
229                newOffset = offset;
230                newLength = length;
231            }
232
233            // Main processing of blocks of 4 bytes
234            final int nblocks = newLength >> 2;
235
236            for (int i = 0; i < nblocks; i++) {
237                final int index = newOffset + (i << 2);
238                final int k = getLittleEndianInt(data, index);
239                hash = mix32(k, hash);
240            }
241
242            // Save left-over unprocessed bytes
243            final int consumed = nblocks << 2;
244            unprocessedLength = newLength - consumed;
245            if (unprocessedLength != 0) {
246                System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
247            }
248        }
249
250        /**
251         * Generates the 32-bit hash value. Repeat calls to this method with no additional data
252         * will generate the same hash value.
253         *
254         * @return The 32-bit hash
255         */
256        public final int end() {
257            // Allow calling end() again after adding no data to return the same result.
258            return finalise(hash, unprocessedLength, unprocessed, totalLen);
259        }
260
261        /**
262         * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
263         * and performing final mixing.
264         *
265         * @param hash The running hash
266         * @param unprocessedLength The number of unprocessed bytes in the tail data.
267         * @param unprocessed Up to 3 unprocessed bytes from input data.
268         * @param totalLen The total number of input bytes added since the start.
269         * @return The 32-bit hash
270         */
271        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
272            int result = hash;
273            int k1 = 0;
274            switch (unprocessedLength) {
275            case 3:
276                k1 ^= (unprocessed[2] & 0xff) << 16;
277            case 2:
278                k1 ^= (unprocessed[1] & 0xff) << 8;
279            case 1:
280                k1 ^= unprocessed[0] & 0xff;
281
282                // mix functions
283                k1 *= C1_32;
284                k1 = Integer.rotateLeft(k1, R1_32);
285                k1 *= C2_32;
286                result ^= k1;
287            }
288
289            // finalization
290            result ^= totalLen;
291            return fmix32(result);
292        }
293
294        /**
295         * Starts a new incremental hash.
296         *
297         * @param seed The initial seed value
298         */
299        public final void start(final int seed) {
300            // Reset
301            unprocessedLength = totalLen = 0;
302            this.hash = seed;
303        }
304    }
305
306    /**
307     * A random number to use for a hash code.
308     *
309     * @deprecated This is not used internally and will be removed in a future release.
310     */
311    @Deprecated
312    public static final long NULL_HASHCODE = 2862933555777941757L;
313    /**
314     * A default seed to use for the murmur hash algorithm.
315     * Has the value {@code 104729}.
316     */
317    public static final int DEFAULT_SEED = 104729;
318    // Constants for 32-bit variant
319    private static final int C1_32 = 0xcc9e2d51;
320    private static final int C2_32 = 0x1b873593;
321    private static final int R1_32 = 15;
322    private static final int R2_32 = 13;
323
324    private static final int M_32 = 5;
325    private static final int N_32 = 0xe6546b64;
326    // Constants for 128-bit variant
327    private static final long C1 = 0x87c37b91114253d5L;
328    private static final long C2 = 0x4cf5ad432745937fL;
329    private static final int R1 = 31;
330    private static final int R2 = 27;
331    private static final int R3 = 33;
332    private static final int M = 5;
333
334    private static final int N1 = 0x52dce729;
335
336    private static final int N2 = 0x38495ab5;
337
338    /**
339     * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
340     *
341     * @param hash The current hash
342     * @return The final hash
343     */
344    private static int fmix32(int hash) {
345        hash ^= hash >>> 16;
346        hash *= 0x85ebca6b;
347        hash ^= hash >>> 13;
348        hash *= 0xc2b2ae35;
349        hash ^= hash >>> 16;
350        return hash;
351    }
352
353    /**
354     * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
355     *
356     * @param hash The current hash
357     * @return The final hash
358     */
359    private static long fmix64(long hash) {
360        hash ^= hash >>> 33;
361        hash *= 0xff51afd7ed558ccdL;
362        hash ^= hash >>> 33;
363        hash *= 0xc4ceb9fe1a85ec53L;
364        hash ^= hash >>> 33;
365        return hash;
366    }
367
368    /**
369     * Gets the little-endian int from 4 bytes starting at the specified index.
370     *
371     * @param data The data
372     * @param index The index
373     * @return The little-endian int
374     */
375    private static int getLittleEndianInt(final byte[] data, final int index) {
376        return data[index    ] & 0xff |
377               (data[index + 1] & 0xff) <<  8 |
378               (data[index + 2] & 0xff) << 16 |
379               (data[index + 3] & 0xff) << 24;
380    }
381
382    /**
383     * Gets the little-endian long from 8 bytes starting at the specified index.
384     *
385     * @param data The data
386     * @param index The index
387     * @return The little-endian long
388     */
389    private static long getLittleEndianLong(final byte[] data, final int index) {
390        return (long) data[index    ] & 0xff |
391               ((long) data[index + 1] & 0xff) <<  8 |
392               ((long) data[index + 2] & 0xff) << 16 |
393               ((long) data[index + 3] & 0xff) << 24 |
394               ((long) data[index + 4] & 0xff) << 32 |
395               ((long) data[index + 5] & 0xff) << 40 |
396               ((long) data[index + 6] & 0xff) << 48 |
397               ((long) data[index + 7] & 0xff) << 56;
398    }
399
400    /**
401     * Generates 128-bit hash from the byte array with a default seed.
402     * This is a helper method that will produce the same result as:
403     *
404     * <pre>
405     * int offset = 0;
406     * int seed = 104729;
407     * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
408     * </pre>
409     *
410     * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
411     * this result as the default seed is positive.</p>
412     *
413     * @param data The input byte array
414     * @return The 128-bit hash (2 longs)
415     * @see #hash128(byte[], int, int, int)
416     */
417    public static long[] hash128(final byte[] data) {
418        return hash128(data, 0, data.length, DEFAULT_SEED);
419    }
420
421    /**
422     * Generates 128-bit hash from the byte array with the given offset, length and seed.
423     *
424     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
425     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
426     *
427     * <p>This implementation contains a sign-extension bug in the seed initialization.
428     * This manifests if the seed is negative.</p>
429     *
430     * @param data The input byte array
431     * @param offset The first element of array
432     * @param length The length of array
433     * @param seed The initial seed value
434     * @return The 128-bit hash (2 longs)
435     * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
436     */
437    @Deprecated
438    public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
439        // ************
440        // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
441        // to maintain behavioral compatibility with the original version.
442        // The implicit conversion to a long will extend a negative sign
443        // bit through the upper 32-bits of the long seed. These should be zero.
444        // ************
445        return hash128x64Internal(data, offset, length, seed);
446    }
447
448    /**
449     * Generates 128-bit hash from a string with a default seed.
450     * <p>
451     * Before 1.14 the string was converted using default encoding.
452     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
453     * </p>
454     * <p>
455     * This is a helper method that will produce the same result as:
456     * </p>
457     *
458     * <pre>
459     * int offset = 0;
460     * int seed = 104729;
461     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
462     * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
463     * </pre>
464     *
465     * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
466     * this result as the default seed is positive.</p>
467     *
468     * @param data The input String
469     * @return The 128-bit hash (2 longs)
470     * @see #hash128(byte[], int, int, int)
471     * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
472     * {@link String#getBytes(java.nio.charset.Charset)}.
473     */
474    @Deprecated
475    public static long[] hash128(final String data) {
476        final byte[] bytes = StringUtils.getBytesUtf8(data);
477        return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
478    }
479
480    /**
481     * Generates 128-bit hash from the byte array with a seed of zero.
482     * This is a helper method that will produce the same result as:
483     *
484     * <pre>
485     * int offset = 0;
486     * int seed = 0;
487     * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
488     * </pre>
489     *
490     * @param data The input byte array
491     * @return The 128-bit hash (2 longs)
492     * @see #hash128x64(byte[], int, int, int)
493     * @since 1.14
494     */
495    public static long[] hash128x64(final byte[] data) {
496        return hash128x64(data, 0, data.length, 0);
497    }
498
499    /**
500     * Generates 128-bit hash from the byte array with the given offset, length and seed.
501     *
502     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
503     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
504     *
505     * @param data The input byte array
506     * @param offset The first element of array
507     * @param length The length of array
508     * @param seed The initial seed value
509     * @return The 128-bit hash (2 longs)
510     * @since 1.14
511     */
512    public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
513        // Use an unsigned 32-bit integer as the seed
514        return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
515    }
516
517    /**
518     * Generates 128-bit hash from the byte array with the given offset, length and seed.
519     *
520     * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
521     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
522     *
523     * @param data The input byte array
524     * @param offset The first element of array
525     * @param length The length of array
526     * @param seed The initial seed value
527     * @return The 128-bit hash (2 longs)
528     */
529    private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
530        long h1 = seed;
531        long h2 = seed;
532        final int nblocks = length >> 4;
533
534        // body
535        for (int i = 0; i < nblocks; i++) {
536            final int index = offset + (i << 4);
537            long k1 = getLittleEndianLong(data, index);
538            long k2 = getLittleEndianLong(data, index + 8);
539
540            // mix functions for k1
541            k1 *= C1;
542            k1 = Long.rotateLeft(k1, R1);
543            k1 *= C2;
544            h1 ^= k1;
545            h1 = Long.rotateLeft(h1, R2);
546            h1 += h2;
547            h1 = h1 * M + N1;
548
549            // mix functions for k2
550            k2 *= C2;
551            k2 = Long.rotateLeft(k2, R3);
552            k2 *= C1;
553            h2 ^= k2;
554            h2 = Long.rotateLeft(h2, R1);
555            h2 += h1;
556            h2 = h2 * M + N2;
557        }
558
559        // tail
560        long k1 = 0;
561        long k2 = 0;
562        final int index = offset + (nblocks << 4);
563        switch (offset + length - index) {
564        case 15:
565            k2 ^= ((long) data[index + 14] & 0xff) << 48;
566        case 14:
567            k2 ^= ((long) data[index + 13] & 0xff) << 40;
568        case 13:
569            k2 ^= ((long) data[index + 12] & 0xff) << 32;
570        case 12:
571            k2 ^= ((long) data[index + 11] & 0xff) << 24;
572        case 11:
573            k2 ^= ((long) data[index + 10] & 0xff) << 16;
574        case 10:
575            k2 ^= ((long) data[index + 9] & 0xff) << 8;
576        case 9:
577            k2 ^= data[index + 8] & 0xff;
578            k2 *= C2;
579            k2 = Long.rotateLeft(k2, R3);
580            k2 *= C1;
581            h2 ^= k2;
582
583        case 8:
584            k1 ^= ((long) data[index + 7] & 0xff) << 56;
585        case 7:
586            k1 ^= ((long) data[index + 6] & 0xff) << 48;
587        case 6:
588            k1 ^= ((long) data[index + 5] & 0xff) << 40;
589        case 5:
590            k1 ^= ((long) data[index + 4] & 0xff) << 32;
591        case 4:
592            k1 ^= ((long) data[index + 3] & 0xff) << 24;
593        case 3:
594            k1 ^= ((long) data[index + 2] & 0xff) << 16;
595        case 2:
596            k1 ^= ((long) data[index + 1] & 0xff) << 8;
597        case 1:
598            k1 ^= data[index] & 0xff;
599            k1 *= C1;
600            k1 = Long.rotateLeft(k1, R1);
601            k1 *= C2;
602            h1 ^= k1;
603        }
604
605        // finalization
606        h1 ^= length;
607        h2 ^= length;
608
609        h1 += h2;
610        h2 += h1;
611
612        h1 = fmix64(h1);
613        h2 = fmix64(h2);
614
615        h1 += h2;
616        h2 += h1;
617
618        return new long[] { h1, h2 };
619    }
620
621    /**
622     * Generates 32-bit hash from the byte array with a default seed.
623     * This is a helper method that will produce the same result as:
624     *
625     * <pre>
626     * int offset = 0;
627     * int seed = 104729;
628     * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
629     * </pre>
630     *
631     * <p>This implementation contains a sign-extension bug in the finalization step of
632     * any bytes left over from dividing the length by 4. This manifests if any of these
633     * bytes are negative.</p>
634     *
635     * @param data The input byte array
636     * @return The 32-bit hash
637     * @see #hash32(byte[], int, int, int)
638     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
639     */
640    @Deprecated
641    public static int hash32(final byte[] data) {
642        return hash32(data, 0, data.length, DEFAULT_SEED);
643    }
644
645    /**
646     * Generates 32-bit hash from the byte array with the given length and a default seed.
647     * This is a helper method that will produce the same result as:
648     *
649     * <pre>
650     * int offset = 0;
651     * int seed = 104729;
652     * int hash = MurmurHash3.hash32(data, offset, length, seed);
653     * </pre>
654     *
655     * <p>This implementation contains a sign-extension bug in the finalization step of
656     * any bytes left over from dividing the length by 4. This manifests if any of these
657     * bytes are negative.</p>
658     *
659     * @param data The input byte array
660     * @param length The length of array
661     * @return The 32-bit hash
662     * @see #hash32(byte[], int, int, int)
663     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
664     */
665    @Deprecated
666    public static int hash32(final byte[] data, final int length) {
667        return hash32(data, length, DEFAULT_SEED);
668    }
669
670    /**
671     * Generates 32-bit hash from the byte array with the given length and seed. This is a
672     * helper method that will produce the same result as:
673     *
674     * <pre>
675     * int offset = 0;
676     * int hash = MurmurHash3.hash32(data, offset, length, seed);
677     * </pre>
678     *
679     * <p>This implementation contains a sign-extension bug in the finalization step of
680     * any bytes left over from dividing the length by 4. This manifests if any of these
681     * bytes are negative.</p>
682     *
683     * @param data The input byte array
684     * @param length The length of array
685     * @param seed The initial seed value
686     * @return The 32-bit hash
687     * @see #hash32(byte[], int, int, int)
688     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
689     */
690    @Deprecated
691    public static int hash32(final byte[] data, final int length, final int seed) {
692        return hash32(data, 0, length, seed);
693    }
694
695    /**
696     * Generates 32-bit hash from the byte array with the given offset, length and seed.
697     *
698     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
699     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
700     *
701     * <p>This implementation contains a sign-extension bug in the finalization step of
702     * any bytes left over from dividing the length by 4. This manifests if any of these
703     * bytes are negative.</p>
704     *
705     * @param data The input byte array
706     * @param offset The offset of data
707     * @param length The length of array
708     * @param seed The initial seed value
709     * @return The 32-bit hash
710     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
711     */
712    @Deprecated
713    public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
714        int hash = seed;
715        final int nblocks = length >> 2;
716
717        // body
718        for (int i = 0; i < nblocks; i++) {
719            final int index = offset + (i << 2);
720            final int k = getLittleEndianInt(data, index);
721            hash = mix32(k, hash);
722        }
723
724        // tail
725        // ************
726        // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
727        // ************
728        final int index = offset + (nblocks << 2);
729        int k1 = 0;
730        switch (offset + length - index) {
731        case 3:
732            k1 ^= data[index + 2] << 16;
733        case 2:
734            k1 ^= data[index + 1] << 8;
735        case 1:
736            k1 ^= data[index];
737
738            // mix functions
739            k1 *= C1_32;
740            k1 = Integer.rotateLeft(k1, R1_32);
741            k1 *= C2_32;
742            hash ^= k1;
743        }
744
745        hash ^= length;
746        return fmix32(hash);
747    }
748
749    /**
750     * Generates 32-bit hash from a long with a default seed value.
751     * This is a helper method that will produce the same result as:
752     *
753     * <pre>
754     * int offset = 0;
755     * int seed = 104729;
756     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
757     *                                            .putLong(data)
758     *                                            .array(), offset, 8, seed);
759     * </pre>
760     *
761     * @param data The long to hash
762     * @return The 32-bit hash
763     * @see #hash32x86(byte[], int, int, int)
764     */
765    public static int hash32(final long data) {
766        return hash32(data, DEFAULT_SEED);
767    }
768
769    /**
770     * Generates 32-bit hash from a long with the given seed.
771     * This is a helper method that will produce the same result as:
772     *
773     * <pre>
774     * int offset = 0;
775     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
776     *                                            .putLong(data)
777     *                                            .array(), offset, 8, seed);
778     * </pre>
779     *
780     * @param data The long to hash
781     * @param seed The initial seed value
782     * @return The 32-bit hash
783     * @see #hash32x86(byte[], int, int, int)
784     */
785    public static int hash32(final long data, final int seed) {
786        int hash = seed;
787        final long r0 = Long.reverseBytes(data);
788
789        hash = mix32((int) r0, hash);
790        hash = mix32((int) (r0 >>> 32), hash);
791
792        hash ^= Long.BYTES;
793        return fmix32(hash);
794    }
795
796    /**
797     * Generates 32-bit hash from two longs with a default seed value.
798     * This is a helper method that will produce the same result as:
799     *
800     * <pre>
801     * int offset = 0;
802     * int seed = 104729;
803     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
804     *                                            .putLong(data1)
805     *                                            .putLong(data2)
806     *                                            .array(), offset, 16, seed);
807     * </pre>
808     *
809     * @param data1 The first long to hash
810     * @param data2 The second long to hash
811     * @return The 32-bit hash
812     * @see #hash32x86(byte[], int, int, int)
813     */
814    public static int hash32(final long data1, final long data2) {
815        return hash32(data1, data2, DEFAULT_SEED);
816    }
817
818    /**
819     * Generates 32-bit hash from two longs with the given seed.
820     * This is a helper method that will produce the same result as:
821     *
822     * <pre>
823     * int offset = 0;
824     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
825     *                                            .putLong(data1)
826     *                                            .putLong(data2)
827     *                                            .array(), offset, 16, seed);
828     * </pre>
829     *
830     * @param data1 The first long to hash
831     * @param data2 The second long to hash
832     * @param seed The initial seed value
833     * @return The 32-bit hash
834     * @see #hash32x86(byte[], int, int, int)
835     */
836    public static int hash32(final long data1, final long data2, final int seed) {
837        int hash = seed;
838        final long r0 = Long.reverseBytes(data1);
839        final long r1 = Long.reverseBytes(data2);
840
841        hash = mix32((int) r0, hash);
842        hash = mix32((int) (r0 >>> 32), hash);
843        hash = mix32((int) r1, hash);
844        hash = mix32((int) (r1 >>> 32), hash);
845
846        hash ^= Long.BYTES * 2;
847        return fmix32(hash);
848    }
849
850    /**
851     * Generates 32-bit hash from a string with a default seed.
852     * <p>
853     * Before 1.14 the string was converted using default encoding.
854     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
855     * </p>
856     * This is a helper method that will produce the same result as:
857     *
858     * <pre>
859     * int offset = 0;
860     * int seed = 104729;
861     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
862     * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
863     * </pre>
864     *
865     * <p>This implementation contains a sign-extension bug in the finalization step of
866     * any bytes left over from dividing the length by 4. This manifests if any of these
867     * bytes are negative.</p>
868     *
869     * @param data The input string
870     * @return The 32-bit hash
871     * @see #hash32(byte[], int, int, int)
872     * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
873     * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
874     */
875    @Deprecated
876    public static int hash32(final String data) {
877        final byte[] bytes = StringUtils.getBytesUtf8(data);
878        return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
879    }
880
881    /**
882     * Generates 32-bit hash from the byte array with a seed of zero.
883     * This is a helper method that will produce the same result as:
884     *
885     * <pre>
886     * int offset = 0;
887     * int seed = 0;
888     * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
889     * </pre>
890     *
891     * @param data The input byte array
892     * @return The 32-bit hash
893     * @see #hash32x86(byte[], int, int, int)
894     * @since 1.14
895     */
896    public static int hash32x86(final byte[] data) {
897        return hash32x86(data, 0, data.length, 0);
898    }
899
900    /**
901     * Generates 32-bit hash from the byte array with the given offset, length and seed.
902     *
903     * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
904     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
905     *
906     * @param data The input byte array
907     * @param offset The offset of data
908     * @param length The length of array
909     * @param seed The initial seed value
910     * @return The 32-bit hash
911     * @since 1.14
912     */
913    public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
914        int hash = seed;
915        final int nblocks = length >> 2;
916
917        // body
918        for (int i = 0; i < nblocks; i++) {
919            final int index = offset + (i << 2);
920            final int k = getLittleEndianInt(data, index);
921            hash = mix32(k, hash);
922        }
923
924        // tail
925        final int index = offset + (nblocks << 2);
926        int k1 = 0;
927        switch (offset + length - index) {
928        case 3:
929            k1 ^= (data[index + 2] & 0xff) << 16;
930        case 2:
931            k1 ^= (data[index + 1] & 0xff) << 8;
932        case 1:
933            k1 ^= data[index] & 0xff;
934
935            // mix functions
936            k1 *= C1_32;
937            k1 = Integer.rotateLeft(k1, R1_32);
938            k1 *= C2_32;
939            hash ^= k1;
940        }
941
942        hash ^= length;
943        return fmix32(hash);
944    }
945
946    /**
947     * Generates 64-bit hash from a byte array with a default seed.
948     *
949     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
950     *
951     * <p>This is a Murmur3-like 64-bit variant.
952     * The method does not produce the same result as either half of the hash bytes from
953     * {@linkplain #hash128x64(byte[])} with the same byte data.
954     * This method will be removed in a future release.</p>
955     *
956     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
957     * this result as the default seed is positive.</p>
958     *
959     * <p>This is a helper method that will produce the same result as:</p>
960     *
961     * <pre>
962     * int offset = 0;
963     * int seed = 104729;
964     * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
965     * </pre>
966     *
967     * @param data The input byte array
968     * @return The 64-bit hash
969     * @see #hash64(byte[], int, int, int)
970     * @deprecated Not part of the MurmurHash3 implementation.
971     * Use half of the hash bytes from {@link #hash128x64(byte[])}.
972     */
973    @Deprecated
974    public static long hash64(final byte[] data) {
975        return hash64(data, 0, data.length, DEFAULT_SEED);
976    }
977
978    /**
979     * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
980     *
981     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
982     *
983     * <p>This is a Murmur3-like 64-bit variant.
984     * The method does not produce the same result as either half of the hash bytes from
985     * {@linkplain #hash128x64(byte[])} with the same byte data.
986     * This method will be removed in a future release.</p>
987     *
988     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
989     * this result as the default seed is positive.</p>
990     *
991     * <p>This is a helper method that will produce the same result as:</p>
992     *
993     * <pre>
994     * int seed = 104729;
995     * long hash = MurmurHash3.hash64(data, offset, length, seed);
996     * </pre>
997     *
998     * @param data The input byte array
999     * @param offset The offset of data
1000     * @param length The length of array
1001     * @return The 64-bit hash
1002     * @see #hash64(byte[], int, int, int)
1003     * @deprecated Not part of the MurmurHash3 implementation.
1004     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1005     */
1006    @Deprecated
1007    public static long hash64(final byte[] data, final int offset, final int length) {
1008        return hash64(data, offset, length, DEFAULT_SEED);
1009    }
1010
1011    /**
1012     * Generates 64-bit hash from a byte array with the given offset, length and seed.
1013     *
1014     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1015     *
1016     * <p>This is a Murmur3-like 64-bit variant.
1017     * This method will be removed in a future release.</p>
1018     *
1019     * <p>This implementation contains a sign-extension bug in the seed initialization.
1020     * This manifests if the seed is negative.</p>
1021     *
1022     * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1023     * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1024     * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1025     * the same value as the first or second 64-bits of the function
1026     * {@link #hash128(byte[], int, int, int)}.</p>
1027     *
1028     * <p>Use of this method is not advised. Use the first long returned from
1029     * {@link #hash128x64(byte[], int, int, int)}.</p>
1030     *
1031     * @param data The input byte array
1032     * @param offset The offset of data
1033     * @param length The length of array
1034     * @param seed The initial seed value
1035     * @return The 64-bit hash
1036     * @deprecated Not part of the MurmurHash3 implementation.
1037     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1038     */
1039    @Deprecated
1040    public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1041        //
1042        // Note: This fails to apply masking using 0xffffffffL to the seed.
1043        //
1044        long hash = seed;
1045        final int nblocks = length >> 3;
1046
1047        // body
1048        for (int i = 0; i < nblocks; i++) {
1049            final int index = offset + (i << 3);
1050            long k = getLittleEndianLong(data, index);
1051
1052            // mix functions
1053            k *= C1;
1054            k = Long.rotateLeft(k, R1);
1055            k *= C2;
1056            hash ^= k;
1057            hash = Long.rotateLeft(hash, R2) * M + N1;
1058        }
1059
1060        // tail
1061        long k1 = 0;
1062        final int index = offset + (nblocks << 3);
1063        switch (offset + length - index) {
1064        case 7:
1065            k1 ^= ((long) data[index + 6] & 0xff) << 48;
1066        case 6:
1067            k1 ^= ((long) data[index + 5] & 0xff) << 40;
1068        case 5:
1069            k1 ^= ((long) data[index + 4] & 0xff) << 32;
1070        case 4:
1071            k1 ^= ((long) data[index + 3] & 0xff) << 24;
1072        case 3:
1073            k1 ^= ((long) data[index + 2] & 0xff) << 16;
1074        case 2:
1075            k1 ^= ((long) data[index + 1] & 0xff) << 8;
1076        case 1:
1077            k1 ^= (long) data[index] & 0xff;
1078            k1 *= C1;
1079            k1 = Long.rotateLeft(k1, R1);
1080            k1 *= C2;
1081            hash ^= k1;
1082        }
1083
1084        // finalization
1085        hash ^= length;
1086        return fmix64(hash);
1087    }
1088
1089    /**
1090     * Generates 64-bit hash from an int with a default seed.
1091     *
1092     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1093     *
1094     * <p>This is a Murmur3-like 64-bit variant.
1095     * The method does not produce the same result as either half of the hash bytes from
1096     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1097     * This method will be removed in a future release.</p>
1098     *
1099     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1100     * this result as the default seed is positive.</p>
1101     *
1102     * <p>This is a helper method that will produce the same result as:</p>
1103     *
1104     * <pre>
1105     * int offset = 0;
1106     * int seed = 104729;
1107     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1108     *                                          .putInt(data)
1109     *                                          .array(), offset, 4, seed);
1110     * </pre>
1111     *
1112     * @param data The int to hash
1113     * @return The 64-bit hash
1114     * @see #hash64(byte[], int, int, int)
1115     * @deprecated Not part of the MurmurHash3 implementation.
1116     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1117     */
1118    @Deprecated
1119    public static long hash64(final int data) {
1120        long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1121        long hash = DEFAULT_SEED;
1122        k1 *= C1;
1123        k1 = Long.rotateLeft(k1, R1);
1124        k1 *= C2;
1125        hash ^= k1;
1126        // finalization
1127        hash ^= Integer.BYTES;
1128        return fmix64(hash);
1129    }
1130
1131    /**
1132     * Generates 64-bit hash from a long with a default seed.
1133     *
1134     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1135     *
1136     * <p>This is a Murmur3-like 64-bit variant.
1137     * The method does not produce the same result as either half of the hash bytes from
1138     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1139     * This method will be removed in a future release.</p>
1140     *
1141     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1142     * this result as the default seed is positive.</p>
1143     *
1144     * <p>This is a helper method that will produce the same result as:</p>
1145     *
1146     * <pre>
1147     * int offset = 0;
1148     * int seed = 104729;
1149     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1150     *                                          .putLong(data)
1151     *                                          .array(), offset, 8, seed);
1152     * </pre>
1153     *
1154     * @param data The long to hash
1155     * @return The 64-bit hash
1156     * @see #hash64(byte[], int, int, int)
1157     * @deprecated Not part of the MurmurHash3 implementation.
1158     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1159     */
1160    @Deprecated
1161    public static long hash64(final long data) {
1162        long hash = DEFAULT_SEED;
1163        long k = Long.reverseBytes(data);
1164        // mix functions
1165        k *= C1;
1166        k = Long.rotateLeft(k, R1);
1167        k *= C2;
1168        hash ^= k;
1169        hash = Long.rotateLeft(hash, R2) * M + N1;
1170        // finalization
1171        hash ^= Long.BYTES;
1172        return fmix64(hash);
1173    }
1174
1175    /**
1176     * Generates 64-bit hash from a short with a default seed.
1177     *
1178     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1179     *
1180     * <p>This is a Murmur3-like 64-bit variant.
1181     * The method does not produce the same result as either half of the hash bytes from
1182     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1183     * This method will be removed in a future release.</p>
1184     *
1185     * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1186     * this result as the default seed is positive.</p>
1187     *
1188     * <p>This is a helper method that will produce the same result as:</p>
1189     *
1190     * <pre>
1191     * int offset = 0;
1192     * int seed = 104729;
1193     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1194     *                                          .putShort(data)
1195     *                                          .array(), offset, 2, seed);
1196     * </pre>
1197     *
1198     * @param data The short to hash
1199     * @return The 64-bit hash
1200     * @see #hash64(byte[], int, int, int)
1201     * @deprecated Not part of the MurmurHash3 implementation.
1202     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1203     */
1204    @Deprecated
1205    public static long hash64(final short data) {
1206        long hash = DEFAULT_SEED;
1207        long k1 = 0;
1208        k1 ^= ((long) data & 0xff) << 8;
1209        k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1210        k1 *= C1;
1211        k1 = Long.rotateLeft(k1, R1);
1212        k1 *= C2;
1213        hash ^= k1;
1214
1215        // finalization
1216        hash ^= Short.BYTES;
1217        return fmix64(hash);
1218    }
1219
1220    /**
1221     * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1222     *
1223     * @param k The data to add to the hash
1224     * @param hash The current hash
1225     * @return The new hash
1226     */
1227    private static int mix32(int k, int hash) {
1228        k *= C1_32;
1229        k = Integer.rotateLeft(k, R1_32);
1230        k *= C2_32;
1231        hash ^= k;
1232        return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1233    }
1234
1235    /** No instance methods. */
1236    private MurmurHash3() {
1237    }
1238}