View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24   *
25   * <p>
26   * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27   * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28   * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29   * </p>
30   *
31   * <p>
32   * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33   * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34   * </p>
35   *
36   * <p>
37   * This is public domain code with no copyrights. From home page of
38   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39   * </p>
40   *
41   * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
42   * code." </blockquote>
43   *
44   * <p>
45   * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
46   * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
47   * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
48   * </p>
49   *
50   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
51   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
52   *      code</a>
53   * @see <a href=
54   *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
55   *      Apache Hive Murmer3</a>
56   * @since 1.13
57   */
58  public final class MurmurHash3 {
59  
60      /**
61       * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
62       * hash computed.
63       *
64       * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
65       * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
66       *
67       * <p>This implementation contains a sign-extension bug in the finalization step of
68       * any bytes left over from dividing the length by 4. This manifests if any of these
69       * bytes are negative.</p>
70       *
71       * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
72       */
73      @Deprecated
74      public static class IncrementalHash32 extends IncrementalHash32x86 {
75  
76          /**
77           * Constructs a new instance.
78           */
79          public IncrementalHash32() {
80              // empty
81          }
82  
83          /**
84           * {@inheritDoc}
85           *
86           * <p>This implementation contains a sign-extension bug in the finalization step of
87           * any bytes left over from dividing the length by 4. This manifests if any of these
88           * bytes are negative.<p>
89           *
90           * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
91           */
92          @Override
93          @Deprecated
94          int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
95              int result = hash;
96              // ************
97              // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
98              // ************
99              int k1 = 0;
100             switch (unprocessedLength) {
101             case 3:
102                 k1 ^= unprocessed[2] << 16;
103             case 2:
104                 k1 ^= unprocessed[1] << 8;
105             case 1:
106                 k1 ^= unprocessed[0];
107 
108                 // mix functions
109                 k1 *= C1_32;
110                 k1 = Integer.rotateLeft(k1, R1_32);
111                 k1 *= C2_32;
112                 result ^= k1;
113             }
114 
115             // finalization
116             result ^= totalLen;
117             return fmix32(result);
118         }
119     }
120 
121     /**
122      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
123      * hash computed.
124      *
125      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
126      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
127      *
128      * @since 1.14
129      */
130     public static class IncrementalHash32x86 {
131 
132         /** The size of byte blocks that are processed together. */
133         private static final int BLOCK_SIZE = 4;
134 
135         /**
136          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
137          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
138          * significant.
139          *
140          * @param b1 The first byte
141          * @param b2 The second byte
142          * @param b3 The third byte
143          * @param b4 The fourth byte
144          * @return The 32-bit integer
145          */
146         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
147             return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
148         }
149 
150         /** Up to 3 unprocessed bytes from input data. */
151         private final byte[] unprocessed = new byte[3];
152 
153         /** The number of unprocessed bytes in the tail data. */
154         private int unprocessedLength;
155 
156         /** The total number of input bytes added since the start. */
157         private int totalLen;
158 
159         /**
160          * The current running hash.
161          * This must be finalised to generate the 32-bit hash value.
162          */
163         private int hash;
164 
165         /**
166          * Constructs a new instance.
167          */
168         public IncrementalHash32x86() {
169             // empty
170         }
171 
172         /**
173          * Adds the byte array to the current incremental hash.
174          *
175          * @param data The input byte array
176          * @param offset The offset of data
177          * @param length The length of array
178          */
179         public final void add(final byte[] data, final int offset, final int length) {
180             if (length <= 0) {
181                 // Nothing to add
182                 return;
183             }
184             totalLen += length;
185 
186             // Process the bytes in blocks of 4.
187             // New bytes must be added to any current unprocessed bytes,
188             // then processed in blocks of 4 and the remaining bytes saved:
189             //
190             //    |--|---------------------------|--|
191             // unprocessed
192             //                main block
193             //                                remaining
194 
195             // Check if the unprocessed bytes and new bytes can fill a block of 4.
196             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
197             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
198             if (unprocessedLength + length - BLOCK_SIZE < 0) {
199                 // Not enough so add to the unprocessed bytes
200                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
201                 unprocessedLength += length;
202                 return;
203             }
204 
205             // Combine unprocessed bytes with new bytes.
206             final int newOffset;
207             final int newLength;
208             if (unprocessedLength > 0) {
209                 int k = -1;
210                 switch (unprocessedLength) {
211                 case 1:
212                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
213                     break;
214                 case 2:
215                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
216                     break;
217                 case 3:
218                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
219                     break;
220                 default:
221                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
222                 }
223                 hash = mix32(k, hash);
224                 // Update the offset and length
225                 final int consumed = BLOCK_SIZE - unprocessedLength;
226                 newOffset = offset + consumed;
227                 newLength = length - consumed;
228             } else {
229                 newOffset = offset;
230                 newLength = length;
231             }
232 
233             // Main processing of blocks of 4 bytes
234             final int nblocks = newLength >> 2;
235 
236             for (int i = 0; i < nblocks; i++) {
237                 final int index = newOffset + (i << 2);
238                 final int k = getLittleEndianInt(data, index);
239                 hash = mix32(k, hash);
240             }
241 
242             // Save left-over unprocessed bytes
243             final int consumed = nblocks << 2;
244             unprocessedLength = newLength - consumed;
245             if (unprocessedLength != 0) {
246                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
247             }
248         }
249 
250         /**
251          * Generates the 32-bit hash value. Repeat calls to this method with no additional data
252          * will generate the same hash value.
253          *
254          * @return The 32-bit hash
255          */
256         public final int end() {
257             // Allow calling end() again after adding no data to return the same result.
258             return finalise(hash, unprocessedLength, unprocessed, totalLen);
259         }
260 
261         /**
262          * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
263          * and performing final mixing.
264          *
265          * @param hash The running hash
266          * @param unprocessedLength The number of unprocessed bytes in the tail data.
267          * @param unprocessed Up to 3 unprocessed bytes from input data.
268          * @param totalLen The total number of input bytes added since the start.
269          * @return The 32-bit hash
270          */
271         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
272             int result = hash;
273             int k1 = 0;
274             switch (unprocessedLength) {
275             case 3:
276                 k1 ^= (unprocessed[2] & 0xff) << 16;
277             case 2:
278                 k1 ^= (unprocessed[1] & 0xff) << 8;
279             case 1:
280                 k1 ^= unprocessed[0] & 0xff;
281 
282                 // mix functions
283                 k1 *= C1_32;
284                 k1 = Integer.rotateLeft(k1, R1_32);
285                 k1 *= C2_32;
286                 result ^= k1;
287             }
288 
289             // finalization
290             result ^= totalLen;
291             return fmix32(result);
292         }
293 
294         /**
295          * Starts a new incremental hash.
296          *
297          * @param seed The initial seed value
298          */
299         public final void start(final int seed) {
300             // Reset
301             unprocessedLength = totalLen = 0;
302             this.hash = seed;
303         }
304     }
305 
306     /**
307      * A random number to use for a hash code.
308      *
309      * @deprecated This is not used internally and will be removed in a future release.
310      */
311     @Deprecated
312     public static final long NULL_HASHCODE = 2862933555777941757L;
313     /**
314      * A default seed to use for the murmur hash algorithm.
315      * Has the value {@code 104729}.
316      */
317     public static final int DEFAULT_SEED = 104729;
318     // Constants for 32-bit variant
319     private static final int C1_32 = 0xcc9e2d51;
320     private static final int C2_32 = 0x1b873593;
321     private static final int R1_32 = 15;
322     private static final int R2_32 = 13;
323 
324     private static final int M_32 = 5;
325     private static final int N_32 = 0xe6546b64;
326     // Constants for 128-bit variant
327     private static final long C1 = 0x87c37b91114253d5L;
328     private static final long C2 = 0x4cf5ad432745937fL;
329     private static final int R1 = 31;
330     private static final int R2 = 27;
331     private static final int R3 = 33;
332     private static final int M = 5;
333 
334     private static final int N1 = 0x52dce729;
335 
336     private static final int N2 = 0x38495ab5;
337 
338     /**
339      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
340      *
341      * @param hash The current hash
342      * @return The final hash
343      */
344     private static int fmix32(int hash) {
345         hash ^= hash >>> 16;
346         hash *= 0x85ebca6b;
347         hash ^= hash >>> 13;
348         hash *= 0xc2b2ae35;
349         hash ^= hash >>> 16;
350         return hash;
351     }
352 
353     /**
354      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
355      *
356      * @param hash The current hash
357      * @return The final hash
358      */
359     private static long fmix64(long hash) {
360         hash ^= hash >>> 33;
361         hash *= 0xff51afd7ed558ccdL;
362         hash ^= hash >>> 33;
363         hash *= 0xc4ceb9fe1a85ec53L;
364         hash ^= hash >>> 33;
365         return hash;
366     }
367 
368     /**
369      * Gets the little-endian int from 4 bytes starting at the specified index.
370      *
371      * @param data The data
372      * @param index The index
373      * @return The little-endian int
374      */
375     private static int getLittleEndianInt(final byte[] data, final int index) {
376         return data[index    ] & 0xff |
377                (data[index + 1] & 0xff) <<  8 |
378                (data[index + 2] & 0xff) << 16 |
379                (data[index + 3] & 0xff) << 24;
380     }
381 
382     /**
383      * Gets the little-endian long from 8 bytes starting at the specified index.
384      *
385      * @param data The data
386      * @param index The index
387      * @return The little-endian long
388      */
389     private static long getLittleEndianLong(final byte[] data, final int index) {
390         return (long) data[index    ] & 0xff |
391                ((long) data[index + 1] & 0xff) <<  8 |
392                ((long) data[index + 2] & 0xff) << 16 |
393                ((long) data[index + 3] & 0xff) << 24 |
394                ((long) data[index + 4] & 0xff) << 32 |
395                ((long) data[index + 5] & 0xff) << 40 |
396                ((long) data[index + 6] & 0xff) << 48 |
397                ((long) data[index + 7] & 0xff) << 56;
398     }
399 
400     /**
401      * Generates 128-bit hash from the byte array with a default seed.
402      * This is a helper method that will produce the same result as:
403      *
404      * <pre>
405      * int offset = 0;
406      * int seed = 104729;
407      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
408      * </pre>
409      *
410      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
411      * this result as the default seed is positive.</p>
412      *
413      * @param data The input byte array
414      * @return The 128-bit hash (2 longs)
415      * @see #hash128(byte[], int, int, int)
416      */
417     public static long[] hash128(final byte[] data) {
418         return hash128(data, 0, data.length, DEFAULT_SEED);
419     }
420 
421     /**
422      * Generates 128-bit hash from the byte array with the given offset, length and seed.
423      *
424      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
425      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
426      *
427      * <p>This implementation contains a sign-extension bug in the seed initialization.
428      * This manifests if the seed is negative.</p>
429      *
430      * @param data The input byte array
431      * @param offset The first element of array
432      * @param length The length of array
433      * @param seed The initial seed value
434      * @return The 128-bit hash (2 longs)
435      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
436      */
437     @Deprecated
438     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
439         // ************
440         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
441         // to maintain behavioral compatibility with the original version.
442         // The implicit conversion to a long will extend a negative sign
443         // bit through the upper 32-bits of the long seed. These should be zero.
444         // ************
445         return hash128x64Internal(data, offset, length, seed);
446     }
447 
448     /**
449      * Generates 128-bit hash from a string with a default seed.
450      * <p>
451      * Before 1.14 the string was converted using default encoding.
452      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
453      * </p>
454      * <p>
455      * This is a helper method that will produce the same result as:
456      * </p>
457      *
458      * <pre>
459      * int offset = 0;
460      * int seed = 104729;
461      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
462      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
463      * </pre>
464      *
465      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
466      * this result as the default seed is positive.</p>
467      *
468      * @param data The input String
469      * @return The 128-bit hash (2 longs)
470      * @see #hash128(byte[], int, int, int)
471      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
472      * {@link String#getBytes(java.nio.charset.Charset)}.
473      */
474     @Deprecated
475     public static long[] hash128(final String data) {
476         final byte[] bytes = StringUtils.getBytesUtf8(data);
477         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
478     }
479 
480     /**
481      * Generates 128-bit hash from the byte array with a seed of zero.
482      * This is a helper method that will produce the same result as:
483      *
484      * <pre>
485      * int offset = 0;
486      * int seed = 0;
487      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
488      * </pre>
489      *
490      * @param data The input byte array
491      * @return The 128-bit hash (2 longs)
492      * @see #hash128x64(byte[], int, int, int)
493      * @since 1.14
494      */
495     public static long[] hash128x64(final byte[] data) {
496         return hash128x64(data, 0, data.length, 0);
497     }
498 
499     /**
500      * Generates 128-bit hash from the byte array with the given offset, length and seed.
501      *
502      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
503      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
504      *
505      * @param data The input byte array
506      * @param offset The first element of array
507      * @param length The length of array
508      * @param seed The initial seed value
509      * @return The 128-bit hash (2 longs)
510      * @since 1.14
511      */
512     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
513         // Use an unsigned 32-bit integer as the seed
514         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
515     }
516 
517     /**
518      * Generates 128-bit hash from the byte array with the given offset, length and seed.
519      *
520      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
521      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
522      *
523      * @param data The input byte array
524      * @param offset The first element of array
525      * @param length The length of array
526      * @param seed The initial seed value
527      * @return The 128-bit hash (2 longs)
528      */
529     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
530         long h1 = seed;
531         long h2 = seed;
532         final int nblocks = length >> 4;
533 
534         // body
535         for (int i = 0; i < nblocks; i++) {
536             final int index = offset + (i << 4);
537             long k1 = getLittleEndianLong(data, index);
538             long k2 = getLittleEndianLong(data, index + 8);
539 
540             // mix functions for k1
541             k1 *= C1;
542             k1 = Long.rotateLeft(k1, R1);
543             k1 *= C2;
544             h1 ^= k1;
545             h1 = Long.rotateLeft(h1, R2);
546             h1 += h2;
547             h1 = h1 * M + N1;
548 
549             // mix functions for k2
550             k2 *= C2;
551             k2 = Long.rotateLeft(k2, R3);
552             k2 *= C1;
553             h2 ^= k2;
554             h2 = Long.rotateLeft(h2, R1);
555             h2 += h1;
556             h2 = h2 * M + N2;
557         }
558 
559         // tail
560         long k1 = 0;
561         long k2 = 0;
562         final int index = offset + (nblocks << 4);
563         switch (offset + length - index) {
564         case 15:
565             k2 ^= ((long) data[index + 14] & 0xff) << 48;
566         case 14:
567             k2 ^= ((long) data[index + 13] & 0xff) << 40;
568         case 13:
569             k2 ^= ((long) data[index + 12] & 0xff) << 32;
570         case 12:
571             k2 ^= ((long) data[index + 11] & 0xff) << 24;
572         case 11:
573             k2 ^= ((long) data[index + 10] & 0xff) << 16;
574         case 10:
575             k2 ^= ((long) data[index + 9] & 0xff) << 8;
576         case 9:
577             k2 ^= data[index + 8] & 0xff;
578             k2 *= C2;
579             k2 = Long.rotateLeft(k2, R3);
580             k2 *= C1;
581             h2 ^= k2;
582 
583         case 8:
584             k1 ^= ((long) data[index + 7] & 0xff) << 56;
585         case 7:
586             k1 ^= ((long) data[index + 6] & 0xff) << 48;
587         case 6:
588             k1 ^= ((long) data[index + 5] & 0xff) << 40;
589         case 5:
590             k1 ^= ((long) data[index + 4] & 0xff) << 32;
591         case 4:
592             k1 ^= ((long) data[index + 3] & 0xff) << 24;
593         case 3:
594             k1 ^= ((long) data[index + 2] & 0xff) << 16;
595         case 2:
596             k1 ^= ((long) data[index + 1] & 0xff) << 8;
597         case 1:
598             k1 ^= data[index] & 0xff;
599             k1 *= C1;
600             k1 = Long.rotateLeft(k1, R1);
601             k1 *= C2;
602             h1 ^= k1;
603         }
604 
605         // finalization
606         h1 ^= length;
607         h2 ^= length;
608 
609         h1 += h2;
610         h2 += h1;
611 
612         h1 = fmix64(h1);
613         h2 = fmix64(h2);
614 
615         h1 += h2;
616         h2 += h1;
617 
618         return new long[] { h1, h2 };
619     }
620 
621     /**
622      * Generates 32-bit hash from the byte array with a default seed.
623      * This is a helper method that will produce the same result as:
624      *
625      * <pre>
626      * int offset = 0;
627      * int seed = 104729;
628      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
629      * </pre>
630      *
631      * <p>This implementation contains a sign-extension bug in the finalization step of
632      * any bytes left over from dividing the length by 4. This manifests if any of these
633      * bytes are negative.</p>
634      *
635      * @param data The input byte array
636      * @return The 32-bit hash
637      * @see #hash32(byte[], int, int, int)
638      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
639      */
640     @Deprecated
641     public static int hash32(final byte[] data) {
642         return hash32(data, 0, data.length, DEFAULT_SEED);
643     }
644 
645     /**
646      * Generates 32-bit hash from the byte array with the given length and a default seed.
647      * This is a helper method that will produce the same result as:
648      *
649      * <pre>
650      * int offset = 0;
651      * int seed = 104729;
652      * int hash = MurmurHash3.hash32(data, offset, length, seed);
653      * </pre>
654      *
655      * <p>This implementation contains a sign-extension bug in the finalization step of
656      * any bytes left over from dividing the length by 4. This manifests if any of these
657      * bytes are negative.</p>
658      *
659      * @param data The input byte array
660      * @param length The length of array
661      * @return The 32-bit hash
662      * @see #hash32(byte[], int, int, int)
663      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
664      */
665     @Deprecated
666     public static int hash32(final byte[] data, final int length) {
667         return hash32(data, length, DEFAULT_SEED);
668     }
669 
670     /**
671      * Generates 32-bit hash from the byte array with the given length and seed. This is a
672      * helper method that will produce the same result as:
673      *
674      * <pre>
675      * int offset = 0;
676      * int hash = MurmurHash3.hash32(data, offset, length, seed);
677      * </pre>
678      *
679      * <p>This implementation contains a sign-extension bug in the finalization step of
680      * any bytes left over from dividing the length by 4. This manifests if any of these
681      * bytes are negative.</p>
682      *
683      * @param data The input byte array
684      * @param length The length of array
685      * @param seed The initial seed value
686      * @return The 32-bit hash
687      * @see #hash32(byte[], int, int, int)
688      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
689      */
690     @Deprecated
691     public static int hash32(final byte[] data, final int length, final int seed) {
692         return hash32(data, 0, length, seed);
693     }
694 
695     /**
696      * Generates 32-bit hash from the byte array with the given offset, length and seed.
697      *
698      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
699      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
700      *
701      * <p>This implementation contains a sign-extension bug in the finalization step of
702      * any bytes left over from dividing the length by 4. This manifests if any of these
703      * bytes are negative.</p>
704      *
705      * @param data The input byte array
706      * @param offset The offset of data
707      * @param length The length of array
708      * @param seed The initial seed value
709      * @return The 32-bit hash
710      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
711      */
712     @Deprecated
713     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
714         int hash = seed;
715         final int nblocks = length >> 2;
716 
717         // body
718         for (int i = 0; i < nblocks; i++) {
719             final int index = offset + (i << 2);
720             final int k = getLittleEndianInt(data, index);
721             hash = mix32(k, hash);
722         }
723 
724         // tail
725         // ************
726         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
727         // ************
728         final int index = offset + (nblocks << 2);
729         int k1 = 0;
730         switch (offset + length - index) {
731         case 3:
732             k1 ^= data[index + 2] << 16;
733         case 2:
734             k1 ^= data[index + 1] << 8;
735         case 1:
736             k1 ^= data[index];
737 
738             // mix functions
739             k1 *= C1_32;
740             k1 = Integer.rotateLeft(k1, R1_32);
741             k1 *= C2_32;
742             hash ^= k1;
743         }
744 
745         hash ^= length;
746         return fmix32(hash);
747     }
748 
749     /**
750      * Generates 32-bit hash from a long with a default seed value.
751      * This is a helper method that will produce the same result as:
752      *
753      * <pre>
754      * int offset = 0;
755      * int seed = 104729;
756      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
757      *                                            .putLong(data)
758      *                                            .array(), offset, 8, seed);
759      * </pre>
760      *
761      * @param data The long to hash
762      * @return The 32-bit hash
763      * @see #hash32x86(byte[], int, int, int)
764      */
765     public static int hash32(final long data) {
766         return hash32(data, DEFAULT_SEED);
767     }
768 
769     /**
770      * Generates 32-bit hash from a long with the given seed.
771      * This is a helper method that will produce the same result as:
772      *
773      * <pre>
774      * int offset = 0;
775      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
776      *                                            .putLong(data)
777      *                                            .array(), offset, 8, seed);
778      * </pre>
779      *
780      * @param data The long to hash
781      * @param seed The initial seed value
782      * @return The 32-bit hash
783      * @see #hash32x86(byte[], int, int, int)
784      */
785     public static int hash32(final long data, final int seed) {
786         int hash = seed;
787         final long r0 = Long.reverseBytes(data);
788 
789         hash = mix32((int) r0, hash);
790         hash = mix32((int) (r0 >>> 32), hash);
791 
792         hash ^= Long.BYTES;
793         return fmix32(hash);
794     }
795 
796     /**
797      * Generates 32-bit hash from two longs with a default seed value.
798      * This is a helper method that will produce the same result as:
799      *
800      * <pre>
801      * int offset = 0;
802      * int seed = 104729;
803      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
804      *                                            .putLong(data1)
805      *                                            .putLong(data2)
806      *                                            .array(), offset, 16, seed);
807      * </pre>
808      *
809      * @param data1 The first long to hash
810      * @param data2 The second long to hash
811      * @return The 32-bit hash
812      * @see #hash32x86(byte[], int, int, int)
813      */
814     public static int hash32(final long data1, final long data2) {
815         return hash32(data1, data2, DEFAULT_SEED);
816     }
817 
818     /**
819      * Generates 32-bit hash from two longs with the given seed.
820      * This is a helper method that will produce the same result as:
821      *
822      * <pre>
823      * int offset = 0;
824      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
825      *                                            .putLong(data1)
826      *                                            .putLong(data2)
827      *                                            .array(), offset, 16, seed);
828      * </pre>
829      *
830      * @param data1 The first long to hash
831      * @param data2 The second long to hash
832      * @param seed The initial seed value
833      * @return The 32-bit hash
834      * @see #hash32x86(byte[], int, int, int)
835      */
836     public static int hash32(final long data1, final long data2, final int seed) {
837         int hash = seed;
838         final long r0 = Long.reverseBytes(data1);
839         final long r1 = Long.reverseBytes(data2);
840 
841         hash = mix32((int) r0, hash);
842         hash = mix32((int) (r0 >>> 32), hash);
843         hash = mix32((int) r1, hash);
844         hash = mix32((int) (r1 >>> 32), hash);
845 
846         hash ^= Long.BYTES * 2;
847         return fmix32(hash);
848     }
849 
850     /**
851      * Generates 32-bit hash from a string with a default seed.
852      * <p>
853      * Before 1.14 the string was converted using default encoding.
854      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
855      * </p>
856      * This is a helper method that will produce the same result as:
857      *
858      * <pre>
859      * int offset = 0;
860      * int seed = 104729;
861      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
862      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
863      * </pre>
864      *
865      * <p>This implementation contains a sign-extension bug in the finalization step of
866      * any bytes left over from dividing the length by 4. This manifests if any of these
867      * bytes are negative.</p>
868      *
869      * @param data The input string
870      * @return The 32-bit hash
871      * @see #hash32(byte[], int, int, int)
872      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
873      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
874      */
875     @Deprecated
876     public static int hash32(final String data) {
877         final byte[] bytes = StringUtils.getBytesUtf8(data);
878         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
879     }
880 
881     /**
882      * Generates 32-bit hash from the byte array with a seed of zero.
883      * This is a helper method that will produce the same result as:
884      *
885      * <pre>
886      * int offset = 0;
887      * int seed = 0;
888      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
889      * </pre>
890      *
891      * @param data The input byte array
892      * @return The 32-bit hash
893      * @see #hash32x86(byte[], int, int, int)
894      * @since 1.14
895      */
896     public static int hash32x86(final byte[] data) {
897         return hash32x86(data, 0, data.length, 0);
898     }
899 
900     /**
901      * Generates 32-bit hash from the byte array with the given offset, length and seed.
902      *
903      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
904      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
905      *
906      * @param data The input byte array
907      * @param offset The offset of data
908      * @param length The length of array
909      * @param seed The initial seed value
910      * @return The 32-bit hash
911      * @since 1.14
912      */
913     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
914         int hash = seed;
915         final int nblocks = length >> 2;
916 
917         // body
918         for (int i = 0; i < nblocks; i++) {
919             final int index = offset + (i << 2);
920             final int k = getLittleEndianInt(data, index);
921             hash = mix32(k, hash);
922         }
923 
924         // tail
925         final int index = offset + (nblocks << 2);
926         int k1 = 0;
927         switch (offset + length - index) {
928         case 3:
929             k1 ^= (data[index + 2] & 0xff) << 16;
930         case 2:
931             k1 ^= (data[index + 1] & 0xff) << 8;
932         case 1:
933             k1 ^= data[index] & 0xff;
934 
935             // mix functions
936             k1 *= C1_32;
937             k1 = Integer.rotateLeft(k1, R1_32);
938             k1 *= C2_32;
939             hash ^= k1;
940         }
941 
942         hash ^= length;
943         return fmix32(hash);
944     }
945 
946     /**
947      * Generates 64-bit hash from a byte array with a default seed.
948      *
949      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
950      *
951      * <p>This is a Murmur3-like 64-bit variant.
952      * The method does not produce the same result as either half of the hash bytes from
953      * {@linkplain #hash128x64(byte[])} with the same byte data.
954      * This method will be removed in a future release.</p>
955      *
956      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
957      * this result as the default seed is positive.</p>
958      *
959      * <p>This is a helper method that will produce the same result as:</p>
960      *
961      * <pre>
962      * int offset = 0;
963      * int seed = 104729;
964      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
965      * </pre>
966      *
967      * @param data The input byte array
968      * @return The 64-bit hash
969      * @see #hash64(byte[], int, int, int)
970      * @deprecated Not part of the MurmurHash3 implementation.
971      * Use half of the hash bytes from {@link #hash128x64(byte[])}.
972      */
973     @Deprecated
974     public static long hash64(final byte[] data) {
975         return hash64(data, 0, data.length, DEFAULT_SEED);
976     }
977 
978     /**
979      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
980      *
981      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
982      *
983      * <p>This is a Murmur3-like 64-bit variant.
984      * The method does not produce the same result as either half of the hash bytes from
985      * {@linkplain #hash128x64(byte[])} with the same byte data.
986      * This method will be removed in a future release.</p>
987      *
988      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
989      * this result as the default seed is positive.</p>
990      *
991      * <p>This is a helper method that will produce the same result as:</p>
992      *
993      * <pre>
994      * int seed = 104729;
995      * long hash = MurmurHash3.hash64(data, offset, length, seed);
996      * </pre>
997      *
998      * @param data The input byte array
999      * @param offset The offset of data
1000      * @param length The length of array
1001      * @return The 64-bit hash
1002      * @see #hash64(byte[], int, int, int)
1003      * @deprecated Not part of the MurmurHash3 implementation.
1004      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1005      */
1006     @Deprecated
1007     public static long hash64(final byte[] data, final int offset, final int length) {
1008         return hash64(data, offset, length, DEFAULT_SEED);
1009     }
1010 
1011     /**
1012      * Generates 64-bit hash from a byte array with the given offset, length and seed.
1013      *
1014      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1015      *
1016      * <p>This is a Murmur3-like 64-bit variant.
1017      * This method will be removed in a future release.</p>
1018      *
1019      * <p>This implementation contains a sign-extension bug in the seed initialization.
1020      * This manifests if the seed is negative.</p>
1021      *
1022      * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1023      * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1024      * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1025      * the same value as the first or second 64-bits of the function
1026      * {@link #hash128(byte[], int, int, int)}.</p>
1027      *
1028      * <p>Use of this method is not advised. Use the first long returned from
1029      * {@link #hash128x64(byte[], int, int, int)}.</p>
1030      *
1031      * @param data The input byte array
1032      * @param offset The offset of data
1033      * @param length The length of array
1034      * @param seed The initial seed value
1035      * @return The 64-bit hash
1036      * @deprecated Not part of the MurmurHash3 implementation.
1037      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1038      */
1039     @Deprecated
1040     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1041         //
1042         // Note: This fails to apply masking using 0xffffffffL to the seed.
1043         //
1044         long hash = seed;
1045         final int nblocks = length >> 3;
1046 
1047         // body
1048         for (int i = 0; i < nblocks; i++) {
1049             final int index = offset + (i << 3);
1050             long k = getLittleEndianLong(data, index);
1051 
1052             // mix functions
1053             k *= C1;
1054             k = Long.rotateLeft(k, R1);
1055             k *= C2;
1056             hash ^= k;
1057             hash = Long.rotateLeft(hash, R2) * M + N1;
1058         }
1059 
1060         // tail
1061         long k1 = 0;
1062         final int index = offset + (nblocks << 3);
1063         switch (offset + length - index) {
1064         case 7:
1065             k1 ^= ((long) data[index + 6] & 0xff) << 48;
1066         case 6:
1067             k1 ^= ((long) data[index + 5] & 0xff) << 40;
1068         case 5:
1069             k1 ^= ((long) data[index + 4] & 0xff) << 32;
1070         case 4:
1071             k1 ^= ((long) data[index + 3] & 0xff) << 24;
1072         case 3:
1073             k1 ^= ((long) data[index + 2] & 0xff) << 16;
1074         case 2:
1075             k1 ^= ((long) data[index + 1] & 0xff) << 8;
1076         case 1:
1077             k1 ^= (long) data[index] & 0xff;
1078             k1 *= C1;
1079             k1 = Long.rotateLeft(k1, R1);
1080             k1 *= C2;
1081             hash ^= k1;
1082         }
1083 
1084         // finalization
1085         hash ^= length;
1086         return fmix64(hash);
1087     }
1088 
1089     /**
1090      * Generates 64-bit hash from an int with a default seed.
1091      *
1092      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1093      *
1094      * <p>This is a Murmur3-like 64-bit variant.
1095      * The method does not produce the same result as either half of the hash bytes from
1096      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1097      * This method will be removed in a future release.</p>
1098      *
1099      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1100      * this result as the default seed is positive.</p>
1101      *
1102      * <p>This is a helper method that will produce the same result as:</p>
1103      *
1104      * <pre>
1105      * int offset = 0;
1106      * int seed = 104729;
1107      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1108      *                                          .putInt(data)
1109      *                                          .array(), offset, 4, seed);
1110      * </pre>
1111      *
1112      * @param data The int to hash
1113      * @return The 64-bit hash
1114      * @see #hash64(byte[], int, int, int)
1115      * @deprecated Not part of the MurmurHash3 implementation.
1116      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1117      */
1118     @Deprecated
1119     public static long hash64(final int data) {
1120         long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1121         long hash = DEFAULT_SEED;
1122         k1 *= C1;
1123         k1 = Long.rotateLeft(k1, R1);
1124         k1 *= C2;
1125         hash ^= k1;
1126         // finalization
1127         hash ^= Integer.BYTES;
1128         return fmix64(hash);
1129     }
1130 
1131     /**
1132      * Generates 64-bit hash from a long with a default seed.
1133      *
1134      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1135      *
1136      * <p>This is a Murmur3-like 64-bit variant.
1137      * The method does not produce the same result as either half of the hash bytes from
1138      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1139      * This method will be removed in a future release.</p>
1140      *
1141      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1142      * this result as the default seed is positive.</p>
1143      *
1144      * <p>This is a helper method that will produce the same result as:</p>
1145      *
1146      * <pre>
1147      * int offset = 0;
1148      * int seed = 104729;
1149      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1150      *                                          .putLong(data)
1151      *                                          .array(), offset, 8, seed);
1152      * </pre>
1153      *
1154      * @param data The long to hash
1155      * @return The 64-bit hash
1156      * @see #hash64(byte[], int, int, int)
1157      * @deprecated Not part of the MurmurHash3 implementation.
1158      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1159      */
1160     @Deprecated
1161     public static long hash64(final long data) {
1162         long hash = DEFAULT_SEED;
1163         long k = Long.reverseBytes(data);
1164         // mix functions
1165         k *= C1;
1166         k = Long.rotateLeft(k, R1);
1167         k *= C2;
1168         hash ^= k;
1169         hash = Long.rotateLeft(hash, R2) * M + N1;
1170         // finalization
1171         hash ^= Long.BYTES;
1172         return fmix64(hash);
1173     }
1174 
1175     /**
1176      * Generates 64-bit hash from a short with a default seed.
1177      *
1178      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1179      *
1180      * <p>This is a Murmur3-like 64-bit variant.
1181      * The method does not produce the same result as either half of the hash bytes from
1182      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1183      * This method will be removed in a future release.</p>
1184      *
1185      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1186      * this result as the default seed is positive.</p>
1187      *
1188      * <p>This is a helper method that will produce the same result as:</p>
1189      *
1190      * <pre>
1191      * int offset = 0;
1192      * int seed = 104729;
1193      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1194      *                                          .putShort(data)
1195      *                                          .array(), offset, 2, seed);
1196      * </pre>
1197      *
1198      * @param data The short to hash
1199      * @return The 64-bit hash
1200      * @see #hash64(byte[], int, int, int)
1201      * @deprecated Not part of the MurmurHash3 implementation.
1202      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1203      */
1204     @Deprecated
1205     public static long hash64(final short data) {
1206         long hash = DEFAULT_SEED;
1207         long k1 = 0;
1208         k1 ^= ((long) data & 0xff) << 8;
1209         k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1210         k1 *= C1;
1211         k1 = Long.rotateLeft(k1, R1);
1212         k1 *= C2;
1213         hash ^= k1;
1214 
1215         // finalization
1216         hash ^= Short.BYTES;
1217         return fmix64(hash);
1218     }
1219 
1220     /**
1221      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1222      *
1223      * @param k The data to add to the hash
1224      * @param hash The current hash
1225      * @return The new hash
1226      */
1227     private static int mix32(int k, int hash) {
1228         k *= C1_32;
1229         k = Integer.rotateLeft(k, R1_32);
1230         k *= C2_32;
1231         hash ^= k;
1232         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1233     }
1234 
1235     /** No instance methods. */
1236     private MurmurHash3() {
1237     }
1238 }