View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24   *
25   * <p>
26   * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27   * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28   * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29   * </p>
30   *
31   * <p>
32   * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33   * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34   * </p>
35   *
36   * <p>
37   * This is public domain code with no copyrights. From home page of
38   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39   * </p>
40   *
41   * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
42   * code." </blockquote>
43   *
44   * <p>
45   * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
46   * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
47   * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
48   * </p>
49   *
50   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
51   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
52   *      code</a>
53   * @see <a href=
54   *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
55   *      Apache Hive Murmer3</a>
56   * @since 1.13
57   */
58  public final class MurmurHash3 {
59  
60      /**
61       * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
62       * hash computed.
63       *
64       * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
65       * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
66       *
67       * <p>This implementation contains a sign-extension bug in the finalization step of
68       * any bytes left over from dividing the length by 4. This manifests if any of these
69       * bytes are negative.</p>
70       *
71       * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
72       */
73      @Deprecated
74      public static class IncrementalHash32 extends IncrementalHash32x86 {
75  
76          /**
77           * {@inheritDoc}
78           *
79           * <p>This implementation contains a sign-extension bug in the finalization step of
80           * any bytes left over from dividing the length by 4. This manifests if any of these
81           * bytes are negative.<p>
82           *
83           * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
84           */
85          @Override
86          @Deprecated
87          int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
88              int result = hash;
89              // ************
90              // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
91              // ************
92              int k1 = 0;
93              switch (unprocessedLength) {
94              case 3:
95                  k1 ^= unprocessed[2] << 16;
96              case 2:
97                  k1 ^= unprocessed[1] << 8;
98              case 1:
99                  k1 ^= unprocessed[0];
100 
101                 // mix functions
102                 k1 *= C1_32;
103                 k1 = Integer.rotateLeft(k1, R1_32);
104                 k1 *= C2_32;
105                 result ^= k1;
106             }
107 
108             // finalization
109             result ^= totalLen;
110             return fmix32(result);
111         }
112     }
113 
114     /**
115      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
116      * hash computed.
117      *
118      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
119      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
120      *
121      * @since 1.14
122      */
123     public static class IncrementalHash32x86 {
124 
125         /** The size of byte blocks that are processed together. */
126         private static final int BLOCK_SIZE = 4;
127 
128         /**
129          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
130          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
131          * significant.
132          *
133          * @param b1 The first byte
134          * @param b2 The second byte
135          * @param b3 The third byte
136          * @param b4 The fourth byte
137          * @return The 32-bit integer
138          */
139         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
140             return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
141         }
142 
143         /** Up to 3 unprocessed bytes from input data. */
144         private final byte[] unprocessed = new byte[3];
145 
146         /** The number of unprocessed bytes in the tail data. */
147         private int unprocessedLength;
148 
149         /** The total number of input bytes added since the start. */
150         private int totalLen;
151 
152         /**
153          * The current running hash.
154          * This must be finalised to generate the 32-bit hash value.
155          */
156         private int hash;
157 
158         /**
159          * Adds the byte array to the current incremental hash.
160          *
161          * @param data The input byte array
162          * @param offset The offset of data
163          * @param length The length of array
164          */
165         public final void add(final byte[] data, final int offset, final int length) {
166             if (length <= 0) {
167                 // Nothing to add
168                 return;
169             }
170             totalLen += length;
171 
172             // Process the bytes in blocks of 4.
173             // New bytes must be added to any current unprocessed bytes,
174             // then processed in blocks of 4 and the remaining bytes saved:
175             //
176             //    |--|---------------------------|--|
177             // unprocessed
178             //                main block
179             //                                remaining
180 
181             // Check if the unprocessed bytes and new bytes can fill a block of 4.
182             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
183             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
184             if (unprocessedLength + length - BLOCK_SIZE < 0) {
185                 // Not enough so add to the unprocessed bytes
186                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
187                 unprocessedLength += length;
188                 return;
189             }
190 
191             // Combine unprocessed bytes with new bytes.
192             final int newOffset;
193             final int newLength;
194             if (unprocessedLength > 0) {
195                 int k = -1;
196                 switch (unprocessedLength) {
197                 case 1:
198                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
199                     break;
200                 case 2:
201                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
202                     break;
203                 case 3:
204                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
205                     break;
206                 default:
207                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
208                 }
209                 hash = mix32(k, hash);
210                 // Update the offset and length
211                 final int consumed = BLOCK_SIZE - unprocessedLength;
212                 newOffset = offset + consumed;
213                 newLength = length - consumed;
214             } else {
215                 newOffset = offset;
216                 newLength = length;
217             }
218 
219             // Main processing of blocks of 4 bytes
220             final int nblocks = newLength >> 2;
221 
222             for (int i = 0; i < nblocks; i++) {
223                 final int index = newOffset + (i << 2);
224                 final int k = getLittleEndianInt(data, index);
225                 hash = mix32(k, hash);
226             }
227 
228             // Save left-over unprocessed bytes
229             final int consumed = nblocks << 2;
230             unprocessedLength = newLength - consumed;
231             if (unprocessedLength != 0) {
232                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
233             }
234         }
235 
236         /**
237          * Generates the 32-bit hash value. Repeat calls to this method with no additional data
238          * will generate the same hash value.
239          *
240          * @return The 32-bit hash
241          */
242         public final int end() {
243             // Allow calling end() again after adding no data to return the same result.
244             return finalise(hash, unprocessedLength, unprocessed, totalLen);
245         }
246 
247         /**
248          * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
249          * and performing final mixing.
250          *
251          * @param hash The running hash
252          * @param unprocessedLength The number of unprocessed bytes in the tail data.
253          * @param unprocessed Up to 3 unprocessed bytes from input data.
254          * @param totalLen The total number of input bytes added since the start.
255          * @return The 32-bit hash
256          */
257         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
258             int result = hash;
259             int k1 = 0;
260             switch (unprocessedLength) {
261             case 3:
262                 k1 ^= (unprocessed[2] & 0xff) << 16;
263             case 2:
264                 k1 ^= (unprocessed[1] & 0xff) << 8;
265             case 1:
266                 k1 ^= unprocessed[0] & 0xff;
267 
268                 // mix functions
269                 k1 *= C1_32;
270                 k1 = Integer.rotateLeft(k1, R1_32);
271                 k1 *= C2_32;
272                 result ^= k1;
273             }
274 
275             // finalization
276             result ^= totalLen;
277             return fmix32(result);
278         }
279 
280         /**
281          * Starts a new incremental hash.
282          *
283          * @param seed The initial seed value
284          */
285         public final void start(final int seed) {
286             // Reset
287             unprocessedLength = totalLen = 0;
288             this.hash = seed;
289         }
290     }
291 
292     /**
293      * A random number to use for a hash code.
294      *
295      * @deprecated This is not used internally and will be removed in a future release.
296      */
297     @Deprecated
298     public static final long NULL_HASHCODE = 2862933555777941757L;
299     /**
300      * A default seed to use for the murmur hash algorithm.
301      * Has the value {@code 104729}.
302      */
303     public static final int DEFAULT_SEED = 104729;
304     // Constants for 32-bit variant
305     private static final int C1_32 = 0xcc9e2d51;
306     private static final int C2_32 = 0x1b873593;
307     private static final int R1_32 = 15;
308     private static final int R2_32 = 13;
309 
310     private static final int M_32 = 5;
311     private static final int N_32 = 0xe6546b64;
312     // Constants for 128-bit variant
313     private static final long C1 = 0x87c37b91114253d5L;
314     private static final long C2 = 0x4cf5ad432745937fL;
315     private static final int R1 = 31;
316     private static final int R2 = 27;
317     private static final int R3 = 33;
318     private static final int M = 5;
319 
320     private static final int N1 = 0x52dce729;
321 
322     private static final int N2 = 0x38495ab5;
323 
324     /**
325      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
326      *
327      * @param hash The current hash
328      * @return The final hash
329      */
330     private static int fmix32(int hash) {
331         hash ^= hash >>> 16;
332         hash *= 0x85ebca6b;
333         hash ^= hash >>> 13;
334         hash *= 0xc2b2ae35;
335         hash ^= hash >>> 16;
336         return hash;
337     }
338 
339     /**
340      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
341      *
342      * @param hash The current hash
343      * @return The final hash
344      */
345     private static long fmix64(long hash) {
346         hash ^= hash >>> 33;
347         hash *= 0xff51afd7ed558ccdL;
348         hash ^= hash >>> 33;
349         hash *= 0xc4ceb9fe1a85ec53L;
350         hash ^= hash >>> 33;
351         return hash;
352     }
353 
354     /**
355      * Gets the little-endian int from 4 bytes starting at the specified index.
356      *
357      * @param data The data
358      * @param index The index
359      * @return The little-endian int
360      */
361     private static int getLittleEndianInt(final byte[] data, final int index) {
362         return data[index    ] & 0xff |
363                (data[index + 1] & 0xff) <<  8 |
364                (data[index + 2] & 0xff) << 16 |
365                (data[index + 3] & 0xff) << 24;
366     }
367 
368     /**
369      * Gets the little-endian long from 8 bytes starting at the specified index.
370      *
371      * @param data The data
372      * @param index The index
373      * @return The little-endian long
374      */
375     private static long getLittleEndianLong(final byte[] data, final int index) {
376         return (long) data[index    ] & 0xff |
377                ((long) data[index + 1] & 0xff) <<  8 |
378                ((long) data[index + 2] & 0xff) << 16 |
379                ((long) data[index + 3] & 0xff) << 24 |
380                ((long) data[index + 4] & 0xff) << 32 |
381                ((long) data[index + 5] & 0xff) << 40 |
382                ((long) data[index + 6] & 0xff) << 48 |
383                ((long) data[index + 7] & 0xff) << 56;
384     }
385 
386     /**
387      * Generates 128-bit hash from the byte array with a default seed.
388      * This is a helper method that will produce the same result as:
389      *
390      * <pre>
391      * int offset = 0;
392      * int seed = 104729;
393      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
394      * </pre>
395      *
396      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
397      * this result as the default seed is positive.</p>
398      *
399      * @param data The input byte array
400      * @return The 128-bit hash (2 longs)
401      * @see #hash128(byte[], int, int, int)
402      */
403     public static long[] hash128(final byte[] data) {
404         return hash128(data, 0, data.length, DEFAULT_SEED);
405     }
406 
407     /**
408      * Generates 128-bit hash from the byte array with the given offset, length and seed.
409      *
410      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
411      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
412      *
413      * <p>This implementation contains a sign-extension bug in the seed initialization.
414      * This manifests if the seed is negative.</p>
415      *
416      * @param data The input byte array
417      * @param offset The first element of array
418      * @param length The length of array
419      * @param seed The initial seed value
420      * @return The 128-bit hash (2 longs)
421      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
422      */
423     @Deprecated
424     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
425         // ************
426         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
427         // to maintain behavioral compatibility with the original version.
428         // The implicit conversion to a long will extend a negative sign
429         // bit through the upper 32-bits of the long seed. These should be zero.
430         // ************
431         return hash128x64Internal(data, offset, length, seed);
432     }
433 
434     /**
435      * Generates 128-bit hash from a string with a default seed.
436      * <p>
437      * Before 1.14 the string was converted using default encoding.
438      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
439      * </p>
440      * <p>
441      * This is a helper method that will produce the same result as:
442      * </p>
443      *
444      * <pre>
445      * int offset = 0;
446      * int seed = 104729;
447      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
448      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
449      * </pre>
450      *
451      * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
452      * this result as the default seed is positive.</p>
453      *
454      * @param data The input String
455      * @return The 128-bit hash (2 longs)
456      * @see #hash128(byte[], int, int, int)
457      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
458      * {@link String#getBytes(java.nio.charset.Charset)}.
459      */
460     @Deprecated
461     public static long[] hash128(final String data) {
462         final byte[] bytes = StringUtils.getBytesUtf8(data);
463         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
464     }
465 
466     /**
467      * Generates 128-bit hash from the byte array with a seed of zero.
468      * This is a helper method that will produce the same result as:
469      *
470      * <pre>
471      * int offset = 0;
472      * int seed = 0;
473      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
474      * </pre>
475      *
476      * @param data The input byte array
477      * @return The 128-bit hash (2 longs)
478      * @see #hash128x64(byte[], int, int, int)
479      * @since 1.14
480      */
481     public static long[] hash128x64(final byte[] data) {
482         return hash128x64(data, 0, data.length, 0);
483     }
484 
485     /**
486      * Generates 128-bit hash from the byte array with the given offset, length and seed.
487      *
488      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
489      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
490      *
491      * @param data The input byte array
492      * @param offset The first element of array
493      * @param length The length of array
494      * @param seed The initial seed value
495      * @return The 128-bit hash (2 longs)
496      * @since 1.14
497      */
498     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
499         // Use an unsigned 32-bit integer as the seed
500         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
501     }
502 
503     /**
504      * Generates 128-bit hash from the byte array with the given offset, length and seed.
505      *
506      * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
507      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
508      *
509      * @param data The input byte array
510      * @param offset The first element of array
511      * @param length The length of array
512      * @param seed The initial seed value
513      * @return The 128-bit hash (2 longs)
514      */
515     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
516         long h1 = seed;
517         long h2 = seed;
518         final int nblocks = length >> 4;
519 
520         // body
521         for (int i = 0; i < nblocks; i++) {
522             final int index = offset + (i << 4);
523             long k1 = getLittleEndianLong(data, index);
524             long k2 = getLittleEndianLong(data, index + 8);
525 
526             // mix functions for k1
527             k1 *= C1;
528             k1 = Long.rotateLeft(k1, R1);
529             k1 *= C2;
530             h1 ^= k1;
531             h1 = Long.rotateLeft(h1, R2);
532             h1 += h2;
533             h1 = h1 * M + N1;
534 
535             // mix functions for k2
536             k2 *= C2;
537             k2 = Long.rotateLeft(k2, R3);
538             k2 *= C1;
539             h2 ^= k2;
540             h2 = Long.rotateLeft(h2, R1);
541             h2 += h1;
542             h2 = h2 * M + N2;
543         }
544 
545         // tail
546         long k1 = 0;
547         long k2 = 0;
548         final int index = offset + (nblocks << 4);
549         switch (offset + length - index) {
550         case 15:
551             k2 ^= ((long) data[index + 14] & 0xff) << 48;
552         case 14:
553             k2 ^= ((long) data[index + 13] & 0xff) << 40;
554         case 13:
555             k2 ^= ((long) data[index + 12] & 0xff) << 32;
556         case 12:
557             k2 ^= ((long) data[index + 11] & 0xff) << 24;
558         case 11:
559             k2 ^= ((long) data[index + 10] & 0xff) << 16;
560         case 10:
561             k2 ^= ((long) data[index + 9] & 0xff) << 8;
562         case 9:
563             k2 ^= data[index + 8] & 0xff;
564             k2 *= C2;
565             k2 = Long.rotateLeft(k2, R3);
566             k2 *= C1;
567             h2 ^= k2;
568 
569         case 8:
570             k1 ^= ((long) data[index + 7] & 0xff) << 56;
571         case 7:
572             k1 ^= ((long) data[index + 6] & 0xff) << 48;
573         case 6:
574             k1 ^= ((long) data[index + 5] & 0xff) << 40;
575         case 5:
576             k1 ^= ((long) data[index + 4] & 0xff) << 32;
577         case 4:
578             k1 ^= ((long) data[index + 3] & 0xff) << 24;
579         case 3:
580             k1 ^= ((long) data[index + 2] & 0xff) << 16;
581         case 2:
582             k1 ^= ((long) data[index + 1] & 0xff) << 8;
583         case 1:
584             k1 ^= data[index] & 0xff;
585             k1 *= C1;
586             k1 = Long.rotateLeft(k1, R1);
587             k1 *= C2;
588             h1 ^= k1;
589         }
590 
591         // finalization
592         h1 ^= length;
593         h2 ^= length;
594 
595         h1 += h2;
596         h2 += h1;
597 
598         h1 = fmix64(h1);
599         h2 = fmix64(h2);
600 
601         h1 += h2;
602         h2 += h1;
603 
604         return new long[] { h1, h2 };
605     }
606 
607     /**
608      * Generates 32-bit hash from the byte array with a default seed.
609      * This is a helper method that will produce the same result as:
610      *
611      * <pre>
612      * int offset = 0;
613      * int seed = 104729;
614      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
615      * </pre>
616      *
617      * <p>This implementation contains a sign-extension bug in the finalization step of
618      * any bytes left over from dividing the length by 4. This manifests if any of these
619      * bytes are negative.</p>
620      *
621      * @param data The input byte array
622      * @return The 32-bit hash
623      * @see #hash32(byte[], int, int, int)
624      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
625      */
626     @Deprecated
627     public static int hash32(final byte[] data) {
628         return hash32(data, 0, data.length, DEFAULT_SEED);
629     }
630 
631     /**
632      * Generates 32-bit hash from the byte array with the given length and a default seed.
633      * This is a helper method that will produce the same result as:
634      *
635      * <pre>
636      * int offset = 0;
637      * int seed = 104729;
638      * int hash = MurmurHash3.hash32(data, offset, length, seed);
639      * </pre>
640      *
641      * <p>This implementation contains a sign-extension bug in the finalization step of
642      * any bytes left over from dividing the length by 4. This manifests if any of these
643      * bytes are negative.</p>
644      *
645      * @param data The input byte array
646      * @param length The length of array
647      * @return The 32-bit hash
648      * @see #hash32(byte[], int, int, int)
649      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
650      */
651     @Deprecated
652     public static int hash32(final byte[] data, final int length) {
653         return hash32(data, length, DEFAULT_SEED);
654     }
655 
656     /**
657      * Generates 32-bit hash from the byte array with the given length and seed. This is a
658      * helper method that will produce the same result as:
659      *
660      * <pre>
661      * int offset = 0;
662      * int hash = MurmurHash3.hash32(data, offset, length, seed);
663      * </pre>
664      *
665      * <p>This implementation contains a sign-extension bug in the finalization step of
666      * any bytes left over from dividing the length by 4. This manifests if any of these
667      * bytes are negative.</p>
668      *
669      * @param data The input byte array
670      * @param length The length of array
671      * @param seed The initial seed value
672      * @return The 32-bit hash
673      * @see #hash32(byte[], int, int, int)
674      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
675      */
676     @Deprecated
677     public static int hash32(final byte[] data, final int length, final int seed) {
678         return hash32(data, 0, length, seed);
679     }
680 
681     /**
682      * Generates 32-bit hash from the byte array with the given offset, length and seed.
683      *
684      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
685      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
686      *
687      * <p>This implementation contains a sign-extension bug in the finalization step of
688      * any bytes left over from dividing the length by 4. This manifests if any of these
689      * bytes are negative.</p>
690      *
691      * @param data The input byte array
692      * @param offset The offset of data
693      * @param length The length of array
694      * @param seed The initial seed value
695      * @return The 32-bit hash
696      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
697      */
698     @Deprecated
699     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
700         int hash = seed;
701         final int nblocks = length >> 2;
702 
703         // body
704         for (int i = 0; i < nblocks; i++) {
705             final int index = offset + (i << 2);
706             final int k = getLittleEndianInt(data, index);
707             hash = mix32(k, hash);
708         }
709 
710         // tail
711         // ************
712         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
713         // ************
714         final int index = offset + (nblocks << 2);
715         int k1 = 0;
716         switch (offset + length - index) {
717         case 3:
718             k1 ^= data[index + 2] << 16;
719         case 2:
720             k1 ^= data[index + 1] << 8;
721         case 1:
722             k1 ^= data[index];
723 
724             // mix functions
725             k1 *= C1_32;
726             k1 = Integer.rotateLeft(k1, R1_32);
727             k1 *= C2_32;
728             hash ^= k1;
729         }
730 
731         hash ^= length;
732         return fmix32(hash);
733     }
734 
735     /**
736      * Generates 32-bit hash from a long with a default seed value.
737      * This is a helper method that will produce the same result as:
738      *
739      * <pre>
740      * int offset = 0;
741      * int seed = 104729;
742      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
743      *                                            .putLong(data)
744      *                                            .array(), offset, 8, seed);
745      * </pre>
746      *
747      * @param data The long to hash
748      * @return The 32-bit hash
749      * @see #hash32x86(byte[], int, int, int)
750      */
751     public static int hash32(final long data) {
752         return hash32(data, DEFAULT_SEED);
753     }
754 
755     /**
756      * Generates 32-bit hash from a long with the given seed.
757      * This is a helper method that will produce the same result as:
758      *
759      * <pre>
760      * int offset = 0;
761      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
762      *                                            .putLong(data)
763      *                                            .array(), offset, 8, seed);
764      * </pre>
765      *
766      * @param data The long to hash
767      * @param seed The initial seed value
768      * @return The 32-bit hash
769      * @see #hash32x86(byte[], int, int, int)
770      */
771     public static int hash32(final long data, final int seed) {
772         int hash = seed;
773         final long r0 = Long.reverseBytes(data);
774 
775         hash = mix32((int) r0, hash);
776         hash = mix32((int) (r0 >>> 32), hash);
777 
778         hash ^= Long.BYTES;
779         return fmix32(hash);
780     }
781 
782     /**
783      * Generates 32-bit hash from two longs with a default seed value.
784      * This is a helper method that will produce the same result as:
785      *
786      * <pre>
787      * int offset = 0;
788      * int seed = 104729;
789      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
790      *                                            .putLong(data1)
791      *                                            .putLong(data2)
792      *                                            .array(), offset, 16, seed);
793      * </pre>
794      *
795      * @param data1 The first long to hash
796      * @param data2 The second long to hash
797      * @return The 32-bit hash
798      * @see #hash32x86(byte[], int, int, int)
799      */
800     public static int hash32(final long data1, final long data2) {
801         return hash32(data1, data2, DEFAULT_SEED);
802     }
803 
804     /**
805      * Generates 32-bit hash from two longs with the given seed.
806      * This is a helper method that will produce the same result as:
807      *
808      * <pre>
809      * int offset = 0;
810      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
811      *                                            .putLong(data1)
812      *                                            .putLong(data2)
813      *                                            .array(), offset, 16, seed);
814      * </pre>
815      *
816      * @param data1 The first long to hash
817      * @param data2 The second long to hash
818      * @param seed The initial seed value
819      * @return The 32-bit hash
820      * @see #hash32x86(byte[], int, int, int)
821      */
822     public static int hash32(final long data1, final long data2, final int seed) {
823         int hash = seed;
824         final long r0 = Long.reverseBytes(data1);
825         final long r1 = Long.reverseBytes(data2);
826 
827         hash = mix32((int) r0, hash);
828         hash = mix32((int) (r0 >>> 32), hash);
829         hash = mix32((int) r1, hash);
830         hash = mix32((int) (r1 >>> 32), hash);
831 
832         hash ^= Long.BYTES * 2;
833         return fmix32(hash);
834     }
835 
836     /**
837      * Generates 32-bit hash from a string with a default seed.
838      * <p>
839      * Before 1.14 the string was converted using default encoding.
840      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
841      * </p>
842      * This is a helper method that will produce the same result as:
843      *
844      * <pre>
845      * int offset = 0;
846      * int seed = 104729;
847      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
848      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
849      * </pre>
850      *
851      * <p>This implementation contains a sign-extension bug in the finalization step of
852      * any bytes left over from dividing the length by 4. This manifests if any of these
853      * bytes are negative.</p>
854      *
855      * @param data The input string
856      * @return The 32-bit hash
857      * @see #hash32(byte[], int, int, int)
858      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
859      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
860      */
861     @Deprecated
862     public static int hash32(final String data) {
863         final byte[] bytes = StringUtils.getBytesUtf8(data);
864         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
865     }
866 
867     /**
868      * Generates 32-bit hash from the byte array with a seed of zero.
869      * This is a helper method that will produce the same result as:
870      *
871      * <pre>
872      * int offset = 0;
873      * int seed = 0;
874      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
875      * </pre>
876      *
877      * @param data The input byte array
878      * @return The 32-bit hash
879      * @see #hash32x86(byte[], int, int, int)
880      * @since 1.14
881      */
882     public static int hash32x86(final byte[] data) {
883         return hash32x86(data, 0, data.length, 0);
884     }
885 
886     /**
887      * Generates 32-bit hash from the byte array with the given offset, length and seed.
888      *
889      * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
890      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p>
891      *
892      * @param data The input byte array
893      * @param offset The offset of data
894      * @param length The length of array
895      * @param seed The initial seed value
896      * @return The 32-bit hash
897      * @since 1.14
898      */
899     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
900         int hash = seed;
901         final int nblocks = length >> 2;
902 
903         // body
904         for (int i = 0; i < nblocks; i++) {
905             final int index = offset + (i << 2);
906             final int k = getLittleEndianInt(data, index);
907             hash = mix32(k, hash);
908         }
909 
910         // tail
911         final int index = offset + (nblocks << 2);
912         int k1 = 0;
913         switch (offset + length - index) {
914         case 3:
915             k1 ^= (data[index + 2] & 0xff) << 16;
916         case 2:
917             k1 ^= (data[index + 1] & 0xff) << 8;
918         case 1:
919             k1 ^= data[index] & 0xff;
920 
921             // mix functions
922             k1 *= C1_32;
923             k1 = Integer.rotateLeft(k1, R1_32);
924             k1 *= C2_32;
925             hash ^= k1;
926         }
927 
928         hash ^= length;
929         return fmix32(hash);
930     }
931 
932     /**
933      * Generates 64-bit hash from a byte array with a default seed.
934      *
935      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
936      *
937      * <p>This is a Murmur3-like 64-bit variant.
938      * The method does not produce the same result as either half of the hash bytes from
939      * {@linkplain #hash128x64(byte[])} with the same byte data.
940      * This method will be removed in a future release.</p>
941      *
942      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
943      * this result as the default seed is positive.</p>
944      *
945      * <p>This is a helper method that will produce the same result as:</p>
946      *
947      * <pre>
948      * int offset = 0;
949      * int seed = 104729;
950      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
951      * </pre>
952      *
953      * @param data The input byte array
954      * @return The 64-bit hash
955      * @see #hash64(byte[], int, int, int)
956      * @deprecated Not part of the MurmurHash3 implementation.
957      * Use half of the hash bytes from {@link #hash128x64(byte[])}.
958      */
959     @Deprecated
960     public static long hash64(final byte[] data) {
961         return hash64(data, 0, data.length, DEFAULT_SEED);
962     }
963 
964     /**
965      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
966      *
967      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
968      *
969      * <p>This is a Murmur3-like 64-bit variant.
970      * The method does not produce the same result as either half of the hash bytes from
971      * {@linkplain #hash128x64(byte[])} with the same byte data.
972      * This method will be removed in a future release.</p>
973      *
974      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
975      * this result as the default seed is positive.</p>
976      *
977      * <p>This is a helper method that will produce the same result as:</p>
978      *
979      * <pre>
980      * int seed = 104729;
981      * long hash = MurmurHash3.hash64(data, offset, length, seed);
982      * </pre>
983      *
984      * @param data The input byte array
985      * @param offset The offset of data
986      * @param length The length of array
987      * @return The 64-bit hash
988      * @see #hash64(byte[], int, int, int)
989      * @deprecated Not part of the MurmurHash3 implementation.
990      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
991      */
992     @Deprecated
993     public static long hash64(final byte[] data, final int offset, final int length) {
994         return hash64(data, offset, length, DEFAULT_SEED);
995     }
996 
997     /**
998      * Generates 64-bit hash from a byte array with the given offset, length and seed.
999      *
1000      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1001      *
1002      * <p>This is a Murmur3-like 64-bit variant.
1003      * This method will be removed in a future release.</p>
1004      *
1005      * <p>This implementation contains a sign-extension bug in the seed initialization.
1006      * This manifests if the seed is negative.</p>
1007      *
1008      * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1009      * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1010      * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1011      * the same value as the first or second 64-bits of the function
1012      * {@link #hash128(byte[], int, int, int)}.</p>
1013      *
1014      * <p>Use of this method is not advised. Use the first long returned from
1015      * {@link #hash128x64(byte[], int, int, int)}.</p>
1016      *
1017      * @param data The input byte array
1018      * @param offset The offset of data
1019      * @param length The length of array
1020      * @param seed The initial seed value
1021      * @return The 64-bit hash
1022      * @deprecated Not part of the MurmurHash3 implementation.
1023      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1024      */
1025     @Deprecated
1026     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1027         //
1028         // Note: This fails to apply masking using 0xffffffffL to the seed.
1029         //
1030         long hash = seed;
1031         final int nblocks = length >> 3;
1032 
1033         // body
1034         for (int i = 0; i < nblocks; i++) {
1035             final int index = offset + (i << 3);
1036             long k = getLittleEndianLong(data, index);
1037 
1038             // mix functions
1039             k *= C1;
1040             k = Long.rotateLeft(k, R1);
1041             k *= C2;
1042             hash ^= k;
1043             hash = Long.rotateLeft(hash, R2) * M + N1;
1044         }
1045 
1046         // tail
1047         long k1 = 0;
1048         final int index = offset + (nblocks << 3);
1049         switch (offset + length - index) {
1050         case 7:
1051             k1 ^= ((long) data[index + 6] & 0xff) << 48;
1052         case 6:
1053             k1 ^= ((long) data[index + 5] & 0xff) << 40;
1054         case 5:
1055             k1 ^= ((long) data[index + 4] & 0xff) << 32;
1056         case 4:
1057             k1 ^= ((long) data[index + 3] & 0xff) << 24;
1058         case 3:
1059             k1 ^= ((long) data[index + 2] & 0xff) << 16;
1060         case 2:
1061             k1 ^= ((long) data[index + 1] & 0xff) << 8;
1062         case 1:
1063             k1 ^= (long) data[index] & 0xff;
1064             k1 *= C1;
1065             k1 = Long.rotateLeft(k1, R1);
1066             k1 *= C2;
1067             hash ^= k1;
1068         }
1069 
1070         // finalization
1071         hash ^= length;
1072         return fmix64(hash);
1073     }
1074 
1075     /**
1076      * Generates 64-bit hash from an int with a default seed.
1077      *
1078      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1079      *
1080      * <p>This is a Murmur3-like 64-bit variant.
1081      * The method does not produce the same result as either half of the hash bytes from
1082      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1083      * This method will be removed in a future release.</p>
1084      *
1085      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1086      * this result as the default seed is positive.</p>
1087      *
1088      * <p>This is a helper method that will produce the same result as:</p>
1089      *
1090      * <pre>
1091      * int offset = 0;
1092      * int seed = 104729;
1093      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1094      *                                          .putInt(data)
1095      *                                          .array(), offset, 4, seed);
1096      * </pre>
1097      *
1098      * @param data The int to hash
1099      * @return The 64-bit hash
1100      * @see #hash64(byte[], int, int, int)
1101      * @deprecated Not part of the MurmurHash3 implementation.
1102      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1103      */
1104     @Deprecated
1105     public static long hash64(final int data) {
1106         long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1107         long hash = DEFAULT_SEED;
1108         k1 *= C1;
1109         k1 = Long.rotateLeft(k1, R1);
1110         k1 *= C2;
1111         hash ^= k1;
1112         // finalization
1113         hash ^= Integer.BYTES;
1114         return fmix64(hash);
1115     }
1116 
1117     /**
1118      * Generates 64-bit hash from a long with a default seed.
1119      *
1120      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1121      *
1122      * <p>This is a Murmur3-like 64-bit variant.
1123      * The method does not produce the same result as either half of the hash bytes from
1124      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1125      * This method will be removed in a future release.</p>
1126      *
1127      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1128      * this result as the default seed is positive.</p>
1129      *
1130      * <p>This is a helper method that will produce the same result as:</p>
1131      *
1132      * <pre>
1133      * int offset = 0;
1134      * int seed = 104729;
1135      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1136      *                                          .putLong(data)
1137      *                                          .array(), offset, 8, seed);
1138      * </pre>
1139      *
1140      * @param data The long to hash
1141      * @return The 64-bit hash
1142      * @see #hash64(byte[], int, int, int)
1143      * @deprecated Not part of the MurmurHash3 implementation.
1144      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1145      */
1146     @Deprecated
1147     public static long hash64(final long data) {
1148         long hash = DEFAULT_SEED;
1149         long k = Long.reverseBytes(data);
1150         // mix functions
1151         k *= C1;
1152         k = Long.rotateLeft(k, R1);
1153         k *= C2;
1154         hash ^= k;
1155         hash = Long.rotateLeft(hash, R2) * M + N1;
1156         // finalization
1157         hash ^= Long.BYTES;
1158         return fmix64(hash);
1159     }
1160 
1161     /**
1162      * Generates 64-bit hash from a short with a default seed.
1163      *
1164      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1165      *
1166      * <p>This is a Murmur3-like 64-bit variant.
1167      * The method does not produce the same result as either half of the hash bytes from
1168      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1169      * This method will be removed in a future release.</p>
1170      *
1171      * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1172      * this result as the default seed is positive.</p>
1173      *
1174      * <p>This is a helper method that will produce the same result as:</p>
1175      *
1176      * <pre>
1177      * int offset = 0;
1178      * int seed = 104729;
1179      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1180      *                                          .putShort(data)
1181      *                                          .array(), offset, 2, seed);
1182      * </pre>
1183      *
1184      * @param data The short to hash
1185      * @return The 64-bit hash
1186      * @see #hash64(byte[], int, int, int)
1187      * @deprecated Not part of the MurmurHash3 implementation.
1188      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1189      */
1190     @Deprecated
1191     public static long hash64(final short data) {
1192         long hash = DEFAULT_SEED;
1193         long k1 = 0;
1194         k1 ^= ((long) data & 0xff) << 8;
1195         k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1196         k1 *= C1;
1197         k1 = Long.rotateLeft(k1, R1);
1198         k1 *= C2;
1199         hash ^= k1;
1200 
1201         // finalization
1202         hash ^= Short.BYTES;
1203         return fmix64(hash);
1204     }
1205 
1206     /**
1207      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1208      *
1209      * @param k The data to add to the hash
1210      * @param hash The current hash
1211      * @return The new hash
1212      */
1213     private static int mix32(int k, int hash) {
1214         k *= C1_32;
1215         k = Integer.rotateLeft(k, R1_32);
1216         k *= C2_32;
1217         hash ^= k;
1218         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1219     }
1220 
1221     /** No instance methods. */
1222     private MurmurHash3() {
1223     }
1224 }