View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import static java.lang.Integer.rotateLeft;
21  
22  import java.util.zip.Checksum;
23  
24  /**
25   * Implements the xxHash32 hash algorithm.
26   *
27   * <p>
28   * Copied from Commons Compress 1.14 <a href=
29   * "https://gitbox.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://gitbox.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a>
30   * </p>
31   * <p>
32   * NotThreadSafe
33   * </p>
34   *
35   * @see <a href="https://cyan4973.github.io/xxHash/">xxHash</a>
36   * @since 1.11
37   */
38  public class XXHash32 implements Checksum {
39  
40      private static final int BUF_SIZE = 16;
41      private static final int ROTATE_BITS = 13;
42  
43      private static final int PRIME1 = (int) 2654435761L;
44      private static final int PRIME2 = (int) 2246822519L;
45      private static final int PRIME3 = (int) 3266489917L;
46      private static final int PRIME4 =  668265263;
47      private static final int PRIME5 =  374761393;
48  
49      /**
50       * Gets the little-endian int from 4 bytes starting at the specified index.
51       *
52       * @param buffer The data
53       * @param idx The index
54       * @return The little-endian int
55       */
56      private static int getInt(final byte[] buffer, final int idx) {
57          return buffer[idx    ] & 0xff |
58                 (buffer[idx + 1] & 0xff) <<  8 |
59                 (buffer[idx + 2] & 0xff) << 16 |
60                 (buffer[idx + 3] & 0xff) << 24;
61      }
62      private final byte[] oneByte = new byte[1];
63      private final int[] state = new int[4];
64      // Note: The code used to use ByteBuffer but the manual method is 50% faster
65      // See: https://gitbox.apache.org/repos/asf/commons-compress/diff/2f56fb5c
66      private final byte[] buffer = new byte[BUF_SIZE];
67  
68      private final int seed;
69      private int totalLen;
70  
71      private int pos;
72  
73      /** Sets to true when the state array has been updated since the last reset. */
74      private boolean stateUpdated;
75  
76      /**
77       * Creates an XXHash32 instance with a seed of 0.
78       */
79      public XXHash32() {
80          this(0);
81      }
82  
83      /**
84       * Creates an XXHash32 instance.
85       * @param seed the seed to use
86       */
87      public XXHash32(final int seed) {
88          this.seed = seed;
89          initializeState();
90      }
91  
92      @Override
93      public long getValue() {
94          int hash;
95          if (stateUpdated) {
96              // Hash with the state
97              hash =
98                  rotateLeft(state[0],  1) +
99                  rotateLeft(state[1],  7) +
100                 rotateLeft(state[2], 12) +
101                 rotateLeft(state[3], 18);
102         } else {
103             // Hash using the original seed from position 2
104             hash = state[2] + PRIME5;
105         }
106         hash += totalLen;
107 
108         int idx = 0;
109         final int limit = pos - 4;
110         for (; idx <= limit; idx += 4) {
111             hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
112         }
113         while (idx < pos) {
114             hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
115         }
116 
117         hash ^= hash >>> 15;
118         hash *= PRIME2;
119         hash ^= hash >>> 13;
120         hash *= PRIME3;
121         hash ^= hash >>> 16;
122         return hash & 0xffffffffL;
123     }
124 
125     private void initializeState() {
126         state[0] = seed + PRIME1 + PRIME2;
127         state[1] = seed + PRIME2;
128         state[2] = seed;
129         state[3] = seed - PRIME1;
130     }
131 
132     private void process(final byte[] b, final int offset) {
133         // local shadows for performance
134         int s0 = state[0];
135         int s1 = state[1];
136         int s2 = state[2];
137         int s3 = state[3];
138 
139         s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
140         s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
141         s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
142         s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;
143 
144         state[0] = s0;
145         state[1] = s1;
146         state[2] = s2;
147         state[3] = s3;
148 
149         stateUpdated = true;
150     }
151 
152     @Override
153     public void reset() {
154         initializeState();
155         totalLen = 0;
156         pos = 0;
157         stateUpdated = false;
158     }
159 
160     @Override
161     public void update(final byte[] b, int off, final int len) {
162         if (len <= 0) {
163             return;
164         }
165         totalLen += len;
166 
167         final int end = off + len;
168 
169         // Check if the unprocessed bytes and new bytes can fill a block of 16.
170         // Make this overflow safe in the event that len is Integer.MAX_VALUE.
171         // Equivalent to: (pos + len < BUF_SIZE)
172         if (pos + len - BUF_SIZE < 0) {
173             System.arraycopy(b, off, buffer, pos, len);
174             pos += len;
175             return;
176         }
177 
178         // Process left-over bytes with new bytes
179         if (pos > 0) {
180             final int size = BUF_SIZE - pos;
181             System.arraycopy(b, off, buffer, pos, size);
182             process(buffer, 0);
183             off += size;
184         }
185 
186         final int limit = end - BUF_SIZE;
187         while (off <= limit) {
188             process(b, off);
189             off += BUF_SIZE;
190         }
191 
192         // Handle left-over bytes
193         if (off < end) {
194             pos = end - off;
195             System.arraycopy(b, off, buffer, 0, pos);
196         } else {
197             pos = 0;
198         }
199     }
200 
201     @Override
202     public void update(final int b) {
203         oneByte[0] = (byte) (b & 0xff);
204         update(oneByte, 0, 1);
205     }
206 }