View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.numbers.arrays;
19  
20  /**
21   * An index set backed by a open-addressed hash table using linear hashing. Table size is a power
22   * of 2 and has a maximum capacity of 2^29 with a fixed load factor of 0.5. If the functional
23   * capacity is exceeded then the set raises an {@link IllegalStateException}.
24   *
25   * <p>Values are stored using bit inversion. Any positive index will have a negative
26   * representation when stored. An empty slot is indicated by a zero.
27   *
28   * <p>This class has a minimal API. It can be used to ensure a collection of indices of
29   * a known size are unique:
30   *
31   * <pre>{@code
32   * int[] keys = ...
33   * HashIndexSet set = new HashIndexSet(keys.length);
34   * for (int k : keys) {
35   *   if (set.add(k)) {
36   *     // first occurrence of k in keys
37   *   }
38   * }
39   * }</pre>
40   *
41   * @see <a href="https://en.wikipedia.org/wiki/Open_addressing">Open addressing (Wikipedia)</a>
42   * @since 1.2
43   */
44  final class HashIndexSet {
45      /** Message for an invalid index. */
46      private static final String INVALID_INDEX = "Invalid index: ";
47      /** The maximum capacity of the set. */
48      private static final int MAX_CAPACITY = 1 << 29;
49      /** The minimum size of the backing array. */
50      private static final int MIN_SIZE = 16;
51      /**
52       * Unsigned 32-bit integer numerator of the golden ratio (0.618) with an assumed
53       * denominator of 2^32.
54       *
55       * <pre>
56       * 2654435769 = round(2^32 * (sqrt(5) - 1) / 2)
57       * Long.toHexString((long)(0x1p32 * (Math.sqrt(5.0) - 1) / 2))
58       * </pre>
59       */
60      private static final int PHI = 0x9e3779b9;
61  
62      /** The set. */
63      private final int[] set;
64      /** The size. */
65      private int size;
66  
67      /**
68       * Create an instance with size to store up to the specified {@code capacity}.
69       *
70       * <p>The functional capacity (number of indices that can be stored) is the next power
71       * of 2 above {@code capacity}; or a minimum size if the requested {@code capacity} is
72       * small.
73       *
74       * @param capacity Capacity.
75       */
76      private HashIndexSet(int capacity) {
77          // This will generate a load factor at capacity in the range (0.25, 0.5]
78          // The use of Math.max will ignore zero/negative capacity requests.
79          set = new int[nextPow2(Math.max(MIN_SIZE, capacity * 2))];
80      }
81  
82      /**
83       * Create an instance with size to store up to the specified {@code capacity}.
84       * The maximum supported {@code capacity} is 2<sup>29</sup>.
85       *
86       * @param capacity Capacity.
87       * @return the hash index set
88       * @throws IllegalArgumentException if the {@code capacity} is too large.
89       */
90      static HashIndexSet create(int capacity) {
91          if (capacity > MAX_CAPACITY) {
92              throw new IllegalArgumentException("Unsupported capacity: " + capacity);
93          }
94          return new HashIndexSet(capacity);
95      }
96  
97      /**
98       * Returns the closest power-of-two number greater than or equal to {@code value}.
99       *
100      * <p>Warning: This will return {@link Integer#MIN_VALUE} for any {@code value} above
101      * {@code 1 << 30}. This is the next power of 2 as an unsigned integer.
102      *
103      * <p>See <a
104      * href="https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2">Bit
105      * Hacks: Rounding up to a power of 2</a>
106      *
107      * @param value Value.
108      * @return the closest power-of-two number greater than or equal to value
109      */
110     private static int nextPow2(int value) {
111         int result = value - 1;
112         result |= result >>> 1;
113         result |= result >>> 2;
114         result |= result >>> 4;
115         result |= result >>> 8;
116         return (result | (result >>> 16)) + 1;
117     }
118 
119     /**
120      * Adds the {@code index} to the set.
121      *
122      * @param index Index.
123      * @return true if the set was modified by the operation
124      * @throws IndexOutOfBoundsException if the index is negative
125      */
126     boolean add(int index) {
127         if (index < 0) {
128             throw new IndexOutOfBoundsException(INVALID_INDEX + index);
129         }
130         final int[] keys = set;
131         final int key = ~index;
132         final int mask = keys.length - 1;
133         int pos = mix(index) & mask;
134         int curr = keys[pos];
135         if (curr < 0) {
136             if (curr == key) {
137                 // Already present
138                 return false;
139             }
140             // Probe
141             while ((curr = keys[pos = (pos + 1) & mask]) < 0) {
142                 if (curr == key) {
143                     // Already present
144                     return false;
145                 }
146             }
147         }
148         // Insert
149         keys[pos] = key;
150         // Here the load factor is 0.5: Test if size > keys.length * 0.5
151         if (++size > (mask + 1) >>> 1) {
152             // This is where we should grow the size of the set and re-insert
153             // all current keys into the new key storage. Here we are using a
154             // fixed capacity so raise an exception.
155             throw new IllegalStateException("Functional capacity exceeded: " + (keys.length >>> 1));
156         }
157         return true;
158     }
159 
160     /**
161      * Mix the bits of an integer.
162      *
163      * <p>This is the fast hash function used in the linear hash implementation in the <a
164      * href="https://github.com/leventov/Koloboke">Koloboke Collections</a>.
165      *
166      * @param x Bits.
167      * @return the mixed bits
168      */
169     private static int mix(int x) {
170         final int h = x * PHI;
171         return h ^ (h >>> 16);
172     }
173 }