2014-12-28 17:51:16 +01:00
// This is free and unencumbered software released into the public domain.
2015-07-02 11:14:30 +02:00
//
2014-12-28 17:51:16 +01:00
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
// -------------------------------------------------------
2021-06-08 00:39:36 +02:00
// Written by Claire Xenia Wolf <claire@yosyshq.com> in 2014
2014-12-28 17:51:16 +01:00
// -------------------------------------------------------
# ifndef HASHLIB_H
2016-05-14 11:43:20 +02:00
# define HASHLIB_H
2014-12-26 19:28:52 +01:00
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
# include <array>
2014-12-26 21:35:22 +01:00
# include <stdexcept>
2015-02-01 00:27:07 +01:00
# include <algorithm>
2025-05-06 18:57:03 +02:00
# include <optional>
2014-12-26 19:28:52 +01:00
# include <string>
2024-10-07 23:57:30 +02:00
# include <variant>
2014-12-26 19:28:52 +01:00
# include <vector>
2024-10-01 15:12:03 +02:00
# include <type_traits>
2024-02-08 11:15:26 +01:00
# include <stdint.h>
2024-11-19 20:01:41 +01:00
# define YS_HASHING_VERSION 1
2014-12-28 17:51:16 +01:00
namespace hashlib {
2024-10-01 15:12:03 +02:00
/**
* HASHING
*
2024-11-06 18:05:29 +01:00
* Also refer to docs / source / yosys_internals / hashing . rst
*
2024-10-01 15:12:03 +02:00
* The Hasher knows how to hash 32 and 64 - bit integers . That ' s it .
* In the future , it could be expanded to do vectors with SIMD .
*
* The Hasher doesn ' t know how to hash common standard containers
* and compositions . However , hashlib provides centralized wrappers .
*
* Hashlib doesn ' t know how to hash silly Yosys - specific types .
* Hashlib doesn ' t depend on Yosys and can be used standalone .
* Please don ' t use hashlib standalone for new projects .
2024-10-15 12:00:51 +02:00
* Never directly include kernel / hashlib . h in Yosys code .
* Instead include kernel / yosys_common . h
2024-10-01 15:12:03 +02:00
*
* The hash_ops type is now always left to its default value , derived
* from templated functions through SFINAE . Providing custom ops is
* still supported .
*
* HASH TABLES
*
* We implement associative data structures with separate chaining .
* Linked lists use integers into the indirection hashtable array
* instead of pointers .
*/
2014-12-30 13:22:33 +01:00
const int hashtable_size_trigger = 2 ;
const int hashtable_size_factor = 3 ;
2014-12-27 03:04:50 +01:00
2024-11-04 12:41:00 +01:00
namespace legacy {
2024-11-11 15:46:25 +01:00
inline uint32_t djb2_add ( uint32_t a , uint32_t b ) {
2024-11-04 12:41:00 +01:00
return ( ( a < < 5 ) + a ) + b ;
}
} ;
2014-12-26 19:28:52 +01:00
2024-10-01 15:12:03 +02:00
template < typename T >
struct hash_ops ;
2014-12-27 12:02:57 +01:00
2014-12-29 00:12:36 +01:00
inline unsigned int mkhash_xorshift ( unsigned int a ) {
if ( sizeof ( a ) = = 4 ) {
a ^ = a < < 13 ;
a ^ = a > > 17 ;
a ^ = a < < 5 ;
} else if ( sizeof ( a ) = = 8 ) {
a ^ = a < < 13 ;
a ^ = a > > 7 ;
a ^ = a < < 17 ;
} else
throw std : : runtime_error ( " mkhash_xorshift() only implemented for 32 bit and 64 bit ints " ) ;
return a ;
}
2024-11-06 12:58:04 +01:00
class HasherDJB32 {
public :
2024-10-01 15:12:03 +02:00
using hash_t = uint32_t ;
2014-12-26 19:28:52 +01:00
2024-11-06 12:58:04 +01:00
HasherDJB32 ( ) {
2024-10-01 15:12:03 +02:00
// traditionally 5381 is used as starting value for the djb2 hash
state = 5381 ;
2014-12-26 19:28:52 +01:00
}
2024-10-18 12:34:25 +02:00
static void set_fudge ( hash_t f ) {
2024-10-01 16:02:41 +02:00
fudge = f ;
}
2015-08-24 22:49:23 +02:00
2024-11-06 12:58:04 +01:00
private :
2024-10-01 15:12:03 +02:00
uint32_t state ;
2024-10-01 16:02:41 +02:00
static uint32_t fudge ;
2024-10-01 15:12:03 +02:00
// The XOR version of DJB2
[ [ nodiscard ] ]
2024-11-11 13:27:04 +01:00
static uint32_t djb2_xor ( uint32_t a , uint32_t b ) {
2024-10-01 16:02:41 +02:00
uint32_t hash = ( ( a < < 5 ) + a ) ^ b ;
return hash ;
2021-05-24 21:27:29 +02:00
}
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
public :
2024-10-01 15:12:03 +02:00
void hash32 ( uint32_t i ) {
2024-11-11 13:27:04 +01:00
state = djb2_xor ( i , state ) ;
2024-10-30 10:48:09 +01:00
state = mkhash_xorshift ( fudge ^ state ) ;
2024-10-01 15:12:03 +02:00
return ;
2014-12-26 19:28:52 +01:00
}
2024-10-01 15:12:03 +02:00
void hash64 ( uint64_t i ) {
2024-11-20 12:11:37 +01:00
state = djb2_xor ( ( uint32_t ) ( i & 0xFFFFFFFFULL ) , state ) ;
2024-11-11 13:27:04 +01:00
state = djb2_xor ( ( uint32_t ) ( i > > 32ULL ) , state ) ;
2024-10-30 10:48:09 +01:00
state = mkhash_xorshift ( fudge ^ state ) ;
2024-10-01 15:12:03 +02:00
return ;
2015-08-12 13:37:09 +02:00
}
2024-10-18 12:34:25 +02:00
[ [ nodiscard ] ]
2025-09-16 06:16:11 +02:00
hash_t yield ( ) const {
2024-10-01 15:12:03 +02:00
return ( hash_t ) state ;
2021-11-25 20:43:58 +01:00
}
2024-10-01 15:12:03 +02:00
template < typename T >
2024-11-11 15:45:11 +01:00
void eat ( T & & t ) {
2024-11-19 20:04:19 +01:00
* this = hash_ops < std : : remove_cv_t < std : : remove_reference_t < T > > > : : hash_into ( std : : forward < T > ( t ) , * this ) ;
2024-10-09 15:00:31 +02:00
}
template < typename T >
2024-11-11 15:45:11 +01:00
void eat ( const T & t ) {
2024-11-19 20:04:19 +01:00
* this = hash_ops < T > : : hash_into ( t , * this ) ;
2024-10-01 15:12:03 +02:00
}
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
[ [ deprecated ] ]
2024-11-11 15:45:11 +01:00
void commutative_eat ( hash_t t ) {
2024-10-01 15:12:03 +02:00
state ^ = t ;
2023-10-03 23:25:59 +02:00
}
2024-10-01 15:12:03 +02:00
2024-10-30 10:49:17 +01:00
void force ( hash_t new_state ) {
state = new_state ;
}
2023-10-03 23:25:59 +02:00
} ;
2014-12-26 19:28:52 +01:00
2024-11-06 12:58:04 +01:00
using Hasher = HasherDJB32 ;
2025-01-13 20:21:05 +01:00
// Boilerplate compressor for trivially implementing
// top-level hash method with hash_into
# define HASH_TOP_LOOP_FST [[nodiscard]] static inline Hasher hash
# define HASH_TOP_LOOP_SND { \
Hasher h ; \
2025-01-20 16:15:48 +01:00
h = hash_into ( a , h ) ; \
2025-01-13 20:21:05 +01:00
return h ; \
}
2024-11-04 12:41:00 +01:00
2024-10-01 15:12:03 +02:00
template < typename T >
struct hash_ops {
static inline bool cmp ( const T & a , const T & b ) {
2014-12-26 19:28:52 +01:00
return a = = b ;
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const T & a , Hasher h ) {
2024-11-20 12:11:37 +01:00
if constexpr ( std : : is_integral_v < T > ) {
2024-10-01 15:12:03 +02:00
static_assert ( sizeof ( T ) < = sizeof ( uint64_t ) ) ;
if ( sizeof ( T ) = = sizeof ( uint64_t ) )
h . hash64 ( a ) ;
else
h . hash32 ( a ) ;
return h ;
} else if constexpr ( std : : is_enum_v < T > ) {
using u_type = std : : underlying_type_t < T > ;
2024-11-19 20:04:19 +01:00
return hash_ops < u_type > : : hash_into ( ( u_type ) a , h ) ;
2024-10-01 15:12:03 +02:00
} else if constexpr ( std : : is_pointer_v < T > ) {
2024-11-19 20:04:19 +01:00
return hash_ops < uintptr_t > : : hash_into ( ( uintptr_t ) a , h ) ;
2024-10-01 15:12:03 +02:00
} else if constexpr ( std : : is_same_v < T , std : : string > ) {
2025-08-29 06:13:23 +02:00
int size = a . size ( ) ;
int i = 0 ;
while ( i + 8 < size ) {
uint64_t v ;
memcpy ( & v , a . data ( ) + i , 8 ) ;
h . hash64 ( v ) ;
i + = 8 ;
}
uint64_t v = 0 ;
memcpy ( & v , a . data ( ) + i , size - i ) ;
h . hash64 ( v ) ;
2024-10-01 15:12:03 +02:00
return h ;
} else {
2024-11-19 20:04:19 +01:00
return a . hash_into ( h ) ;
2024-10-01 15:12:03 +02:00
}
2014-12-26 19:28:52 +01:00
}
2025-01-13 20:21:05 +01:00
HASH_TOP_LOOP_FST ( const T & a ) HASH_TOP_LOOP_SND
2014-12-26 19:28:52 +01:00
} ;
2014-12-29 02:01:42 +01:00
template < typename P , typename Q > struct hash_ops < std : : pair < P , Q > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : pair < P , Q > & a , const std : : pair < P , Q > & b ) {
2014-12-29 02:01:42 +01:00
return a = = b ;
}
2025-08-25 05:09:04 +02:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const std : : pair < P , Q > & a , Hasher h ) {
2024-11-19 20:04:19 +01:00
h = hash_ops < P > : : hash_into ( a . first , h ) ;
h = hash_ops < Q > : : hash_into ( a . second , h ) ;
2024-10-01 15:12:03 +02:00
return h ;
2014-12-29 02:01:42 +01:00
}
2025-08-25 05:09:04 +02:00
HASH_TOP_LOOP_FST ( const std : : pair < P , Q > & a ) HASH_TOP_LOOP_SND
2025-09-01 05:36:03 +02:00
[ [ nodiscard ] ] static inline Hasher hash ( const P & p , const Q & q ) {
Hasher h ;
h = hash_ops < P > : : hash_into ( p , h ) ;
h = hash_ops < Q > : : hash_into ( q , h ) ;
return h ;
}
2014-12-29 02:01:42 +01:00
} ;
2015-04-07 17:23:30 +02:00
template < typename . . . T > struct hash_ops < std : : tuple < T . . . > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : tuple < T . . . > & a , const std : : tuple < T . . . > & b ) {
2015-04-07 17:23:30 +02:00
return a = = b ;
}
template < size_t I = 0 >
2025-08-25 05:09:04 +02:00
static inline typename std : : enable_if < I = = sizeof . . . ( T ) , Hasher > : : type hash_into ( const std : : tuple < T . . . > & , Hasher h ) {
2024-10-01 15:12:03 +02:00
return h ;
2015-04-07 17:23:30 +02:00
}
template < size_t I = 0 >
2025-08-25 05:09:04 +02:00
static inline typename std : : enable_if < I ! = sizeof . . . ( T ) , Hasher > : : type hash_into ( const std : : tuple < T . . . > & a , Hasher h ) {
2016-02-14 09:35:25 +01:00
typedef hash_ops < typename std : : tuple_element < I , std : : tuple < T . . . > > : : type > element_ops_t ;
2024-11-19 20:04:19 +01:00
h = hash_into < I + 1 > ( a , h ) ;
h = element_ops_t : : hash_into ( std : : get < I > ( a ) , h ) ;
2024-10-01 15:12:03 +02:00
return h ;
2015-04-07 17:23:30 +02:00
}
2025-08-25 05:09:04 +02:00
HASH_TOP_LOOP_FST ( const std : : tuple < T . . . > & a ) HASH_TOP_LOOP_SND
2015-04-07 17:23:30 +02:00
} ;
2014-12-30 23:45:43 +01:00
template < typename T > struct hash_ops < std : : vector < T > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : vector < T > & a , const std : : vector < T > & b ) {
2014-12-30 23:45:43 +01:00
return a = = b ;
}
2025-08-25 05:09:04 +02:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const std : : vector < T > & a , Hasher h ) {
2024-11-20 12:11:37 +01:00
h . eat ( ( uint32_t ) a . size ( ) ) ;
2014-12-30 23:45:43 +01:00
for ( auto k : a )
2024-11-11 15:45:11 +01:00
h . eat ( k ) ;
2014-12-30 23:45:43 +01:00
return h ;
}
2025-08-25 05:09:04 +02:00
HASH_TOP_LOOP_FST ( const std : : vector < T > & a ) HASH_TOP_LOOP_SND
2014-12-30 23:45:43 +01:00
} ;
2024-10-18 16:18:19 +02:00
template < typename T , size_t N > struct hash_ops < std : : array < T , N > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : array < T , N > & a , const std : : array < T , N > & b ) {
2024-10-18 16:18:19 +02:00
return a = = b ;
}
2025-08-25 05:09:04 +02:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const std : : array < T , N > & a , Hasher h ) {
2024-10-18 16:18:19 +02:00
for ( const auto & k : a )
2024-11-19 20:04:19 +01:00
h = hash_ops < T > : : hash_into ( k , h ) ;
2024-10-18 16:18:19 +02:00
return h ;
}
2025-08-25 05:09:04 +02:00
HASH_TOP_LOOP_FST ( const std : : array < T , N > & a ) HASH_TOP_LOOP_SND
2024-10-18 16:18:19 +02:00
} ;
2014-12-26 21:59:41 +01:00
struct hash_cstr_ops {
2014-12-31 13:05:33 +01:00
static inline bool cmp ( const char * a , const char * b ) {
2024-11-20 12:11:37 +01:00
return strcmp ( a , b ) = = 0 ;
2014-12-26 21:59:41 +01:00
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const char * a , Hasher h ) {
2014-12-26 21:59:41 +01:00
while ( * a )
2024-10-01 15:12:03 +02:00
h . hash32 ( * ( a + + ) ) ;
return h ;
2014-12-26 21:59:41 +01:00
}
2025-01-20 16:15:48 +01:00
HASH_TOP_LOOP_FST ( const char * a ) HASH_TOP_LOOP_SND
2014-12-26 21:59:41 +01:00
} ;
2024-10-01 15:12:03 +02:00
template < > struct hash_ops < char * > : hash_cstr_ops { } ;
2014-12-26 21:35:22 +01:00
struct hash_ptr_ops {
2014-12-31 13:05:33 +01:00
static inline bool cmp ( const void * a , const void * b ) {
2014-12-26 21:35:22 +01:00
return a = = b ;
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const void * a , Hasher h ) {
2024-11-19 20:04:19 +01:00
return hash_ops < uintptr_t > : : hash_into ( ( uintptr_t ) a , h ) ;
2014-12-26 21:35:22 +01:00
}
2025-01-20 16:15:48 +01:00
HASH_TOP_LOOP_FST ( const void * a ) HASH_TOP_LOOP_SND
2014-12-26 21:35:22 +01:00
} ;
2014-12-27 03:04:50 +01:00
struct hash_obj_ops {
2014-12-31 13:05:33 +01:00
static inline bool cmp ( const void * a , const void * b ) {
2014-12-27 03:04:50 +01:00
return a = = b ;
}
template < typename T >
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const T * a , Hasher h ) {
2024-11-20 17:06:49 +01:00
if ( a )
2025-01-14 12:36:24 +01:00
h = a - > hash_into ( h ) ;
2024-11-20 17:06:49 +01:00
else
h . eat ( 0 ) ;
return h ;
2014-12-27 03:04:50 +01:00
}
2025-01-13 20:21:05 +01:00
template < typename T >
2025-01-20 16:15:48 +01:00
HASH_TOP_LOOP_FST ( const T * a ) HASH_TOP_LOOP_SND
2014-12-27 03:04:50 +01:00
} ;
2024-10-01 15:12:03 +02:00
/**
* If you find yourself using this function , think hard
* about if it ' s the right thing to do . Mixing finalized
* hashes together with XORs or worse can destroy
* desirable qualities of the hash function
*/
2015-10-25 19:31:29 +01:00
template < typename T >
2024-10-18 12:34:25 +02:00
[ [ nodiscard ] ]
2024-10-01 15:12:03 +02:00
Hasher : : hash_t run_hash ( const T & obj ) {
2025-01-13 20:21:05 +01:00
return hash_ops < T > : : hash ( obj ) . yield ( ) ;
2015-10-25 19:31:29 +01:00
}
2024-11-11 13:27:04 +01:00
/** Refer to docs/source/yosys_internals/hashing.rst */
template < typename T >
[ [ nodiscard ] ]
[ [ deprecated ] ]
inline unsigned int mkhash ( const T & v ) {
return ( unsigned int ) run_hash < T > ( v ) ;
}
2024-06-20 17:25:21 +02:00
template < > struct hash_ops < std : : monostate > {
static inline bool cmp ( std : : monostate a , std : : monostate b ) {
return a = = b ;
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] static inline Hasher hash_into ( std : : monostate , Hasher h ) {
2024-10-01 15:12:03 +02:00
return h ;
2024-06-20 17:25:21 +02:00
}
} ;
template < typename . . . T > struct hash_ops < std : : variant < T . . . > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : variant < T . . . > & a , const std : : variant < T . . . > & b ) {
2024-06-20 17:25:21 +02:00
return a = = b ;
}
2025-08-25 05:09:04 +02:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const std : : variant < T . . . > & a , Hasher h ) {
2024-11-11 15:45:11 +01:00
std : : visit ( [ & h ] ( const auto & v ) { h . eat ( v ) ; } , a ) ;
h . eat ( a . index ( ) ) ;
2024-10-01 15:12:03 +02:00
return h ;
2024-06-20 17:25:21 +02:00
}
} ;
2024-07-25 13:25:19 +02:00
template < typename T > struct hash_ops < std : : optional < T > > {
2025-08-25 05:09:04 +02:00
static inline bool cmp ( const std : : optional < T > & a , const std : : optional < T > & b ) {
2024-07-25 13:25:19 +02:00
return a = = b ;
}
2025-08-25 05:09:04 +02:00
[ [ nodiscard ] ] static inline Hasher hash_into ( const std : : optional < T > & a , Hasher h ) {
2024-07-25 13:25:19 +02:00
if ( a . has_value ( ) )
2024-11-11 15:45:11 +01:00
h . eat ( * a ) ;
2024-07-25 13:25:19 +02:00
else
2024-11-11 15:45:11 +01:00
h . eat ( 0 ) ;
2024-10-01 15:12:03 +02:00
return h ;
2024-07-25 13:25:19 +02:00
}
} ;
2025-01-02 17:59:11 +01:00
inline unsigned int hashtable_size ( unsigned int min_size )
2014-12-26 23:21:23 +01:00
{
2024-07-01 13:30:35 +02:00
// Primes as generated by https://oeis.org/A175953
2025-01-02 17:59:11 +01:00
static std : : vector < unsigned int > zero_and_some_primes = {
2014-12-31 03:58:29 +01:00
0 , 23 , 29 , 37 , 47 , 59 , 79 , 101 , 127 , 163 , 211 , 269 , 337 , 431 , 541 , 677 ,
2014-12-30 13:22:33 +01:00
853 , 1069 , 1361 , 1709 , 2137 , 2677 , 3347 , 4201 , 5261 , 6577 , 8231 , 10289 ,
12889 , 16127 , 20161 , 25219 , 31531 , 39419 , 49277 , 61603 , 77017 , 96281 ,
120371 , 150473 , 188107 , 235159 , 293957 , 367453 , 459317 , 574157 , 717697 ,
897133 , 1121423 , 1401791 , 1752239 , 2190299 , 2737937 , 3422429 , 4278037 ,
5347553 , 6684443 , 8355563 , 10444457 , 13055587 , 16319519 , 20399411 ,
25499291 , 31874149 , 39842687 , 49803361 , 62254207 , 77817767 , 97272239 ,
2024-07-01 13:30:35 +02:00
121590311 , 151987889 , 189984863 , 237481091 , 296851369 , 371064217 ,
2024-07-02 09:10:18 +02:00
463830313 , 579787991 , 724735009 , 905918777 , 1132398479 , 1415498113 ,
2025-01-02 20:05:30 +01:00
1769372713 , 2211715897 , 2764644887 , 3455806139
2014-12-30 13:22:33 +01:00
} ;
2014-12-31 03:58:29 +01:00
for ( auto p : zero_and_some_primes )
if ( p > = min_size ) return p ;
2014-12-30 13:22:33 +01:00
2025-01-02 17:59:11 +01:00
if ( sizeof ( unsigned int ) = = 4 )
2022-08-25 13:45:01 +02:00
throw std : : length_error ( " hash table exceeded maximum size. \n Design is likely too large for yosys to handle, if possible try not to flatten the design. " ) ;
2014-12-30 13:22:33 +01:00
2014-12-31 03:58:29 +01:00
for ( auto p : zero_and_some_primes )
2014-12-30 13:22:33 +01:00
if ( 100129 * p > min_size ) return 100129 * p ;
throw std : : length_error ( " hash table exceeded maximum size. " ) ;
2014-12-26 23:21:23 +01:00
}
2025-01-13 20:21:05 +01:00
template < typename K , typename T , typename OPS = hash_ops < K > > class dict ;
template < typename K , int offset = 0 , typename OPS = hash_ops < K > > class idict ;
template < typename K , typename OPS = hash_ops < K > > class pool ;
template < typename K , typename OPS = hash_ops < K > > class mfp ;
2020-04-23 00:04:22 +02:00
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
// Computes the hash value of an unordered set of elements.
// See https://www.preprints.org/manuscript/201710.0192/v1/download.
// This is the Sum(4) algorithm from that paper, which has good collision resistance,
// much better than Sum(1) or Xor(1) (and somewhat better than Xor(4)).
class commutative_hash {
public :
commutative_hash ( ) {
buckets . fill ( 0 ) ;
}
2025-09-16 06:16:11 +02:00
template < typename T >
void eat ( const T & obj ) {
eat ( hash_ops < T > : : hash ( obj ) ) ;
}
void eat ( const Hasher & h ) {
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
Hasher : : hash_t v = h . yield ( ) ;
size_t index = v & ( buckets . size ( ) - 1 ) ;
buckets [ index ] + = v ;
}
[ [ nodiscard ] ] Hasher hash_into ( Hasher h ) const {
for ( auto b : buckets )
h . eat ( b ) ;
return h ;
}
private :
std : : array < Hasher : : hash_t , 4 > buckets ;
} ;
2024-11-04 12:41:00 +01:00
template < typename K , typename T , typename OPS >
2024-10-01 15:12:03 +02:00
class dict {
2014-12-26 19:28:52 +01:00
struct entry_t
{
std : : pair < K , T > udata ;
2014-12-31 03:58:29 +01:00
int next ;
2014-12-26 21:35:22 +01:00
2014-12-31 03:58:29 +01:00
entry_t ( ) { }
entry_t ( const std : : pair < K , T > & udata , int next ) : udata ( udata ) , next ( next ) { }
2015-02-09 20:11:51 +01:00
entry_t ( std : : pair < K , T > & & udata , int next ) : udata ( std : : move ( udata ) ) , next ( next ) { }
2020-04-24 10:37:16 +02:00
bool operator < ( const entry_t & other ) const { return udata . first < other . udata . first ; }
2014-12-26 19:28:52 +01:00
} ;
std : : vector < int > hashtable ;
std : : vector < entry_t > entries ;
2024-11-04 12:41:00 +01:00
OPS ops ;
2014-12-26 19:28:52 +01:00
2015-02-09 20:11:51 +01:00
# ifdef NDEBUG
static inline void do_assert ( bool ) { }
# else
2014-12-31 03:58:29 +01:00
static inline void do_assert ( bool cond ) {
if ( ! cond ) throw std : : runtime_error ( " dict<> assert failed. " ) ;
2014-12-26 19:28:52 +01:00
}
2014-12-31 03:58:29 +01:00
# endif
2014-12-26 19:28:52 +01:00
2024-10-18 12:34:25 +02:00
Hasher : : hash_t do_hash ( const K & key ) const
2014-12-26 21:35:22 +01:00
{
2024-10-01 15:12:03 +02:00
Hasher : : hash_t hash = 0 ;
2014-12-26 21:35:22 +01:00
if ( ! hashtable . empty ( ) )
2024-11-04 12:41:00 +01:00
hash = ops . hash ( key ) . yield ( ) % ( unsigned int ) ( hashtable . size ( ) ) ;
2014-12-26 21:35:22 +01:00
return hash ;
2014-12-26 19:28:52 +01:00
}
2014-12-31 03:58:29 +01:00
void do_rehash ( )
2014-12-29 20:24:28 +01:00
{
2014-12-31 03:58:29 +01:00
hashtable . clear ( ) ;
2016-01-31 22:50:34 +01:00
hashtable . resize ( hashtable_size ( entries . capacity ( ) * hashtable_size_factor ) , - 1 ) ;
2014-12-29 20:24:28 +01:00
2014-12-31 03:58:29 +01:00
for ( int i = 0 ; i < int ( entries . size ( ) ) ; i + + ) {
do_assert ( - 1 < = entries [ i ] . next & & entries [ i ] . next < int ( entries . size ( ) ) ) ;
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( entries [ i ] . udata . first ) ;
2014-12-31 03:58:29 +01:00
entries [ i ] . next = hashtable [ hash ] ;
hashtable [ hash ] = i ;
}
2014-12-29 20:24:28 +01:00
}
2024-10-18 12:34:25 +02:00
int do_erase ( int index , Hasher : : hash_t hash )
2014-12-26 19:28:52 +01:00
{
2014-12-31 03:58:29 +01:00
do_assert ( index < int ( entries . size ( ) ) ) ;
if ( hashtable . empty ( ) | | index < 0 )
return 0 ;
int k = hashtable [ hash ] ;
2015-02-09 20:11:51 +01:00
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
2014-12-31 03:58:29 +01:00
if ( k = = index ) {
hashtable [ hash ] = entries [ index ] . next ;
} else {
while ( entries [ k ] . next ! = index ) {
k = entries [ k ] . next ;
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
}
entries [ k ] . next = entries [ index ] . next ;
}
2014-12-30 13:22:33 +01:00
2014-12-31 03:58:29 +01:00
int back_idx = entries . size ( ) - 1 ;
2014-12-26 19:28:52 +01:00
2014-12-31 03:58:29 +01:00
if ( index ! = back_idx )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t back_hash = do_hash ( entries [ back_idx ] . udata . first ) ;
2014-12-26 19:28:52 +01:00
2014-12-31 03:58:29 +01:00
k = hashtable [ back_hash ] ;
2015-02-09 20:11:51 +01:00
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
2014-12-31 03:58:29 +01:00
if ( k = = back_idx ) {
hashtable [ back_hash ] = index ;
2014-12-26 19:28:52 +01:00
} else {
2014-12-31 03:58:29 +01:00
while ( entries [ k ] . next ! = back_idx ) {
k = entries [ k ] . next ;
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
}
entries [ k ] . next = index ;
2014-12-26 19:28:52 +01:00
}
2014-12-29 20:24:28 +01:00
2014-12-31 03:58:29 +01:00
entries [ index ] = std : : move ( entries [ back_idx ] ) ;
}
entries . pop_back ( ) ;
if ( entries . empty ( ) )
hashtable . clear ( ) ;
2014-12-29 20:24:28 +01:00
2014-12-31 03:58:29 +01:00
return 1 ;
2014-12-26 19:28:52 +01:00
}
2025-07-15 04:55:45 +02:00
int do_lookup ( const K & key , Hasher : : hash_t & hash )
2014-12-26 19:28:52 +01:00
{
2014-12-31 03:58:29 +01:00
if ( hashtable . empty ( ) )
return - 1 ;
if ( entries . size ( ) * hashtable_size_trigger > hashtable . size ( ) ) {
2025-07-15 04:55:45 +02:00
do_rehash ( ) ;
2014-12-31 03:58:29 +01:00
hash = do_hash ( key ) ;
2014-12-26 19:28:52 +01:00
}
2025-07-15 04:55:45 +02:00
return do_lookup_internal ( key , hash ) ;
}
int do_lookup_internal ( const K & key , Hasher : : hash_t hash ) const
{
2014-12-31 03:58:29 +01:00
int index = hashtable [ hash ] ;
while ( index > = 0 & & ! ops . cmp ( entries [ index ] . udata . first , key ) ) {
index = entries [ index ] . next ;
do_assert ( - 1 < = index & & index < int ( entries . size ( ) ) ) ;
2014-12-26 19:28:52 +01:00
}
2014-12-31 03:58:29 +01:00
return index ;
2014-12-26 19:28:52 +01:00
}
2025-07-15 04:55:45 +02:00
int do_lookup_no_rehash ( const K & key , Hasher : : hash_t hash ) const
{
if ( hashtable . empty ( ) )
return - 1 ;
return do_lookup_internal ( key , hash ) ;
}
2025-09-04 14:47:45 +02:00
int do_insert ( const K & key , const Hasher : : hash_t & hash )
2015-02-09 20:11:51 +01:00
{
if ( hashtable . empty ( ) ) {
2020-04-13 21:59:29 +02:00
entries . emplace_back ( std : : pair < K , T > ( key , T ( ) ) , - 1 ) ;
2015-02-09 20:11:51 +01:00
do_rehash ( ) ;
} else {
2020-04-13 21:59:29 +02:00
entries . emplace_back ( std : : pair < K , T > ( key , T ( ) ) , hashtable [ hash ] ) ;
2015-02-09 20:11:51 +01:00
hashtable [ hash ] = entries . size ( ) - 1 ;
}
return entries . size ( ) - 1 ;
}
2025-09-04 14:47:45 +02:00
int do_insert ( const std : : pair < K , T > & value , const Hasher : : hash_t & hash )
2014-12-26 19:28:52 +01:00
{
2014-12-31 03:58:29 +01:00
if ( hashtable . empty ( ) ) {
2020-04-13 21:59:29 +02:00
entries . emplace_back ( value , - 1 ) ;
2014-12-31 03:58:29 +01:00
do_rehash ( ) ;
} else {
2020-04-13 21:59:29 +02:00
entries . emplace_back ( value , hashtable [ hash ] ) ;
hashtable [ hash ] = entries . size ( ) - 1 ;
}
return entries . size ( ) - 1 ;
}
2025-09-04 14:47:45 +02:00
int do_insert ( std : : pair < K , T > & & rvalue , const Hasher : : hash_t & hash )
2020-04-13 21:59:29 +02:00
{
if ( hashtable . empty ( ) ) {
entries . emplace_back ( std : : forward < std : : pair < K , T > > ( rvalue ) , - 1 ) ;
do_rehash ( ) ;
} else {
entries . emplace_back ( std : : forward < std : : pair < K , T > > ( rvalue ) , hashtable [ hash ] ) ;
2014-12-31 03:58:29 +01:00
hashtable [ hash ] = entries . size ( ) - 1 ;
2014-12-26 19:28:52 +01:00
}
2014-12-31 03:58:29 +01:00
return entries . size ( ) - 1 ;
2014-12-26 19:28:52 +01:00
}
public :
2023-12-09 18:43:38 +01:00
class const_iterator
2014-12-26 19:28:52 +01:00
{
2014-12-31 04:24:04 +01:00
friend class dict ;
2014-12-31 03:58:29 +01:00
protected :
2014-12-31 14:52:46 +01:00
const dict * ptr ;
2014-12-26 19:28:52 +01:00
int index ;
2014-12-31 14:52:46 +01:00
const_iterator ( const dict * ptr , int index ) : ptr ( ptr ) , index ( index ) { }
2014-12-26 19:28:52 +01:00
public :
2025-09-02 18:21:30 +02:00
typedef std : : bidirectional_iterator_tag iterator_category ;
2023-12-09 18:43:38 +01:00
typedef std : : pair < K , T > value_type ;
typedef ptrdiff_t difference_type ;
2025-09-02 18:21:30 +02:00
typedef const std : : pair < K , T > * pointer ;
typedef const std : : pair < K , T > & reference ;
2014-12-31 14:52:46 +01:00
const_iterator ( ) { }
const_iterator operator + + ( ) { index - - ; return * this ; }
2025-09-02 18:21:30 +02:00
const_iterator operator + + ( int ) { const_iterator tmp = * this ; index - - ; return tmp ; }
const_iterator operator - - ( ) { index + + ; return * this ; }
const_iterator operator - - ( int ) { const_iterator tmp = * this ; index + + ; return tmp ; }
2020-06-19 22:57:27 +02:00
const_iterator operator + = ( int amt ) { index - = amt ; return * this ; }
2014-12-31 14:52:46 +01:00
bool operator < ( const const_iterator & other ) const { return index > other . index ; }
bool operator = = ( const const_iterator & other ) const { return index = = other . index ; }
bool operator ! = ( const const_iterator & other ) const { return index ! = other . index ; }
2014-12-26 21:35:22 +01:00
const std : : pair < K , T > & operator * ( ) const { return ptr - > entries [ index ] . udata ; }
const std : : pair < K , T > * operator - > ( ) const { return & ptr - > entries [ index ] . udata ; }
2014-12-26 19:28:52 +01:00
} ;
2023-12-09 18:43:38 +01:00
class iterator
2014-12-26 21:35:22 +01:00
{
2014-12-31 04:24:04 +01:00
friend class dict ;
2014-12-31 03:58:29 +01:00
protected :
2014-12-31 14:52:46 +01:00
dict * ptr ;
2014-12-26 21:35:22 +01:00
int index ;
2014-12-31 14:52:46 +01:00
iterator ( dict * ptr , int index ) : ptr ( ptr ) , index ( index ) { }
2014-12-26 21:35:22 +01:00
public :
2023-12-09 18:43:38 +01:00
typedef std : : forward_iterator_tag iterator_category ;
typedef std : : pair < K , T > value_type ;
typedef ptrdiff_t difference_type ;
typedef std : : pair < K , T > * pointer ;
typedef std : : pair < K , T > & reference ;
2014-12-31 14:52:46 +01:00
iterator ( ) { }
iterator operator + + ( ) { index - - ; return * this ; }
2020-06-19 22:57:27 +02:00
iterator operator + = ( int amt ) { index - = amt ; return * this ; }
2014-12-31 14:52:46 +01:00
bool operator < ( const iterator & other ) const { return index > other . index ; }
bool operator = = ( const iterator & other ) const { return index = = other . index ; }
bool operator ! = ( const iterator & other ) const { return index ! = other . index ; }
std : : pair < K , T > & operator * ( ) { return ptr - > entries [ index ] . udata ; }
std : : pair < K , T > * operator - > ( ) { return & ptr - > entries [ index ] . udata ; }
2014-12-26 21:35:22 +01:00
const std : : pair < K , T > & operator * ( ) const { return ptr - > entries [ index ] . udata ; }
const std : : pair < K , T > * operator - > ( ) const { return & ptr - > entries [ index ] . udata ; }
2014-12-31 14:52:46 +01:00
operator const_iterator ( ) const { return const_iterator ( ptr , index ) ; }
2014-12-26 21:35:22 +01:00
} ;
2025-09-02 18:21:30 +02:00
using reverse_iterator = std : : reverse_iterator < const_iterator > ;
reverse_iterator rbegin ( ) const {
return std : : make_reverse_iterator ( end ( ) ) ;
}
reverse_iterator rend ( ) const {
return std : : make_reverse_iterator ( begin ( ) ) ;
}
2014-12-26 21:35:22 +01:00
2023-12-29 19:20:44 +01:00
constexpr dict ( )
2014-12-26 19:28:52 +01:00
{
}
2014-12-31 04:19:04 +01:00
dict ( const dict & other )
2014-12-26 21:35:22 +01:00
{
2014-12-31 03:58:29 +01:00
entries = other . entries ;
do_rehash ( ) ;
2014-12-26 21:35:22 +01:00
}
2014-12-31 04:19:04 +01:00
dict ( dict & & other )
2014-12-26 21:35:22 +01:00
{
swap ( other ) ;
}
2014-12-31 04:19:04 +01:00
dict & operator = ( const dict & other ) {
2014-12-31 03:58:29 +01:00
entries = other . entries ;
do_rehash ( ) ;
2014-12-26 21:35:22 +01:00
return * this ;
}
2014-12-31 04:19:04 +01:00
dict & operator = ( dict & & other ) {
2014-12-26 21:35:22 +01:00
clear ( ) ;
swap ( other ) ;
return * this ;
}
dict ( const std : : initializer_list < std : : pair < K , T > > & list )
{
for ( auto & it : list )
insert ( it ) ;
}
2014-12-26 19:28:52 +01:00
template < class InputIterator >
2014-12-26 21:35:22 +01:00
dict ( InputIterator first , InputIterator last )
2014-12-26 19:28:52 +01:00
{
insert ( first , last ) ;
}
template < class InputIterator >
void insert ( InputIterator first , InputIterator last )
{
for ( ; first ! = last ; + + first )
insert ( * first ) ;
}
2015-02-09 20:11:51 +01:00
std : : pair < iterator , bool > insert ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2015-02-09 20:11:51 +01:00
int i = do_lookup ( key , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( key , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
2014-12-28 17:51:16 +01:00
std : : pair < iterator , bool > insert ( const std : : pair < K , T > & value )
2014-12-26 19:28:52 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( value . first ) ;
2014-12-31 03:58:29 +01:00
int i = do_lookup ( value . first , hash ) ;
2014-12-26 19:28:52 +01:00
if ( i > = 0 )
2014-12-28 17:51:16 +01:00
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
2014-12-31 03:58:29 +01:00
i = do_insert ( value , hash ) ;
2014-12-28 17:51:16 +01:00
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
2014-12-26 19:28:52 +01:00
}
2020-04-13 21:59:29 +02:00
std : : pair < iterator , bool > insert ( std : : pair < K , T > & & rvalue )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( rvalue . first ) ;
2020-04-13 21:59:29 +02:00
int i = do_lookup ( rvalue . first , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( std : : forward < std : : pair < K , T > > ( rvalue ) , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
2020-04-15 18:22:22 +02:00
std : : pair < iterator , bool > emplace ( K const & key , T const & value )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2020-04-15 18:22:22 +02:00
int i = do_lookup ( key , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( std : : make_pair ( key , value ) , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
std : : pair < iterator , bool > emplace ( K const & key , T & & rvalue )
2020-04-13 21:59:29 +02:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2020-04-13 21:59:29 +02:00
int i = do_lookup ( key , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( std : : make_pair ( key , std : : forward < T > ( rvalue ) ) , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
2020-04-15 18:22:22 +02:00
std : : pair < iterator , bool > emplace ( K & & rkey , T const & value )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( rkey ) ;
2020-04-15 18:22:22 +02:00
int i = do_lookup ( rkey , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( std : : make_pair ( std : : forward < K > ( rkey ) , value ) , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
std : : pair < iterator , bool > emplace ( K & & rkey , T & & rvalue )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( rkey ) ;
2020-04-15 18:22:22 +02:00
int i = do_lookup ( rkey , hash ) ;
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
i = do_insert ( std : : make_pair ( std : : forward < K > ( rkey ) , std : : forward < T > ( rvalue ) ) , hash ) ;
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
2014-12-28 22:26:09 +01:00
int erase ( const K & key )
2014-12-26 19:28:52 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 03:58:29 +01:00
int index = do_lookup ( key , hash ) ;
return do_erase ( index , hash ) ;
2014-12-26 19:28:52 +01:00
}
2014-12-28 22:26:09 +01:00
iterator erase ( iterator it )
2014-12-26 21:35:22 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( it - > first ) ;
2014-12-31 03:58:29 +01:00
do_erase ( it . index , hash ) ;
2014-12-28 22:26:09 +01:00
return + + it ;
2014-12-26 21:35:22 +01:00
}
int count ( const K & key ) const
2014-12-26 19:28:52 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 04:55:45 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-26 19:28:52 +01:00
return i < 0 ? 0 : 1 ;
}
2014-12-31 14:52:46 +01:00
int count ( const K & key , const_iterator it ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 04:55:45 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-31 14:52:46 +01:00
return i < 0 | | i > it . index ? 0 : 1 ;
}
2014-12-26 21:35:22 +01:00
iterator find ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 03:58:29 +01:00
int i = do_lookup ( key , hash ) ;
2014-12-26 21:35:22 +01:00
if ( i < 0 )
return end ( ) ;
return iterator ( this , i ) ;
}
const_iterator find ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 04:55:45 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-26 21:35:22 +01:00
if ( i < 0 )
return end ( ) ;
return const_iterator ( this , i ) ;
}
T & at ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 03:58:29 +01:00
int i = do_lookup ( key , hash ) ;
2014-12-26 21:35:22 +01:00
if ( i < 0 )
throw std : : out_of_range ( " dict::at() " ) ;
return entries [ i ] . udata . second ;
}
const T & at ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 04:55:45 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-26 21:35:22 +01:00
if ( i < 0 )
throw std : : out_of_range ( " dict::at() " ) ;
return entries [ i ] . udata . second ;
}
2020-04-16 21:48:03 +02:00
const T & at ( const K & key , const T & defval ) const
2015-12-02 20:41:57 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 04:55:45 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2015-12-02 20:41:57 +01:00
if ( i < 0 )
return defval ;
return entries [ i ] . udata . second ;
}
2014-12-26 19:28:52 +01:00
T & operator [ ] ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 03:58:29 +01:00
int i = do_lookup ( key , hash ) ;
2014-12-26 19:28:52 +01:00
if ( i < 0 )
2014-12-31 03:58:29 +01:00
i = do_insert ( std : : pair < K , T > ( key , T ( ) ) , hash ) ;
2014-12-26 19:28:52 +01:00
return entries [ i ] . udata . second ;
}
2015-01-24 00:13:27 +01:00
template < typename Compare = std : : less < K > >
void sort ( Compare comp = Compare ( ) )
{
std : : sort ( entries . begin ( ) , entries . end ( ) , [ comp ] ( const entry_t & a , const entry_t & b ) { return comp ( b . udata . first , a . udata . first ) ; } ) ;
do_rehash ( ) ;
}
2014-12-31 04:19:04 +01:00
void swap ( dict & other )
2014-12-26 21:35:22 +01:00
{
hashtable . swap ( other . hashtable ) ;
entries . swap ( other . entries ) ;
}
2014-12-31 04:19:04 +01:00
bool operator = = ( const dict & other ) const {
2014-12-31 03:58:29 +01:00
if ( size ( ) ! = other . size ( ) )
2014-12-26 21:35:22 +01:00
return false ;
2014-12-31 03:58:29 +01:00
for ( auto & it : entries ) {
auto oit = other . find ( it . udata . first ) ;
2015-02-09 20:11:51 +01:00
if ( oit = = other . end ( ) | | ! ( oit - > second = = it . udata . second ) )
2014-12-31 03:58:29 +01:00
return false ;
}
2014-12-26 21:35:22 +01:00
return true ;
}
2014-12-31 04:19:04 +01:00
bool operator ! = ( const dict & other ) const {
2015-02-09 20:11:51 +01:00
return ! operator = = ( other ) ;
2014-12-26 21:35:22 +01:00
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] Hasher hash_into ( Hasher h ) const {
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
commutative_hash comm ;
2024-10-01 15:12:03 +02:00
for ( auto & it : entries ) {
Hasher entry_hash ;
2024-11-11 15:45:11 +01:00
entry_hash . eat ( it . udata . first ) ;
entry_hash . eat ( it . udata . second ) ;
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
comm . eat ( entry_hash ) ;
2020-04-23 00:04:22 +02:00
}
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
return comm . hash_into ( h ) ;
2020-04-23 00:04:22 +02:00
}
2016-01-31 22:50:34 +01:00
void reserve ( size_t n ) { entries . reserve ( n ) ; }
2014-12-31 03:58:29 +01:00
size_t size ( ) const { return entries . size ( ) ; }
bool empty ( ) const { return entries . empty ( ) ; }
void clear ( ) { hashtable . clear ( ) ; entries . clear ( ) ; }
2014-12-26 21:35:22 +01:00
2014-12-31 03:58:29 +01:00
iterator begin ( ) { return iterator ( this , int ( entries . size ( ) ) - 1 ) ; }
2019-03-13 17:36:06 +01:00
iterator element ( int n ) { return iterator ( this , int ( entries . size ( ) ) - 1 - n ) ; }
2014-12-30 13:30:22 +01:00
iterator end ( ) { return iterator ( nullptr , - 1 ) ; }
2014-12-26 21:35:22 +01:00
2014-12-31 03:58:29 +01:00
const_iterator begin ( ) const { return const_iterator ( this , int ( entries . size ( ) ) - 1 ) ; }
2019-03-13 17:36:06 +01:00
const_iterator element ( int n ) const { return const_iterator ( this , int ( entries . size ( ) ) - 1 - n ) ; }
2025-09-02 18:21:30 +02:00
const_iterator end ( ) const { return const_iterator ( this , - 1 ) ; }
2014-12-26 19:28:52 +01:00
} ;
2024-11-04 12:41:00 +01:00
template < typename K , typename OPS >
2014-12-26 21:59:41 +01:00
class pool
{
2024-11-04 12:41:00 +01:00
template < typename , int , typename > friend class idict ;
2015-01-18 12:12:33 +01:00
protected :
2014-12-26 21:59:41 +01:00
struct entry_t
{
2014-12-31 04:19:04 +01:00
K udata ;
int next ;
2014-12-26 21:59:41 +01:00
2014-12-31 04:19:04 +01:00
entry_t ( ) { }
entry_t ( const K & udata , int next ) : udata ( udata ) , next ( next ) { }
2020-04-20 07:37:10 +02:00
entry_t ( K & & udata , int next ) : udata ( std : : move ( udata ) ) , next ( next ) { }
2014-12-26 21:59:41 +01:00
} ;
std : : vector < int > hashtable ;
std : : vector < entry_t > entries ;
2024-11-04 12:41:00 +01:00
OPS ops ;
2014-12-26 21:59:41 +01:00
2015-02-09 20:11:51 +01:00
# ifdef NDEBUG
static inline void do_assert ( bool ) { }
# else
2014-12-31 04:19:04 +01:00
static inline void do_assert ( bool cond ) {
if ( ! cond ) throw std : : runtime_error ( " pool<> assert failed. " ) ;
2014-12-26 21:59:41 +01:00
}
2014-12-31 04:19:04 +01:00
# endif
2014-12-26 21:59:41 +01:00
2024-10-18 12:34:25 +02:00
Hasher : : hash_t do_hash ( const K & key ) const
2014-12-26 21:59:41 +01:00
{
2024-10-01 15:12:03 +02:00
Hasher : : hash_t hash = 0 ;
2014-12-26 21:59:41 +01:00
if ( ! hashtable . empty ( ) )
2024-11-04 12:41:00 +01:00
hash = ops . hash ( key ) . yield ( ) % ( unsigned int ) ( hashtable . size ( ) ) ;
2014-12-26 21:59:41 +01:00
return hash ;
}
2014-12-31 04:19:04 +01:00
void do_rehash ( )
2014-12-29 20:24:28 +01:00
{
2014-12-31 04:19:04 +01:00
hashtable . clear ( ) ;
2016-01-31 22:50:34 +01:00
hashtable . resize ( hashtable_size ( entries . capacity ( ) * hashtable_size_factor ) , - 1 ) ;
2014-12-31 04:19:04 +01:00
for ( int i = 0 ; i < int ( entries . size ( ) ) ; i + + ) {
do_assert ( - 1 < = entries [ i ] . next & & entries [ i ] . next < int ( entries . size ( ) ) ) ;
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( entries [ i ] . udata ) ;
2014-12-31 04:19:04 +01:00
entries [ i ] . next = hashtable [ hash ] ;
hashtable [ hash ] = i ;
2014-12-29 20:24:28 +01:00
}
}
2024-10-18 12:34:25 +02:00
int do_erase ( int index , Hasher : : hash_t hash )
2014-12-29 20:24:28 +01:00
{
2014-12-31 04:19:04 +01:00
do_assert ( index < int ( entries . size ( ) ) ) ;
if ( hashtable . empty ( ) | | index < 0 )
return 0 ;
2014-12-29 20:24:28 +01:00
2014-12-31 04:19:04 +01:00
int k = hashtable [ hash ] ;
if ( k = = index ) {
hashtable [ hash ] = entries [ index ] . next ;
} else {
while ( entries [ k ] . next ! = index ) {
k = entries [ k ] . next ;
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
}
entries [ k ] . next = entries [ index ] . next ;
}
2014-12-30 13:22:33 +01:00
2014-12-31 04:19:04 +01:00
int back_idx = entries . size ( ) - 1 ;
2014-12-26 21:59:41 +01:00
2014-12-31 04:19:04 +01:00
if ( index ! = back_idx )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t back_hash = do_hash ( entries [ back_idx ] . udata ) ;
2014-12-26 21:59:41 +01:00
2014-12-31 04:19:04 +01:00
k = hashtable [ back_hash ] ;
if ( k = = back_idx ) {
hashtable [ back_hash ] = index ;
2014-12-26 21:59:41 +01:00
} else {
2014-12-31 04:19:04 +01:00
while ( entries [ k ] . next ! = back_idx ) {
k = entries [ k ] . next ;
do_assert ( 0 < = k & & k < int ( entries . size ( ) ) ) ;
}
entries [ k ] . next = index ;
2014-12-26 21:59:41 +01:00
}
2014-12-29 20:24:28 +01:00
2014-12-31 04:19:04 +01:00
entries [ index ] = std : : move ( entries [ back_idx ] ) ;
2014-12-26 21:59:41 +01:00
}
2014-12-31 04:19:04 +01:00
entries . pop_back ( ) ;
if ( entries . empty ( ) )
hashtable . clear ( ) ;
return 1 ;
2014-12-26 21:59:41 +01:00
}
2025-07-15 05:02:35 +02:00
int do_lookup ( const K & key , Hasher : : hash_t & hash )
2014-12-26 21:59:41 +01:00
{
2014-12-31 04:19:04 +01:00
if ( hashtable . empty ( ) )
return - 1 ;
if ( entries . size ( ) * hashtable_size_trigger > hashtable . size ( ) ) {
2025-07-15 05:02:35 +02:00
do_rehash ( ) ;
2014-12-31 04:19:04 +01:00
hash = do_hash ( key ) ;
2014-12-26 21:59:41 +01:00
}
2014-12-31 04:19:04 +01:00
2025-07-15 05:02:35 +02:00
return do_lookup_internal ( key , hash ) ;
}
int do_lookup_internal ( const K & key , Hasher : : hash_t hash ) const
{
2014-12-31 04:19:04 +01:00
int index = hashtable [ hash ] ;
while ( index > = 0 & & ! ops . cmp ( entries [ index ] . udata , key ) ) {
index = entries [ index ] . next ;
do_assert ( - 1 < = index & & index < int ( entries . size ( ) ) ) ;
}
return index ;
2014-12-26 21:59:41 +01:00
}
2025-07-15 05:02:35 +02:00
int do_lookup_no_rehash ( const K & key , Hasher : : hash_t hash ) const
{
if ( hashtable . empty ( ) )
return - 1 ;
return do_lookup_internal ( key , hash ) ;
}
2024-10-18 12:34:25 +02:00
int do_insert ( const K & value , Hasher : : hash_t & hash )
2014-12-26 21:59:41 +01:00
{
2014-12-31 04:19:04 +01:00
if ( hashtable . empty ( ) ) {
2020-04-20 04:16:55 +02:00
entries . emplace_back ( value , - 1 ) ;
2014-12-31 04:19:04 +01:00
do_rehash ( ) ;
hash = do_hash ( value ) ;
} else {
2020-04-20 04:16:55 +02:00
entries . emplace_back ( value , hashtable [ hash ] ) ;
hashtable [ hash ] = entries . size ( ) - 1 ;
}
return entries . size ( ) - 1 ;
}
2024-10-18 12:34:25 +02:00
int do_insert ( K & & rvalue , Hasher : : hash_t & hash )
2020-04-20 04:16:55 +02:00
{
if ( hashtable . empty ( ) ) {
2020-04-21 19:17:47 +02:00
entries . emplace_back ( std : : forward < K > ( rvalue ) , - 1 ) ;
2014-12-31 04:19:04 +01:00
do_rehash ( ) ;
2020-04-21 19:17:47 +02:00
hash = do_hash ( rvalue ) ;
2014-12-31 04:19:04 +01:00
} else {
2020-04-21 19:17:47 +02:00
entries . emplace_back ( std : : forward < K > ( rvalue ) , hashtable [ hash ] ) ;
2014-12-31 04:19:04 +01:00
hashtable [ hash ] = entries . size ( ) - 1 ;
2014-12-26 21:59:41 +01:00
}
2014-12-31 04:19:04 +01:00
return entries . size ( ) - 1 ;
2014-12-26 21:59:41 +01:00
}
public :
2023-12-09 18:43:38 +01:00
class const_iterator
2014-12-26 21:59:41 +01:00
{
2014-12-31 04:24:04 +01:00
friend class pool ;
2014-12-31 04:19:04 +01:00
protected :
2014-12-31 14:52:46 +01:00
const pool * ptr ;
2014-12-26 21:59:41 +01:00
int index ;
2014-12-31 14:52:46 +01:00
const_iterator ( const pool * ptr , int index ) : ptr ( ptr ) , index ( index ) { }
2014-12-26 21:59:41 +01:00
public :
2023-12-09 18:43:38 +01:00
typedef std : : forward_iterator_tag iterator_category ;
typedef K value_type ;
typedef ptrdiff_t difference_type ;
typedef K * pointer ;
typedef K & reference ;
2014-12-31 14:52:46 +01:00
const_iterator ( ) { }
const_iterator operator + + ( ) { index - - ; return * this ; }
bool operator = = ( const const_iterator & other ) const { return index = = other . index ; }
bool operator ! = ( const const_iterator & other ) const { return index ! = other . index ; }
2014-12-31 04:19:04 +01:00
const K & operator * ( ) const { return ptr - > entries [ index ] . udata ; }
const K * operator - > ( ) const { return & ptr - > entries [ index ] . udata ; }
2014-12-26 21:59:41 +01:00
} ;
2023-12-09 18:43:38 +01:00
class iterator
2014-12-26 21:59:41 +01:00
{
2014-12-31 04:24:04 +01:00
friend class pool ;
2014-12-31 04:19:04 +01:00
protected :
2014-12-31 14:52:46 +01:00
pool * ptr ;
2014-12-26 21:59:41 +01:00
int index ;
2014-12-31 14:52:46 +01:00
iterator ( pool * ptr , int index ) : ptr ( ptr ) , index ( index ) { }
2014-12-26 21:59:41 +01:00
public :
2023-12-09 18:43:38 +01:00
typedef std : : forward_iterator_tag iterator_category ;
typedef K value_type ;
typedef ptrdiff_t difference_type ;
typedef K * pointer ;
typedef K & reference ;
2014-12-31 14:52:46 +01:00
iterator ( ) { }
iterator operator + + ( ) { index - - ; return * this ; }
bool operator = = ( const iterator & other ) const { return index = = other . index ; }
bool operator ! = ( const iterator & other ) const { return index ! = other . index ; }
K & operator * ( ) { return ptr - > entries [ index ] . udata ; }
K * operator - > ( ) { return & ptr - > entries [ index ] . udata ; }
2014-12-31 04:19:04 +01:00
const K & operator * ( ) const { return ptr - > entries [ index ] . udata ; }
const K * operator - > ( ) const { return & ptr - > entries [ index ] . udata ; }
2014-12-31 14:52:46 +01:00
operator const_iterator ( ) const { return const_iterator ( ptr , index ) ; }
2014-12-26 21:59:41 +01:00
} ;
2023-12-29 19:20:44 +01:00
constexpr pool ( )
2014-12-26 21:59:41 +01:00
{
}
2014-12-31 04:19:04 +01:00
pool ( const pool & other )
2014-12-26 21:59:41 +01:00
{
2014-12-31 04:19:04 +01:00
entries = other . entries ;
do_rehash ( ) ;
2014-12-26 21:59:41 +01:00
}
2014-12-31 04:19:04 +01:00
pool ( pool & & other )
2014-12-26 21:59:41 +01:00
{
swap ( other ) ;
}
2014-12-31 04:19:04 +01:00
pool & operator = ( const pool & other ) {
entries = other . entries ;
do_rehash ( ) ;
2014-12-26 21:59:41 +01:00
return * this ;
}
2014-12-31 04:19:04 +01:00
pool & operator = ( pool & & other ) {
2014-12-26 21:59:41 +01:00
clear ( ) ;
swap ( other ) ;
return * this ;
}
pool ( const std : : initializer_list < K > & list )
{
for ( auto & it : list )
insert ( it ) ;
}
template < class InputIterator >
pool ( InputIterator first , InputIterator last )
{
insert ( first , last ) ;
}
template < class InputIterator >
void insert ( InputIterator first , InputIterator last )
{
for ( ; first ! = last ; + + first )
insert ( * first ) ;
}
2014-12-31 04:19:04 +01:00
std : : pair < iterator , bool > insert ( const K & value )
2014-12-26 21:59:41 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( value ) ;
2014-12-31 04:19:04 +01:00
int i = do_lookup ( value , hash ) ;
2014-12-26 21:59:41 +01:00
if ( i > = 0 )
2014-12-28 17:51:16 +01:00
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
2014-12-31 04:19:04 +01:00
i = do_insert ( value , hash ) ;
2014-12-28 17:51:16 +01:00
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
2014-12-26 21:59:41 +01:00
}
2020-04-21 19:17:47 +02:00
std : : pair < iterator , bool > insert ( K & & rvalue )
2020-04-20 04:16:55 +02:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( rvalue ) ;
2020-04-21 19:17:47 +02:00
int i = do_lookup ( rvalue , hash ) ;
2020-04-20 04:16:55 +02:00
if ( i > = 0 )
return std : : pair < iterator , bool > ( iterator ( this , i ) , false ) ;
2020-04-21 19:17:47 +02:00
i = do_insert ( std : : forward < K > ( rvalue ) , hash ) ;
2020-04-20 04:16:55 +02:00
return std : : pair < iterator , bool > ( iterator ( this , i ) , true ) ;
}
2020-04-22 17:14:07 +02:00
template < typename . . . Args >
std : : pair < iterator , bool > emplace ( Args & & . . . args )
{
return insert ( K ( std : : forward < Args > ( args ) . . . ) ) ;
}
2014-12-28 22:26:09 +01:00
int erase ( const K & key )
2014-12-26 21:59:41 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 04:19:04 +01:00
int index = do_lookup ( key , hash ) ;
return do_erase ( index , hash ) ;
2014-12-26 21:59:41 +01:00
}
2014-12-28 22:26:09 +01:00
iterator erase ( iterator it )
2014-12-26 21:59:41 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( * it ) ;
2014-12-31 04:19:04 +01:00
do_erase ( it . index , hash ) ;
2014-12-28 22:26:09 +01:00
return + + it ;
2014-12-26 21:59:41 +01:00
}
int count ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-26 21:59:41 +01:00
return i < 0 ? 0 : 1 ;
}
2014-12-31 14:52:46 +01:00
int count ( const K & key , const_iterator it ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-31 14:52:46 +01:00
return i < 0 | | i > it . index ? 0 : 1 ;
}
2014-12-26 21:59:41 +01:00
iterator find ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 04:19:04 +01:00
int i = do_lookup ( key , hash ) ;
2014-12-26 21:59:41 +01:00
if ( i < 0 )
return end ( ) ;
return iterator ( this , i ) ;
}
const_iterator find ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = do_lookup_no_rehash ( key , hash ) ;
2014-12-26 21:59:41 +01:00
if ( i < 0 )
return end ( ) ;
return const_iterator ( this , i ) ;
}
2014-12-31 04:19:04 +01:00
bool operator [ ] ( const K & key )
2014-12-26 21:59:41 +01:00
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = do_hash ( key ) ;
2014-12-31 04:19:04 +01:00
int i = do_lookup ( key , hash ) ;
2014-12-26 21:59:41 +01:00
return i > = 0 ;
}
2015-01-24 00:13:27 +01:00
template < typename Compare = std : : less < K > >
void sort ( Compare comp = Compare ( ) )
{
std : : sort ( entries . begin ( ) , entries . end ( ) , [ comp ] ( const entry_t & a , const entry_t & b ) { return comp ( b . udata , a . udata ) ; } ) ;
do_rehash ( ) ;
}
2015-04-07 15:07:01 +02:00
K pop ( )
{
iterator it = begin ( ) ;
K ret = * it ;
erase ( it ) ;
return ret ;
}
2014-12-31 04:19:04 +01:00
void swap ( pool & other )
2014-12-26 21:59:41 +01:00
{
hashtable . swap ( other . hashtable ) ;
entries . swap ( other . entries ) ;
}
2014-12-31 04:19:04 +01:00
bool operator = = ( const pool & other ) const {
if ( size ( ) ! = other . size ( ) )
2014-12-26 21:59:41 +01:00
return false ;
2014-12-31 04:19:04 +01:00
for ( auto & it : entries )
if ( ! other . count ( it . udata ) )
return false ;
2014-12-26 21:59:41 +01:00
return true ;
}
2014-12-31 04:19:04 +01:00
bool operator ! = ( const pool & other ) const {
2015-02-09 20:11:51 +01:00
return ! operator = = ( other ) ;
2014-12-26 21:59:41 +01:00
}
2025-01-14 12:39:15 +01:00
[ [ nodiscard ] ] Hasher hash_into ( Hasher h ) const {
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
commutative_hash comm ;
2024-10-01 15:12:03 +02:00
for ( auto & it : entries ) {
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
comm . eat ( ops . hash ( it . udata ) ) ;
2024-10-01 15:12:03 +02:00
}
Improve commutative hashing.
The simple XOR `commutative_eat()` implementation produces a lot of collisions.
https://www.preprints.org/manuscript/201710.0192/v1/download is a useful reference on this topic.
Running the included `hashTest.cc` without the hashlib changes, I get 49,580,349 collisions.
The 49,995,000 (i,j) pairs (0 <= i < 10000, i < j < 10000) hash into only 414,651 unique hash values.
We get simple collisions like (0,1) colliding with (2,3).
With the hashlib changes, we get only 707,099 collisions and 49,287,901 unique hash values.
Much better! The `commutative_hash` implementation corresponds to `Sum(4)` in the paper
mentioned above.
2025-08-19 05:21:54 +02:00
return comm . hash_into ( h ) ;
2017-08-22 13:04:33 +02:00
}
2016-01-31 22:50:34 +01:00
void reserve ( size_t n ) { entries . reserve ( n ) ; }
2014-12-31 04:19:04 +01:00
size_t size ( ) const { return entries . size ( ) ; }
bool empty ( ) const { return entries . empty ( ) ; }
void clear ( ) { hashtable . clear ( ) ; entries . clear ( ) ; }
2014-12-26 21:59:41 +01:00
2014-12-31 04:19:04 +01:00
iterator begin ( ) { return iterator ( this , int ( entries . size ( ) ) - 1 ) ; }
2019-03-13 17:36:06 +01:00
iterator element ( int n ) { return iterator ( this , int ( entries . size ( ) ) - 1 - n ) ; }
2014-12-30 13:30:22 +01:00
iterator end ( ) { return iterator ( nullptr , - 1 ) ; }
2014-12-26 21:59:41 +01:00
2014-12-31 04:19:04 +01:00
const_iterator begin ( ) const { return const_iterator ( this , int ( entries . size ( ) ) - 1 ) ; }
2019-03-13 17:36:06 +01:00
const_iterator element ( int n ) const { return const_iterator ( this , int ( entries . size ( ) ) - 1 - n ) ; }
2014-12-30 13:30:22 +01:00
const_iterator end ( ) const { return const_iterator ( nullptr , - 1 ) ; }
2014-12-26 21:59:41 +01:00
} ;
2024-11-04 12:41:00 +01:00
template < typename K , int offset , typename OPS >
2015-01-18 12:12:33 +01:00
class idict
{
2024-11-04 12:41:00 +01:00
pool < K , OPS > database ;
2015-01-18 12:12:33 +01:00
public :
2023-12-09 18:43:38 +01:00
class const_iterator
2020-04-16 21:48:03 +02:00
{
friend class idict ;
protected :
const idict & container ;
int index ;
const_iterator ( const idict & container , int index ) : container ( container ) , index ( index ) { }
public :
2023-12-09 18:43:38 +01:00
typedef std : : forward_iterator_tag iterator_category ;
typedef K value_type ;
typedef ptrdiff_t difference_type ;
typedef K * pointer ;
typedef K & reference ;
2020-04-16 21:48:03 +02:00
const_iterator ( ) { }
const_iterator operator + + ( ) { index + + ; return * this ; }
bool operator = = ( const const_iterator & other ) const { return index = = other . index ; }
bool operator ! = ( const const_iterator & other ) const { return index ! = other . index ; }
const K & operator * ( ) const { return container [ index ] ; }
const K * operator - > ( ) const { return & container [ index ] ; }
} ;
2015-01-18 12:12:33 +01:00
2023-12-29 19:20:44 +01:00
constexpr idict ( )
{
}
2015-01-18 12:12:33 +01:00
int operator ( ) ( const K & key )
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = database . do_hash ( key ) ;
2015-01-18 12:12:33 +01:00
int i = database . do_lookup ( key , hash ) ;
if ( i < 0 )
i = database . do_insert ( key , hash ) ;
return i + offset ;
}
int at ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = database . do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = database . do_lookup_no_rehash ( key , hash ) ;
2015-01-18 12:12:33 +01:00
if ( i < 0 )
throw std : : out_of_range ( " idict::at() " ) ;
return i + offset ;
}
2015-12-02 20:41:57 +01:00
int at ( const K & key , int defval ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = database . do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = database . do_lookup_no_rehash ( key , hash ) ;
2015-12-02 20:41:57 +01:00
if ( i < 0 )
return defval ;
return i + offset ;
}
2015-01-18 12:12:33 +01:00
int count ( const K & key ) const
{
2024-10-18 12:34:25 +02:00
Hasher : : hash_t hash = database . do_hash ( key ) ;
2025-07-15 05:02:35 +02:00
int i = database . do_lookup_no_rehash ( key , hash ) ;
2015-01-18 12:12:33 +01:00
return i < 0 ? 0 : 1 ;
}
void expect ( const K & key , int i )
{
int j = ( * this ) ( key ) ;
if ( i ! = j )
throw std : : out_of_range ( " idict::expect() " ) ;
}
const K & operator [ ] ( int index ) const
{
return database . entries . at ( index - offset ) . udata ;
}
2015-10-27 15:04:47 +01:00
void swap ( idict & other )
{
database . swap ( other . database ) ;
}
2016-01-31 22:50:34 +01:00
void reserve ( size_t n ) { database . reserve ( n ) ; }
2015-10-27 15:04:47 +01:00
size_t size ( ) const { return database . size ( ) ; }
bool empty ( ) const { return database . empty ( ) ; }
void clear ( ) { database . clear ( ) ; }
2020-04-16 21:48:03 +02:00
const_iterator begin ( ) const { return const_iterator ( * this , offset ) ; }
const_iterator element ( int n ) const { return const_iterator ( * this , n ) ; }
const_iterator end ( ) const { return const_iterator ( * this , offset + size ( ) ) ; }
2015-01-18 12:12:33 +01:00
} ;
2024-07-18 16:02:11 +02:00
/**
* Union - find data structure with a promotion method
* mfp stands for " merge, find, promote "
* i - prefixed methods operate on indices in parents
*/
2024-11-04 12:41:00 +01:00
template < typename K , typename OPS >
2015-10-27 15:04:47 +01:00
class mfp
{
2024-11-04 12:41:00 +01:00
mutable idict < K , 0 , OPS > database ;
2015-10-27 15:04:47 +01:00
mutable std : : vector < int > parents ;
public :
2024-10-01 15:12:03 +02:00
typedef typename idict < K , 0 > : : const_iterator const_iterator ;
2015-11-30 19:43:52 +01:00
2023-12-29 19:20:44 +01:00
constexpr mfp ( )
{
}
2024-07-18 16:02:11 +02:00
// Finds a given element's index. If it isn't in the data structure,
// it is added as its own set
2015-10-27 15:04:47 +01:00
int operator ( ) ( const K & key ) const
{
int i = database ( key ) ;
2024-07-18 16:02:11 +02:00
// If the lookup caused the database to grow,
// also add a corresponding entry in parents initialized to -1 (no parent)
2015-10-27 15:04:47 +01:00
parents . resize ( database . size ( ) , - 1 ) ;
return i ;
}
2024-07-18 16:02:11 +02:00
// Finds an element at given index
2015-10-27 15:04:47 +01:00
const K & operator [ ] ( int index ) const
{
return database [ index ] ;
}
int ifind ( int i ) const
{
int p = i , k = i ;
while ( parents [ p ] ! = - 1 )
p = parents [ p ] ;
2024-07-18 16:02:11 +02:00
// p is now the representative of i
// Now we traverse from i up to the representative again
// and make p the parent of all the nodes along the way.
// This is a side effect and doesn't affect the return value.
// It speeds up future find operations
2015-10-27 15:04:47 +01:00
while ( k ! = p ) {
int next_k = parents [ k ] ;
parents [ k ] = p ;
k = next_k ;
}
return p ;
}
2025-07-28 01:44:41 +02:00
// Merge sets if the given indices belong to different sets.
// Makes ifind(j) the root of the merged set.
2015-10-27 15:04:47 +01:00
void imerge ( int i , int j )
{
i = ifind ( i ) ;
j = ifind ( j ) ;
if ( i ! = j )
parents [ i ] = j ;
}
void ipromote ( int i )
{
int k = i ;
while ( k ! = - 1 ) {
int next_k = parents [ k ] ;
parents [ k ] = i ;
k = next_k ;
}
parents [ i ] = - 1 ;
}
2015-10-28 11:21:55 +01:00
int lookup ( const K & a ) const
{
return ifind ( ( * this ) ( a ) ) ;
}
2015-10-27 15:04:47 +01:00
const K & find ( const K & a ) const
{
2016-02-01 10:03:03 +01:00
int i = database . at ( a , - 1 ) ;
if ( i < 0 )
return a ;
return ( * this ) [ ifind ( i ) ] ;
2015-10-27 15:04:47 +01:00
}
void merge ( const K & a , const K & b )
{
imerge ( ( * this ) ( a ) , ( * this ) ( b ) ) ;
}
void promote ( const K & a )
{
2016-02-01 10:03:03 +01:00
int i = database . at ( a , - 1 ) ;
if ( i > = 0 )
ipromote ( i ) ;
2015-10-27 15:04:47 +01:00
}
void swap ( mfp & other )
{
database . swap ( other . database ) ;
parents . swap ( other . parents ) ;
}
2016-01-31 22:50:34 +01:00
void reserve ( size_t n ) { database . reserve ( n ) ; }
2015-10-27 15:04:47 +01:00
size_t size ( ) const { return database . size ( ) ; }
bool empty ( ) const { return database . empty ( ) ; }
void clear ( ) { database . clear ( ) ; parents . clear ( ) ; }
2015-11-30 19:43:52 +01:00
const_iterator begin ( ) const { return database . begin ( ) ; }
2019-03-13 17:36:06 +01:00
const_iterator element ( int n ) const { return database . element ( n ) ; }
2015-11-30 19:43:52 +01:00
const_iterator end ( ) const { return database . end ( ) ; }
2015-10-27 15:04:47 +01:00
} ;
2014-12-28 17:51:16 +01:00
} /* namespace hashlib */
2014-12-26 19:28:52 +01:00
# endif