Ruby 2.7.6p219 (2022-04-12 revision c9c2245c0a25176072e02db9254f0e0c84c805cd)
st.c
Go to the documentation of this file.
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#else
107#include "internal.h"
108#endif
109
110#include <stdio.h>
111#ifdef HAVE_STDLIB_H
112#include <stdlib.h>
113#endif
114#include <string.h>
115#include <assert.h>
116
117#ifdef __GNUC__
118#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
119#define EXPECT(expr, val) __builtin_expect(expr, val)
120#define ATTRIBUTE_UNUSED __attribute__((unused))
121#else
122#define PREFETCH(addr, write_p)
123#define EXPECT(expr, val) (expr)
124#define ATTRIBUTE_UNUSED
125#endif
126
127#ifdef ST_DEBUG
128#define st_assert assert
129#else
130#define st_assert(cond) ((void)(0 && (cond)))
131#endif
132
133/* The type of hashes. */
135
140};
141
142#define type_numhash st_hashtype_num
143static const struct st_hash_type st_hashtype_num = {
144 st_numcmp,
146};
147
148static int st_strcmp(st_data_t, st_data_t);
149static st_index_t strhash(st_data_t);
150static const struct st_hash_type type_strhash = {
151 st_strcmp,
152 strhash,
153};
154
155static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
156static st_index_t strcasehash(st_data_t);
157static const struct st_hash_type type_strcasehash = {
158 st_locale_insensitive_strcasecmp_i,
159 strcasehash,
160};
161
162/* Value used to catch uninitialized entries/bins during debugging.
163 There is a possibility for a false alarm, but its probability is
164 extremely small. */
165#define ST_INIT_VAL 0xafafafafafafafaf
166#define ST_INIT_VAL_BYTE 0xafa
167
168#ifdef RUBY
169#undef malloc
170#undef realloc
171#undef calloc
172#undef free
173#define malloc ruby_xmalloc
174#define calloc ruby_xcalloc
175#define realloc ruby_xrealloc
176#define free ruby_xfree
177#endif
178
179#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
180#define PTR_EQUAL(tab, ptr, hash_val, key_) \
181 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182
183/* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set
184 up to TRUE if the table is rebuilt during the comparison. */
185#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
186 do { \
187 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
188 res = PTR_EQUAL(tab, ptr, hash_val, key); \
189 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
190 } while (FALSE)
191
192/* Features of a table. */
194 /* Power of 2 used for number of allocated entries. */
195 unsigned char entry_power;
196 /* Power of 2 used for number of allocated bins. Depending on the
197 table size, the number of bins is 2-4 times more than the
198 number of entries. */
199 unsigned char bin_power;
200 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
201 unsigned char size_ind;
202 /* Bins are packed in words of type st_index_t. The following is
203 a size of bins counted by words. */
205};
206
207/* Features of all possible size tables. */
208#if SIZEOF_ST_INDEX_T == 8
209#define MAX_POWER2 62
210static const struct st_features features[] = {
211 {0, 1, 0, 0x0},
212 {1, 2, 0, 0x1},
213 {2, 3, 0, 0x1},
214 {3, 4, 0, 0x2},
215 {4, 5, 0, 0x4},
216 {5, 6, 0, 0x8},
217 {6, 7, 0, 0x10},
218 {7, 8, 0, 0x20},
219 {8, 9, 1, 0x80},
220 {9, 10, 1, 0x100},
221 {10, 11, 1, 0x200},
222 {11, 12, 1, 0x400},
223 {12, 13, 1, 0x800},
224 {13, 14, 1, 0x1000},
225 {14, 15, 1, 0x2000},
226 {15, 16, 1, 0x4000},
227 {16, 17, 2, 0x10000},
228 {17, 18, 2, 0x20000},
229 {18, 19, 2, 0x40000},
230 {19, 20, 2, 0x80000},
231 {20, 21, 2, 0x100000},
232 {21, 22, 2, 0x200000},
233 {22, 23, 2, 0x400000},
234 {23, 24, 2, 0x800000},
235 {24, 25, 2, 0x1000000},
236 {25, 26, 2, 0x2000000},
237 {26, 27, 2, 0x4000000},
238 {27, 28, 2, 0x8000000},
239 {28, 29, 2, 0x10000000},
240 {29, 30, 2, 0x20000000},
241 {30, 31, 2, 0x40000000},
242 {31, 32, 2, 0x80000000},
243 {32, 33, 3, 0x200000000},
244 {33, 34, 3, 0x400000000},
245 {34, 35, 3, 0x800000000},
246 {35, 36, 3, 0x1000000000},
247 {36, 37, 3, 0x2000000000},
248 {37, 38, 3, 0x4000000000},
249 {38, 39, 3, 0x8000000000},
250 {39, 40, 3, 0x10000000000},
251 {40, 41, 3, 0x20000000000},
252 {41, 42, 3, 0x40000000000},
253 {42, 43, 3, 0x80000000000},
254 {43, 44, 3, 0x100000000000},
255 {44, 45, 3, 0x200000000000},
256 {45, 46, 3, 0x400000000000},
257 {46, 47, 3, 0x800000000000},
258 {47, 48, 3, 0x1000000000000},
259 {48, 49, 3, 0x2000000000000},
260 {49, 50, 3, 0x4000000000000},
261 {50, 51, 3, 0x8000000000000},
262 {51, 52, 3, 0x10000000000000},
263 {52, 53, 3, 0x20000000000000},
264 {53, 54, 3, 0x40000000000000},
265 {54, 55, 3, 0x80000000000000},
266 {55, 56, 3, 0x100000000000000},
267 {56, 57, 3, 0x200000000000000},
268 {57, 58, 3, 0x400000000000000},
269 {58, 59, 3, 0x800000000000000},
270 {59, 60, 3, 0x1000000000000000},
271 {60, 61, 3, 0x2000000000000000},
272 {61, 62, 3, 0x4000000000000000},
273 {62, 63, 3, 0x8000000000000000},
274};
275
276#else
277#define MAX_POWER2 30
278
279static const struct st_features features[] = {
280 {0, 1, 0, 0x1},
281 {1, 2, 0, 0x1},
282 {2, 3, 0, 0x2},
283 {3, 4, 0, 0x4},
284 {4, 5, 0, 0x8},
285 {5, 6, 0, 0x10},
286 {6, 7, 0, 0x20},
287 {7, 8, 0, 0x40},
288 {8, 9, 1, 0x100},
289 {9, 10, 1, 0x200},
290 {10, 11, 1, 0x400},
291 {11, 12, 1, 0x800},
292 {12, 13, 1, 0x1000},
293 {13, 14, 1, 0x2000},
294 {14, 15, 1, 0x4000},
295 {15, 16, 1, 0x8000},
296 {16, 17, 2, 0x20000},
297 {17, 18, 2, 0x40000},
298 {18, 19, 2, 0x80000},
299 {19, 20, 2, 0x100000},
300 {20, 21, 2, 0x200000},
301 {21, 22, 2, 0x400000},
302 {22, 23, 2, 0x800000},
303 {23, 24, 2, 0x1000000},
304 {24, 25, 2, 0x2000000},
305 {25, 26, 2, 0x4000000},
306 {26, 27, 2, 0x8000000},
307 {27, 28, 2, 0x10000000},
308 {28, 29, 2, 0x20000000},
309 {29, 30, 2, 0x40000000},
310 {30, 31, 2, 0x80000000},
311};
312
313#endif
314
315/* The reserved hash value and its substitution. */
316#define RESERVED_HASH_VAL (~(st_hash_t) 0)
317#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
318
321
322/* Return hash value of KEY for table TAB. */
323static inline st_hash_t
324do_hash(st_data_t key, st_table *tab)
325{
326 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
327
328 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
329 another value. Such mapping should be extremely rare. */
331}
332
333/* Power of 2 defining the minimal number of allocated entries. */
334#define MINIMAL_POWER2 2
335
336#if MINIMAL_POWER2 < 2
337#error "MINIMAL_POWER2 should be >= 2"
338#endif
339
340/* If the power2 of the allocated `entries` is less than the following
341 value, don't allocate bins and use a linear search. */
342#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
343
344/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
345static int
346get_power2(st_index_t size)
347{
348 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
349 if (n <= MAX_POWER2)
350 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
351#ifndef NOT_RUBY
352 /* Ran out of the table entries */
353 rb_raise(rb_eRuntimeError, "st_table too big");
354#endif
355 /* should raise exception */
356 return -1;
357}
358
359/* Return value of N-th bin in array BINS of table with bins size
360 index S. */
361static inline st_index_t
362get_bin(st_index_t *bins, int s, st_index_t n)
363{
364 return (s == 0 ? ((unsigned char *) bins)[n]
365 : s == 1 ? ((unsigned short *) bins)[n]
366 : s == 2 ? ((unsigned int *) bins)[n]
367 : ((st_index_t *) bins)[n]);
368}
369
370/* Set up N-th bin in array BINS of table with bins size index S to
371 value V. */
372static inline void
373set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
374{
375 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
376 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
377 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
378 else ((st_index_t *) bins)[n] = v;
379}
380
381/* These macros define reserved values for empty table bin and table
382 bin which contains a deleted entry. We will never use such values
383 for an entry index in bins. */
384#define EMPTY_BIN 0
385#define DELETED_BIN 1
386/* Base of a real entry index in the bins. */
387#define ENTRY_BASE 2
388
389/* Mark I-th bin of table TAB as empty, in other words not
390 corresponding to any entry. */
391#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
392
393/* Values used for not found entry and bin with given
394 characteristics. */
395#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
396#define UNDEFINED_BIN_IND (~(st_index_t) 0)
397
398/* Entry and bin values returned when we found a table rebuild during
399 the search. */
400#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
401#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
402
403/* Mark I-th bin of table TAB as corresponding to a deleted table
404 entry. Update number of entries in the table and number of bins
405 corresponding to deleted entries. */
406#define MARK_BIN_DELETED(tab, i) \
407 do { \
408 st_assert(i != UNDEFINED_BIN_IND); \
409 st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
410 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
411 } while (0)
412
413/* Macros to check that value B is used empty bins and bins
414 corresponding deleted entries. */
415#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
416#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
417#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
418
419/* Macros to check empty bins and bins corresponding to deleted
420 entries. Bins are given by their index I in table TAB. */
421#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
424
425/* Macros for marking and checking deleted entries given by their
426 pointer E_PTR. */
427#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
428#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
429
430/* Return bin size index of table TAB. */
431static inline unsigned int
432get_size_ind(const st_table *tab)
433{
434 return tab->size_ind;
435}
436
437/* Return the number of allocated bins of table TAB. */
438static inline st_index_t
439get_bins_num(const st_table *tab)
440{
441 return ((st_index_t) 1)<<tab->bin_power;
442}
443
444/* Return mask for a bin index in table TAB. */
445static inline st_index_t
446bins_mask(const st_table *tab)
447{
448 return get_bins_num(tab) - 1;
449}
450
451/* Return the index of table TAB bin corresponding to
452 HASH_VALUE. */
453static inline st_index_t
454hash_bin(st_hash_t hash_value, st_table *tab)
455{
456 return hash_value & bins_mask(tab);
457}
458
459/* Return the number of allocated entries of table TAB. */
460static inline st_index_t
461get_allocated_entries(const st_table *tab)
462{
463 return ((st_index_t) 1)<<tab->entry_power;
464}
465
466/* Return size of the allocated bins of table TAB. */
467static inline st_index_t
468bins_size(const st_table *tab)
469{
470 return features[tab->entry_power].bins_words * sizeof (st_index_t);
471}
472
473/* Mark all bins of table TAB as empty. */
474static void
475initialize_bins(st_table *tab)
476{
477 memset(tab->bins, 0, bins_size(tab));
478}
479
480/* Make table TAB empty. */
481static void
482make_tab_empty(st_table *tab)
483{
484 tab->num_entries = 0;
485 tab->entries_start = tab->entries_bound = 0;
486 if (tab->bins != NULL)
487 initialize_bins(tab);
488}
489
490#ifdef ST_DEBUG
491#define st_assert_notinitial(ent) \
492 do { \
493 st_assert(ent.hash != (st_hash_t) ST_INIT_VAL); \
494 st_assert(ent.key != ST_INIT_VAL); \
495 st_assert(ent.record != ST_INIT_VAL); \
496 } while (0)
497/* Check the table T consistency. It can be extremely slow. So use
498 it only for debugging. */
499static void
500st_check(st_table *tab)
501{
502 st_index_t d, e, i, n, p;
503
504 for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
505 ;
506 p = i;
508 st_assert(tab->entries_bound <= get_allocated_entries(tab));
510 n = 0;
511 return;
512 if (tab->entries_bound != 0)
513 for (i = tab->entries_start; i < tab->entries_bound; i++) {
514 st_assert_notinitial(tab->entries[i]);
515 if (! DELETED_ENTRY_P(&tab->entries[i]))
516 n++;
517 }
518 st_assert(n == tab->num_entries);
519 if (tab->bins == NULL)
521 else {
523 for (n = d = i = 0; i < get_bins_num(tab); i++) {
524 st_assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
525 if (IND_DELETED_BIN_P(tab, i)) {
526 d++;
527 continue;
528 }
529 else if (IND_EMPTY_BIN_P(tab, i))
530 continue;
531 n++;
532 e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
533 st_assert(tab->entries_start <= e && e < tab->entries_bound);
534 st_assert(! DELETED_ENTRY_P(&tab->entries[e]));
535 st_assert_notinitial(tab->entries[e]);
536 }
537 st_assert(n == tab->num_entries);
538 st_assert(n + d < get_bins_num(tab));
539 }
540}
541#endif
542
543#ifdef HASH_LOG
544#ifdef HAVE_UNISTD_H
545#include <unistd.h>
546#endif
547static struct {
548 int all, total, num, str, strcase;
549} collision;
550
551/* Flag switching off output of package statistics at the end of
552 program. */
553static int init_st = 0;
554
555/* Output overall number of table searches and collisions into a
556 temporary file. */
557static void
558stat_col(void)
559{
560 char fname[10+sizeof(long)*3];
561 FILE *f;
562 if (!collision.total) return;
563 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
564 if (f == NULL)
565 return;
566 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
567 ((double)collision.all / (collision.total)) * 100);
568 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
569 fclose(f);
570}
571#endif
572
573/* Create and return table with TYPE which can hold at least SIZE
574 entries. The real number of entries which the table can hold is
575 the nearest power of two for SIZE. */
576st_table *
578{
579 st_table *tab;
580 int n;
581
582#ifdef HASH_LOG
583#if HASH_LOG+0 < 0
584 {
585 const char *e = getenv("ST_HASH_LOG");
586 if (!e || !*e) init_st = 1;
587 }
588#endif
589 if (init_st == 0) {
590 init_st = 1;
591 atexit(stat_col);
592 }
593#endif
594
595 n = get_power2(size);
596#ifndef RUBY
597 if (n < 0)
598 return NULL;
599#endif
600 tab = (st_table *) malloc(sizeof (st_table));
601#ifndef RUBY
602 if (tab == NULL)
603 return NULL;
604#endif
605 tab->type = type;
606 tab->entry_power = n;
607 tab->bin_power = features[n].bin_power;
608 tab->size_ind = features[n].size_ind;
610 tab->bins = NULL;
611 else {
612 tab->bins = (st_index_t *) malloc(bins_size(tab));
613#ifndef RUBY
614 if (tab->bins == NULL) {
615 free(tab);
616 return NULL;
617 }
618#endif
619 }
620 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
621 * sizeof(st_table_entry));
622#ifndef RUBY
623 if (tab->entries == NULL) {
624 st_free_table(tab);
625 return NULL;
626 }
627#endif
628#ifdef ST_DEBUG
630 get_allocated_entries(tab) * sizeof(st_table_entry));
631 if (tab->bins != NULL)
632 memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
633#endif
634 make_tab_empty(tab);
635 tab->rebuilds_num = 0;
636#ifdef ST_DEBUG
637 st_check(tab);
638#endif
639 return tab;
640}
641
642/* Create and return table with TYPE which can hold a minimal number
643 of entries (see comments for get_power2). */
644st_table *
646{
647 return st_init_table_with_size(type, 0);
648}
649
650/* Create and return table which can hold a minimal number of
651 numbers. */
652st_table *
654{
656}
657
658/* Create and return table which can hold SIZE numbers. */
659st_table *
661{
663}
664
665/* Create and return table which can hold a minimal number of
666 strings. */
667st_table *
669{
670 return st_init_table(&type_strhash);
671}
672
673/* Create and return table which can hold SIZE strings. */
674st_table *
676{
677 return st_init_table_with_size(&type_strhash, size);
678}
679
680/* Create and return table which can hold a minimal number of strings
681 whose character case is ignored. */
682st_table *
684{
685 return st_init_table(&type_strcasehash);
686}
687
688/* Create and return table which can hold SIZE strings whose character
689 case is ignored. */
690st_table *
692{
693 return st_init_table_with_size(&type_strcasehash, size);
694}
695
696/* Make table TAB empty. */
697void
699{
700 make_tab_empty(tab);
701 tab->rebuilds_num++;
702#ifdef ST_DEBUG
703 st_check(tab);
704#endif
705}
706
707/* Free table TAB space. */
708void
710{
711 if (tab->bins != NULL)
712 free(tab->bins);
713 free(tab->entries);
714 free(tab);
715}
716
717/* Return byte size of memory allocated for table TAB. */
718size_t
720{
721 return(sizeof(st_table)
722 + (tab->bins == NULL ? 0 : bins_size(tab))
723 + get_allocated_entries(tab) * sizeof(st_table_entry));
724}
725
726static st_index_t
727find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
728
729static st_index_t
730find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
731
732static st_index_t
733find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
734
735static st_index_t
736find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
737 st_data_t key, st_index_t *bin_ind);
738
739#ifdef HASH_LOG
740static void
741count_collision(const struct st_hash_type *type)
742{
743 collision.all++;
744 if (type == &type_numhash) {
745 collision.num++;
746 }
747 else if (type == &type_strhash) {
748 collision.strcase++;
749 }
750 else if (type == &type_strcasehash) {
751 collision.str++;
752 }
753}
754
755#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
756#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
757#define collision_check 0
758#else
759#define COLLISION
760#define FOUND_BIN
761#endif
762
763/* If the number of entries in the table is at least REBUILD_THRESHOLD
764 times less than the entry array length, decrease the table
765 size. */
766#define REBUILD_THRESHOLD 4
767
768#if REBUILD_THRESHOLD < 2
769#error "REBUILD_THRESHOLD should be >= 2"
770#endif
771
772/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
773 and can change size of the table entries and bins arrays.
774 Rebuilding is implemented by creation of a new table or by
775 compaction of the existing one. */
776static void
777rebuild_table(st_table *tab)
778{
779 st_index_t i, ni, bound;
780 unsigned int size_ind;
781 st_table *new_tab;
782 st_table_entry *entries, *new_entries;
783 st_table_entry *curr_entry_ptr;
784 st_index_t *bins;
785 st_index_t bin_ind;
786
787 st_assert(tab != NULL);
788 bound = tab->entries_bound;
789 entries = tab->entries;
790 if ((2 * tab->num_entries <= get_allocated_entries(tab)
791 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
792 || tab->num_entries < (1 << MINIMAL_POWER2)) {
793 /* Compaction: */
794 tab->num_entries = 0;
795 if (tab->bins != NULL)
796 initialize_bins(tab);
797 new_tab = tab;
798 new_entries = entries;
799 }
800 else {
801 new_tab = st_init_table_with_size(tab->type,
802 2 * tab->num_entries - 1);
803 new_entries = new_tab->entries;
804 }
805 ni = 0;
806 bins = new_tab->bins;
807 size_ind = get_size_ind(new_tab);
808 for (i = tab->entries_start; i < bound; i++) {
809 curr_entry_ptr = &entries[i];
810 PREFETCH(entries + i + 1, 0);
811 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
812 continue;
813 if (&new_entries[ni] != curr_entry_ptr)
814 new_entries[ni] = *curr_entry_ptr;
815 if (EXPECT(bins != NULL, 1)) {
816 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
817 curr_entry_ptr->key);
818 st_assert(bin_ind != UNDEFINED_BIN_IND);
819 st_assert(tab == new_tab || new_tab->rebuilds_num == 0);
820 st_assert(IND_EMPTY_BIN_P(new_tab, bin_ind));
821 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
822 }
823 new_tab->num_entries++;
824 ni++;
825 }
826 if (new_tab != tab) {
827 tab->entry_power = new_tab->entry_power;
828 tab->bin_power = new_tab->bin_power;
829 tab->size_ind = new_tab->size_ind;
830 st_assert(tab->num_entries == ni);
831 st_assert(new_tab->num_entries == ni);
832 if (tab->bins != NULL)
833 free(tab->bins);
834 tab->bins = new_tab->bins;
835 free(tab->entries);
836 tab->entries = new_tab->entries;
837 free(new_tab);
838 }
839 tab->entries_start = 0;
840 tab->entries_bound = tab->num_entries;
841 tab->rebuilds_num++;
842#ifdef ST_DEBUG
843 st_check(tab);
844#endif
845}
846
847/* Return the next secondary hash index for table TAB using previous
848 index IND and PERTERB. Finally modulo of the function becomes a
849 full *cycle linear congruential generator*, in other words it
850 guarantees traversing all table bins in extreme case.
851
852 According the Hull-Dobell theorem a generator
853 "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
854 o m and c are relatively prime
855 o a-1 is divisible by all prime factors of m
856 o a-1 is divisible by 4 if m is divisible by 4.
857
858 For our case a is 5, c is 1, and m is a power of two. */
859static inline st_index_t
860secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
861{
862 *perterb >>= 11;
863 ind = (ind << 2) + ind + *perterb + 1;
864 return hash_bin(ind, tab);
865}
866
867/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
868 search. Return the index of the found entry in array `entries`.
869 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
870 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
871static inline st_index_t
872find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
873{
874 int eq_p, rebuilt_p;
875 st_index_t i, bound;
877
878 bound = tab->entries_bound;
879 entries = tab->entries;
880 for (i = tab->entries_start; i < bound; i++) {
881 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
882 if (EXPECT(rebuilt_p, 0))
884 if (eq_p)
885 return i;
886 }
887 return UNDEFINED_ENTRY_IND;
888}
889
890/* Use the quadratic probing. The method has a better data locality
891 but more collisions than the current approach. In average it
892 results in a bit slower search. */
893/*#define QUADRATIC_PROBE*/
894
895/* Return index of entry with HASH_VALUE and KEY in table TAB. If
896 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
897 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
898static st_index_t
899find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
900{
901 int eq_p, rebuilt_p;
902 st_index_t ind;
903#ifdef QUADRATIC_PROBE
904 st_index_t d;
905#else
906 st_index_t peterb;
907#endif
910
911 st_assert(tab != NULL);
912 st_assert(tab->bins != NULL);
913 ind = hash_bin(hash_value, tab);
914#ifdef QUADRATIC_PROBE
915 d = 1;
916#else
917 peterb = hash_value;
918#endif
919 FOUND_BIN;
920 for (;;) {
921 bin = get_bin(tab->bins, get_size_ind(tab), ind);
923 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
924 if (EXPECT(rebuilt_p, 0))
926 if (eq_p)
927 break;
928 } else if (EMPTY_BIN_P(bin))
929 return UNDEFINED_ENTRY_IND;
930#ifdef QUADRATIC_PROBE
931 ind = hash_bin(ind + d, tab);
932 d++;
933#else
934 ind = secondary_hash(ind, tab, &peterb);
935#endif
936 COLLISION;
937 }
938 return bin;
939}
940
941/* Find and return index of table TAB bin corresponding to an entry
942 with HASH_VALUE and KEY. If there is no such bin, return
943 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
944 return REBUILT_TABLE_BIN_IND. */
945static st_index_t
946find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
947{
948 int eq_p, rebuilt_p;
949 st_index_t ind;
950#ifdef QUADRATIC_PROBE
951 st_index_t d;
952#else
953 st_index_t peterb;
954#endif
957
958 st_assert(tab != NULL);
959 st_assert(tab->bins != NULL);
960 ind = hash_bin(hash_value, tab);
961#ifdef QUADRATIC_PROBE
962 d = 1;
963#else
964 peterb = hash_value;
965#endif
966 FOUND_BIN;
967 for (;;) {
968 bin = get_bin(tab->bins, get_size_ind(tab), ind);
970 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
971 if (EXPECT(rebuilt_p, 0))
973 if (eq_p)
974 break;
975 } else if (EMPTY_BIN_P(bin))
976 return UNDEFINED_BIN_IND;
977#ifdef QUADRATIC_PROBE
978 ind = hash_bin(ind + d, tab);
979 d++;
980#else
981 ind = secondary_hash(ind, tab, &peterb);
982#endif
983 COLLISION;
984 }
985 return ind;
986}
987
988/* Find and return index of table TAB bin corresponding to an entry
989 with HASH_VALUE and KEY. The entry should be in the table
990 already. */
991static st_index_t
992find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
993{
994 st_index_t ind;
995#ifdef QUADRATIC_PROBE
996 st_index_t d;
997#else
998 st_index_t peterb;
999#endif
1002
1003 st_assert(tab != NULL);
1004 st_assert(tab->bins != NULL);
1005 ind = hash_bin(hash_value, tab);
1006#ifdef QUADRATIC_PROBE
1007 d = 1;
1008#else
1009 peterb = hash_value;
1010#endif
1011 FOUND_BIN;
1012 for (;;) {
1013 bin = get_bin(tab->bins, get_size_ind(tab), ind);
1015 return ind;
1016 st_assert (entries[bin - ENTRY_BASE].hash != hash_value);
1017#ifdef QUADRATIC_PROBE
1018 ind = hash_bin(ind + d, tab);
1019 d++;
1020#else
1021 ind = secondary_hash(ind, tab, &peterb);
1022#endif
1023 COLLISION;
1024 }
1025}
1026
1027/* Return index of table TAB bin for HASH_VALUE and KEY through
1028 BIN_IND and the pointed value as the function result. Reserve the
1029 bin for inclusion of the corresponding entry into the table if it
1030 is not there yet. We always find such bin as bins array length is
1031 bigger entries array. Although we can reuse a deleted bin, the
1032 result bin value is always empty if the table has no entry with
1033 KEY. Return the entries array index of the found entry or
1034 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1035 during the search, return REBUILT_TABLE_ENTRY_IND. */
1036static st_index_t
1037find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1038 st_data_t key, st_index_t *bin_ind)
1039{
1040 int eq_p, rebuilt_p;
1041 st_index_t ind;
1042 st_hash_t curr_hash_value = *hash_value;
1043#ifdef QUADRATIC_PROBE
1044 st_index_t d;
1045#else
1046 st_index_t peterb;
1047#endif
1048 st_index_t entry_index;
1049 st_index_t first_deleted_bin_ind;
1051
1052 st_assert(tab != NULL);
1053 st_assert(tab->bins != NULL);
1054 st_assert(tab->entries_bound <= get_allocated_entries(tab));
1056 ind = hash_bin(curr_hash_value, tab);
1057#ifdef QUADRATIC_PROBE
1058 d = 1;
1059#else
1060 peterb = curr_hash_value;
1061#endif
1062 FOUND_BIN;
1063 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1064 entries = tab->entries;
1065 for (;;) {
1066 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1067 if (EMPTY_BIN_P(entry_index)) {
1068 tab->num_entries++;
1069 entry_index = UNDEFINED_ENTRY_IND;
1070 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1071 /* We can reuse bin of a deleted entry. */
1072 ind = first_deleted_bin_ind;
1073 MARK_BIN_EMPTY(tab, ind);
1074 }
1075 break;
1076 }
1077 else if (! DELETED_BIN_P(entry_index)) {
1078 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1079 if (EXPECT(rebuilt_p, 0))
1081 if (eq_p)
1082 break;
1083 }
1084 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1085 first_deleted_bin_ind = ind;
1086#ifdef QUADRATIC_PROBE
1087 ind = hash_bin(ind + d, tab);
1088 d++;
1089#else
1090 ind = secondary_hash(ind, tab, &peterb);
1091#endif
1092 COLLISION;
1093 }
1094 *bin_ind = ind;
1095 return entry_index;
1096}
1097
1098/* Find an entry with KEY in table TAB. Return non-zero if we found
1099 it. Set up *RECORD to the found entry record. */
1100int
1102{
1104 st_hash_t hash = do_hash(key, tab);
1105
1106 retry:
1107 if (tab->bins == NULL) {
1108 bin = find_entry(tab, hash, key);
1110 goto retry;
1111 if (bin == UNDEFINED_ENTRY_IND)
1112 return 0;
1113 }
1114 else {
1115 bin = find_table_entry_ind(tab, hash, key);
1117 goto retry;
1118 if (bin == UNDEFINED_ENTRY_IND)
1119 return 0;
1120 bin -= ENTRY_BASE;
1121 }
1122 if (value != 0)
1123 *value = tab->entries[bin].record;
1124 return 1;
1125}
1126
1127/* Find an entry with KEY in table TAB. Return non-zero if we found
1128 it. Set up *RESULT to the found table entry key. */
1129int
1131{
1133 st_hash_t hash = do_hash(key, tab);
1134
1135 retry:
1136 if (tab->bins == NULL) {
1137 bin = find_entry(tab, hash, key);
1139 goto retry;
1140 if (bin == UNDEFINED_ENTRY_IND)
1141 return 0;
1142 }
1143 else {
1144 bin = find_table_entry_ind(tab, hash, key);
1146 goto retry;
1147 if (bin == UNDEFINED_ENTRY_IND)
1148 return 0;
1149 bin -= ENTRY_BASE;
1150 }
1151 if (result != 0)
1152 *result = tab->entries[bin].key;
1153 return 1;
1154}
1155
1156/* Check the table and rebuild it if it is necessary. */
1157static inline void
1158rebuild_table_if_necessary (st_table *tab)
1159{
1160 st_index_t bound = tab->entries_bound;
1161
1162 if (bound == get_allocated_entries(tab))
1163 rebuild_table(tab);
1164 st_assert(tab->entries_bound < get_allocated_entries(tab));
1165}
1166
1167/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1168 already entry with KEY in the table, return nonzero and and update
1169 the value of the found entry. */
1170int
1172{
1173 st_table_entry *entry;
1175 st_index_t ind;
1176 st_hash_t hash_value;
1177 st_index_t bin_ind;
1178 int new_p;
1179
1180 hash_value = do_hash(key, tab);
1181 retry:
1182 rebuild_table_if_necessary(tab);
1183 if (tab->bins == NULL) {
1184 bin = find_entry(tab, hash_value, key);
1186 goto retry;
1187 new_p = bin == UNDEFINED_ENTRY_IND;
1188 if (new_p)
1189 tab->num_entries++;
1190 bin_ind = UNDEFINED_BIN_IND;
1191 }
1192 else {
1193 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1194 key, &bin_ind);
1196 goto retry;
1197 new_p = bin == UNDEFINED_ENTRY_IND;
1198 bin -= ENTRY_BASE;
1199 }
1200 if (new_p) {
1201 st_assert(tab->entries_bound < get_allocated_entries(tab));
1202 ind = tab->entries_bound++;
1203 entry = &tab->entries[ind];
1204 entry->hash = hash_value;
1205 entry->key = key;
1206 entry->record = value;
1207 if (bin_ind != UNDEFINED_BIN_IND)
1208 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1209#ifdef ST_DEBUG
1210 st_check(tab);
1211#endif
1212 return 0;
1213 }
1214 tab->entries[bin].record = value;
1215#ifdef ST_DEBUG
1216 st_check(tab);
1217#endif
1218 return 1;
1219}
1220
1221/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1222 entry with KEY before the insertion. */
1223static inline void
1224st_add_direct_with_hash(st_table *tab,
1225 st_data_t key, st_data_t value, st_hash_t hash)
1226{
1227 st_table_entry *entry;
1228 st_index_t ind;
1229 st_index_t bin_ind;
1230
1231 rebuild_table_if_necessary(tab);
1232 ind = tab->entries_bound++;
1233 entry = &tab->entries[ind];
1234 entry->hash = hash;
1235 entry->key = key;
1236 entry->record = value;
1237 tab->num_entries++;
1238 if (tab->bins != NULL) {
1239 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1240 st_assert (bin_ind != UNDEFINED_BIN_IND);
1241 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1242 }
1243#ifdef ST_DEBUG
1244 st_check(tab);
1245#endif
1246}
1247
1248/* Insert (KEY, VALUE) into table TAB. The table should not have
1249 entry with KEY before the insertion. */
1250void
1252{
1253 st_hash_t hash_value;
1254
1255 hash_value = do_hash(key, tab);
1256 st_add_direct_with_hash(tab, key, value, hash_value);
1257}
1258
1259/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1260 there is already entry with KEY in the table, return nonzero and
1261 and update the value of the found entry. */
1262int
1264 st_data_t (*func)(st_data_t))
1265{
1266 st_table_entry *entry;
1268 st_index_t ind, check;
1269 st_hash_t hash_value;
1270 st_index_t bin_ind;
1271 int new_p;
1272
1273 hash_value = do_hash(key, tab);
1274 retry:
1275 rebuild_table_if_necessary (tab);
1276 if (tab->bins == NULL) {
1277 bin = find_entry(tab, hash_value, key);
1279 goto retry;
1280 new_p = bin == UNDEFINED_ENTRY_IND;
1281 if (new_p)
1282 tab->num_entries++;
1283 bin_ind = UNDEFINED_BIN_IND;
1284 }
1285 else {
1286 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1287 key, &bin_ind);
1289 goto retry;
1290 new_p = bin == UNDEFINED_ENTRY_IND;
1291 bin -= ENTRY_BASE;
1292 }
1293 if (new_p) {
1294 st_assert(tab->entries_bound < get_allocated_entries(tab));
1295 check = tab->rebuilds_num;
1296 key = (*func)(key);
1297 st_assert(check == tab->rebuilds_num);
1298 ind = tab->entries_bound++;
1299 entry = &tab->entries[ind];
1300 entry->hash = hash_value;
1301 entry->key = key;
1302 entry->record = value;
1303 if (bin_ind != UNDEFINED_BIN_IND)
1304 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1305 st_assert(do_hash(key, tab) == hash_value);
1306#ifdef ST_DEBUG
1307 st_check(tab);
1308#endif
1309 return 0;
1310 }
1311 tab->entries[bin].record = value;
1312#ifdef ST_DEBUG
1313 st_check(tab);
1314#endif
1315 return 1;
1316}
1317
1318/* Create and return a copy of table OLD_TAB. */
1319st_table *
1321{
1322 st_table *new_tab;
1323
1324 new_tab = (st_table *) malloc(sizeof(st_table));
1325#ifndef RUBY
1326 if (new_tab == NULL)
1327 return NULL;
1328#endif
1329 *new_tab = *old_tab;
1330 if (old_tab->bins == NULL)
1331 new_tab->bins = NULL;
1332 else {
1333 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1334#ifndef RUBY
1335 if (new_tab->bins == NULL) {
1336 free(new_tab);
1337 return NULL;
1338 }
1339#endif
1340 }
1341 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1342 * sizeof(st_table_entry));
1343#ifndef RUBY
1344 if (new_tab->entries == NULL) {
1345 st_free_table(new_tab);
1346 return NULL;
1347 }
1348#endif
1349 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1350 get_allocated_entries(old_tab));
1351 if (old_tab->bins != NULL)
1352 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1353#ifdef ST_DEBUG
1354 st_check(new_tab);
1355#endif
1356 return new_tab;
1357}
1358
1359/* Update the entries start of table TAB after removing an entry
1360 with index N in the array entries. */
1361static inline void
1362update_range_for_deleted(st_table *tab, st_index_t n)
1363{
1364 /* Do not update entries_bound here. Otherwise, we can fill all
1365 bins by deleted entry value before rebuilding the table. */
1366 if (tab->entries_start == n)
1367 tab->entries_start = n + 1;
1368}
1369
1370/* Delete entry with KEY from table TAB, set up *VALUE (unless
1371 VALUE is zero) from deleted table entry, and return non-zero. If
1372 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1373 is zero), and return zero. */
1374static int
1375st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1376{
1377 st_table_entry *entry;
1379 st_index_t bin_ind;
1380 st_hash_t hash;
1381
1382 st_assert(tab != NULL);
1383 hash = do_hash(*key, tab);
1384 retry:
1385 if (tab->bins == NULL) {
1386 bin = find_entry(tab, hash, *key);
1388 goto retry;
1389 if (bin == UNDEFINED_ENTRY_IND) {
1390 if (value != 0) *value = 0;
1391 return 0;
1392 }
1393 }
1394 else {
1395 bin_ind = find_table_bin_ind(tab, hash, *key);
1396 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1397 goto retry;
1398 if (bin_ind == UNDEFINED_BIN_IND) {
1399 if (value != 0) *value = 0;
1400 return 0;
1401 }
1402 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1403 MARK_BIN_DELETED(tab, bin_ind);
1404 }
1405 entry = &tab->entries[bin];
1406 *key = entry->key;
1407 if (value != 0) *value = entry->record;
1408 MARK_ENTRY_DELETED(entry);
1409 tab->num_entries--;
1410 update_range_for_deleted(tab, bin);
1411#ifdef ST_DEBUG
1412 st_check(tab);
1413#endif
1414 return 1;
1415}
1416
1417int
1419{
1420 return st_general_delete(tab, key, value);
1421}
1422
1423/* The function and other functions with suffix '_safe' or '_check'
1424 are originated from the previous implementation of the hash tables.
1425 It was necessary for correct deleting entries during traversing
1426 tables. The current implementation permits deletion during
1427 traversing without a specific way to do this. */
1428int
1431{
1432 return st_general_delete(tab, key, value);
1433}
1434
1435/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1436 return zero. Otherwise, remove the first entry in the table.
1437 Return its key through KEY and its record through VALUE (unless
1438 VALUE is zero). */
1439int
1441{
1442 st_index_t i, bound;
1444 st_table_entry *entries, *curr_entry_ptr;
1445 st_index_t bin_ind;
1446
1447 entries = tab->entries;
1448 bound = tab->entries_bound;
1449 for (i = tab->entries_start; i < bound; i++) {
1450 curr_entry_ptr = &entries[i];
1451 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1452 st_hash_t entry_hash = curr_entry_ptr->hash;
1453 st_data_t entry_key = curr_entry_ptr->key;
1454
1455 if (value != 0) *value = curr_entry_ptr->record;
1456 *key = entry_key;
1457 retry:
1458 if (tab->bins == NULL) {
1459 bin = find_entry(tab, entry_hash, entry_key);
1460 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1461 entries = tab->entries;
1462 goto retry;
1463 }
1465 curr_entry_ptr = &entries[bin];
1466 }
1467 else {
1468 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1469 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1470 entries = tab->entries;
1471 goto retry;
1472 }
1473 st_assert(bin_ind != UNDEFINED_BIN_IND);
1474 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1475 - ENTRY_BASE];
1476 MARK_BIN_DELETED(tab, bin_ind);
1477 }
1478 st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key);
1479 MARK_ENTRY_DELETED(curr_entry_ptr);
1480 tab->num_entries--;
1481 update_range_for_deleted(tab, i);
1482#ifdef ST_DEBUG
1483 st_check(tab);
1484#endif
1485 return 1;
1486 }
1487 }
1488 st_assert(tab->num_entries == 0);
1489 if (value != 0) *value = 0;
1490 return 0;
1491}
1492
1493/* See comments for function st_delete_safe. */
1494void
1497{
1498}
1499
1500/* Find entry with KEY in table TAB, call FUNC with the key and the
1501 value of the found entry, and non-zero as the 3rd argument. If the
1502 entry is not found, call FUNC with KEY, and 2 zero arguments. If
1503 the call returns ST_CONTINUE, the table will have an entry with key
1504 and value returned by FUNC through the 1st and 2nd parameters. If
1505 the call of FUNC returns ST_DELETE, the table will not have entry
1506 with KEY. The function returns flag of that the entry with KEY was
1507 in the table before the call. */
1508int
1511{
1512 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1513 st_index_t bin = 0; /* Ditto */
1515 st_index_t bin_ind;
1516 st_data_t value = 0, old_key;
1517 st_index_t check;
1518 int retval, existing;
1519 st_hash_t hash = do_hash(key, tab);
1520
1521 retry:
1522 entries = tab->entries;
1523 if (tab->bins == NULL) {
1524 bin = find_entry(tab, hash, key);
1526 goto retry;
1527 existing = bin != UNDEFINED_ENTRY_IND;
1528 entry = &entries[bin];
1529 bin_ind = UNDEFINED_BIN_IND;
1530 }
1531 else {
1532 bin_ind = find_table_bin_ind(tab, hash, key);
1533 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1534 goto retry;
1535 existing = bin_ind != UNDEFINED_BIN_IND;
1536 if (existing) {
1537 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1538 entry = &entries[bin];
1539 }
1540 }
1541 if (existing) {
1542 key = entry->key;
1543 value = entry->record;
1544 }
1545 old_key = key;
1546 check = tab->rebuilds_num;
1547 retval = (*func)(&key, &value, arg, existing);
1548 st_assert(check == tab->rebuilds_num);
1549 switch (retval) {
1550 case ST_CONTINUE:
1551 if (! existing) {
1552 st_add_direct_with_hash(tab, key, value, hash);
1553 break;
1554 }
1555 if (old_key != key) {
1556 entry->key = key;
1557 }
1558 entry->record = value;
1559 break;
1560 case ST_DELETE:
1561 if (existing) {
1562 if (bin_ind != UNDEFINED_BIN_IND)
1563 MARK_BIN_DELETED(tab, bin_ind);
1564 MARK_ENTRY_DELETED(entry);
1565 tab->num_entries--;
1566 update_range_for_deleted(tab, bin);
1567#ifdef ST_DEBUG
1568 st_check(tab);
1569#endif
1570 }
1571 break;
1572 }
1573#ifdef ST_DEBUG
1574 st_check(tab);
1575#endif
1576 return existing;
1577}
1578
1579/* Traverse all entries in table TAB calling FUNC with current entry
1580 key and value and zero. If the call returns ST_STOP, stop
1581 traversing. If the call returns ST_DELETE, delete the current
1582 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1583 traversing. The function returns zero unless an error is found.
1584 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1585 different for ST_CHECK and when the current element is removed
1586 during traversing. */
1587static inline int
1588st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1589 int check_p)
1590{
1592 st_index_t bin_ind;
1593 st_table_entry *entries, *curr_entry_ptr;
1594 enum st_retval retval;
1595 st_index_t i, rebuilds_num;
1596 st_hash_t hash;
1597 st_data_t key;
1598 int error_p, packed_p = tab->bins == NULL;
1599
1601 entries = tab->entries;
1602 /* The bound can change inside the loop even without rebuilding
1603 the table, e.g. by an entry inesrtion. */
1604 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1605 curr_entry_ptr = &entries[i];
1606 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1607 continue;
1608 key = curr_entry_ptr->key;
1609 rebuilds_num = tab->rebuilds_num;
1610 hash = curr_entry_ptr->hash;
1611 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1612
1613 if (retval == ST_REPLACE && replace) {
1614 st_data_t value;
1615 value = curr_entry_ptr->record;
1616 retval = (*replace)(&key, &value, arg, TRUE);
1617 curr_entry_ptr->key = key;
1618 curr_entry_ptr->record = value;
1619 }
1620
1621 if (rebuilds_num != tab->rebuilds_num) {
1622 retry:
1623 entries = tab->entries;
1624 packed_p = tab->bins == NULL;
1625 if (packed_p) {
1626 i = find_entry(tab, hash, key);
1627 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1628 goto retry;
1629 error_p = i == UNDEFINED_ENTRY_IND;
1630 }
1631 else {
1632 i = find_table_entry_ind(tab, hash, key);
1633 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1634 goto retry;
1635 error_p = i == UNDEFINED_ENTRY_IND;
1636 i -= ENTRY_BASE;
1637 }
1638 if (error_p && check_p) {
1639 /* call func with error notice */
1640 retval = (*func)(0, 0, arg, 1);
1641#ifdef ST_DEBUG
1642 st_check(tab);
1643#endif
1644 return 1;
1645 }
1646 curr_entry_ptr = &entries[i];
1647 }
1648 switch (retval) {
1649 case ST_REPLACE:
1650 break;
1651 case ST_CONTINUE:
1652 break;
1653 case ST_CHECK:
1654 if (check_p)
1655 break;
1656 case ST_STOP:
1657#ifdef ST_DEBUG
1658 st_check(tab);
1659#endif
1660 return 0;
1661 case ST_DELETE: {
1662 st_data_t key = curr_entry_ptr->key;
1663
1664 again:
1665 if (packed_p) {
1666 bin = find_entry(tab, hash, key);
1668 goto again;
1669 if (bin == UNDEFINED_ENTRY_IND)
1670 break;
1671 }
1672 else {
1673 bin_ind = find_table_bin_ind(tab, hash, key);
1674 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1675 goto again;
1676 if (bin_ind == UNDEFINED_BIN_IND)
1677 break;
1678 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1679 MARK_BIN_DELETED(tab, bin_ind);
1680 }
1681 curr_entry_ptr = &entries[bin];
1682 MARK_ENTRY_DELETED(curr_entry_ptr);
1683 tab->num_entries--;
1684 update_range_for_deleted(tab, bin);
1685#ifdef ST_DEBUG
1686 st_check(tab);
1687#endif
1688 break;
1689 }
1690 }
1691 }
1692#ifdef ST_DEBUG
1693 st_check(tab);
1694#endif
1695 return 0;
1696}
1697
1698int
1700{
1701 return st_general_foreach(tab, func, replace, arg, TRUE);
1702}
1703
1704struct functor {
1706 st_data_t arg;
1707};
1708
1709static int
1710apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1711{
1712 const struct functor *f = (void *)d;
1713 return f->func(k, v, f->arg);
1714}
1715
1716int
1718{
1719 const struct functor f = { func, arg };
1720 return st_general_foreach(tab, apply_functor, NULL, (st_data_t)&f, FALSE);
1721}
1722
1723/* See comments for function st_delete_safe. */
1724int
1727{
1728 return st_general_foreach(tab, func, NULL, arg, TRUE);
1729}
1730
1731/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1732 Return the number of keys set up in array KEYS. */
1733static inline st_index_t
1734st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1735{
1736 st_index_t i, bound;
1737 st_data_t key, *keys_start, *keys_end;
1738 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1739
1740 bound = tab->entries_bound;
1741 keys_start = keys;
1742 keys_end = keys + size;
1743 for (i = tab->entries_start; i < bound; i++) {
1744 if (keys == keys_end)
1745 break;
1746 curr_entry_ptr = &entries[i];
1747 key = curr_entry_ptr->key;
1748 if (! DELETED_ENTRY_P(curr_entry_ptr))
1749 *keys++ = key;
1750 }
1751
1752 return keys - keys_start;
1753}
1754
1757{
1758 return st_general_keys(tab, keys, size);
1759}
1760
1761/* See comments for function st_delete_safe. */
1765{
1766 return st_general_keys(tab, keys, size);
1767}
1768
1769/* Set up array VALUES by at most SIZE values of head table TAB
1770 entries. Return the number of values set up in array VALUES. */
1771static inline st_index_t
1772st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1773{
1774 st_index_t i, bound;
1775 st_data_t *values_start, *values_end;
1776 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1777
1778 values_start = values;
1779 values_end = values + size;
1780 bound = tab->entries_bound;
1781 st_assert(bound != 0);
1782 for (i = tab->entries_start; i < bound; i++) {
1783 if (values == values_end)
1784 break;
1785 curr_entry_ptr = &entries[i];
1786 if (! DELETED_ENTRY_P(curr_entry_ptr))
1787 *values++ = curr_entry_ptr->record;
1788 }
1789
1790 return values - values_start;
1791}
1792
1795{
1796 return st_general_values(tab, values, size);
1797}
1798
1799/* See comments for function st_delete_safe. */
1803{
1804 return st_general_values(tab, values, size);
1805}
1806
1807#define FNV1_32A_INIT 0x811c9dc5
1808
1809/*
1810 * 32 bit magic FNV-1a prime
1811 */
1812#define FNV_32_PRIME 0x01000193
1813
1814#ifndef UNALIGNED_WORD_ACCESS
1815# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1816 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1817 defined(__powerpc64__) || defined(__aarch64__) || \
1818 defined(__mc68020__)
1819# define UNALIGNED_WORD_ACCESS 1
1820# endif
1821#endif
1822#ifndef UNALIGNED_WORD_ACCESS
1823# define UNALIGNED_WORD_ACCESS 0
1824#endif
1825
1826/* This hash function is quite simplified MurmurHash3
1827 * Simplification is legal, cause most of magic still happens in finalizator.
1828 * And finalizator is almost the same as in MurmurHash3 */
1829#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1830#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1831
1832#if ST_INDEX_BITS <= 32
1833#define C1 (st_index_t)0xcc9e2d51
1834#define C2 (st_index_t)0x1b873593
1835#else
1836#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1837#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1838#endif
1839NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1840NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1841NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1842
1843static inline st_index_t
1844murmur_step(st_index_t h, st_index_t k)
1845{
1846#if ST_INDEX_BITS <= 32
1847#define r1 (17)
1848#define r2 (11)
1849#else
1850#define r1 (33)
1851#define r2 (24)
1852#endif
1853 k *= C1;
1854 h ^= ROTL(k, r1);
1855 h *= C2;
1856 h = ROTL(h, r2);
1857 return h;
1858}
1859#undef r1
1860#undef r2
1861
1862static inline st_index_t
1863murmur_finish(st_index_t h)
1864{
1865#if ST_INDEX_BITS <= 32
1866#define r1 (16)
1867#define r2 (13)
1868#define r3 (16)
1869 const st_index_t c1 = 0x85ebca6b;
1870 const st_index_t c2 = 0xc2b2ae35;
1871#else
1872/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1873#define r1 (30)
1874#define r2 (27)
1875#define r3 (31)
1876 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1877 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1878#endif
1879#if ST_INDEX_BITS > 64
1880 h ^= h >> 64;
1881 h *= c2;
1882 h ^= h >> 65;
1883#endif
1884 h ^= h >> r1;
1885 h *= c1;
1886 h ^= h >> r2;
1887 h *= c2;
1888 h ^= h >> r3;
1889 return h;
1890}
1891#undef r1
1892#undef r2
1893#undef r3
1894
1896st_hash(const void *ptr, size_t len, st_index_t h)
1897{
1898 const char *data = ptr;
1899 st_index_t t = 0;
1900 size_t l = len;
1901
1902#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1903#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1904#if SIZEOF_ST_INDEX_T > 4
1905#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1906#if SIZEOF_ST_INDEX_T > 8
1907#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1908 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1909#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1910#endif
1911#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1912#else
1913#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1914#endif
1915#undef SKIP_TAIL
1916 if (len >= sizeof(st_index_t)) {
1917#if !UNALIGNED_WORD_ACCESS
1918 int align = (int)((st_data_t)data % sizeof(st_index_t));
1919 if (align) {
1920 st_index_t d = 0;
1921 int sl, sr, pack;
1922
1923 switch (align) {
1924#ifdef WORDS_BIGENDIAN
1925# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1926 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1927#else
1928# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1929 t |= data_at(n) << CHAR_BIT*(n)
1930#endif
1932#undef UNALIGNED_ADD
1933 }
1934
1935#ifdef WORDS_BIGENDIAN
1936 t >>= (CHAR_BIT * align) - CHAR_BIT;
1937#else
1938 t <<= (CHAR_BIT * align);
1939#endif
1940
1941 data += sizeof(st_index_t)-align;
1942 len -= sizeof(st_index_t)-align;
1943
1944 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1945 sr = CHAR_BIT * align;
1946
1947 while (len >= sizeof(st_index_t)) {
1948 d = *(st_index_t *)data;
1949#ifdef WORDS_BIGENDIAN
1950 t = (t << sr) | (d >> sl);
1951#else
1952 t = (t >> sr) | (d << sl);
1953#endif
1954 h = murmur_step(h, t);
1955 t = d;
1956 data += sizeof(st_index_t);
1957 len -= sizeof(st_index_t);
1958 }
1959
1960 pack = len < (size_t)align ? (int)len : align;
1961 d = 0;
1962 switch (pack) {
1963#ifdef WORDS_BIGENDIAN
1964# define UNALIGNED_ADD(n) case (n) + 1: \
1965 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1966#else
1967# define UNALIGNED_ADD(n) case (n) + 1: \
1968 d |= data_at(n) << CHAR_BIT*(n)
1969#endif
1971#undef UNALIGNED_ADD
1972 }
1973#ifdef WORDS_BIGENDIAN
1974 t = (t << sr) | (d >> sl);
1975#else
1976 t = (t >> sr) | (d << sl);
1977#endif
1978
1979 if (len < (size_t)align) goto skip_tail;
1980# define SKIP_TAIL 1
1981 h = murmur_step(h, t);
1982 data += pack;
1983 len -= pack;
1984 }
1985 else
1986#endif
1987#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1988#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1989#else
1990#define aligned_data data
1991#endif
1992 {
1993 do {
1994 h = murmur_step(h, *(st_index_t *)aligned_data);
1995 data += sizeof(st_index_t);
1996 len -= sizeof(st_index_t);
1997 } while (len >= sizeof(st_index_t));
1998 }
1999 }
2000
2001 t = 0;
2002 switch (len) {
2003#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
2004 /* in this case byteorder doesn't really matter */
2005#if SIZEOF_ST_INDEX_T > 4
2006 case 7: t |= data_at(6) << 48;
2007 case 6: t |= data_at(5) << 40;
2008 case 5: t |= data_at(4) << 32;
2009 case 4:
2011 goto skip_tail;
2012# define SKIP_TAIL 1
2013#endif
2014 case 3: t |= data_at(2) << 16;
2015 case 2: t |= data_at(1) << 8;
2016 case 1: t |= data_at(0);
2017#else
2018#ifdef WORDS_BIGENDIAN
2019# define UNALIGNED_ADD(n) case (n) + 1: \
2020 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
2021#else
2022# define UNALIGNED_ADD(n) case (n) + 1: \
2023 t |= data_at(n) << CHAR_BIT*(n)
2024#endif
2026#undef UNALIGNED_ADD
2027#endif
2028#ifdef SKIP_TAIL
2029 skip_tail:
2030#endif
2031 h ^= t; h -= ROTL(t, 7);
2032 h *= C2;
2033 }
2034 h ^= l;
2035#undef aligned_data
2036
2037 return murmur_finish(h);
2038}
2039
2042{
2043 return murmur_step(h, i);
2044}
2045
2046NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
2049{
2050 i += h;
2051/* no matter if it is BigEndian or LittleEndian,
2052 * we hash just integers */
2053#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
2054 h = murmur_step(h, i >> 8*8);
2055#endif
2056 h = murmur_step(h, i);
2057 return h;
2058}
2059
2062{
2063 h = murmur_finish(h);
2064 return h;
2065}
2066
2067#undef st_hash_start
2070{
2071 return h;
2072}
2073
2074static st_index_t
2075strhash(st_data_t arg)
2076{
2077 register const char *string = (const char *)arg;
2078 return st_hash(string, strlen(string), FNV1_32A_INIT);
2079}
2080
2081int
2082st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2083{
2084 char c1, c2;
2085
2086 while (1) {
2087 c1 = *s1++;
2088 c2 = *s2++;
2089 if (c1 == '\0' || c2 == '\0') {
2090 if (c1 != '\0') return 1;
2091 if (c2 != '\0') return -1;
2092 return 0;
2093 }
2094 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2095 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2096 if (c1 != c2) {
2097 if (c1 > c2)
2098 return 1;
2099 else
2100 return -1;
2101 }
2102 }
2103}
2104
2105int
2106st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2107{
2108 char c1, c2;
2109 size_t i;
2110
2111 for (i = 0; i < n; i++) {
2112 c1 = *s1++;
2113 c2 = *s2++;
2114 if (c1 == '\0' || c2 == '\0') {
2115 if (c1 != '\0') return 1;
2116 if (c2 != '\0') return -1;
2117 return 0;
2118 }
2119 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2120 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2121 if (c1 != c2) {
2122 if (c1 > c2)
2123 return 1;
2124 else
2125 return -1;
2126 }
2127 }
2128 return 0;
2129}
2130
2131static int
2132st_strcmp(st_data_t lhs, st_data_t rhs)
2133{
2134 const char *s1 = (char *)lhs;
2135 const char *s2 = (char *)rhs;
2136 return strcmp(s1, s2);
2137}
2138
2139static int
2140st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2141{
2142 const char *s1 = (char *)lhs;
2143 const char *s2 = (char *)rhs;
2145}
2146
2147NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2148static st_index_t
2149strcasehash(st_data_t arg)
2150{
2151 register const char *string = (const char *)arg;
2152 register st_index_t hval = FNV1_32A_INIT;
2153
2154 /*
2155 * FNV-1a hash each octet in the buffer
2156 */
2157 while (*string) {
2158 unsigned int c = (unsigned char)*string++;
2159 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2160 hval ^= c;
2161
2162 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2163 hval *= FNV_32_PRIME;
2164 }
2165 return hval;
2166}
2167
2168int
2170{
2171 return x != y;
2172}
2173
2176{
2177 enum {s1 = 11, s2 = 3};
2178 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2179}
2180
2181/* Expand TAB to be suitable for holding SIZ entries in total.
2182 Pre-existing entries remain not deleted inside of TAB, but its bins
2183 are cleared to expect future reconstruction. See rehash below. */
2184static void
2185st_expand_table(st_table *tab, st_index_t siz)
2186{
2187 st_table *tmp;
2188 st_index_t n;
2189
2190 if (siz <= get_allocated_entries(tab))
2191 return; /* enough room already */
2192
2193 tmp = st_init_table_with_size(tab->type, siz);
2194 n = get_allocated_entries(tab);
2195 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2196 free(tab->entries);
2197 if (tab->bins != NULL)
2198 free(tab->bins);
2199 if (tmp->bins != NULL)
2200 free(tmp->bins);
2201 tab->entry_power = tmp->entry_power;
2202 tab->bin_power = tmp->bin_power;
2203 tab->size_ind = tmp->size_ind;
2204 tab->entries = tmp->entries;
2205 tab->bins = NULL;
2206 tab->rebuilds_num++;
2207 free(tmp);
2208}
2209
2210/* Rehash using linear search. Return TRUE if we found that the table
2211 was rebuilt. */
2212static int
2213st_rehash_linear(st_table *tab)
2214{
2215 int eq_p, rebuilt_p;
2216 st_index_t i, j;
2217 st_table_entry *p, *q;
2218 if (tab->bins) {
2219 free(tab->bins);
2220 tab->bins = NULL;
2221 }
2222 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2223 p = &tab->entries[i];
2224 if (DELETED_ENTRY_P(p))
2225 continue;
2226 for (j = i + 1; j < tab->entries_bound; j++) {
2227 q = &tab->entries[j];
2228 if (DELETED_ENTRY_P(q))
2229 continue;
2230 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2231 if (EXPECT(rebuilt_p, 0))
2232 return TRUE;
2233 if (eq_p) {
2234 st_assert(p < q);
2235 *p = *q;
2237 tab->num_entries--;
2238 update_range_for_deleted(tab, j);
2239 }
2240 }
2241 }
2242 return FALSE;
2243}
2244
2245/* Rehash using index. Return TRUE if we found that the table was
2246 rebuilt. */
2247static int
2248st_rehash_indexed(st_table *tab)
2249{
2250 int eq_p, rebuilt_p;
2251 st_index_t i;
2252 st_index_t const n = bins_size(tab);
2253 unsigned int const size_ind = get_size_ind(tab);
2254 st_index_t *bins = realloc(tab->bins, n);
2255 st_assert(bins != NULL);
2256 tab->bins = bins;
2257 initialize_bins(tab);
2258 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2259 st_table_entry *p = &tab->entries[i];
2260 st_index_t ind;
2261#ifdef QUADRATIC_PROBE
2262 st_index_t d = 1;
2263#else
2264 st_index_t peterb = p->hash;
2265#endif
2266
2267 if (DELETED_ENTRY_P(p))
2268 continue;
2269
2270 ind = hash_bin(p->hash, tab);
2271 for(;;) {
2272 st_index_t bin = get_bin(bins, size_ind, ind);
2274 /* ok, new room */
2275 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2276 break;
2277 }
2278 else {
2279 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2280 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2281 if (EXPECT(rebuilt_p, 0))
2282 return TRUE;
2283 if (eq_p) {
2284 /* duplicated key; delete it */
2285 st_assert(q < p);
2286 q->record = p->record;
2288 tab->num_entries--;
2289 update_range_for_deleted(tab, bin);
2290 break;
2291 }
2292 else {
2293 /* hash collision; skip it */
2294#ifdef QUADRATIC_PROBE
2295 ind = hash_bin(ind + d, tab);
2296 d++;
2297#else
2298 ind = secondary_hash(ind, tab, &peterb);
2299#endif
2300 }
2301 }
2302 }
2303 }
2304 return FALSE;
2305}
2306
2307/* Reconstruct TAB's bins according to TAB's entries. This function
2308 permits conflicting keys inside of entries. No errors are reported
2309 then. All but one of them are discarded silently. */
2310static void
2311st_rehash(st_table *tab)
2312{
2313 int rebuilt_p;
2314
2315 do {
2317 rebuilt_p = st_rehash_linear(tab);
2318 else
2319 rebuilt_p = st_rehash_indexed(tab);
2320 } while (rebuilt_p);
2321}
2322
2323#ifdef RUBY
2324static st_data_t
2325st_stringify(VALUE key)
2326{
2327 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2329}
2330
2331static void
2332st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2333{
2334 st_data_t k = st_stringify(key);
2336 e.hash = do_hash(k, tab);
2337 e.key = k;
2338 e.record = val;
2339
2340 tab->entries[tab->entries_bound++] = e;
2341 tab->num_entries++;
2342 RB_OBJ_WRITTEN(hash, Qundef, k);
2343 RB_OBJ_WRITTEN(hash, Qundef, val);
2344}
2345
2346static void
2347st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2348{
2349 long i;
2350
2351 for (i = 0; i < argc; /* */) {
2352 st_data_t k = st_stringify(argv[i++]);
2353 st_data_t v = argv[i++];
2354 st_insert(tab, k, v);
2355 RB_OBJ_WRITTEN(hash, Qundef, k);
2356 RB_OBJ_WRITTEN(hash, Qundef, v);
2357 }
2358}
2359
2360static void
2361st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2362{
2363 long i;
2364
2365 /* push elems */
2366 for (i = 0; i < argc; /* */) {
2367 VALUE key = argv[i++];
2368 VALUE val = argv[i++];
2369 st_insert_single(tab, hash, key, val);
2370 }
2371
2372 /* reindex */
2373 st_rehash(tab);
2374}
2375
2376/* Mimics ruby's { foo => bar } syntax. This function is subpart
2377 of rb_hash_bulk_insert. */
2378void
2380{
2381 st_index_t n, size = argc / 2;
2382 st_table *tab = RHASH_ST_TABLE(hash);
2383
2384 tab = RHASH_TBL_RAW(hash);
2385 n = tab->entries_bound + size;
2386 st_expand_table(tab, n);
2387 if (UNLIKELY(tab->num_entries))
2388 st_insert_generic(tab, argc, argv, hash);
2389 else if (argc <= 2)
2390 st_insert_single(tab, hash, argv[0], argv[1]);
2392 st_insert_linear(tab, argc, argv, hash);
2393 else
2394 st_insert_generic(tab, argc, argv, hash);
2395}
2396#endif
struct RIMemo * ptr
Definition: debug.c:65
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Definition: st.c:2082
VALUE rb_cString
Definition: ruby.h:2046
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: st.c:2106
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2671
VALUE rb_eRuntimeError
Definition: error.c:922
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
st_index_t st_hash_t
Definition: hash.c:327
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:39
__uint32_t uint32_t
#define MEMCPY(p1, p2, type, n)
#define NULL
#define _(args)
#define RHASH_TBL_RAW(h)
unsigned long st_data_t
size_t strlen(const char *)
int strcmp(const char *, const char *)
int st_foreach_check_callback_func(st_data_t, st_data_t, st_data_t, int)
#define Qundef
#define CHAR_BIT
int snprintf(char *__restrict__, size_t, const char *__restrict__,...) __attribute__((__format__(__printf__
struct st_table_entry st_table_entry
int fprintf(FILE *__restrict__, const char *__restrict__,...) __attribute__((__format__(__printf__
const char size_t n
VALUE rb_hash_key_str(VALUE)
Definition: hash.c:2802
unsigned long VALUE
int fclose(FILE *)
const rb_iseq_t const char const VALUE keys
uint32_t i
#define PUREFUNC(x)
#define char
__inline__ const void *__restrict__ size_t len
#define long
void * memset(void *, int, size_t)
#define ST_INDEX_BITS
const char * s2
int VALUE v
#define short
#define TRUE
#define FALSE
#define SIZEOF_ST_INDEX_T
#define RHASH_ST_TABLE(hash)
unsigned int size
long unsigned int size_t
#define UNLIKELY(x)
int st_update_callback_func(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
FILE * fopen(const char *__restrict__ _name, const char *__restrict__ _type)
st_data_t st_index_t
pid_t getpid(void)
const VALUE * argv
__inline__ int
int st_foreach_callback_func(st_data_t, st_data_t, st_data_t)
#define RB_OBJ_FROZEN(x)
int atexit(void(*__func)(void))
size_t st_index_t h
#define RB_OBJ_WRITTEN(a, oldv, b)
struct iseq_catch_table_entry entries[]
char bin[32]
Definition: siphash.c:135
#define f
#define RESERVED_HASH_SUBSTITUTION_VAL
Definition: st.c:317
#define UNDEFINED_BIN_IND
Definition: st.c:396
st_index_t st_numhash(st_data_t n)
Definition: st.c:2175
#define r2
#define type_numhash
Definition: st.c:142
void st_free_table(st_table *tab)
Definition: st.c:709
st_table * st_init_numtable_with_size(st_index_t size)
Definition: st.c:660
#define realloc
Definition: st.c:175
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS
Definition: st.c:342
#define ENTRY_BASE
Definition: st.c:387
#define MARK_BIN_DELETED(tab, i)
Definition: st.c:406
#define MINIMAL_POWER2
Definition: st.c:334
#define free
Definition: st.c:176
size_t st_memsize(const st_table *tab)
Definition: st.c:719
int st_delete(st_table *tab, st_data_t *key, st_data_t *value)
Definition: st.c:1418
int st_insert2(st_table *tab, st_data_t key, st_data_t value, st_data_t(*func)(st_data_t))
Definition: st.c:1263
#define IND_EMPTY_BIN_P(tab, i)
Definition: st.c:421
st_index_t st_keys(st_table *tab, st_data_t *keys, st_index_t size)
Definition: st.c:1756
#define MAX_POWER2
Definition: st.c:277
#define IND_DELETED_BIN_P(tab, i)
Definition: st.c:422
st_index_t st_hash_uint32(st_index_t h, uint32_t i)
Definition: st.c:2041
#define FNV1_32A_INIT
Definition: st.c:1807
#define C1
Definition: st.c:1833
int st_shift(st_table *tab, st_data_t *key, st_data_t *value)
Definition: st.c:1440
#define MARK_BIN_EMPTY(tab, i)
Definition: st.c:391
#define REBUILT_TABLE_BIN_IND
Definition: st.c:401
int st_numcmp(st_data_t x, st_data_t y)
Definition: st.c:2169
void st_add_direct(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1251
#define DELETED_ENTRY_P(e_ptr)
Definition: st.c:428
#define ST_INIT_VAL
Definition: st.c:165
st_index_t st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1763
#define EMPTY_BIN_P(b)
Definition: st.c:415
st_index_t rb_st_hash_start(st_index_t h)
Definition: st.c:2069
#define data_at(n)
#define UNALIGNED_ADD_ALL
#define FOUND_BIN
Definition: st.c:760
#define REBUILD_THRESHOLD
Definition: st.c:766
#define ATTRIBUTE_UNUSED
Definition: st.c:124
st_table * st_init_numtable(void)
Definition: st.c:653
st_table * st_init_strtable(void)
Definition: st.c:668
#define UNDEFINED_ENTRY_IND
Definition: st.c:395
st_index_t st_hash_uint(st_index_t h, st_index_t i)
Definition: st.c:2048
NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k))
const st_hash_t st_reserved_hash_substitution_val
Definition: st.c:320
#define r1
#define aligned_data
int st_insert(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1171
st_index_t st_values(st_table *tab, st_data_t *values, st_index_t size)
Definition: st.c:1794
void st_clear(st_table *tab)
Definition: st.c:698
#define ROTL(x, n)
Definition: st.c:1830
void rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
Definition: st.c:2379
#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p)
Definition: st.c:185
st_index_t st_hash(const void *ptr, size_t len, st_index_t h)
Definition: st.c:1896
#define RESERVED_HASH_VAL
Definition: st.c:316
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1101
#define COLLISION
Definition: st.c:759
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
Definition: st.c:1717
#define EXPECT(expr, val)
Definition: st.c:123
void st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1495
#define st_assert(cond)
Definition: st.c:130
st_index_t st_values_check(st_table *tab, st_data_t *values, st_index_t size, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1801
st_table * st_init_table(const struct st_hash_type *type)
Definition: st.c:645
#define EMPTY_OR_DELETED_BIN_P(b)
Definition: st.c:417
int st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
Definition: st.c:1699
int st_update(st_table *tab, st_data_t key, st_update_callback_func *func, st_data_t arg)
Definition: st.c:1509
int st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1725
#define REBUILT_TABLE_ENTRY_IND
Definition: st.c:400
st_index_t st_hash_t
Definition: st.c:134
st_table * st_copy(st_table *old_tab)
Definition: st.c:1320
int st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1429
#define ST_INIT_VAL_BYTE
Definition: st.c:166
#define BIG_CONSTANT(x, y)
Definition: st.c:1829
#define malloc
Definition: st.c:173
#define C2
Definition: st.c:1834
#define PREFETCH(addr, write_p)
Definition: st.c:122
#define FNV_32_PRIME
Definition: st.c:1812
st_table * st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
Definition: st.c:577
st_index_t st_hash_end(st_index_t h)
Definition: st.c:2061
#define DELETED_BIN_P(b)
Definition: st.c:416
#define r3
const st_hash_t st_reserved_hash_val
Definition: st.c:319
int st_get_key(st_table *tab, st_data_t key, st_data_t *result)
Definition: st.c:1130
st_table * st_init_strcasetable(void)
Definition: st.c:683
st_table * st_init_strtable_with_size(st_index_t size)
Definition: st.c:675
#define MARK_ENTRY_DELETED(e_ptr)
Definition: st.c:427
st_table * st_init_strcasetable_with_size(st_index_t size)
Definition: st.c:691
Definition: hash.c:909
st_foreach_callback_func * func
Definition: hash.c:910
st_data_t arg
Definition: hash.c:911
unsigned char entry_power
Definition: st.c:195
unsigned char size_ind
Definition: st.c:201
unsigned char bin_power
Definition: st.c:199
st_index_t bins_words
Definition: st.c:204
st_index_t(* hash)(st_data_t)
Definition: st.c:136
st_hash_t hash
Definition: st.c:137
st_data_t record
Definition: st.c:139
st_data_t key
Definition: st.c:138
unsigned char bin_power
st_table_entry * entries
unsigned int rebuilds_num
st_index_t entries_bound
unsigned char entry_power
const struct st_hash_type * type
unsigned char size_ind
st_index_t entries_start
#define getenv(name)
Definition: win32.c:73