Ruby  2.5.0dev(2017-10-22revision60238)
st.c
Go to the documentation of this file.
1 /* This is a public domain general purpose hash table package
2  originally written by Peter Moore @ UCB.
3 
4  The hash table data structures were redesigned and the package was
5  rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6 
7 /* The original package implemented classic bucket-based hash tables
8  with entries doubly linked for an access by their insertion order.
9  To decrease pointer chasing and as a consequence to improve a data
10  locality the current implementation is based on storing entries in
11  an array and using hash tables with open addressing. The current
12  entries are more compact in comparison with the original ones and
13  this also improves the data locality.
14 
15  The hash table has two arrays called *bins* and *entries*.
16 
17  bins:
18  -------
19  | | entries array:
20  |-------| --------------------------------
21  | index | | | entry: | | |
22  |-------| | | | | |
23  | ... | | ... | hash | ... | ... |
24  |-------| | | key | | |
25  | empty | | | record | | |
26  |-------| --------------------------------
27  | ... | ^ ^
28  |-------| |_ entries start |_ entries bound
29  |deleted|
30  -------
31 
32  o The entry array contains table entries in the same order as they
33  were inserted.
34 
35  When the first entry is deleted, a variable containing index of
36  the current first entry (*entries start*) is changed. In all
37  other cases of the deletion, we just mark the entry as deleted by
38  using a reserved hash value.
39 
40  Such organization of the entry storage makes operations of the
41  table shift and the entries traversal very fast.
42 
43  o The bins provide access to the entries by their keys. The
44  key hash is mapped to a bin containing *index* of the
45  corresponding entry in the entry array.
46 
47  The bin array size is always power of two, it makes mapping very
48  fast by using the corresponding lower bits of the hash.
49  Generally it is not a good idea to ignore some part of the hash.
50  But alternative approach is worse. For example, we could use a
51  modulo operation for mapping and a prime number for the size of
52  the bin array. Unfortunately, the modulo operation for big
53  64-bit numbers are extremely slow (it takes more than 100 cycles
54  on modern Intel CPUs).
55 
56  Still other bits of the hash value are used when the mapping
57  results in a collision. In this case we use a secondary hash
58  value which is a result of a function of the collision bin
59  index and the original hash value. The function choice
60  guarantees that we can traverse all bins and finally find the
61  corresponding bin as after several iterations the function
62  becomes a full cycle linear congruential generator because it
63  satisfies requirements of the Hull-Dobell theorem.
64 
65  When an entry is removed from the table besides marking the
66  hash in the corresponding entry described above, we also mark
67  the bin by a special value in order to find entries which had
68  a collision with the removed entries.
69 
70  There are two reserved values for the bins. One denotes an
71  empty bin, another one denotes a bin for a deleted entry.
72 
73  o The length of the bin array is at least two times more than the
74  entry array length. This keeps the table load factor healthy.
75  The trigger of rebuilding the table is always a case when we can
76  not insert an entry anymore at the entries bound. We could
77  change the entries bound too in case of deletion but than we need
78  a special code to count bins with corresponding deleted entries
79  and reset the bin values when there are too many bins
80  corresponding deleted entries
81 
82  Table rebuilding is done by creation of a new entry array and
83  bins of an appropriate size. We also try to reuse the arrays
84  in some cases by compacting the array and removing deleted
85  entries.
86 
87  o To save memory very small tables have no allocated arrays
88  bins. We use a linear search for an access by a key.
89 
90  o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91  bins depending on the current hash table size.
92 
93  This implementation speeds up the Ruby hash table benchmarks in
94  average by more 40% on Intel Haswell CPU.
95 
96 */
97 
98 #ifdef NOT_RUBY
99 #include "regint.h"
100 #include "st.h"
101 #else
102 #include "internal.h"
103 #endif
104 
105 #include <stdio.h>
106 #ifdef HAVE_STDLIB_H
107 #include <stdlib.h>
108 #endif
109 #include <string.h>
110 #include <assert.h>
111 
112 #ifdef __GNUC__
113 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
114 #define EXPECT(expr, val) __builtin_expect(expr, val)
115 #define ATTRIBUTE_UNUSED __attribute__((unused))
116 #else
117 #define PREFETCH(addr, write_p)
118 #define EXPECT(expr, val) (expr)
119 #define ATTRIBUTE_UNUSED
120 #endif
121 
122 #ifdef ST_DEBUG
123 #define st_assert assert
124 #else
125 #define st_assert(cond) ((void)(0 && (cond)))
126 #endif
127 
128 /* The type of hashes. */
130 
135 };
136 
137 #define type_numhash st_hashtype_num
139  st_numcmp,
140  st_numhash,
141 };
142 
143 /* extern int strcmp(const char *, const char *); */
144 static st_index_t strhash(st_data_t);
145 static const struct st_hash_type type_strhash = {
146  strcmp,
147  strhash,
148 };
149 
150 static st_index_t strcasehash(st_data_t);
151 static const struct st_hash_type type_strcasehash = {
153  strcasehash,
154 };
155 
156 /* Value used to catch uninitialized entries/bins during debugging.
157  There is a possibility for a false alarm, but its probability is
158  extremely small. */
159 #define ST_INIT_VAL 0xafafafafafafafaf
160 #define ST_INIT_VAL_BYTE 0xafa
161 
162 #ifdef RUBY
163 #undef malloc
164 #undef realloc
165 #undef calloc
166 #undef free
167 #define malloc ruby_xmalloc
168 #define calloc ruby_xcalloc
169 #define realloc ruby_xrealloc
170 #define free ruby_xfree
171 #endif
172 
173 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
174 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
175  ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
176 
177 /* Features of a table. */
178 struct st_features {
179  /* Power of 2 used for number of allocated entries. */
180  unsigned char entry_power;
181  /* Power of 2 used for number of allocated bins. Depending on the
182  table size, the number of bins is 2-4 times more than the
183  number of entries. */
184  unsigned char bin_power;
185  /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
186  unsigned char size_ind;
187  /* Bins are packed in words of type st_index_t. The following is
188  a size of bins counted by words. */
190 };
191 
192 /* Features of all possible size tables. */
193 #if SIZEOF_ST_INDEX_T == 8
194 #define MAX_POWER2 62
195 static const struct st_features features[] = {
196  {0, 1, 0, 0x0},
197  {1, 2, 0, 0x1},
198  {2, 3, 0, 0x1},
199  {3, 4, 0, 0x2},
200  {4, 5, 0, 0x4},
201  {5, 6, 0, 0x8},
202  {6, 7, 0, 0x10},
203  {7, 8, 0, 0x20},
204  {8, 9, 1, 0x80},
205  {9, 10, 1, 0x100},
206  {10, 11, 1, 0x200},
207  {11, 12, 1, 0x400},
208  {12, 13, 1, 0x800},
209  {13, 14, 1, 0x1000},
210  {14, 15, 1, 0x2000},
211  {15, 16, 1, 0x4000},
212  {16, 17, 2, 0x10000},
213  {17, 18, 2, 0x20000},
214  {18, 19, 2, 0x40000},
215  {19, 20, 2, 0x80000},
216  {20, 21, 2, 0x100000},
217  {21, 22, 2, 0x200000},
218  {22, 23, 2, 0x400000},
219  {23, 24, 2, 0x800000},
220  {24, 25, 2, 0x1000000},
221  {25, 26, 2, 0x2000000},
222  {26, 27, 2, 0x4000000},
223  {27, 28, 2, 0x8000000},
224  {28, 29, 2, 0x10000000},
225  {29, 30, 2, 0x20000000},
226  {30, 31, 2, 0x40000000},
227  {31, 32, 2, 0x80000000},
228  {32, 33, 3, 0x200000000},
229  {33, 34, 3, 0x400000000},
230  {34, 35, 3, 0x800000000},
231  {35, 36, 3, 0x1000000000},
232  {36, 37, 3, 0x2000000000},
233  {37, 38, 3, 0x4000000000},
234  {38, 39, 3, 0x8000000000},
235  {39, 40, 3, 0x10000000000},
236  {40, 41, 3, 0x20000000000},
237  {41, 42, 3, 0x40000000000},
238  {42, 43, 3, 0x80000000000},
239  {43, 44, 3, 0x100000000000},
240  {44, 45, 3, 0x200000000000},
241  {45, 46, 3, 0x400000000000},
242  {46, 47, 3, 0x800000000000},
243  {47, 48, 3, 0x1000000000000},
244  {48, 49, 3, 0x2000000000000},
245  {49, 50, 3, 0x4000000000000},
246  {50, 51, 3, 0x8000000000000},
247  {51, 52, 3, 0x10000000000000},
248  {52, 53, 3, 0x20000000000000},
249  {53, 54, 3, 0x40000000000000},
250  {54, 55, 3, 0x80000000000000},
251  {55, 56, 3, 0x100000000000000},
252  {56, 57, 3, 0x200000000000000},
253  {57, 58, 3, 0x400000000000000},
254  {58, 59, 3, 0x800000000000000},
255  {59, 60, 3, 0x1000000000000000},
256  {60, 61, 3, 0x2000000000000000},
257  {61, 62, 3, 0x4000000000000000},
258  {62, 63, 3, 0x8000000000000000},
259 };
260 
261 #else
262 #define MAX_POWER2 30
263 
264 static const struct st_features features[] = {
265  {0, 1, 0, 0x1},
266  {1, 2, 0, 0x1},
267  {2, 3, 0, 0x2},
268  {3, 4, 0, 0x4},
269  {4, 5, 0, 0x8},
270  {5, 6, 0, 0x10},
271  {6, 7, 0, 0x20},
272  {7, 8, 0, 0x40},
273  {8, 9, 1, 0x100},
274  {9, 10, 1, 0x200},
275  {10, 11, 1, 0x400},
276  {11, 12, 1, 0x800},
277  {12, 13, 1, 0x1000},
278  {13, 14, 1, 0x2000},
279  {14, 15, 1, 0x4000},
280  {15, 16, 1, 0x8000},
281  {16, 17, 2, 0x20000},
282  {17, 18, 2, 0x40000},
283  {18, 19, 2, 0x80000},
284  {19, 20, 2, 0x100000},
285  {20, 21, 2, 0x200000},
286  {21, 22, 2, 0x400000},
287  {22, 23, 2, 0x800000},
288  {23, 24, 2, 0x1000000},
289  {24, 25, 2, 0x2000000},
290  {25, 26, 2, 0x4000000},
291  {26, 27, 2, 0x8000000},
292  {27, 28, 2, 0x10000000},
293  {28, 29, 2, 0x20000000},
294  {29, 30, 2, 0x40000000},
295  {30, 31, 2, 0x80000000},
296 };
297 
298 #endif
299 
300 /* The reserved hash value and its substitution. */
301 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
302 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
303 
304 /* Return hash value of KEY for table TAB. */
305 static inline st_hash_t
306 do_hash(st_data_t key, st_table *tab)
307 {
308  st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
309 
310  /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
311  another value. Such mapping should be extremely rare. */
313 }
314 
315 /* Power of 2 defining the minimal number of allocated entries. */
316 #define MINIMAL_POWER2 2
317 
318 #if MINIMAL_POWER2 < 2
319 #error "MINIMAL_POWER2 should be >= 2"
320 #endif
321 
322 /* If the power2 of the allocated `entries` is less than the following
323  value, don't allocate bins and use a linear search. */
324 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
325 
326 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
327 static int
328 get_power2(st_index_t size)
329 {
330  unsigned int n;
331 
332  for (n = 0; size != 0; n++)
333  size >>= 1;
334  if (n <= MAX_POWER2)
335  return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
336 #ifndef NOT_RUBY
337  /* Ran out of the table entries */
338  rb_raise(rb_eRuntimeError, "st_table too big");
339 #endif
340  /* should raise exception */
341  return -1;
342 }
343 
344 /* Return value of N-th bin in array BINS of table with bins size
345  index S. */
346 static inline st_index_t
347 get_bin(st_index_t *bins, int s, st_index_t n)
348 {
349  return (s == 0 ? ((unsigned char *) bins)[n]
350  : s == 1 ? ((unsigned short *) bins)[n]
351  : s == 2 ? ((unsigned int *) bins)[n]
352  : ((st_index_t *) bins)[n]);
353 }
354 
355 /* Set up N-th bin in array BINS of table with bins size index S to
356  value V. */
357 static inline void
358 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
359 {
360  if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
361  else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
362  else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
363  else ((st_index_t *) bins)[n] = v;
364 }
365 
366 /* These macros define reserved values for empty table bin and table
367  bin which contains a deleted entry. We will never use such values
368  for an entry index in bins. */
369 #define EMPTY_BIN 0
370 #define DELETED_BIN 1
371 /* Base of a real entry index in the bins. */
372 #define ENTRY_BASE 2
373 
374 /* Mark I-th bin of table TAB as empty, in other words not
375  corresponding to any entry. */
376 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
377 
378 /* Values used for not found entry and bin with given
379  characteristics. */
380 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
381 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
382 
383 /* Mark I-th bin of table TAB as corresponding to a deleted table
384  entry. Update number of entries in the table and number of bins
385  corresponding to deleted entries. */
386 #define MARK_BIN_DELETED(tab, i) \
387  do { \
388  st_assert(i != UNDEFINED_BIN_IND); \
389  st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
390  set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
391  } while (0)
392 
393 /* Macros to check that value B is used empty bins and bins
394  corresponding deleted entries. */
395 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
396 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
397 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
398 
399 /* Macros to check empty bins and bins corresponding to deleted
400  entries. Bins are given by their index I in table TAB. */
401 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
402 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
403 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
404 
405 /* Macros for marking and checking deleted entries given by their
406  pointer E_PTR. */
407 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
408 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
409 
410 /* Return bin size index of table TAB. */
411 static inline unsigned int
412 get_size_ind(const st_table *tab)
413 {
414  return tab->size_ind;
415 }
416 
417 /* Return the number of allocated bins of table TAB. */
418 static inline st_index_t
419 get_bins_num(const st_table *tab)
420 {
421  return ((st_index_t) 1)<<tab->bin_power;
422 }
423 
424 /* Return mask for a bin index in table TAB. */
425 static inline st_index_t
426 bins_mask(const st_table *tab)
427 {
428  return get_bins_num(tab) - 1;
429 }
430 
431 /* Return the index of table TAB bin corresponding to
432  HASH_VALUE. */
433 static inline st_index_t
434 hash_bin(st_hash_t hash_value, st_table *tab)
435 {
436  return hash_value & bins_mask(tab);
437 }
438 
439 /* Return the number of allocated entries of table TAB. */
440 static inline st_index_t
441 get_allocated_entries(const st_table *tab)
442 {
443  return ((st_index_t) 1)<<tab->entry_power;
444 }
445 
446 /* Return size of the allocated bins of table TAB. */
447 static inline st_index_t
448 bins_size(const st_table *tab)
449 {
450  return features[tab->entry_power].bins_words * sizeof (st_index_t);
451 }
452 
453 /* Mark all bins of table TAB as empty. */
454 static void
455 initialize_bins(st_table *tab)
456 {
457  memset(tab->bins, 0, bins_size(tab));
458 }
459 
460 /* Make table TAB empty. */
461 static void
462 make_tab_empty(st_table *tab)
463 {
464  tab->num_entries = 0;
465  tab->entries_start = tab->entries_bound = 0;
466  if (tab->bins != NULL)
467  initialize_bins(tab);
468 }
469 
470 #ifdef ST_DEBUG
471 #define st_assert_notinitial(ent) \
472  do { \
473  st_assert(ent.hash != (st_hash_t) ST_INIT_VAL); \
474  st_assert(ent.key != ST_INIT_VAL); \
475  st_assert(ent.record != ST_INIT_VAL); \
476  } while (0)
477 /* Check the table T consistency. It can be extremely slow. So use
478  it only for debugging. */
479 static void
480 st_check(st_table *tab)
481 {
482  st_index_t d, e, i, n, p;
483 
484  for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
485  ;
486  p = i;
488  st_assert(tab->entries_bound <= get_allocated_entries(tab));
489  st_assert(tab->entries_start <= tab->entries_bound);
490  n = 0;
491  return;
492  if (tab->entries_bound != 0)
493  for (i = tab->entries_start; i < tab->entries_bound; i++) {
494  st_assert_notinitial(tab->entries[i]);
495  if (! DELETED_ENTRY_P(&tab->entries[i]))
496  n++;
497  }
498  st_assert(n == tab->num_entries);
499  if (tab->bins == NULL)
501  else {
503  for (n = d = i = 0; i < get_bins_num(tab); i++) {
504  st_assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
505  if (IND_DELETED_BIN_P(tab, i)) {
506  d++;
507  continue;
508  }
509  else if (IND_EMPTY_BIN_P(tab, i))
510  continue;
511  n++;
512  e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
513  st_assert(tab->entries_start <= e && e < tab->entries_bound);
514  st_assert(! DELETED_ENTRY_P(&tab->entries[e]));
515  st_assert_notinitial(tab->entries[e]);
516  }
517  st_assert(n == tab->num_entries);
518  st_assert(n + d < get_bins_num(tab));
519  }
520 }
521 #endif
522 
523 #ifdef HASH_LOG
524 #ifdef HAVE_UNISTD_H
525 #include <unistd.h>
526 #endif
527 static struct {
528  int all, total, num, str, strcase;
529 } collision;
530 
531 /* Flag switching off output of package statistics at the end of
532  program. */
533 static int init_st = 0;
534 
535 /* Output overall number of table searches and collisions into a
536  temporary file. */
537 static void
538 stat_col(void)
539 {
540  char fname[10+sizeof(long)*3];
541  FILE *f;
542  if (!collision.total) return;
543  f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
544  fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
545  ((double)collision.all / (collision.total)) * 100);
546  fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
547  fclose(f);
548 }
549 #endif
550 
551 /* Create and return table with TYPE which can hold at least SIZE
552  entries. The real number of entries which the table can hold is
553  the nearest power of two for SIZE. */
554 st_table *
556 {
557  st_table *tab;
558  int n;
559 
560 #ifdef HASH_LOG
561 #if HASH_LOG+0 < 0
562  {
563  const char *e = getenv("ST_HASH_LOG");
564  if (!e || !*e) init_st = 1;
565  }
566 #endif
567  if (init_st == 0) {
568  init_st = 1;
569  atexit(stat_col);
570  }
571 #endif
572 
573  n = get_power2(size);
574  tab = (st_table *) malloc(sizeof (st_table));
575  tab->type = type;
576  tab->entry_power = n;
577  tab->bin_power = features[n].bin_power;
578  tab->size_ind = features[n].size_ind;
580  tab->bins = NULL;
581  else
582  tab->bins = (st_index_t *) malloc(bins_size(tab));
583  tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
584  * sizeof(st_table_entry));
585 #ifdef ST_DEBUG
586  memset(tab->entries, ST_INIT_VAL_BYTE,
587  get_allocated_entries(tab) * sizeof(st_table_entry));
588  if (tab->bins != NULL)
589  memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
590 #endif
591  make_tab_empty(tab);
592  tab->rebuilds_num = 0;
593 #ifdef ST_DEBUG
594  st_check(tab);
595 #endif
596  return tab;
597 }
598 
599 /* Create and return table with TYPE which can hold a minimal number
600  of entries (see comments for get_power2). */
601 st_table *
602 st_init_table(const struct st_hash_type *type)
603 {
604  return st_init_table_with_size(type, 0);
605 }
606 
607 /* Create and return table which can hold a minimal number of
608  numbers. */
609 st_table *
611 {
612  return st_init_table(&type_numhash);
613 }
614 
615 /* Create and return table which can hold SIZE numbers. */
616 st_table *
618 {
619  return st_init_table_with_size(&type_numhash, size);
620 }
621 
622 /* Create and return table which can hold a minimal number of
623  strings. */
624 st_table *
626 {
627  return st_init_table(&type_strhash);
628 }
629 
630 /* Create and return table which can hold SIZE strings. */
631 st_table *
633 {
634  return st_init_table_with_size(&type_strhash, size);
635 }
636 
637 /* Create and return table which can hold a minimal number of strings
638  whose character case is ignored. */
639 st_table *
641 {
642  return st_init_table(&type_strcasehash);
643 }
644 
645 /* Create and return table which can hold SIZE strings whose character
646  case is ignored. */
647 st_table *
649 {
650  return st_init_table_with_size(&type_strcasehash, size);
651 }
652 
653 /* Make table TAB empty. */
654 void
656 {
657  make_tab_empty(tab);
658  tab->rebuilds_num++;
659 #ifdef ST_DEBUG
660  st_check(tab);
661 #endif
662 }
663 
664 /* Free table TAB space. */
665 void
667 {
668  if (tab->bins != NULL)
669  free(tab->bins);
670  free(tab->entries);
671  free(tab);
672 }
673 
674 /* Return byte size of memory allocted for table TAB. */
675 size_t
676 st_memsize(const st_table *tab)
677 {
678  return(sizeof(st_table)
679  + (tab->bins == NULL ? 0 : bins_size(tab))
680  + get_allocated_entries(tab) * sizeof(st_table_entry));
681 }
682 
683 static st_index_t
684 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
685 
686 static st_index_t
687 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
688 
689 static st_index_t
690 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
691 
692 static st_index_t
693 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
694  st_data_t key, st_index_t *bin_ind);
695 
696 #ifdef HASH_LOG
697 static void
698 count_collision(const struct st_hash_type *type)
699 {
700  collision.all++;
701  if (type == &type_numhash) {
702  collision.num++;
703  }
704  else if (type == &type_strhash) {
705  collision.strcase++;
706  }
707  else if (type == &type_strcasehash) {
708  collision.str++;
709  }
710 }
711 
712 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
713 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
714 #define collision_check 0
715 #else
716 #define COLLISION
717 #define FOUND_BIN
718 #endif
719 
720 /* If the number of entries in the table is at least REBUILD_THRESHOLD
721  times less than the entry array length, decrease the table
722  size. */
723 #define REBUILD_THRESHOLD 4
724 
725 #if REBUILD_THRESHOLD < 2
726 #error "REBUILD_THRESHOLD should be >= 2"
727 #endif
728 
729 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
730  and can change size of the table entries and bins arrays.
731  Rebuilding is implemented by creation of a new table or by
732  compaction of the existing one. */
733 static void
734 rebuild_table(st_table *tab)
735 {
736  st_index_t i, ni, bound;
737  unsigned int size_ind;
738  st_table *new_tab;
739  st_table_entry *entries, *new_entries;
740  st_table_entry *curr_entry_ptr;
741  st_index_t *bins;
742  st_index_t bin_ind;
743 
744  st_assert(tab != NULL);
745  bound = tab->entries_bound;
746  entries = tab->entries;
747  if ((2 * tab->num_entries <= get_allocated_entries(tab)
748  && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
749  || tab->num_entries < (1 << MINIMAL_POWER2)) {
750  /* Compaction: */
751  tab->num_entries = 0;
752  if (tab->bins != NULL)
753  initialize_bins(tab);
754  new_tab = tab;
755  new_entries = entries;
756  }
757  else {
758  new_tab = st_init_table_with_size(tab->type,
759  2 * tab->num_entries - 1);
760  new_entries = new_tab->entries;
761  }
762  ni = 0;
763  bins = new_tab->bins;
764  size_ind = get_size_ind(new_tab);
765  for (i = tab->entries_start; i < bound; i++) {
766  curr_entry_ptr = &entries[i];
767  PREFETCH(entries + i + 1, 0);
768  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
769  continue;
770  if (&new_entries[ni] != curr_entry_ptr)
771  new_entries[ni] = *curr_entry_ptr;
772  if (EXPECT(bins != NULL, 1)) {
773  bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
774  curr_entry_ptr->key);
775  st_assert(bin_ind != UNDEFINED_BIN_IND);
776  st_assert(tab == new_tab || new_tab->rebuilds_num == 0);
777  st_assert(IND_EMPTY_BIN_P(new_tab, bin_ind));
778  set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
779  }
780  new_tab->num_entries++;
781  ni++;
782  }
783  if (new_tab != tab) {
784  tab->entry_power = new_tab->entry_power;
785  tab->bin_power = new_tab->bin_power;
786  tab->size_ind = new_tab->size_ind;
787  st_assert(tab->num_entries == ni);
788  st_assert(new_tab->num_entries == ni);
789  if (tab->bins != NULL)
790  free(tab->bins);
791  tab->bins = new_tab->bins;
792  free(tab->entries);
793  tab->entries = new_tab->entries;
794  free(new_tab);
795  }
796  tab->entries_start = 0;
797  tab->entries_bound = tab->num_entries;
798  tab->rebuilds_num++;
799 #ifdef ST_DEBUG
800  st_check(tab);
801 #endif
802 }
803 
804 /* Return the next secondary hash index for table TAB using previous
805  index IND and PERTERB. Finally modulo of the function becomes a
806  full *cycle linear congruential generator*, in other words it
807  guarantees traversing all table bins in extreme case.
808 
809  According the Hull-Dobell theorem a generator
810  "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
811  o m and c are relatively prime
812  o a-1 is divisible by all prime factors of m
813  o a-1 is divisible by 4 if m is divisible by 4.
814 
815  For our case a is 5, c is 1, and m is a power of two. */
816 static inline st_index_t
817 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
818 {
819  *perterb >>= 11;
820  ind = (ind << 2) + ind + *perterb + 1;
821  return hash_bin(ind, tab);
822 }
823 
824 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
825  search. Return the index of the found entry in array `entries`.
826  If it is not found, return UNDEFINED_ENTRY_IND. */
827 static inline st_index_t
828 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
829 {
830  st_index_t i, bound;
831  st_table_entry *entries;
832 
833  bound = tab->entries_bound;
834  entries = tab->entries;
835  for (i = tab->entries_start; i < bound; i++) {
836  if (PTR_EQUAL(tab, &entries[i], hash_value, key))
837  return i;
838  }
839  return UNDEFINED_ENTRY_IND;
840 }
841 
842 /* Use the quadratic probing. The method has a better data locality
843  but more collisions than the current approach. In average it
844  results in a bit slower search. */
845 /*#define QUADRATIC_PROBE*/
846 
847 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
848  there is no such entry, return UNDEFINED_ENTRY_IND. */
849 static st_index_t
850 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
851 {
852  st_index_t ind;
853 #ifdef QUADRATIC_PROBE
854  st_index_t d;
855 #else
856  st_index_t peterb;
857 #endif
858  st_index_t bin;
859  st_table_entry *entries = tab->entries;
860 
861  st_assert(tab != NULL);
862  st_assert(tab->bins != NULL);
863  ind = hash_bin(hash_value, tab);
864 #ifdef QUADRATIC_PROBE
865  d = 1;
866 #else
867  peterb = hash_value;
868 #endif
869  FOUND_BIN;
870  for (;;) {
871  bin = get_bin(tab->bins, get_size_ind(tab), ind);
872  if (! EMPTY_OR_DELETED_BIN_P(bin)
873  && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
874  break;
875  else if (EMPTY_BIN_P(bin))
876  return UNDEFINED_ENTRY_IND;
877 #ifdef QUADRATIC_PROBE
878  ind = hash_bin(ind + d, tab);
879  d++;
880 #else
881  ind = secondary_hash(ind, tab, &peterb);
882 #endif
883  COLLISION;
884  }
885  return bin;
886 }
887 
888 /* Find and return index of table TAB bin corresponding to an entry
889  with HASH_VALUE and KEY. If there is no such bin, return
890  UNDEFINED_BIN_IND. */
891 static st_index_t
892 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
893 {
894  st_index_t ind;
895 #ifdef QUADRATIC_PROBE
896  st_index_t d;
897 #else
898  st_index_t peterb;
899 #endif
900  st_index_t bin;
901  st_table_entry *entries = tab->entries;
902 
903  st_assert(tab != NULL);
904  st_assert(tab->bins != NULL);
905  ind = hash_bin(hash_value, tab);
906 #ifdef QUADRATIC_PROBE
907  d = 1;
908 #else
909  peterb = hash_value;
910 #endif
911  FOUND_BIN;
912  for (;;) {
913  bin = get_bin(tab->bins, get_size_ind(tab), ind);
914  if (! EMPTY_OR_DELETED_BIN_P(bin)
915  && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
916  break;
917  else if (EMPTY_BIN_P(bin))
918  return UNDEFINED_BIN_IND;
919 #ifdef QUADRATIC_PROBE
920  ind = hash_bin(ind + d, tab);
921  d++;
922 #else
923  ind = secondary_hash(ind, tab, &peterb);
924 #endif
925  COLLISION;
926  }
927  return ind;
928 }
929 
930 /* Find and return index of table TAB bin corresponding to an entry
931  with HASH_VALUE and KEY. The entry should be in the table
932  already. */
933 static st_index_t
934 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
935 {
936  st_index_t ind;
937 #ifdef QUADRATIC_PROBE
938  st_index_t d;
939 #else
940  st_index_t peterb;
941 #endif
942  st_index_t bin;
943  st_table_entry *entries = tab->entries;
944 
945  st_assert(tab != NULL);
946  st_assert(tab->bins != NULL);
947  ind = hash_bin(hash_value, tab);
948 #ifdef QUADRATIC_PROBE
949  d = 1;
950 #else
951  peterb = hash_value;
952 #endif
953  FOUND_BIN;
954  for (;;) {
955  bin = get_bin(tab->bins, get_size_ind(tab), ind);
956  if (EMPTY_OR_DELETED_BIN_P(bin))
957  return ind;
958  st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
959 #ifdef QUADRATIC_PROBE
960  ind = hash_bin(ind + d, tab);
961  d++;
962 #else
963  ind = secondary_hash(ind, tab, &peterb);
964 #endif
965  COLLISION;
966  }
967 }
968 
969 /* Return index of table TAB bin for HASH_VALUE and KEY through
970  BIN_IND and the pointed value as the function result. Reserve the
971  bin for inclusion of the corresponding entry into the table if it
972  is not there yet. We always find such bin as bins array length is
973  bigger entries array. Although we can reuse a deleted bin, the
974  result bin value is always empty if the table has no entry with
975  KEY. Return the entries array index of the found entry or
976  UNDEFINED_ENTRY_IND if it is not found. */
977 static st_index_t
978 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
979  st_data_t key, st_index_t *bin_ind)
980 {
981  st_index_t ind;
982  st_hash_t curr_hash_value = *hash_value;
983 #ifdef QUADRATIC_PROBE
984  st_index_t d;
985 #else
986  st_index_t peterb;
987 #endif
988  st_index_t entry_index;
989  st_index_t first_deleted_bin_ind;
990  st_table_entry *entries;
991 
992  st_assert(tab != NULL);
993  st_assert(tab->bins != NULL);
994  st_assert(tab->entries_bound <= get_allocated_entries(tab));
995  st_assert(tab->entries_start <= tab->entries_bound);
996  ind = hash_bin(curr_hash_value, tab);
997 #ifdef QUADRATIC_PROBE
998  d = 1;
999 #else
1000  peterb = curr_hash_value;
1001 #endif
1002  FOUND_BIN;
1003  first_deleted_bin_ind = UNDEFINED_BIN_IND;
1004  entries = tab->entries;
1005  for (;;) {
1006  entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1007  if (EMPTY_BIN_P(entry_index)) {
1008  tab->num_entries++;
1009  entry_index = UNDEFINED_ENTRY_IND;
1010  if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1011  /* We can reuse bin of a deleted entry. */
1012  ind = first_deleted_bin_ind;
1013  MARK_BIN_EMPTY(tab, ind);
1014  }
1015  break;
1016  }
1017  else if (! DELETED_BIN_P(entry_index)) {
1018  if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
1019  break;
1020  }
1021  else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1022  first_deleted_bin_ind = ind;
1023 #ifdef QUADRATIC_PROBE
1024  ind = hash_bin(ind + d, tab);
1025  d++;
1026 #else
1027  ind = secondary_hash(ind, tab, &peterb);
1028 #endif
1029  COLLISION;
1030  }
1031  *bin_ind = ind;
1032  return entry_index;
1033 }
1034 
1035 /* Find an entry with KEY in table TAB. Return non-zero if we found
1036  it. Set up *RECORD to the found entry record. */
1037 int
1039 {
1040  st_index_t bin;
1041  st_hash_t hash = do_hash(key, tab);
1042 
1043  if (tab->bins == NULL) {
1044  bin = find_entry(tab, hash, key);
1045  if (bin == UNDEFINED_ENTRY_IND)
1046  return 0;
1047  }
1048  else {
1049  bin = find_table_entry_ind(tab, hash, key);
1050  if (bin == UNDEFINED_ENTRY_IND)
1051  return 0;
1052  bin -= ENTRY_BASE;
1053  }
1054  if (value != 0)
1055  *value = tab->entries[bin].record;
1056  return 1;
1057 }
1058 
1059 /* Find an entry with KEY in table TAB. Return non-zero if we found
1060  it. Set up *RESULT to the found table entry key. */
1061 int
1063 {
1064  st_index_t bin;
1065  st_hash_t hash = do_hash(key, tab);
1066 
1067  if (tab->bins == NULL) {
1068  bin = find_entry(tab, hash, key);
1069  if (bin == UNDEFINED_ENTRY_IND)
1070  return 0;
1071  }
1072  else {
1073  bin = find_table_entry_ind(tab, hash, key);
1074  if (bin == UNDEFINED_ENTRY_IND)
1075  return 0;
1076  bin -= ENTRY_BASE;
1077  }
1078  if (result != 0)
1079  *result = tab->entries[bin].key;
1080  return 1;
1081 }
1082 
1083 /* Check the table and rebuild it if it is necessary. */
1084 static inline void
1085 rebuild_table_if_necessary (st_table *tab)
1086 {
1087  st_index_t bound = tab->entries_bound;
1088 
1089  if (bound == get_allocated_entries(tab))
1090  rebuild_table(tab);
1091  st_assert(tab->entries_bound < get_allocated_entries(tab));
1092 }
1093 
1094 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1095  already entry with KEY in the table, return nonzero and and update
1096  the value of the found entry. */
1097 int
1099 {
1100  st_table_entry *entry;
1101  st_index_t bin;
1102  st_index_t ind;
1103  st_hash_t hash_value;
1104  st_index_t bin_ind;
1105  int new_p;
1106 
1107  rebuild_table_if_necessary(tab);
1108  hash_value = do_hash(key, tab);
1109  if (tab->bins == NULL) {
1110  bin = find_entry(tab, hash_value, key);
1111  new_p = bin == UNDEFINED_ENTRY_IND;
1112  if (new_p)
1113  tab->num_entries++;
1114  bin_ind = UNDEFINED_BIN_IND;
1115  }
1116  else {
1117  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1118  key, &bin_ind);
1119  new_p = bin == UNDEFINED_ENTRY_IND;
1120  bin -= ENTRY_BASE;
1121  }
1122  if (new_p) {
1123  st_assert(tab->entries_bound < get_allocated_entries(tab));
1124  ind = tab->entries_bound++;
1125  entry = &tab->entries[ind];
1126  entry->hash = hash_value;
1127  entry->key = key;
1128  entry->record = value;
1129  if (bin_ind != UNDEFINED_BIN_IND)
1130  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1131 #ifdef ST_DEBUG
1132  st_check(tab);
1133 #endif
1134  return 0;
1135  }
1136  tab->entries[bin].record = value;
1137 #ifdef ST_DEBUG
1138  st_check(tab);
1139 #endif
1140  return 1;
1141 }
1142 
1143 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1144  entry with KEY before the insertion. */
1145 static inline void
1146 st_add_direct_with_hash(st_table *tab,
1148 {
1149  st_table_entry *entry;
1150  st_index_t ind;
1151  st_index_t bin_ind;
1152 
1153  rebuild_table_if_necessary(tab);
1154  ind = tab->entries_bound++;
1155  entry = &tab->entries[ind];
1156  entry->hash = hash;
1157  entry->key = key;
1158  entry->record = value;
1159  tab->num_entries++;
1160  if (tab->bins != NULL) {
1161  bin_ind = find_table_bin_ind_direct(tab, hash, key);
1162  st_assert (bin_ind != UNDEFINED_BIN_IND);
1163  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1164  }
1165 #ifdef ST_DEBUG
1166  st_check(tab);
1167 #endif
1168 }
1169 
1170 /* Insert (KEY, VALUE) into table TAB. The table should not have
1171  entry with KEY before the insertion. */
1172 void
1174 {
1175  st_hash_t hash_value;
1176 
1177  hash_value = do_hash(key, tab);
1178  st_add_direct_with_hash(tab, key, value, hash_value);
1179 }
1180 
1181 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1182  there is already entry with KEY in the table, return nonzero and
1183  and update the value of the found entry. */
1184 int
1186  st_data_t (*func)(st_data_t))
1187 {
1188  st_table_entry *entry;
1189  st_index_t bin;
1190  st_index_t ind, check;
1191  st_hash_t hash_value;
1192  st_index_t bin_ind;
1193  int new_p;
1194 
1195  rebuild_table_if_necessary (tab);
1196  hash_value = do_hash(key, tab);
1197  if (tab->bins == NULL) {
1198  bin = find_entry(tab, hash_value, key);
1199  new_p = bin == UNDEFINED_ENTRY_IND;
1200  if (new_p)
1201  tab->num_entries++;
1202  bin_ind = UNDEFINED_BIN_IND;
1203  }
1204  else {
1205  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1206  key, &bin_ind);
1207  new_p = bin == UNDEFINED_ENTRY_IND;
1208  bin -= ENTRY_BASE;
1209  }
1210  if (new_p) {
1211  st_assert(tab->entries_bound < get_allocated_entries(tab));
1212  check = tab->rebuilds_num;
1213  key = (*func)(key);
1214  st_assert(check == tab->rebuilds_num);
1215  st_assert(do_hash(key, tab) == hash_value);
1216  ind = tab->entries_bound++;
1217  entry = &tab->entries[ind];
1218  entry->hash = hash_value;
1219  entry->key = key;
1220  entry->record = value;
1221  if (bin_ind != UNDEFINED_BIN_IND)
1222  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1223 #ifdef ST_DEBUG
1224  st_check(tab);
1225 #endif
1226  return 0;
1227  }
1228  tab->entries[bin].record = value;
1229 #ifdef ST_DEBUG
1230  st_check(tab);
1231 #endif
1232  return 1;
1233 }
1234 
1235 /* Create and return a copy of table OLD_TAB. */
1236 st_table *
1238 {
1239  st_table *new_tab;
1240 
1241  new_tab = (st_table *) malloc(sizeof(st_table));
1242  *new_tab = *old_tab;
1243  if (old_tab->bins == NULL)
1244  new_tab->bins = NULL;
1245  else
1246  new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1247  new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1248  * sizeof(st_table_entry));
1249  MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1250  get_allocated_entries(old_tab));
1251  if (old_tab->bins != NULL)
1252  MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1253 #ifdef ST_DEBUG
1254  st_check(new_tab);
1255 #endif
1256  return new_tab;
1257 }
1258 
1259 /* Update the entries start of table TAB after removing an entry
1260  with index N in the array entries. */
1261 static inline void
1262 update_range_for_deleted(st_table *tab, st_index_t n)
1263 {
1264  /* Do not update entries_bound here. Otherwise, we can fill all
1265  bins by deleted entry value before rebuilding the table. */
1266  if (tab->entries_start == n)
1267  tab->entries_start = n + 1;
1268 }
1269 
1270 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1271  VALUE is zero) from deleted table entry, and return non-zero. If
1272  there is no entry with KEY in the table, clear *VALUE (unless VALUE
1273  is zero), and return zero. */
1274 static int
1275 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1276 {
1277  st_table_entry *entry;
1278  st_index_t bin;
1279  st_index_t bin_ind;
1280  st_hash_t hash;
1281 
1282  st_assert(tab != NULL);
1283  hash = do_hash(*key, tab);
1284  if (tab->bins == NULL) {
1285  bin = find_entry(tab, hash, *key);
1286  if (bin == UNDEFINED_ENTRY_IND) {
1287  if (value != 0) *value = 0;
1288  return 0;
1289  }
1290  }
1291  else {
1292  bin_ind = find_table_bin_ind(tab, hash, *key);
1293  if (bin_ind == UNDEFINED_BIN_IND) {
1294  if (value != 0) *value = 0;
1295  return 0;
1296  }
1297  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1298  MARK_BIN_DELETED(tab, bin_ind);
1299  }
1300  entry = &tab->entries[bin];
1301  *key = entry->key;
1302  if (value != 0) *value = entry->record;
1303  MARK_ENTRY_DELETED(entry);
1304  tab->num_entries--;
1305  update_range_for_deleted(tab, bin);
1306 #ifdef ST_DEBUG
1307  st_check(tab);
1308 #endif
1309  return 1;
1310 }
1311 
1312 int
1314 {
1315  return st_general_delete(tab, key, value);
1316 }
1317 
1318 /* The function and other functions with suffix '_safe' or '_check'
1319  are originated from the previous implementation of the hash tables.
1320  It was necessary for correct deleting entries during traversing
1321  tables. The current implementation permits deletion during
1322  traversing without a specific way to do this. */
1323 int
1326 {
1327  return st_general_delete(tab, key, value);
1328 }
1329 
1330 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1331  return zero. Otherwise, remove the first entry in the table.
1332  Return its key through KEY and its record through VALUE (unless
1333  VALUE is zero). */
1334 int
1336 {
1337  st_index_t i, bound;
1338  st_index_t bin;
1339  st_table_entry *entries, *curr_entry_ptr;
1340  st_index_t bin_ind;
1341 
1342  entries = tab->entries;
1343  bound = tab->entries_bound;
1344  for (i = tab->entries_start; i < bound; i++) {
1345  curr_entry_ptr = &entries[i];
1346  if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1347  if (value != 0) *value = curr_entry_ptr->record;
1348  *key = curr_entry_ptr->key;
1349  if (tab->bins == NULL) {
1350  bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
1352  st_assert(&entries[bin] == curr_entry_ptr);
1353  }
1354  else {
1355  bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
1356  curr_entry_ptr->key);
1357  st_assert(bin_ind != UNDEFINED_BIN_IND);
1358  st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1359  - ENTRY_BASE] == curr_entry_ptr);
1360  MARK_BIN_DELETED(tab, bin_ind);
1361  }
1362  MARK_ENTRY_DELETED(curr_entry_ptr);
1363  tab->num_entries--;
1364  update_range_for_deleted(tab, i);
1365 #ifdef ST_DEBUG
1366  st_check(tab);
1367 #endif
1368  return 1;
1369  }
1370  }
1371  st_assert(tab->num_entries == 0);
1372  tab->entries_start = tab->entries_bound = 0;
1373  if (value != 0) *value = 0;
1374  return 0;
1375 }
1376 
1377 /* See comments for function st_delete_safe. */
1378 void
1380  st_data_t never ATTRIBUTE_UNUSED)
1381 {
1382 }
1383 
1384 /* Find entry with KEY in table TAB, call FUNC with the key and the
1385  value of the found entry, and non-zero as the 3rd argument. If the
1386  entry is not found, call FUNC with KEY, and 2 zero arguments. If
1387  the call returns ST_CONTINUE, the table will have an entry with key
1388  and value returned by FUNC through the 1st and 2nd parameters. If
1389  the call of FUNC returns ST_DELETE, the table will not have entry
1390  with KEY. The function returns flag of that the entry with KEY was
1391  in the table before the call. */
1392 int
1395 {
1396  st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1397  st_index_t bin = 0; /* Ditto */
1398  st_table_entry *entries;
1399  st_index_t bin_ind;
1400  st_data_t value = 0, old_key;
1401  st_index_t check;
1402  int retval, existing;
1403  st_hash_t hash = do_hash(key, tab);
1404 
1405  entries = tab->entries;
1406  if (tab->bins == NULL) {
1407  bin = find_entry(tab, hash, key);
1408  existing = bin != UNDEFINED_ENTRY_IND;
1409  entry = &entries[bin];
1410  bin_ind = UNDEFINED_BIN_IND;
1411  }
1412  else {
1413  bin_ind = find_table_bin_ind(tab, hash, key);
1414  existing = bin_ind != UNDEFINED_BIN_IND;
1415  if (existing) {
1416  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1417  entry = &entries[bin];
1418  }
1419  }
1420  if (existing) {
1421  key = entry->key;
1422  value = entry->record;
1423  }
1424  old_key = key;
1425  check = tab->rebuilds_num;
1426  retval = (*func)(&key, &value, arg, existing);
1427  st_assert(check == tab->rebuilds_num);
1428  switch (retval) {
1429  case ST_CONTINUE:
1430  if (! existing) {
1431  st_add_direct_with_hash(tab, key, value, hash);
1432  break;
1433  }
1434  if (old_key != key) {
1435  entry->key = key;
1436  }
1437  entry->record = value;
1438  break;
1439  case ST_DELETE:
1440  if (existing) {
1441  if (bin_ind != UNDEFINED_BIN_IND)
1442  MARK_BIN_DELETED(tab, bin_ind);
1443  MARK_ENTRY_DELETED(entry);
1444  tab->num_entries--;
1445  update_range_for_deleted(tab, bin);
1446 #ifdef ST_DEBUG
1447  st_check(tab);
1448 #endif
1449  }
1450  break;
1451  }
1452 #ifdef ST_DEBUG
1453  st_check(tab);
1454 #endif
1455  return existing;
1456 }
1457 
1458 /* Traverse all entries in table TAB calling FUNC with current entry
1459  key and value and zero. If the call returns ST_STOP, stop
1460  traversing. If the call returns ST_DELETE, delete the current
1461  entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1462  traversing. The function returns zero unless an error is found.
1463  CHECK_P is flag of st_foreach_check call. The behavior is a bit
1464  different for ST_CHECK and when the current element is removed
1465  during traversing. */
1466 static inline int
1467 st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
1468  int check_p)
1469 {
1470  st_index_t bin;
1471  st_index_t bin_ind;
1472  st_table_entry *entries, *curr_entry_ptr;
1473  enum st_retval retval;
1474  st_index_t i, rebuilds_num;
1475  st_hash_t hash;
1476  st_data_t key;
1477  int error_p, packed_p = tab->bins == NULL;
1478 
1479  st_assert(tab->entries_start <= tab->entries_bound);
1480  entries = tab->entries;
1481  /* The bound can change inside the loop even without rebuilding
1482  the table, e.g. by an entry inesrtion. */
1483  for (i = tab->entries_start; i < tab->entries_bound; i++) {
1484  curr_entry_ptr = &entries[i];
1485  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1486  continue;
1487  key = curr_entry_ptr->key;
1488  rebuilds_num = tab->rebuilds_num;
1489  hash = curr_entry_ptr->hash;
1490  retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1491  if (rebuilds_num != tab->rebuilds_num) {
1492  entries = tab->entries;
1493  packed_p = tab->bins == NULL;
1494  if (packed_p) {
1495  i = find_entry(tab, hash, key);
1496  error_p = i == UNDEFINED_ENTRY_IND;
1497  }
1498  else {
1499  i = find_table_entry_ind(tab, hash, key);
1500  error_p = i == UNDEFINED_ENTRY_IND;
1501  i -= ENTRY_BASE;
1502  }
1503  if (error_p && check_p) {
1504  /* call func with error notice */
1505  retval = (*func)(0, 0, arg, 1);
1506 #ifdef ST_DEBUG
1507  st_check(tab);
1508 #endif
1509  return 1;
1510  }
1511  curr_entry_ptr = &entries[i];
1512  }
1513  switch (retval) {
1514  case ST_CONTINUE:
1515  break;
1516  case ST_CHECK:
1517  if (check_p)
1518  break;
1519  case ST_STOP:
1520 #ifdef ST_DEBUG
1521  st_check(tab);
1522 #endif
1523  return 0;
1524  case ST_DELETE:
1525  if (packed_p) {
1526  bin = find_entry(tab, hash, curr_entry_ptr->key);
1527  if (bin == UNDEFINED_ENTRY_IND)
1528  break;
1529  }
1530  else {
1531  bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
1532  if (bin_ind == UNDEFINED_BIN_IND)
1533  break;
1534  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1535  MARK_BIN_DELETED(tab, bin_ind);
1536  }
1537  st_assert(&entries[bin] == curr_entry_ptr);
1538  MARK_ENTRY_DELETED(curr_entry_ptr);
1539  tab->num_entries--;
1540  update_range_for_deleted(tab, bin);
1541 #ifdef ST_DEBUG
1542  st_check(tab);
1543 #endif
1544  break;
1545  }
1546  }
1547 #ifdef ST_DEBUG
1548  st_check(tab);
1549 #endif
1550  return 0;
1551 }
1552 
1553 int
1554 st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg)
1555 {
1556  return st_general_foreach(tab, func, arg, FALSE);
1557 }
1558 
1559 /* See comments for function st_delete_safe. */
1560 int
1561 st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
1563 {
1564  return st_general_foreach(tab, func, arg, TRUE);
1565 }
1566 
1567 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1568  Return the number of keys set up in array KEYS. */
1569 static inline st_index_t
1570 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1571 {
1572  st_index_t i, bound;
1573  st_data_t key, *keys_start, *keys_end;
1574  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1575 
1576  bound = tab->entries_bound;
1577  keys_start = keys;
1578  keys_end = keys + size;
1579  for (i = tab->entries_start; i < bound; i++) {
1580  if (keys == keys_end)
1581  break;
1582  curr_entry_ptr = &entries[i];
1583  key = curr_entry_ptr->key;
1584  if (! DELETED_ENTRY_P(curr_entry_ptr))
1585  *keys++ = key;
1586  }
1587 
1588  return keys - keys_start;
1589 }
1590 
1591 st_index_t
1593 {
1594  return st_general_keys(tab, keys, size);
1595 }
1596 
1597 /* See comments for function st_delete_safe. */
1598 st_index_t
1601 {
1602  return st_general_keys(tab, keys, size);
1603 }
1604 
1605 /* Set up array VALUES by at most SIZE values of head table TAB
1606  entries. Return the number of values set up in array VALUES. */
1607 static inline st_index_t
1608 st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1609 {
1610  st_index_t i, bound;
1611  st_data_t *values_start, *values_end;
1612  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1613 
1614  values_start = values;
1615  values_end = values + size;
1616  bound = tab->entries_bound;
1617  st_assert(bound != 0);
1618  for (i = tab->entries_start; i < bound; i++) {
1619  if (values == values_end)
1620  break;
1621  curr_entry_ptr = &entries[i];
1622  if (! DELETED_ENTRY_P(curr_entry_ptr))
1623  *values++ = curr_entry_ptr->record;
1624  }
1625 
1626  return values - values_start;
1627 }
1628 
1629 st_index_t
1631 {
1632  return st_general_values(tab, values, size);
1633 }
1634 
1635 /* See comments for function st_delete_safe. */
1636 st_index_t
1639 {
1640  return st_general_values(tab, values, size);
1641 }
1642 
1643 #define FNV1_32A_INIT 0x811c9dc5
1644 
1645 /*
1646  * 32 bit magic FNV-1a prime
1647  */
1648 #define FNV_32_PRIME 0x01000193
1649 
1650 #ifndef UNALIGNED_WORD_ACCESS
1651 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1652  defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1653  defined(__powerpc64__) || \
1654  defined(__mc68020__)
1655 # define UNALIGNED_WORD_ACCESS 1
1656 # endif
1657 #endif
1658 #ifndef UNALIGNED_WORD_ACCESS
1659 # define UNALIGNED_WORD_ACCESS 0
1660 #endif
1661 
1662 /* This hash function is quite simplified MurmurHash3
1663  * Simplification is legal, cause most of magic still happens in finalizator.
1664  * And finalizator is almost the same as in MurmurHash3 */
1665 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1666 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1667 
1668 #if ST_INDEX_BITS <= 32
1669 #define C1 (st_index_t)0xcc9e2d51
1670 #define C2 (st_index_t)0x1b873593
1671 #else
1672 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1673 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1674 #endif
1675 static inline st_index_t
1676 murmur_step(st_index_t h, st_index_t k)
1677 {
1678 #if ST_INDEX_BITS <= 32
1679 #define r1 (17)
1680 #define r2 (11)
1681 #else
1682 #define r1 (33)
1683 #define r2 (24)
1684 #endif
1685  k *= C1;
1686  h ^= ROTL(k, r1);
1687  h *= C2;
1688  h = ROTL(h, r2);
1689  return h;
1690 }
1691 #undef r1
1692 #undef r2
1693 
1694 static inline st_index_t
1695 murmur_finish(st_index_t h)
1696 {
1697 #if ST_INDEX_BITS <= 32
1698 #define r1 (16)
1699 #define r2 (13)
1700 #define r3 (16)
1701  const st_index_t c1 = 0x85ebca6b;
1702  const st_index_t c2 = 0xc2b2ae35;
1703 #else
1704 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1705 #define r1 (30)
1706 #define r2 (27)
1707 #define r3 (31)
1708  const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1709  const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1710 #endif
1711 #if ST_INDEX_BITS > 64
1712  h ^= h >> 64;
1713  h *= c2;
1714  h ^= h >> 65;
1715 #endif
1716  h ^= h >> r1;
1717  h *= c1;
1718  h ^= h >> r2;
1719  h *= c2;
1720  h ^= h >> r3;
1721  return h;
1722 }
1723 #undef r1
1724 #undef r2
1725 #undef r3
1726 
1727 st_index_t
1728 st_hash(const void *ptr, size_t len, st_index_t h)
1729 {
1730  const char *data = ptr;
1731  st_index_t t = 0;
1732  size_t l = len;
1733 
1734 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1735 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1736 #if SIZEOF_ST_INDEX_T > 4
1737 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1738 #if SIZEOF_ST_INDEX_T > 8
1739 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1740  UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1741 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1742 #endif
1743 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1744 #else
1745 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1746 #endif
1747 #undef SKIP_TAIL
1748  if (len >= sizeof(st_index_t)) {
1749 #if !UNALIGNED_WORD_ACCESS
1750  int align = (int)((st_data_t)data % sizeof(st_index_t));
1751  if (align) {
1752  st_index_t d = 0;
1753  int sl, sr, pack;
1754 
1755  switch (align) {
1756 #ifdef WORDS_BIGENDIAN
1757 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1758  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1759 #else
1760 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1761  t |= data_at(n) << CHAR_BIT*(n)
1762 #endif
1764 #undef UNALIGNED_ADD
1765  }
1766 
1767 #ifdef WORDS_BIGENDIAN
1768  t >>= (CHAR_BIT * align) - CHAR_BIT;
1769 #else
1770  t <<= (CHAR_BIT * align);
1771 #endif
1772 
1773  data += sizeof(st_index_t)-align;
1774  len -= sizeof(st_index_t)-align;
1775 
1776  sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1777  sr = CHAR_BIT * align;
1778 
1779  while (len >= sizeof(st_index_t)) {
1780  d = *(st_index_t *)data;
1781 #ifdef WORDS_BIGENDIAN
1782  t = (t << sr) | (d >> sl);
1783 #else
1784  t = (t >> sr) | (d << sl);
1785 #endif
1786  h = murmur_step(h, t);
1787  t = d;
1788  data += sizeof(st_index_t);
1789  len -= sizeof(st_index_t);
1790  }
1791 
1792  pack = len < (size_t)align ? (int)len : align;
1793  d = 0;
1794  switch (pack) {
1795 #ifdef WORDS_BIGENDIAN
1796 # define UNALIGNED_ADD(n) case (n) + 1: \
1797  d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1798 #else
1799 # define UNALIGNED_ADD(n) case (n) + 1: \
1800  d |= data_at(n) << CHAR_BIT*(n)
1801 #endif
1803 #undef UNALIGNED_ADD
1804  }
1805 #ifdef WORDS_BIGENDIAN
1806  t = (t << sr) | (d >> sl);
1807 #else
1808  t = (t >> sr) | (d << sl);
1809 #endif
1810 
1811  if (len < (size_t)align) goto skip_tail;
1812 # define SKIP_TAIL 1
1813  h = murmur_step(h, t);
1814  data += pack;
1815  len -= pack;
1816  }
1817  else
1818 #endif
1819  {
1820  do {
1821  h = murmur_step(h, *(st_index_t *)data);
1822  data += sizeof(st_index_t);
1823  len -= sizeof(st_index_t);
1824  } while (len >= sizeof(st_index_t));
1825  }
1826  }
1827 
1828  t = 0;
1829  switch (len) {
1830 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1831  /* in this case byteorder doesn't really matter */
1832 #if SIZEOF_ST_INDEX_T > 4
1833  case 7: t |= data_at(6) << 48;
1834  case 6: t |= data_at(5) << 40;
1835  case 5: t |= data_at(4) << 32;
1836  case 4:
1837  t |= (st_index_t)*(uint32_t*)data;
1838  goto skip_tail;
1839 # define SKIP_TAIL 1
1840 #endif
1841  case 3: t |= data_at(2) << 16;
1842  case 2: t |= data_at(1) << 8;
1843  case 1: t |= data_at(0);
1844 #else
1845 #ifdef WORDS_BIGENDIAN
1846 # define UNALIGNED_ADD(n) case (n) + 1: \
1847  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1848 #else
1849 # define UNALIGNED_ADD(n) case (n) + 1: \
1850  t |= data_at(n) << CHAR_BIT*(n)
1851 #endif
1853 #undef UNALIGNED_ADD
1854 #endif
1855 #ifdef SKIP_TAIL
1856  skip_tail:
1857 #endif
1858  h ^= t; h -= ROTL(t, 7);
1859  h *= C2;
1860  }
1861  h ^= l;
1862 
1863  return murmur_finish(h);
1864 }
1865 
1866 st_index_t
1868 {
1869  return murmur_step(h, i);
1870 }
1871 
1872 st_index_t
1874 {
1875  i += h;
1876 /* no matter if it is BigEndian or LittleEndian,
1877  * we hash just integers */
1878 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1879  h = murmur_step(h, i >> 8*8);
1880 #endif
1881  h = murmur_step(h, i);
1882  return h;
1883 }
1884 
1885 st_index_t
1887 {
1888  h = murmur_finish(h);
1889  return h;
1890 }
1891 
1892 #undef st_hash_start
1893 st_index_t
1895 {
1896  return h;
1897 }
1898 
1899 static st_index_t
1900 strhash(st_data_t arg)
1901 {
1902  register const char *string = (const char *)arg;
1903  return st_hash(string, strlen(string), FNV1_32A_INIT);
1904 }
1905 
1906 int
1907 st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1908 {
1909  unsigned int c1, c2;
1910 
1911  while (1) {
1912  c1 = (unsigned char)*s1++;
1913  c2 = (unsigned char)*s2++;
1914  if (c1 == '\0' || c2 == '\0') {
1915  if (c1 != '\0') return 1;
1916  if (c2 != '\0') return -1;
1917  return 0;
1918  }
1919  if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
1920  if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
1921  if (c1 != c2) {
1922  if (c1 > c2)
1923  return 1;
1924  else
1925  return -1;
1926  }
1927  }
1928 }
1929 
1930 int
1931 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1932 {
1933  unsigned int c1, c2;
1934 
1935  while (n--) {
1936  c1 = (unsigned char)*s1++;
1937  c2 = (unsigned char)*s2++;
1938  if (c1 == '\0' || c2 == '\0') {
1939  if (c1 != '\0') return 1;
1940  if (c2 != '\0') return -1;
1941  return 0;
1942  }
1943  if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
1944  if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
1945  if (c1 != c2) {
1946  if (c1 > c2)
1947  return 1;
1948  else
1949  return -1;
1950  }
1951  }
1952  return 0;
1953 }
1954 
1955 PUREFUNC(static st_index_t strcasehash(st_data_t));
1956 static st_index_t
1957 strcasehash(st_data_t arg)
1958 {
1959  register const char *string = (const char *)arg;
1960  register st_index_t hval = FNV1_32A_INIT;
1961 
1962  /*
1963  * FNV-1a hash each octet in the buffer
1964  */
1965  while (*string) {
1966  unsigned int c = (unsigned char)*string++;
1967  if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
1968  hval ^= c;
1969 
1970  /* multiply by the 32 bit FNV magic prime mod 2^32 */
1971  hval *= FNV_32_PRIME;
1972  }
1973  return hval;
1974 }
1975 
1976 int
1978 {
1979  return x != y;
1980 }
1981 
1982 st_index_t
1984 {
1985  enum {s1 = 11, s2 = 3};
1986  return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
1987 }
1988 
1989 /* Expand TAB to be suitable for holding SIZ entries in total.
1990  Pre-existing entries remain not deleted inside of TAB, but its bins
1991  are cleared to expect future reconstruction. See rehash below. */
1992 static void
1993 st_expand_table(st_table *tab, st_index_t siz)
1994 {
1995  st_table *tmp;
1996  st_index_t n;
1997 
1998  if (siz <= get_allocated_entries(tab))
1999  return; /* enough room already */
2000 
2001  tmp = st_init_table_with_size(tab->type, siz);
2002  n = get_allocated_entries(tab);
2003  MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2004  free(tab->entries);
2005  if (tab->bins != NULL)
2006  free(tab->bins);
2007  if (tmp->bins != NULL)
2008  free(tmp->bins);
2009  tab->entry_power = tmp->entry_power;
2010  tab->bin_power = tmp->bin_power;
2011  tab->size_ind = tmp->size_ind;
2012  tab->entries = tmp->entries;
2013  tab->bins = NULL;
2014  tab->rebuilds_num++;
2015  free(tmp);
2016 }
2017 
2018 /* Rehash using linear search. */
2019 static void
2020 st_rehash_linear(st_table *tab)
2021 {
2022  st_index_t i, j;
2023  st_table_entry *p, *q;
2024  if (tab->bins) {
2025  free(tab->bins);
2026  tab->bins = NULL;
2027  }
2028  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2029  p = &tab->entries[i];
2030  if (DELETED_ENTRY_P(p))
2031  continue;
2032  for (j = i + 1; j < tab->entries_bound; j++) {
2033  q = &tab->entries[j];
2034  if (DELETED_ENTRY_P(q))
2035  continue;
2036  if (PTR_EQUAL(tab, p, q->hash, q->key)) {
2037  st_assert(p < q);
2038  *p = *q;
2039  MARK_ENTRY_DELETED(q);
2040  tab->num_entries--;
2041  update_range_for_deleted(tab, j);
2042  }
2043  }
2044  }
2045 }
2046 
2047 /* Rehash using index */
2048 static void
2049 st_rehash_indexed(st_table *tab)
2050 {
2051  st_index_t i;
2052  st_index_t const n = bins_size(tab);
2053  unsigned int const size_ind = get_size_ind(tab);
2054  st_index_t *bins = realloc(tab->bins, n);
2055  st_assert(bins != NULL);
2056  tab->bins = bins;
2057  initialize_bins(tab);
2058  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2059  st_table_entry *p = &tab->entries[i];
2060  st_index_t ind;
2061 #ifdef QUADRATIC_PROBE
2062  st_index_t d = 1;
2063 #else
2064  st_index_t peterb = p->hash;
2065 #endif
2066 
2067  if (DELETED_ENTRY_P(p))
2068  continue;
2069 
2070  ind = hash_bin(p->hash, tab);
2071  for(;;) {
2072  st_index_t bin = get_bin(bins, size_ind, ind);
2073  st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2074  if (EMPTY_OR_DELETED_BIN_P(bin)) {
2075  /* ok, new room */
2076  set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2077  break;
2078  }
2079  else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
2080  /* duplicated key; delete it */
2081  st_assert(q < p);
2082  q->record = p->record;
2083  MARK_ENTRY_DELETED(p);
2084  tab->num_entries--;
2085  update_range_for_deleted(tab, bin);
2086  break;
2087  }
2088  else {
2089  /* hash collision; skip it */
2090 #ifdef QUADRATIC_PROBE
2091  ind = hash_bin(ind + d, tab);
2092  d++;
2093 #else
2094  ind = secondary_hash(ind, tab, &peterb);
2095 #endif
2096  }
2097  }
2098  }
2099 }
2100 
2101 /* Reconstruct TAB's bins according to TAB's entries. This function
2102  permits conflicting keys inside of entries. No errors are reported
2103  then. All but one of them are discarded silently. */
2104 static void
2105 st_rehash(st_table *tab)
2106 {
2108  st_rehash_linear(tab);
2109  else
2110  st_rehash_indexed(tab);
2111 }
2112 
2113 #ifdef RUBY
2114 static st_data_t
2115 st_stringify(VALUE key)
2116 {
2117  return (rb_obj_class(key) == rb_cString) ?
2118  rb_str_new_frozen(key) : key;
2119 }
2120 
2121 static void
2122 st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2123 {
2124  st_data_t k = st_stringify(key);
2125  st_table_entry e;
2126  e.hash = do_hash(k, tab);
2127  e.key = k;
2128  e.record = val;
2129 
2130  tab->entries[tab->entries_bound++] = e;
2131  tab->num_entries++;
2132  RB_OBJ_WRITTEN(hash, Qundef, k);
2133  RB_OBJ_WRITTEN(hash, Qundef, val);
2134 }
2135 
2136 static void
2137 st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2138 {
2139  long i;
2140 
2141  for (i = 0; i < argc; /* */) {
2142  st_data_t k = st_stringify(argv[i++]);
2143  st_data_t v = argv[i++];
2144  st_insert(tab, k, v);
2145  RB_OBJ_WRITTEN(hash, Qundef, k);
2146  RB_OBJ_WRITTEN(hash, Qundef, v);
2147  }
2148 }
2149 
2150 static void
2151 st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2152 {
2153  int i;
2154 
2155  /* push elems */
2156  for (i = 0; i < argc; /* */) {
2157  VALUE key = argv[i++];
2158  VALUE val = argv[i++];
2159  st_insert_single(tab, hash, key, val);
2160  }
2161 
2162  /* reindex */
2163  st_rehash(tab);
2164 }
2165 
2166 /* Mimics ruby's { foo => bar } syntax. This function is placed here
2167  because it touches table internals and write barriers at once. */
2168 void
2169 rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
2170 {
2171  st_index_t n;
2172  st_table *tab = RHASH(hash)->ntbl;
2173 
2174  st_assert(argc % 2 == 0);
2175  if (! argc)
2176  return;
2177  if (! tab) {
2178  VALUE tmp = rb_hash_new_with_size(argc / 2);
2179  RBASIC_CLEAR_CLASS(tmp);
2180  RHASH(hash)->ntbl = tab = RHASH(tmp)->ntbl;
2181  RHASH(tmp)->ntbl = NULL;
2182  }
2183  n = tab->num_entries + argc / 2;
2184  st_expand_table(tab, n);
2185  if (UNLIKELY(tab->num_entries))
2186  st_insert_generic(tab, argc, argv, hash);
2187  else if (argc <= 2)
2188  st_insert_single(tab, hash, argv[0], argv[1]);
2190  st_insert_linear(tab, argc, argv, hash);
2191  else
2192  st_insert_generic(tab, argc, argv, hash);
2193 }
2194 #endif
RUBY_EXTERN VALUE rb_cString
Definition: ruby.h:1927
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:1469
Definition: st.h:99
int st_insert2(st_table *tab, st_data_t key, st_data_t value, st_data_t(*func)(st_data_t))
Definition: st.c:1185
#define FALSE
Definition: nkf.h:174
#define free
Definition: st.c:170
size_t strlen(const char *)
int st_foreach_check(st_table *tab, int(*func)(ANYARGS), st_data_t arg, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1561
Definition: st.h:79
Definition: st.h:99
#define RB_OBJ_WRITTEN(a, oldv, b)
Definition: ruby.h:1438
st_index_t st_hash_uint32(st_index_t h, uint32_t i)
Definition: st.c:1867
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Definition: st.c:1907
#define IND_DELETED_BIN_P(tab, i)
Definition: st.c:402
#define r1
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2284
#define FNV_32_PRIME
Definition: st.c:1648
st_index_t st_numhash(st_data_t n)
Definition: st.c:1983
void st_add_direct(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1173
Definition: st.h:99
#define RESERVED_HASH_SUBSTITUTION_VAL
Definition: st.c:302
PUREFUNC(static st_index_t strcasehash(st_data_t))
#define BIG_CONSTANT(x, y)
Definition: st.c:1665
#define PTR_EQUAL(tab, ptr, hash_val, key_)
Definition: st.c:174
st_table * st_init_strcasetable_with_size(st_index_t size)
Definition: st.c:648
int st_update(st_table *tab, st_data_t key, st_update_callback_func *func, st_data_t arg)
Definition: st.c:1393
unsigned char size_ind
Definition: st.h:81
st_index_t st_hash_start(st_index_t h)
Definition: st.c:1894
#define r3
st_data_t record
Definition: st.c:134
st_data_t st_index_t
Definition: st.h:50
st_table * st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
Definition: st.c:555
st_table * st_init_numtable(void)
Definition: st.c:610
unsigned char entry_power
Definition: st.c:180
VALUE rb_hash_new_with_size(st_index_t size)
Definition: hash.c:430
#define C2
Definition: st.c:1670
st_index_t bins_words
Definition: st.c:189
#define PREFETCH(addr, write_p)
Definition: st.c:117
st_index_t * bins
Definition: st.h:88
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:22
#define RHASH(obj)
Definition: internal.h:663
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1038
VALUE rb_obj_class(VALUE)
call-seq: obj.class -> class
Definition: object.c:277
st_table * st_init_strtable_with_size(st_index_t size)
Definition: st.c:632
st_table * st_init_strtable(void)
Definition: st.c:625
#define UNLIKELY(x)
Definition: internal.h:43
void st_free_table(st_table *tab)
Definition: st.c:666
#define SIZEOF_ST_INDEX_T
Definition: st.h:59
st_table * st_init_strcasetable(void)
Definition: st.c:640
st_index_t st_hash_end(st_index_t h)
Definition: st.c:1886
#define val
size_t st_memsize(const st_table *tab)
Definition: st.c:676
#define UNALIGNED_ADD_ALL
unsigned char size_ind
Definition: st.c:186
Definition: st.c:131
void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
Definition: st.c:2169
#define FOUND_BIN
Definition: st.c:717
#define EXPECT(expr, val)
Definition: st.c:118
#define snprintf
Definition: subst.h:6
#define UNDEFINED_ENTRY_IND
Definition: st.c:380
int st_foreach(st_table *tab, int(*func)(ANYARGS), st_data_t arg)
Definition: st.c:1554
#define ENTRY_BASE
Definition: st.c:372
register int hval
Definition: zonetab.h:82
#define EMPTY_BIN_P(b)
Definition: st.c:395
#define RESERVED_HASH_VAL
Definition: st.c:301
#define IND_EMPTY_BIN_P(tab, i)
Definition: st.c:401
int argc
Definition: ruby.c:187
st_index_t st_hash_t
Definition: st.c:129
st_index_t st_keys(st_table *tab, st_data_t *keys, st_index_t size)
Definition: st.c:1592
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1661
st_index_t(* hash)(ANYARGS)
Definition: st.h:63
#define realloc
Definition: st.c:169
#define ATTRIBUTE_UNUSED
Definition: st.c:119
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS
Definition: st.c:324
#define malloc
Definition: st.c:167
unsigned int rebuilds_num
Definition: st.h:83
st_hash_t hash
Definition: st.c:132
st_index_t st_hash(const void *ptr, size_t len, st_index_t h)
Definition: st.c:1728
#define TRUE
Definition: nkf.h:175
#define MINIMAL_POWER2
Definition: st.c:316
st_index_t st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1599
#define MARK_BIN_DELETED(tab, i)
Definition: st.c:386
#define DELETED_BIN_P(b)
Definition: st.c:396
st_index_t st_values(st_table *tab, st_data_t *values, st_index_t size)
Definition: st.c:1630
st_index_t entries_bound
Definition: st.h:92
st_retval
Definition: st.h:99
#define MARK_ENTRY_DELETED(e_ptr)
Definition: st.c:407
unsigned long VALUE
Definition: ruby.h:85
#define FNV1_32A_INIT
Definition: st.c:1643
int st_delete(st_table *tab, st_data_t *key, st_data_t *value)
Definition: st.c:1313
unsigned char bin_power
Definition: st.h:81
st_data_t key
Definition: st.c:133
char bin[32]
Definition: siphash.c:135
#define CHAR_BIT
Definition: ruby.h:196
int st_shift(st_table *tab, st_data_t *key, st_data_t *value)
Definition: st.c:1335
#define REBUILD_THRESHOLD
Definition: st.c:723
unsigned int uint32_t
Definition: sha2.h:101
register unsigned int len
Definition: zonetab.h:51
#define getenv(name)
Definition: win32.c:71
unsigned char entry_power
Definition: st.h:81
#define ROTL(x, n)
Definition: st.c:1666
int size
Definition: encoding.c:57
st_table * st_copy(st_table *old_tab)
Definition: st.c:1237
#define f
int st_insert(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1098
#define type_numhash
Definition: st.c:137
#define ANYARGS
Definition: defines.h:173
VALUE rb_eRuntimeError
Definition: error.c:800
#define st_assert(cond)
Definition: st.c:125
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: st.c:1931
const struct st_hash_type * type
Definition: st.h:84
Definition: st.h:99
int st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1324
#define ST_INIT_VAL
Definition: st.c:159
int st_numcmp(st_data_t x, st_data_t y)
Definition: st.c:1977
#define ST_INIT_VAL_BYTE
Definition: st.c:160
VALUE rb_str_new_frozen(VALUE)
Definition: string.c:1158
st_index_t st_values_check(st_table *tab, st_data_t *values, st_index_t size, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1637
void st_clear(st_table *tab)
Definition: st.c:655
#define r2
#define data_at(n)
const struct st_hash_type st_hashtype_num
Definition: st.c:138
#define UNDEFINED_BIN_IND
Definition: st.c:381
struct st_table_entry st_table_entry
Definition: st.h:75
#define MAX_POWER2
Definition: st.c:262
#define C1
Definition: st.c:1669
#define NULL
Definition: _sdbm.c:102
#define Qundef
Definition: ruby.h:439
#define MARK_BIN_EMPTY(tab, i)
Definition: st.c:376
st_table_entry * entries
Definition: st.h:94
st_index_t st_hash_uint(st_index_t h, st_index_t i)
Definition: st.c:1873
void st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, st_data_t never ATTRIBUTE_UNUSED)
Definition: st.c:1379
st_index_t num_entries
Definition: st.h:86
int st_update_callback_func(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
Definition: st.h:116
#define DELETED_ENTRY_P(e_ptr)
Definition: st.c:408
st_table * st_init_table(const struct st_hash_type *type)
Definition: st.c:602
st_index_t entries_start
Definition: st.h:92
st_table * st_init_numtable_with_size(st_index_t size)
Definition: st.c:617
#define COLLISION
Definition: st.c:716
#define EMPTY_OR_DELETED_BIN_P(b)
Definition: st.c:397
unsigned char bin_power
Definition: st.c:184
char ** argv
Definition: ruby.c:188
int st_get_key(st_table *tab, st_data_t key, st_data_t *result)
Definition: st.c:1062