BAM
Abstract Machine for Bottom-Up Evaluation with the Push Method
set_e.h
Go to the documentation of this file.
1 // ============================================================================
2 // Project: Deductive Database
3 // Filename: set_e.h
4 // Purpose: Set of tuples (binding pattern b..b): Using Extendible Hashing
5 // Last Change: 04.08.2017
6 // Language: C++
7 // EMail: brass@informatik.uni-halle.de
8 // WWW: http://www.informatik.uni-halle.de/~brass/
9 // Address: Feldschloesschen 15, D-06120 Halle (Saale), GERMANY
10 // Copyright: (c) 2016-2017 by Stefan Brass
11 // License: See file "LICENSE" for copying conditions.
12 // Note: There is no warranty at all - this code may contain bugs.
13 // ============================================================================
14 
15 
22 //=============================================================================
23 // Include File Frame:
24 //=============================================================================
25 
26 #ifndef SET_E_INCLUDED
27 #define SET_E_INCLUDED
28 
29 //=============================================================================
30 // Used Types and Macros:
31 //=============================================================================
32 
33 #ifndef VER_INCLUDED
34 #include "../base/ver.h"
35 #endif
36 
37 #ifndef STR_INCLUDED
38 #include "../base/str.h"
39 #endif
40 
41 #ifndef CHECK_INCLUDED
42 #include "../base/check.h"
43 #endif
44 
45 #ifndef PAGE_INCLUDED
46 #include "../mem/page.h"
47 #endif
48 
49 #ifndef MPOOL_INCLUDED
50 #include "../mem/mpool.h"
51 #endif
52 
53 #ifndef RELSIZE_INCLUDED
54 #include "../rel/relsize.h"
55 #endif
56 
57 #ifndef REL_INCLUDED
58 #include "../rel/rel.h"
59 #endif
60 
61 
62 //=============================================================================
63 // Private Constants:
64 //=============================================================================
65 
66 //-----------------------------------------------------------------------------
67 // SET_E_MAXPAGES: Maximal hash table size in memory pages.
68 //-----------------------------------------------------------------------------
69 
70 // static const int SET_E_MAXPAGES = 1024;
71 
72  // Since the hash table size is doubled when necessary,
73  // SET_E_MAXPAGES should be a power of 2
74  // (assuming that a memory page contains a power of 2 of pointers).
75  // For 4 KB pages and 64 bit pointers, one page contains 512 pointers,
76  // so the total size of the hash table is SET_E_MAXPAGES * 512.
77 
78  // The number of memory pages used for buckets is unlimited.
79  // If unavoidable, overflow buckets are linked to a bucket.
80  // So there is no hard limit on the number of entries in the hash table
81  // (but the performance decreases, of course).
82 
83 //-----------------------------------------------------------------------------
84 // SET_E_BUCKETSIZE: Number of entries that fit into one bucket.
85 //-----------------------------------------------------------------------------
86 
87 static const int SET_E_BUCKETSIZE = 16;
88 
89 //-----------------------------------------------------------------------------
90 // SET_E_MAGIC: Magic number (identifies objects of this class).
91 //-----------------------------------------------------------------------------
92 
93 static const long SET_E_MAGIC = 0x5354450AL; // 'STE\n'
94 
95 
96 //=============================================================================
97 // Private Class for the Buckets in the Hash Table:
98 //=============================================================================
99 
100 
101 template<class T> class set_e_bucket_c {
102  public:
103  int NumElems;
104  int LocalBits;
105  set_e_bucket_c<T> *Overflow;
106  T Elems[SET_E_BUCKETSIZE];
107 };
108 
109 #define SET_E_BUCKET_T set_e_bucket_c<T>*
110 #define SET_E_BUCKET_NULL (static_cast<set_e_bucket_c<T> *>(0))
111 #define SET_E_BUCKET_L0_NULL (static_cast<set_e_bucket_c<T> *>(0))
112 #define SET_E_BUCKET_L1_NULL (static_cast<set_e_bucket_c<T> **>(0))
113 #define SET_E_BUCKET_L2_NULL (static_cast<set_e_bucket_c<T> ***>(0))
114 
115 // The following page sizes on different levels must be a power of 2.
116 // With a normal value for VER_PAGESIZE like 4KB or 8KB
117 // and a normal pointer size like 32 Bit or 64 Bit the following will work.
118 // In unusual cases, one must explizitly select a page size that is a power
119 // of 2, such that the pointers fit into a page of VER_PAGESIZE bytes.
120 // I.e., some memory will be unused in this case.
121 
122 #define SET_E_PAGESIZE_L0 PAGE_SIZE(set_e_bucket_c<T> *)
123 #define SET_E_PAGESIZE_L1 PAGE_SIZE(set_e_bucket_c<T> **)
124 #define SET_E_PAGESIZE_L2 PAGE_SIZE(set_e_bucket_c<T> ***)
125 
126 //=============================================================================
127 // Class Template for Sets (for Duplicate Check):
128 //=============================================================================
129 
177 template<class T> class set_e_c: public rel_c {
178  public:
179 
180 //-----------------------------------------------------------------------------
181 // Constructor, Destructor:
182 //-----------------------------------------------------------------------------
183 
184  // Constructor:
185  set_e_c(str_t set_e_name);
186 
187  // Destructor:
188  ~set_e_c();
189 
190 //-----------------------------------------------------------------------------
191 // Copy-constructor and assignment operator are not supported for this class:
192 //-----------------------------------------------------------------------------
193 
194  private:
195 
196  set_e_c(const set_e_c& obj); // Not implemented
197  set_e_c& operator=(const set_e_c& obj); // Not implemented
198 
199 
200 //=============================================================================
201 // (Object) Methods:
202 //=============================================================================
203 
204  public:
205 
206 //-----------------------------------------------------------------------------
207 // insert: Insert an element into the set (returns true if successful).
208 //-----------------------------------------------------------------------------
209 
210  // This method returns true if the element was not already in the set
211  // and was successfully inserted (i.e. if it is not a duplicate).
212  // It returns false if the element was already in the set (duplicate).
213  // It also returns false if it was impossible to insert the element
214  // because there was not enough memory.
215  // By pretending that it was a duplicate, recursion at least comes
216  // to an end and the program terminates.
217  // One should check the method mem_err() from time to time
218  // (at least at the end) to see whether this problem occurred
219  // or the result is reliable.
220  // A memory error is also logged in the class err_c.
221  // So even if one does not check mem_err() for every set object,
222  // one will notice the error if one at least checks the global error
223  // log or has enabled output of error messages.
224 
225  bool insert(T elem);
226 
227 
228 //-----------------------------------------------------------------------------
229 // contains: Check whether an element is contained in the set.
230 //-----------------------------------------------------------------------------
231 
232  bool contains(T elem) const {
233 
234  // Check this object:
235  CHECK_VALID("set_e_c::contains");
236 
237  // Search in hash table. First compute hash function:
238  int hash_val = elem.hash();
239  CHECK(hash_val >= 0, "set_e_c::contains: hash_val is negative");
240  hash_val &= BitMask;
241 
242  // Get hash table entry:
243  // Note: This is basically the same code as below in function
244  // hash_entry(). However, the const qualifier for contains
245  // makes it impossible to call this function.
246 
247  SET_E_BUCKET_T bucket;
248  // The following code sets bucket the the hash table entry:
249  switch(Level) {
250  case 0:
251  CHECK(hash_val < SET_E_PAGESIZE_L0,
252  "set_e_c::contains: hash_val invalid (L0)");
253  bucket = HashTab.Level0[hash_val];
254  break;
255 
256  case 1:
257  CHECK(hash_val < SET_E_PAGESIZE_L1 * SET_E_PAGESIZE_L0,
258  "set_e_c::contains: hash_val invalid (L1)");
259  {
260  SET_E_BUCKET_T *page =
261  HashTab.Level1[hash_val / SET_E_PAGESIZE_L0];
262  bucket = page[hash_val % SET_E_PAGESIZE_L0];
263  }
264  break;
265 
266  case 2:
267  CHECK(hash_val < SET_E_PAGESIZE_L2 * SET_E_PAGESIZE_L1 *
268  SET_E_PAGESIZE_L0,
269  "set_e_c::contains: hash_val invalid (L2)");
270  {
271  int index2 = hash_val /
272  (SET_E_PAGESIZE_L1 * SET_E_PAGESIZE_L0);
273  int index1 = hash_val %
274  (SET_E_PAGESIZE_L1 * SET_E_PAGESIZE_L0);
275  int index0 = index1 % SET_E_PAGESIZE_L0;
276  index1 = index1 / SET_E_PAGESIZE_L0;
277  SET_E_BUCKET_T **page_level1 = HashTab.Level2[index2];
278  SET_E_BUCKET_T *page_level0 = page_level1[index1];
279  bucket = page_level0[index0];
280  }
281  break;
282 
283  default:
284  CHECK_IMPOSSIBLE("set_e_c::contains: illegal level");
285  return false;
286  }
287 
288  // Now search the linked list of buckets in that entry:
289  for(; bucket != SET_E_BUCKET_NULL; bucket = bucket->Overflow) {
290  T *elem_ptr = bucket->Elems;
291  for(int i = 0; i < bucket->NumElems; i++)
292  if(*elem_ptr++ == elem)
293  return true;
294  }
295  return false;
296  }
297 
298 //-----------------------------------------------------------------------------
299 // hash_size: Size of the hash table.
300 //-----------------------------------------------------------------------------
301 
302  int hash_size() const {
303  CHECK_VALID("set_e_c::hash_size");
304  return HashSize;
305  }
306 
307 //-----------------------------------------------------------------------------
308 // num_buckets: Number of distinct buckets that are used (includes overflow).
309 //-----------------------------------------------------------------------------
310 
311  int num_buckets() const {
312  CHECK_VALID("set_e_c::num_buckets");
313  return NumBuckets;
314  }
315 
316 //-----------------------------------------------------------------------------
317 // num_overflow: Number of overflow buckets that are used.
318 //-----------------------------------------------------------------------------
319 
320  int num_overflow() const {
321  CHECK_VALID("set_e_c::num_overflow");
322  return NumOverflow;
323  }
324 
325 //-----------------------------------------------------------------------------
326 // size_value: Values of data-structure specific size measures.
327 //-----------------------------------------------------------------------------
328 
329  int size_value(relsize_t relsize) const {
330  CHECK_VALID("set_e_c::size_value");
331  CHECK(relsize_valid(relsize),
332  "set_e_c::size_value: invalid size measure");
333 
334  // Return value of selected size measure:
335  switch(relsize) {
336  case RELSIZE_OBJ_SIZE:
337  return static_cast<int>(sizeof(set_e_c<T>));
338  case RELSIZE_HASHTAB_SIZE:
339  return HashSize;
340  case RELSIZE_BUCKET_SIZE:
341  return SET_E_BUCKETSIZE;
342  case RELSIZE_BUCKET_BYTES:
343  return static_cast<int>(
344  sizeof(set_e_bucket_c<T>));
345  case RELSIZE_TOTAL_BUCKETS:
346  return NumBuckets;
347  case RELSIZE_OVERFLOW_BUCKETS:
348  return NumOverflow;
349  default:
350  return -1;
351  }
352  }
353 
354 //=============================================================================
355 // Debugging Support:
356 //=============================================================================
357 
358 #if VER_DEBUG
359  public:
360  // Integrity check:
361  str_t check() const;
362 
363  private:
364  // Magic number (identifies objects of this class for debugging).
365  long Magic; // Must be "SET_E_MAGIC".
366 #endif
367 
368 #if VER_DUMP
369  private:
370  // Auxiliary Function to display bucket data in a data structure dump:
371  void dump_bucket(int i, SET_E_BUCKET_T bucket) const;
372 
373  public:
374  // Display data structure:
375  void dump(str_t headline = STR_NULL) const;
376 #endif
377 
378 
379 //=============================================================================
380 // Private Object Members:
381 //=============================================================================
382 
383  private:
384 
385  // Hash table size (in number of bits used):
386  int GlobalBits;
387 
388  // Corresponding bit mask for filtering part of hash value used:
389  int BitMask;
390 
391  // Hash table size (total number of entries):
392  int HashSize;
393 
394  // Hash table size (entries in the HashTab array, top level):
395  int HashTopSize;
396 
397  // Number of distinct buckets in hash table (includes overflow ones):
398  int NumBuckets;
399 
400  // Number of overflow buckets:
401  int NumOverflow;
402 
403  // Current Memory Page for Storing Bucket Objects:
404  page_t MemPage;
405 
406  // Pointer to next free place in current memory page:
407  set_e_bucket_c<T> *MemPtr;
408 
409  // Number of free places in current page:
410  int MemFree;
411 
412  // Current indirection level of array HashTab (0, 1, 2):
413  int Level;
414 
415  // Hash table:
416  union {
417  SET_E_BUCKET_T Level0[SET_E_PAGESIZE_L0];
418  SET_E_BUCKET_T * Level1[SET_E_PAGESIZE_L1];
419  SET_E_BUCKET_T ** Level2[SET_E_PAGESIZE_L2];
420  } HashTab;
421 
422 
423 //=============================================================================
424 // Auxiliary Functions:
425 //=============================================================================
426 
427  private:
428 
429  //---------------------------------------------------------------------
430  // Get new hashtable bucket:
431  //---------------------------------------------------------------------
432 
433  inline SET_E_BUCKET_T get_new_bucket(int bits)
434  {
435  // Get new memory page if current page has not enough space:
436  if(MemFree <= 0) {
437  // Allocate memory page, handle failure:
438  page_t page = MemPool.alloc_page();
439  if(page == PAGE_NULL) {
440  MemErr = true;
441  return SET_E_BUCKET_NULL;
442  }
443 
444  // Make new page available:
445  MemPage = page;
446  MemPtr = reinterpret_cast<SET_E_BUCKET_T>(MemPage);
447  MemFree = PAGE_SIZE(set_e_bucket_c<T>);
448  }
449 
450  // Cut new hashtable bucket from memory page:
451  CHECK(MemFree > 0, "set_e_c::get_new_bucket: Page too small");
452  SET_E_BUCKET_T bucket = MemPtr++;
453  MemFree--;
454 
455  // Increase number of buckets used (for statistics):
456  NumBuckets++;
457 
458  // Initialize and return new bucket:
459  bucket->NumElems = 0;
460  bucket->LocalBits = bits;
461  bucket->Overflow = SET_E_BUCKET_NULL;
462  return bucket;
463  }
464 
465 
466  //---------------------------------------------------------------------
467  // Get a pointer to the i-th entry of the hash table.
468  //---------------------------------------------------------------------
469 
470  //inline SET_E_BUCKET_T const * hashtab_entry(int i) const {
471  inline SET_E_BUCKET_T * hashtab_entry(int i) {
472 
473  // Check this object:
474  CHECK_VALID("set_e_c::hashtab_entry");
475 
476  // CHECK parameter:
477  CHECK(i >= 0, "set_e_c::hashtab_entry: i is negative");
478  CHECK(i < HashSize,
479  "set_e_c::hashtab_entry: i is too large");
480 
481  // Get hash table entry:
482  switch(Level) {
483  case 0:
484  CHECK(i < SET_E_PAGESIZE_L0,
485  "set_e_c::hashtab_entry: i invalid (L0)");
486  return &(HashTab.Level0[i]);
487  case 1:
488  CHECK(i < SET_E_PAGESIZE_L1 * SET_E_PAGESIZE_L0,
489  "set_e_c::hashtab_entry: i invalid (L1)");
490  {
491  SET_E_BUCKET_T *page =
492  HashTab.Level1[i / SET_E_PAGESIZE_L0];
493  return page + (i % SET_E_PAGESIZE_L0);
494  }
495  case 2:
496  CHECK(i < SET_E_PAGESIZE_L2 * SET_E_PAGESIZE_L1 *
497  SET_E_PAGESIZE_L0,
498  "set_e_c::hashtab_entry: i invalid (L2)");
499  {
500  int index2 = i / (SET_E_PAGESIZE_L1*SET_E_PAGESIZE_L0);
501  int index1 = i % (SET_E_PAGESIZE_L1*SET_E_PAGESIZE_L0);
502  int index0 = index1 % SET_E_PAGESIZE_L0;
503  index1 = index1 / SET_E_PAGESIZE_L0;
504  SET_E_BUCKET_T **indirect_page = HashTab.Level2[index2];
505  SET_E_BUCKET_T *page = indirect_page[index1];
506  return page + index0;
507  }
508  default:
510  "set_e_c::hashtab_entry: illegal level");
511  return 0;
512  }
513  }
514 
515  //---------------------------------------------------------------------
516  // Set a hash table entry:
517  //---------------------------------------------------------------------
518 
519  inline void set_hashtab_entry(int i, SET_E_BUCKET_T bucket)
520  {
521  // CHECK parameter:
522  CHECK(i >= 0, "set_e_c::set_hashtab_entry: i is negative");
523  CHECK(i < HashSize,
524  "set_e_c::set_hashtab_entry: i is too large");
525 
526  // Determine pointer:
527  SET_E_BUCKET_T *entry = hashtab_entry(i);
528 
529  // Set entry:
530  *entry = bucket;
531  }
532 
533 
534  //---------------------------------------------------------------------
535  // Copy Level 0 page:
536  //---------------------------------------------------------------------
537 
538  inline SET_E_BUCKET_T *copy_page0(SET_E_BUCKET_T *input_page);
539 
540  //---------------------------------------------------------------------
541  // Copy Level 1 page including all linked level 0 pages:
542  //---------------------------------------------------------------------
543 
544  inline SET_E_BUCKET_T **copy_page1(SET_E_BUCKET_T **input_page);
545 
546  //---------------------------------------------------------------------
547  // Double Hash Table:
548  //---------------------------------------------------------------------
549 
550  inline bool double_table();
551 
552 //=============================================================================
553 // End of Class Template:
554 //=============================================================================
555 
556 };
557 
558 
559 //=============================================================================
560 // End of Include File:
561 //=============================================================================
562 
563 #endif
564 
Definition: set_e.h:101
#define CHECK_IMPOSSIBLE(MSG)
Definition: check.h:151
Definition: set_e.h:177
Definition: rel.h:72
#define CHECK_VALID(EX)
Definition: check.h:85
const char * str_t
Definition: str.h:41
#define STR_NULL
Definition: str.h:52
#define CHECK(EX, MSG)
Definition: check.h:69