DEADSOFTWARE

some optimizations in hash table
[d2df-sdl.git] / src / shared / hashtable.pas
1 (* Copyright (C) DooM 2D:Forever Developers
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *)
16 {$INCLUDE a_modes.inc}
17 {.$DEFINE RBHASH_DEBUG_RESIZE}
18 {.$DEFINE RBHASH_DEBUG_INSERT}
19 {.$DEFINE RBHASH_DEBUG_DELETE}
20 {.$DEFINE RBHASH_DEBUG_COMPACT}
21 {$IF DEFINED(D2F_DEBUG)}
22 {$DEFINE RBHASH_SANITY_CHECKS}
23 {$ENDIF}
24 // hash table (robin hood)
25 unit hashtable;
27 interface
30 type
31 // WARNING! don't put structures into hash, use ponters or ids!
32 generic THashBase<KeyT, ValueT> = class(TObject)
33 private
34 const InitSize = {$IF DEFINED(D2F_DEBUG)}16{$ELSE}512{$ENDIF}; // *MUST* be power of two
35 const LoadFactorPrc = 90; // it is ok for robin hood hashes
37 public
38 type THashFn = function (constref o: KeyT): LongWord;
39 type TEquFn = function (constref a, b: KeyT): Boolean;
41 private
42 type
43 PEntry = ^TEntry;
44 TEntry = record
45 key: KeyT;
46 value: ValueT;
47 hash: LongWord; // key hash or 0
48 nextFree: Integer;
49 end;
51 private
52 hashfn: THashFn;
53 equfn: TEquFn;
54 mBuckets: array of PEntry; // entries, points to mEntries elements
55 mBucketsUsed: Integer;
56 mEntries: array of TEntry;
57 mEntriesUsed: Integer;
58 mFreeEntryHead: Integer;
59 mSeed: LongWord;
61 private
62 function allocEntry (): PEntry;
63 procedure releaseEntry (e: PEntry);
65 //function distToStIdx (idx: LongWord): LongWord; inline;
67 procedure putEntryInternal (swpe: PEntry);
69 function getCapacity (): Integer; inline;
71 public
72 constructor Create (ahashfn: THashFn; aequfn: TEquFn);
73 destructor Destroy (); override;
75 procedure clear ();
77 procedure rehash ();
78 procedure compact (); // call this instead of `rehash()` after alot of deletions
80 function get (constref akey: KeyT; out rval: ValueT): Boolean; // `true`: found
81 function put (constref akey: KeyT; constref aval: ValueT): Boolean; // `true`: replaced
82 function has (constref akey: KeyT): Boolean; // `true`: found
83 function del (constref akey: KeyT): Boolean; // `true`: deleted
85 property count: Integer read mBucketsUsed;
86 property capacity: Integer read getCapacity;
87 end;
90 type
91 TJoaatHasher = record
92 private
93 seed: LongWord; // initial seed value; MUST BE FIRST
94 hash: LongWord; // current value
96 public
97 constructor Create (aseed: LongWord);
99 procedure reset (); inline; overload;
100 procedure reset (aseed: LongWord); inline; overload;
102 procedure put (const buf; len: LongWord);
104 // current hash value
105 // you can continue putting data, as this is not destructive
106 function value: LongWord; inline;
107 end;
110 type
111 THashIntInt = specialize THashBase<Integer, Integer>;
113 function hashNewIntInt (): THashIntInt;
116 function u32Hash (a: LongWord): LongWord; inline;
117 function fnvHash (const buf; len: LongWord): LongWord;
118 function joaatHash (const buf; len: LongWord): LongWord;
120 function nextPOT (x: LongWord): LongWord; inline;
123 implementation
125 uses
126 SysUtils;
129 // ////////////////////////////////////////////////////////////////////////// //
130 {$PUSH}
131 {$RANGECHECKS OFF}
132 function nextPOT (x: LongWord): LongWord; inline;
133 begin
134 result := x;
135 result := result or (result shr 1);
136 result := result or (result shr 2);
137 result := result or (result shr 4);
138 result := result or (result shr 8);
139 result := result or (result shr 16);
140 // already pot?
141 if (x <> 0) and ((x and (x-1)) = 0) then result := result and (not (result shr 1)) else result += 1;
142 end;
143 {$POP}
146 // ////////////////////////////////////////////////////////////////////////// //
147 function hiiequ (constref a, b: Integer): Boolean; begin result := (a = b); end;
149 {$PUSH}
150 {$RANGECHECKS OFF}
151 function hiihash (constref k: Integer): LongWord;
152 begin
153 result := k;
154 result -= (result shl 6);
155 result := result xor (result shr 17);
156 result -= (result shl 9);
157 result := result xor (result shl 4);
158 result -= (result shl 3);
159 result := result xor (result shl 10);
160 result := result xor (result shr 15);
161 end;
162 {$POP}
165 function hashNewIntInt (): THashIntInt;
166 begin
167 result := THashIntInt.Create(hiihash, hiiequ);
168 end;
171 // ////////////////////////////////////////////////////////////////////////// //
172 {$PUSH}
173 {$RANGECHECKS OFF}
174 constructor TJoaatHasher.Create (aseed: LongWord);
175 begin
176 reset(aseed);
177 end;
180 procedure TJoaatHasher.reset (); inline; overload;
181 begin
182 hash := seed;
183 end;
186 procedure TJoaatHasher.reset (aseed: LongWord); inline; overload;
187 begin
188 seed := aseed;
189 hash := aseed;
190 end;
193 procedure TJoaatHasher.put (const buf; len: LongWord);
194 var
195 bytes: PByte;
196 h: LongWord;
197 begin
198 if (len < 1) then exit;
199 bytes := PByte(@buf);
200 h := hash;
201 while (len > 0) do
202 begin
203 h += bytes^;
204 h += (h shl 10);
205 h := h xor (h shr 6);
206 Dec(len);
207 Inc(bytes);
208 end;
209 hash := h;
210 end;
213 function TJoaatHasher.value: LongWord; inline;
214 begin
215 result := hash;
216 result += (result shl 3);
217 result := result xor (result shr 11);
218 result += (result shl 15);
219 end;
220 {$POP}
223 function joaatHash (const buf; len: LongWord): LongWord;
224 var
225 h: TJoaatHasher;
226 begin
227 h := TJoaatHasher.Create(0);
228 h.put(buf, len);
229 result := h.value;
230 end;
233 // ////////////////////////////////////////////////////////////////////////// //
234 {$PUSH}
235 {$RANGECHECKS OFF}
236 // fnv-1a: http://www.isthe.com/chongo/tech/comp/fnv/
237 function fnvHash (const buf; len: LongWord): LongWord;
238 var
239 b: PByte;
240 begin
241 b := @buf;
242 result := 2166136261; // fnv offset basis
243 while (len > 0) do
244 begin
245 result := result xor b^;
246 result := result*16777619; // 32-bit fnv prime
247 Inc(b);
248 Dec(len);
249 end;
250 end;
251 {$POP}
254 {$PUSH}
255 {$RANGECHECKS OFF}
256 function u32Hash (a: LongWord): LongWord; inline;
257 begin
258 result := a;
259 result -= (result shl 6);
260 result := result xor (result shr 17);
261 result -= (result shl 9);
262 result := result xor (result shl 4);
263 result -= (result shl 3);
264 result := result xor (result shl 10);
265 result := result xor (result shr 15);
266 end;
267 {$POP}
270 // ////////////////////////////////////////////////////////////////////////// //
271 constructor THashBase.Create (ahashfn: THashFn; aequfn: TEquFn);
272 begin
273 if not assigned(ahashfn) then raise Exception.Create('cannot create hash without hash function');
274 if not assigned(aequfn) then raise Exception.Create('cannot create hash without equality function');
276 hashfn := ahashfn;
277 equfn := aequfn;
278 mSeed := u32Hash($29a);
280 clear();
281 end;
284 destructor THashBase.Destroy ();
285 begin
286 mBuckets := nil;
287 mEntries := nil;
288 inherited;
289 end;
292 function THashBase.getCapacity (): Integer; inline; begin result := Length(mBuckets); end;
295 function THashBase.allocEntry (): PEntry;
296 begin
297 {$IFDEF RBHASH_SANITY_CHECKS}
298 if (mFreeEntryHead = -1) then raise Exception.Create('internal error in hash entry allocator (0)');
299 if (mEntries[mFreeEntryHead].hash <> 0) then raise Exception.Create('internal error in hash entry allocator (1)');
300 {$ENDIF}
301 result := @mEntries[mFreeEntryHead];
302 mFreeEntryHead := result.nextFree;
303 Inc(mEntriesUsed);
304 result.nextFree := -1;
305 end;
308 procedure THashBase.releaseEntry (e: PEntry);
309 var
310 idx: LongWord;
311 begin
312 {$IFDEF RBHASH_SANITY_CHECKS}
313 if (mEntriesUsed = 0) then raise Exception.Create('internal error in hash entry allocator');
314 if (e = nil) then raise Exception.Create('internal error in hash entry allocator (trying to release nil entry)');
315 if (e.nextFree <> -1) or (e.hash = 0) then raise Exception.Create('internal error in hash entry allocator (trying to release unallocated entry)');
316 {$ENDIF}
317 idx := LongWord((PtrUInt(e)-PtrUInt(@mEntries[0])) div sizeof(mEntries[0]));
318 {$IFDEF RBHASH_SANITY_CHECKS}
319 if (idx >= Length(mEntries)) then raise Exception.Create('internal error in hash entry allocator (invalid calculated index)');
320 {$ENDIF}
321 e.hash := 0;
322 e.nextFree := mFreeEntryHead;
323 mFreeEntryHead := idx;
324 Dec(mEntriesUsed);
325 end;
328 (*
329 function THashBase.distToStIdx (idx: LongWord): LongWord; inline;
330 begin
331 {$IFDEF RBHASH_SANITY_CHECKS}
332 assert(idx < Length(mBuckets));
333 assert(mBuckets[idx] <> nil);
334 {$ENDIF}
335 result := mBuckets[idx].hash and High(mBuckets);
336 if (result <= idx) then result := idx-result else result := idx+(Length(mBuckets)-result);
337 end;
338 *)
341 function THashBase.has (constref akey: KeyT): Boolean;
342 var
343 khash, idx: LongWord;
344 dist, pdist: LongWord;
345 bhigh: LongWord;
346 begin
347 result := false;
348 if (mBucketsUsed = 0) then exit;
350 bhigh := High(mBuckets);
351 khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
352 idx := khash and bhigh;
353 if (mBuckets[idx] = nil) then exit;
355 for dist := 0 to bhigh do
356 begin
357 if (mBuckets[idx] = nil) then break;
358 //pdist := distToStIdx(idx);
359 pdist := mBuckets[idx].hash and bhigh;
360 if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
361 //
362 if (dist > pdist) then break;
363 result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
364 if result then break;
365 idx := (idx+1) and bhigh;
366 end;
367 end;
370 function THashBase.get (constref akey: KeyT; out rval: ValueT): Boolean;
371 var
372 khash, idx: LongWord;
373 dist, pdist: LongWord;
374 bhigh: LongWord;
375 begin
376 result := false;
377 if (mBucketsUsed = 0) then begin rval := Default(ValueT); exit; end;
379 bhigh := High(mBuckets);
380 khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
381 idx := khash and bhigh;
382 if (mBuckets[idx] = nil) then begin rval := Default(ValueT); exit; end;
384 for dist := 0 to bhigh do
385 begin
386 if (mBuckets[idx] = nil) then break;
387 //pdist := distToStIdx(idx);
388 pdist := mBuckets[idx].hash and bhigh;
389 if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
390 //
391 if (dist > pdist) then break;
392 result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
393 if result then
394 begin
395 rval := mBuckets[idx].value;
396 break;
397 end;
398 idx := (idx+1) and bhigh;
399 end;
401 if not result then rval := Default(ValueT); // just in case
402 end;
405 procedure THashBase.putEntryInternal (swpe: PEntry);
406 var
407 idx, dist, pcur, pdist: LongWord;
408 tmpe: PEntry; // current entry to swap (or nothing)
409 bhigh: LongWord;
410 begin
411 bhigh := High(mBuckets);
412 idx := swpe.hash and bhigh;
413 {$IFDEF RBHASH_DEBUG_INSERT}writeln('inserting key ', swpe.key, '; value=', swpe.value, '; wantidx=', idx, '; bhigh=', bhigh);{$ENDIF}
414 pcur := 0;
415 for dist := 0 to bhigh do
416 begin
417 if (mBuckets[idx] = nil) then
418 begin
419 // put entry
420 {$IFDEF RBHASH_DEBUG_INSERT}writeln(' inserted to ', idx);{$ENDIF}
421 mBuckets[idx] := swpe;
422 Inc(mBucketsUsed);
423 break;
424 end;
425 //pdist := distToStIdx(idx);
426 pdist := mBuckets[idx].hash and bhigh;
427 if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
428 //
429 if (pcur > pdist) then
430 begin
431 // swapping the current bucket with the one to insert
432 tmpe := mBuckets[idx];
433 mBuckets[idx] := swpe;
434 swpe := tmpe;
435 pcur := pdist;
436 end;
437 idx := (idx+1) and bhigh;
438 Inc(pcur);
439 end;
440 end;
443 function THashBase.put (constref akey: KeyT; constref aval: ValueT): Boolean;
444 var
445 khash, idx, dist, pdist: LongWord;
446 swpe: PEntry = nil; // current entry to swap (or nothing)
447 bhigh: LongWord;
448 newsz, eidx: Integer;
449 begin
450 result := false;
452 bhigh := High(mBuckets);
453 khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
454 idx := khash and bhigh;
456 // check if we already have this key
457 if (mBucketsUsed <> 0) and (mBuckets[idx] <> nil) then
458 begin
459 for dist := 0 to bhigh do
460 begin
461 if (mBuckets[idx] = nil) then break;
462 //pdist := distToStIdx(idx);
463 pdist := mBuckets[idx].hash and bhigh;
464 if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
465 //
466 if (dist > pdist) then break;
467 result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
468 if result then
469 begin
470 // replace element
471 //mBuckets[idx].key := akey;
472 mBuckets[idx].value := aval;
473 exit;
474 end;
475 idx := (idx+1) and bhigh;
476 end;
477 end;
479 // need to resize hash?
480 if (mBucketsUsed >= (bhigh+1)*LoadFactorPrc div 100) then
481 begin
482 newsz := Length(mBuckets);
483 if (Length(mEntries) <> newsz) then raise Exception.Create('internal error in hash table (resize)');
484 if (newsz <= 1024*1024*1024) then newsz *= 2 else raise Exception.Create('hash table too big');
485 {$IFDEF RBHASH_DEBUG_RESIZE}
486 writeln('resizing hash; used=', mBucketsUsed, '; total=', (bhigh+1), '; maxload=', (bhigh+1)*LoadFactorPrc div 100, '; newsz=', newsz);
487 {$ENDIF}
488 SetLength(mBuckets, newsz);
489 // resize entries array
490 eidx := newsz;
491 SetLength(mEntries, newsz);
492 while (eidx < Length(mEntries)) do begin mEntries[eidx].hash := 0; Inc(eidx); end;
493 // mFreeEntryHead will be fixed in `rehash()`
494 // reinsert entries
495 rehash();
496 // as seed was changed, recalc hash
497 khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
498 end;
500 // create new entry
501 swpe := allocEntry();
502 swpe.key := akey;
503 swpe.value := aval;
504 swpe.hash := khash;
506 putEntryInternal(swpe);
507 end;
510 // see http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
511 function THashBase.del (constref akey: KeyT): Boolean;
512 var
513 khash, idx, idxnext, pdist, dist: LongWord;
514 bhigh: LongWord;
515 begin
516 result := false;
517 if (mBucketsUsed = 0) then exit;
519 bhigh := High(mBuckets);
520 khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
521 idx := khash and bhigh;
523 // find key
524 if (mBuckets[idx] = nil) then exit; // no key
525 for dist := 0 to bhigh do
526 begin
527 if (mBuckets[idx] = nil) then break;
528 //pdist := distToStIdx(idxcur);
529 pdist := mBuckets[idx].hash and bhigh;
530 if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
531 //
532 if (dist > pdist) then break;
533 result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
534 if result then break;
535 idx := (idx+1) and bhigh;
536 end;
538 if not result then
539 begin
540 // key not found
541 {$IFDEF RBHASH_DEBUG_DELETE}
542 writeln('del: key ', akey, ': not found');
543 {$ENDIF}
544 exit;
545 end;
547 {$IFDEF RBHASH_DEBUG_DELETE}
548 writeln('del: key ', akey, ': found at ', idx, '; ek=', mBuckets[idx].key, '; ev=', mBuckets[idx].value);
549 {$ENDIF}
550 releaseEntry(mBuckets[idx]);
552 idxnext := (idx+1) and bhigh;
553 for dist := 0 to bhigh do
554 begin
555 {$IFDEF RBHASH_DEBUG_DELETE}
556 writeln(' dist=', dist, '; idx=', idx, '; idxnext=', idxnext, '; ce=', (mBuckets[idx] <> nil), '; ne=', (mBuckets[idxnext] <> nil));
557 {$ENDIF}
558 if (mBuckets[idxnext] = nil) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' idxnext nil');{$ENDIF} mBuckets[idx] := nil; break; end;
559 //pdist := distToStIdx(idxnext);
560 pdist := mBuckets[idxnext].hash and bhigh;
561 if (pdist <= idxnext) then pdist := idxnext-pdist else pdist := idxnext+((bhigh+1)-pdist);
562 //
563 if (pdist = 0) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist is zero');{$ENDIF} mBuckets[idx] := nil; break; end;
564 {$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist=', pdist);{$ENDIF}
565 mBuckets[idx] := mBuckets[idxnext];
566 idx := (idx+1) and bhigh;
567 idxnext := (idxnext+1) and bhigh;
568 end;
570 Dec(mBucketsUsed);
571 end;
574 procedure THashBase.clear ();
575 var
576 idx: Integer;
577 begin
578 SetLength(mBuckets, InitSize);
579 for idx := 0 to High(mBuckets) do mBuckets[idx] := nil;
581 SetLength(mEntries, Length(mBuckets));
582 for idx := 0 to High(mEntries) do
583 begin
584 mEntries[idx].hash := 0;
585 mEntries[idx].nextFree := idx+1;
586 end;
587 mEntries[High(mEntries)].nextFree := -1;
589 mBucketsUsed := 0;
590 mEntriesUsed := 0;
591 mFreeEntryHead := 0;
592 end;
595 procedure THashBase.rehash ();
596 var
597 idx, lastfree: Integer;
598 e: PEntry;
599 begin
600 // change seed, to minimize pathological cases
601 if (mSeed = 0) then mSeed := $29a;
602 mSeed := u32Hash(mSeed);
603 // clear buckets
604 for idx := 0 to High(mBuckets) do mBuckets[idx] := nil;
605 mBucketsUsed := 0;
606 // reinsert entries
607 mFreeEntryHead := -1;
608 lastfree := -1;
609 for idx := 0 to High(mEntries) do
610 begin
611 e := @mEntries[idx];
612 if (e.hash <> 0) then
613 begin
614 e.hash := hashfn(e.key) xor mSeed; if (e.hash = 0) then e.hash := $29a;
615 putEntryInternal(e);
616 end
617 else
618 begin
619 if (lastfree <> -1) then mEntries[lastfree].nextFree := idx else mFreeEntryHead := idx;
620 lastfree := idx;
621 end;
622 end;
623 end;
626 procedure THashBase.compact ();
627 var
628 newsz, didx, f: Integer;
629 begin
630 newsz := nextPOT(LongWord(mBucketsUsed));
631 if (newsz >= 1024*1024*1024) then exit;
632 if (newsz*2 >= Length(mBuckets)) then exit;
633 {$IFDEF RBHASH_DEBUG_COMPACT}writeln('compacting; used=', mBucketsUsed, '; oldsizePOT=', newsz, '; newsize=', newsz*2);{$ENDIF}
634 newsz *= 2;
635 // move all entries to top
636 didx := 0;
637 while (didx < Length(mEntries)) do if (mEntries[didx].hash <> 0) then Inc(didx) else break;
638 f := didx+1;
639 while (f < Length(mEntries)) do if (mEntries[f].hash = 0) then Inc(f) else break;
640 // copy entries
641 while (f < Length(mEntries)) do
642 begin
643 if (mEntries[f].hash <> 0) then
644 begin
645 if (didx >= f) then raise Exception.Create('internal error in hash: inconsistent');
646 mEntries[didx] := mEntries[f];
647 mEntries[f].hash := 0;
648 Inc(didx);
649 while (didx < Length(mEntries)) do if (mEntries[didx].hash <> 0) then Inc(didx) else break;
650 Inc(f);
651 while (f < Length(mEntries)) do if (mEntries[f].hash = 0) then Inc(f) else break;
652 end
653 else
654 begin
655 Inc(f);
656 end;
657 end;
658 // shrink
659 SetLength(mBuckets, newsz);
660 SetLength(mEntries, newsz);
661 // mFreeEntryHead will be fixed in `rehash()`
662 // reinsert entries
663 rehash();
664 end;
667 end.