summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 630b58e)
raw | patch | inline | side by side (parent: 630b58e)
author | Ketmar Dark <ketmar@ketmar.no-ip.org> | |
Mon, 21 Aug 2017 11:50:26 +0000 (14:50 +0300) | ||
committer | Ketmar Dark <ketmar@ketmar.no-ip.org> | |
Mon, 21 Aug 2017 11:57:11 +0000 (14:57 +0300) |
src/shared/a_modes.inc | patch | blob | history | |
src/shared/hashtable.pas | patch | blob | history |
diff --git a/src/shared/a_modes.inc b/src/shared/a_modes.inc
index 9d1da17825a7b8580b5ed4239eaeaca7a4619060..9c5c74c08774122fa8a2e705e89f63d449807b3a 100644 (file)
--- a/src/shared/a_modes.inc
+++ b/src/shared/a_modes.inc
{$MMX-} // get lost, mmx
{$IF DEFINED(D2F_DEBUG)}
- {$RANGECHECKS ON}
{$STACKFRAMES ON}
{$ELSE}
{$STACKFRAMES OFF}
index 7dbbb260beb8d2b8dcdd44f2812da19992e04dad..955e599bf9f1ba6bb1516f2a5545362b82a6a37b 100644 (file)
--- a/src/shared/hashtable.pas
+++ b/src/shared/hashtable.pas
*)
{$INCLUDE a_modes.inc}
{.$DEFINE RBHASH_DEBUG_RESIZE}
+{.$DEFINE RBHASH_DEBUG_INSERT}
{.$DEFINE RBHASH_DEBUG_DELETE}
+{.$DEFINE RBHASH_DEBUG_COMPACT}
{$IF DEFINED(D2F_DEBUG)}
{$DEFINE RBHASH_SANITY_CHECKS}
{$ENDIF}
// WARNING! don't put structures into hash, use ponters or ids!
generic THashBase<KeyT, ValueT> = class(TObject)
private
- const InitSize = {$IF DEFINED(D2F_DEBUG)}16{$ELSE}512{$ENDIF};
+ const InitSize = {$IF DEFINED(D2F_DEBUG)}16{$ELSE}512{$ENDIF}; // *MUST* be power of two
const LoadFactorPrc = 90; // it is ok for robin hood hashes
public
mEntries: array of TEntry;
mEntriesUsed: Integer;
mFreeEntryHead: Integer;
+ mSeed: LongWord;
private
function allocEntry (): PEntry;
procedure releaseEntry (e: PEntry);
- function distToStIdx (idx: LongWord): LongWord; inline;
+ //function distToStIdx (idx: LongWord): LongWord; inline;
procedure putEntryInternal (swpe: PEntry);
+ function getCapacity (): Integer; inline;
+
public
constructor Create (ahashfn: THashFn; aequfn: TEquFn);
destructor Destroy (); override;
procedure clear ();
procedure rehash ();
+ procedure compact (); // call this instead of `rehash()` after alot of deletions
function get (constref akey: KeyT; out rval: ValueT): Boolean; // `true`: found
function put (constref akey: KeyT; constref aval: ValueT): Boolean; // `true`: replaced
function del (constref akey: KeyT): Boolean; // `true`: deleted
property count: Integer read mBucketsUsed;
+ property capacity: Integer read getCapacity;
+ end;
+
+
+type
+ TJoaatHasher = record
+ private
+ seed: LongWord; // initial seed value; MUST BE FIRST
+ hash: LongWord; // current value
+
+ public
+ constructor Create (aseed: LongWord);
+
+ procedure reset (); inline; overload;
+ procedure reset (aseed: LongWord); inline; overload;
+
+ procedure put (const buf; len: LongWord);
+
+ // current hash value
+ // you can continue putting data, as this is not destructive
+ function value: LongWord; inline;
end;
+type
+ THashIntInt = specialize THashBase<Integer, Integer>;
+
+function hashNewIntInt (): THashIntInt;
+
+
+function u32Hash (a: LongWord): LongWord; inline;
+function fnvHash (const buf; len: LongWord): LongWord;
+function joaatHash (const buf; len: LongWord): LongWord;
+
+function nextPOT (x: LongWord): LongWord; inline;
+
+
implementation
uses
SysUtils;
+// ////////////////////////////////////////////////////////////////////////// //
+{$PUSH}
+{$RANGECHECKS OFF}
+function nextPOT (x: LongWord): LongWord; inline;
+begin
+ result := x;
+ result := result or (result shr 1);
+ result := result or (result shr 2);
+ result := result or (result shr 4);
+ result := result or (result shr 8);
+ result := result or (result shr 16);
+ // already pot?
+ if (x <> 0) and ((x and (x-1)) = 0) then result := result and (not (result shr 1)) else result += 1;
+end;
+{$POP}
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+function hiiequ (constref a, b: Integer): Boolean; begin result := (a = b); end;
+
+{$PUSH}
+{$RANGECHECKS OFF}
+function hiihash (constref k: Integer): LongWord;
+begin
+ result := k;
+ result -= (result shl 6);
+ result := result xor (result shr 17);
+ result -= (result shl 9);
+ result := result xor (result shl 4);
+ result -= (result shl 3);
+ result := result xor (result shl 10);
+ result := result xor (result shr 15);
+end;
+{$POP}
+
+
+function hashNewIntInt (): THashIntInt;
+begin
+ result := THashIntInt.Create(hiihash, hiiequ);
+end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+{$PUSH}
+{$RANGECHECKS OFF}
+constructor TJoaatHasher.Create (aseed: LongWord);
+begin
+ reset(aseed);
+end;
+
+
+procedure TJoaatHasher.reset (); inline; overload;
+begin
+ hash := seed;
+end;
+
+
+procedure TJoaatHasher.reset (aseed: LongWord); inline; overload;
+begin
+ seed := aseed;
+ hash := aseed;
+end;
+
+
+procedure TJoaatHasher.put (const buf; len: LongWord);
+var
+ bytes: PByte;
+ h: LongWord;
+begin
+ if (len < 1) then exit;
+ bytes := PByte(@buf);
+ h := hash;
+ while (len > 0) do
+ begin
+ h += bytes^;
+ h += (h shl 10);
+ h := h xor (h shr 6);
+ Dec(len);
+ Inc(bytes);
+ end;
+ hash := h;
+end;
+
+
+function TJoaatHasher.value: LongWord; inline;
+begin
+ result := hash;
+ result += (result shl 3);
+ result := result xor (result shr 11);
+ result += (result shl 15);
+end;
+{$POP}
+
+
+function joaatHash (const buf; len: LongWord): LongWord;
+var
+ h: TJoaatHasher;
+begin
+ h := TJoaatHasher.Create(0);
+ h.put(buf, len);
+ result := h.value;
+end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+{$PUSH}
+{$RANGECHECKS OFF}
+// fnv-1a: http://www.isthe.com/chongo/tech/comp/fnv/
+function fnvHash (const buf; len: LongWord): LongWord;
+var
+ b: PByte;
+begin
+ b := @buf;
+ result := 2166136261; // fnv offset basis
+ while (len > 0) do
+ begin
+ result := result xor b^;
+ result := result*16777619; // 32-bit fnv prime
+ Inc(b);
+ Dec(len);
+ end;
+end;
+{$POP}
+
+
+{$PUSH}
+{$RANGECHECKS OFF}
+function u32Hash (a: LongWord): LongWord; inline;
+begin
+ result := a;
+ result -= (result shl 6);
+ result := result xor (result shr 17);
+ result -= (result shl 9);
+ result := result xor (result shl 4);
+ result -= (result shl 3);
+ result := result xor (result shl 10);
+ result := result xor (result shr 15);
+end;
+{$POP}
+
+
// ////////////////////////////////////////////////////////////////////////// //
constructor THashBase.Create (ahashfn: THashFn; aequfn: TEquFn);
begin
hashfn := ahashfn;
equfn := aequfn;
+ mSeed := u32Hash($29a);
clear();
end;
end;
+function THashBase.getCapacity (): Integer; inline; begin result := Length(mBuckets); end;
+
+
function THashBase.allocEntry (): PEntry;
begin
{$IFDEF RBHASH_SANITY_CHECKS}
end;
+(*
function THashBase.distToStIdx (idx: LongWord): LongWord; inline;
-var
- stidx: LongWord;
begin
{$IFDEF RBHASH_SANITY_CHECKS}
assert(idx < Length(mBuckets));
assert(mBuckets[idx] <> nil);
{$ENDIF}
- stidx := mBuckets[idx].hash mod Length(mBuckets);
- if (stidx <= idx) then result := idx-stidx else result := idx+(Length(mBuckets)-stidx);
+ result := mBuckets[idx].hash and High(mBuckets);
+ if (result <= idx) then result := idx-result else result := idx+(Length(mBuckets)-result);
end;
+*)
function THashBase.has (constref akey: KeyT): Boolean;
var
khash, idx: LongWord;
dist, pdist: LongWord;
- blen: LongWord;
+ bhigh: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then exit;
- blen := Length(mBuckets);
- khash := hashfn(akey); if (khash = 0) then khash := $29a;
- idx := khash mod blen;
+ bhigh := High(mBuckets);
+ khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
+ idx := khash and bhigh;
if (mBuckets[idx] = nil) then exit;
- for dist := 0 to blen-1 do
+ for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then break;
- pdist := distToStIdx(idx);
+ //pdist := distToStIdx(idx);
+ pdist := mBuckets[idx].hash and bhigh;
+ if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
+ //
if (dist > pdist) then break;
result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
if result then break;
- idx := (idx+1) mod blen;
+ idx := (idx+1) and bhigh;
end;
end;
var
khash, idx: LongWord;
dist, pdist: LongWord;
- blen: LongWord;
+ bhigh: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then begin rval := Default(ValueT); exit; end;
- blen := Length(mBuckets);
- khash := hashfn(akey); if (khash = 0) then khash := $29a;
- idx := khash mod blen;
+ bhigh := High(mBuckets);
+ khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
+ idx := khash and bhigh;
if (mBuckets[idx] = nil) then begin rval := Default(ValueT); exit; end;
- for dist := 0 to blen-1 do
+ for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then break;
- pdist := distToStIdx(idx);
+ //pdist := distToStIdx(idx);
+ pdist := mBuckets[idx].hash and bhigh;
+ if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
+ //
if (dist > pdist) then break;
result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
if result then
rval := mBuckets[idx].value;
break;
end;
- idx := (idx+1) mod blen;
+ idx := (idx+1) and bhigh;
end;
if not result then rval := Default(ValueT); // just in case
var
idx, dist, pcur, pdist: LongWord;
tmpe: PEntry; // current entry to swap (or nothing)
- blen: LongWord;
+ bhigh: LongWord;
begin
- blen := Length(mBuckets);
- idx := swpe.hash mod blen;
+ bhigh := High(mBuckets);
+ idx := swpe.hash and bhigh;
+ {$IFDEF RBHASH_DEBUG_INSERT}writeln('inserting key ', swpe.key, '; value=', swpe.value, '; wantidx=', idx, '; bhigh=', bhigh);{$ENDIF}
pcur := 0;
- for dist := 0 to blen-1 do
+ for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then
begin
// put entry
+ {$IFDEF RBHASH_DEBUG_INSERT}writeln(' inserted to ', idx);{$ENDIF}
mBuckets[idx] := swpe;
Inc(mBucketsUsed);
break;
end;
- pdist := distToStIdx(idx);
+ //pdist := distToStIdx(idx);
+ pdist := mBuckets[idx].hash and bhigh;
+ if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
+ //
if (pcur > pdist) then
begin
// swapping the current bucket with the one to insert
swpe := tmpe;
pcur := pdist;
end;
- idx := (idx+1) mod blen;
+ idx := (idx+1) and bhigh;
Inc(pcur);
end;
end;
function THashBase.put (constref akey: KeyT; constref aval: ValueT): Boolean;
var
- khash, stidx, idx, dist, pdist: LongWord;
+ khash, idx, dist, pdist: LongWord;
swpe: PEntry = nil; // current entry to swap (or nothing)
- blen: LongWord;
+ bhigh: LongWord;
newsz, eidx: Integer;
begin
result := false;
- blen := Length(mBuckets);
- khash := hashfn(akey); if (khash = 0) then khash := $29a;
- stidx := khash mod blen;
+ bhigh := High(mBuckets);
+ khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
+ idx := khash and bhigh;
// check if we already have this key
- idx := stidx;
if (mBucketsUsed <> 0) and (mBuckets[idx] <> nil) then
begin
- for dist := 0 to blen-1 do
+ for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then break;
- pdist := distToStIdx(idx);
+ //pdist := distToStIdx(idx);
+ pdist := mBuckets[idx].hash and bhigh;
+ if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
+ //
if (dist > pdist) then break;
result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
if result then
mBuckets[idx].value := aval;
exit;
end;
- idx := (idx+1) mod blen;
+ idx := (idx+1) and bhigh;
end;
end;
// need to resize hash?
- if (mBucketsUsed >= blen*LoadFactorPrc div 100) then
+ if (mBucketsUsed >= (bhigh+1)*LoadFactorPrc div 100) then
begin
newsz := Length(mBuckets);
if (Length(mEntries) <> newsz) then raise Exception.Create('internal error in hash table (resize)');
if (newsz <= 1024*1024*1024) then newsz *= 2 else raise Exception.Create('hash table too big');
{$IFDEF RBHASH_DEBUG_RESIZE}
- writeln('resizing hash; used=', mBucketsUsed, '; total=', blen, '; maxload=', blen*LoadFactorPrc div 100, '; newsz=', newsz);
+ writeln('resizing hash; used=', mBucketsUsed, '; total=', (bhigh+1), '; maxload=', (bhigh+1)*LoadFactorPrc div 100, '; newsz=', newsz);
{$ENDIF}
SetLength(mBuckets, newsz);
// resize entries array
// mFreeEntryHead will be fixed in `rehash()`
// reinsert entries
rehash();
+ // as seed was changed, recalc hash
+ khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
end;
// create new entry
// see http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
function THashBase.del (constref akey: KeyT): Boolean;
var
- khash, stidx, idxcur, idxnext, pdist, dist: LongWord;
- blen: LongWord;
+ khash, idx, idxnext, pdist, dist: LongWord;
+ bhigh: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then exit;
- blen := Length(mBuckets);
- khash := hashfn(akey); if (khash = 0) then khash := $29a;
- stidx := khash mod blen;
+ bhigh := High(mBuckets);
+ khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
+ idx := khash and bhigh;
// find key
- if (mBuckets[stidx] = nil) then exit; // no key
-
- idxcur := stidx;
- for dist := 0 to blen-1 do
+ if (mBuckets[idx] = nil) then exit; // no key
+ for dist := 0 to bhigh do
begin
- if (mBuckets[idxcur] = nil) then break;
- pdist := distToStIdx(idxcur);
+ if (mBuckets[idx] = nil) then break;
+ //pdist := distToStIdx(idxcur);
+ pdist := mBuckets[idx].hash and bhigh;
+ if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
+ //
if (dist > pdist) then break;
- result := (mBuckets[idxcur].hash = khash) and equfn(mBuckets[idxcur].key, akey);
+ result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
if result then break;
- idxcur := (idxcur+1) mod blen;
+ idx := (idx+1) and bhigh;
end;
if not result then
end;
{$IFDEF RBHASH_DEBUG_DELETE}
- writeln('del: key ', akey, ': found at ', idxcur, '(', stidx, '); ek=', mBuckets[idxcur].key, '; ev=', mBuckets[idxcur].value);
+ writeln('del: key ', akey, ': found at ', idx, '; ek=', mBuckets[idx].key, '; ev=', mBuckets[idx].value);
{$ENDIF}
- releaseEntry(mBuckets[idxcur]);
+ releaseEntry(mBuckets[idx]);
- idxnext := (idxcur+1) mod blen;
- for dist := 0 to blen-1 do
+ idxnext := (idx+1) and bhigh;
+ for dist := 0 to bhigh do
begin
{$IFDEF RBHASH_DEBUG_DELETE}
- writeln(' dist=', dist, '; idxcur=', idxcur, '; idxnext=', idxnext, '; ce=', (mBuckets[idxcur] <> nil), '; ne=', (mBuckets[idxnext] <> nil));
+ writeln(' dist=', dist, '; idx=', idx, '; idxnext=', idxnext, '; ce=', (mBuckets[idx] <> nil), '; ne=', (mBuckets[idxnext] <> nil));
{$ENDIF}
- if (mBuckets[idxnext] = nil) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' idxnext nil');{$ENDIF} mBuckets[idxcur] := nil; break; end;
- pdist := distToStIdx(idxnext);
- if (pdist = 0) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist is zero');{$ENDIF} mBuckets[idxcur] := nil; break; end;
+ if (mBuckets[idxnext] = nil) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' idxnext nil');{$ENDIF} mBuckets[idx] := nil; break; end;
+ //pdist := distToStIdx(idxnext);
+ pdist := mBuckets[idxnext].hash and bhigh;
+ if (pdist <= idxnext) then pdist := idxnext-pdist else pdist := idxnext+((bhigh+1)-pdist);
+ //
+ if (pdist = 0) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist is zero');{$ENDIF} mBuckets[idx] := nil; break; end;
{$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist=', pdist);{$ENDIF}
- mBuckets[idxcur] := mBuckets[idxnext];
- idxcur := (idxcur+1) mod blen;
- idxnext := (idxnext+1) mod blen;
+ mBuckets[idx] := mBuckets[idxnext];
+ idx := (idx+1) and bhigh;
+ idxnext := (idxnext+1) and bhigh;
end;
Dec(mBucketsUsed);
idx, lastfree: Integer;
e: PEntry;
begin
+ // change seed, to minimize pathological cases
+ if (mSeed = 0) then mSeed := $29a;
+ mSeed := u32Hash(mSeed);
// clear buckets
for idx := 0 to High(mBuckets) do mBuckets[idx] := nil;
mBucketsUsed := 0;
e := @mEntries[idx];
if (e.hash <> 0) then
begin
+ e.hash := hashfn(e.key) xor mSeed; if (e.hash = 0) then e.hash := $29a;
putEntryInternal(e);
end
else
end;
+procedure THashBase.compact ();
+var
+ newsz, didx, f: Integer;
+begin
+ newsz := nextPOT(LongWord(mBucketsUsed));
+ if (newsz >= 1024*1024*1024) then exit;
+ if (newsz*2 >= Length(mBuckets)) then exit;
+ {$IFDEF RBHASH_DEBUG_COMPACT}writeln('compacting; used=', mBucketsUsed, '; oldsizePOT=', newsz, '; newsize=', newsz*2);{$ENDIF}
+ newsz *= 2;
+ // move all entries to top
+ didx := 0;
+ while (didx < Length(mEntries)) do if (mEntries[didx].hash <> 0) then Inc(didx) else break;
+ f := didx+1;
+ while (f < Length(mEntries)) do if (mEntries[f].hash = 0) then Inc(f) else break;
+ // copy entries
+ while (f < Length(mEntries)) do
+ begin
+ if (mEntries[f].hash <> 0) then
+ begin
+ if (didx >= f) then raise Exception.Create('internal error in hash: inconsistent');
+ mEntries[didx] := mEntries[f];
+ mEntries[f].hash := 0;
+ Inc(didx);
+ while (didx < Length(mEntries)) do if (mEntries[didx].hash <> 0) then Inc(didx) else break;
+ Inc(f);
+ while (f < Length(mEntries)) do if (mEntries[f].hash = 0) then Inc(f) else break;
+ end
+ else
+ begin
+ Inc(f);
+ end;
+ end;
+ // shrink
+ SetLength(mBuckets, newsz);
+ SetLength(mEntries, newsz);
+ // mFreeEntryHead will be fixed in `rehash()`
+ // reinsert entries
+ rehash();
+end;
+
+
end.