index b847720ba90a3ed86977b6adbc2531a4c4ce4b9c..a51fc91d27519874d875a6b2799ab85b7230c599 100644 (file)
--- a/src/shared/hashtable.pas
+++ b/src/shared/hashtable.pas
{.$DEFINE RBHASH_DEBUG_DELETE}
{.$DEFINE RBHASH_DEBUG_COMPACT}
{$IF DEFINED(D2F_DEBUG)}
- {$DEFINE RBHASH_SANITY_CHECKS}
+ {.$DEFINE RBHASH_SANITY_CHECKS}
{$ENDIF}
// hash table (robin hood)
unit hashtable;
// WARNING! don't put structures into hash, use ponters or ids!
generic THashBase<KeyT, ValueT> = class(TObject)
private
- const InitSize = {$IF DEFINED(D2F_DEBUG)}16{$ELSE}512{$ENDIF}; // *MUST* be power of two
+ const InitSize = {$IF DEFINED(RBHASH_SANITY_CHECKS)}16{$ELSE}256{$ENDIF}; // *MUST* be power of two
const LoadFactorPrc = 90; // it is ok for robin hood hashes
public
type THashFn = function (constref o: KeyT): LongWord;
type TEquFn = function (constref a, b: KeyT): Boolean;
type TIteratorFn = function (constref k: KeyT; constref v: ValueT): Boolean is nested; // return `true` to stop
+ type TIteratorExFn = function (constref k: KeyT; constref v: ValueT; keyhash: LongWord): Boolean is nested; // return `true` to stop
- private
type
PEntry = ^TEntry;
TEntry = record
+ public
key: KeyT;
value: ValueT;
+ private
hash: LongWord; // key hash or 0
nextFree: PEntry; // next free entry
+ public
+ property keyhash: LongWord read hash;
+ end;
+
+ private
+ type
+ TEntryArray = array of TEntry;
+
+ public
+ type
+ TValEnumerator = record
+ private
+ mEntries: TEntryArray;
+ mFirstEntry, mLastEntry, cur: Integer;
+ public
+ constructor Create (const aents: TEntryArray; afirst, alast: Integer);
+ function MoveNext (): Boolean; inline;
+ function getCurrent (): ValueT; inline;
+ function GetEnumerator (): TValEnumerator; inline;
+ property Current: ValueT read getCurrent;
+ end;
+
+ TKeyEnumerator = record
+ private
+ mEntries: TEntryArray;
+ mFirstEntry, mLastEntry, cur: Integer;
+ public
+ constructor Create (const aents: TEntryArray; afirst, alast: Integer);
+ function MoveNext (): Boolean; inline;
+ function getCurrent (): KeyT; inline;
+ function GetEnumerator (): TKeyEnumerator; inline;
+ property Current: KeyT read getCurrent;
+ end;
+
+ TKeyValEnumerator = record
+ private
+ mEntries: TEntryArray;
+ mFirstEntry, mLastEntry, cur: Integer;
+ public
+ constructor Create (const aents: TEntryArray; afirst, alast: Integer);
+ function MoveNext (): Boolean; inline;
+ function getCurrent (): PEntry; inline;
+ function GetEnumerator (): TKeyValEnumerator; inline;
+ property Current: PEntry read getCurrent;
end;
private
equfn: TEquFn;
mBuckets: array of PEntry; // entries, points to mEntries elements
mBucketsUsed: Integer;
- mEntries: array of TEntry;
+ mEntries: TEntryArray;
{$IFDEF RBHASH_SANITY_CHECKS}
mEntriesUsed: Integer;
{$ENDIF}
procedure rehash ();
procedure compact (); // call this instead of `rehash()` after alot of deletions
- function get (constref akey: KeyT; out rval: ValueT): Boolean; // `true`: found
- function put (constref akey: KeyT; constref aval: ValueT): Boolean; // `true`: replaced
- function has (constref akey: KeyT): Boolean; // `true`: found
- function del (constref akey: KeyT): Boolean; // `true`: deleted
+ // you may pass `keyhash` to bypass hash calculation
+ function get (constref akey: KeyT; out rval: ValueT; keyhashin: PLongWord=nil): Boolean; // `true`: found
+ // the function may return calculated value hash in `keyhash`
+ function put (constref akey: KeyT; constref aval: ValueT; keyhashout: PLongWord=nil): Boolean; // `true`: replaced
+ // you may pass `keyhash` to bypass hash calculation
+ function has (constref akey: KeyT; keyhashin: PLongWord=nil): Boolean; // `true`: found
+ // you may pass `keyhash` to bypass hash calculation
+ function del (constref akey: KeyT; keyhashin: PLongWord=nil): Boolean; // `true`: deleted
//WARNING! don't modify table in iterator (queries are ok, though)
- function forEach (it: TIteratorFn): Boolean;
+ function forEach (it: TIteratorFn): Boolean; overload;
+ function forEach (it: TIteratorExFn): Boolean; overload;
+
+ // default `for ... in` enums values
+ function GetEnumerator (): TValEnumerator;
+ function byKey (): TKeyEnumerator;
+ function byValue (): TValEnumerator;
+ function byKeyValue (): TKeyValEnumerator; // PEntry
property count: Integer read mBucketsUsed;
property capacity: Integer read getCapacity;
end;
-
type
TJoaatHasher = record
private
procedure reset (); inline; overload;
procedure reset (aseed: LongWord); inline; overload;
- procedure put (const buf; len: LongWord);
+ procedure put (constref buf; len: LongWord);
// current hash value
// you can continue putting data, as this is not destructive
type
THashIntInt = specialize THashBase<Integer, Integer>;
+ THashStrInt = specialize THashBase<AnsiString, Integer>;
+ THashStrStr = specialize THashBase<AnsiString, AnsiString>;
function hashNewIntInt (): THashIntInt;
+function hashNewStrInt (): THashStrInt;
+function hashNewStrStr (): THashStrStr;
function u32Hash (a: LongWord): LongWord; inline;
-function fnvHash (const buf; len: LongWord): LongWord;
-function joaatHash (const buf; len: LongWord): LongWord;
+function fnvHash (constref buf; len: LongWord): LongWord;
+function joaatHash (constref buf; len: LongWord): LongWord;
function nextPOT (x: LongWord): LongWord; inline;
+// for integer keys
+function hiiequ (constref a, b: Integer): Boolean;
+function hiihash (constref k: Integer): LongWord;
+function hsiequ (constref a, b: AnsiString): Boolean;
+function hsihash (constref k: AnsiString): LongWord;
+
+
implementation
uses
// ////////////////////////////////////////////////////////////////////////// //
function hiiequ (constref a, b: Integer): Boolean; begin result := (a = b); end;
+function hsiequ (constref a, b: AnsiString): Boolean; begin result := (a = b); end;
{$PUSH}
{$RANGECHECKS OFF}
function hiihash (constref k: Integer): LongWord;
begin
- result := k;
+ result := LongWord(k);
result -= (result shl 6);
result := result xor (result shr 17);
result -= (result shl 9);
result := result xor (result shl 10);
result := result xor (result shr 15);
end;
+
+function hsihash (constref k: AnsiString): LongWord;
+begin
+ if (Length(k) > 0) then result := fnvHash(PAnsiChar(k)^, Length(k)) else result := 0;
+end;
{$POP}
end;
+function hashNewStrInt (): THashStrInt;
+begin
+ result := THashStrInt.Create(hsihash, hsiequ);
+end;
+
+
+function hashNewStrStr (): THashStrStr;
+begin
+ result := THashStrStr.Create(hsihash, hsiequ);
+end;
+
+
// ////////////////////////////////////////////////////////////////////////// //
{$PUSH}
{$RANGECHECKS OFF}
end;
-procedure TJoaatHasher.put (const buf; len: LongWord);
+procedure TJoaatHasher.put (constref buf; len: LongWord);
var
bytes: PByte;
h: LongWord;
{$POP}
-function joaatHash (const buf; len: LongWord): LongWord;
+function joaatHash (constref buf; len: LongWord): LongWord;
var
h: TJoaatHasher;
begin
h := TJoaatHasher.Create(0);
- h.put(buf, len);
+ h.put(PByte(@buf)^, len);
result := h.value;
end;
{$PUSH}
{$RANGECHECKS OFF}
// fnv-1a: http://www.isthe.com/chongo/tech/comp/fnv/
-function fnvHash (const buf; len: LongWord): LongWord;
+function fnvHash (constref buf; len: LongWord): LongWord;
var
b: PByte;
begin
begin
SetLength(mBuckets, InitSize);
for idx := 0 to High(mBuckets) do mBuckets[idx] := nil;
-
SetLength(mEntries, Length(mBuckets));
- {
- for idx := 0 to High(mEntries)-1 do
- begin
- mEntries[idx].hash := 0;
- mEntries[idx].nextFree := @mEntries[idx+1]; //idx+1;
- end;
- mEntries[High(mEntries)].hash := 0;
- mEntries[High(mEntries)].nextFree := nil;
- }
- {
- for idx := 0 to High(mEntries) do
- begin
- mEntries[idx].hash := 0;
- mEntries[idx].nextFree := nil;
- end;
- }
-
mBucketsUsed := 0;
{$IFDEF RBHASH_SANITY_CHECKS}
mEntriesUsed := 0;
if (mBucketsUsed > 0) then
begin
for idx := 0 to High(mBuckets) do mBuckets[idx] := nil;
- {
- for idx := 0 to High(mEntries)-1 do
- begin
- mEntries[idx].hash := 0;
- mEntries[idx].nextFree := @mEntries[idx+1]; //idx+1;
- end;
- mEntries[High(mEntries)].hash := 0;
- mEntries[High(mEntries)].nextFree := nil;
- }
- {
- if (mFirstEntry >= 0) then
- begin
- for idx := mFirstEntry to mLastEntry do
- begin
- mEntries[idx].hash := 0;
- mEntries[idx].nextFree := nil;
- end;
- end;
- }
-
mBucketsUsed := 0;
{$IFDEF RBHASH_SANITY_CHECKS}
mEntriesUsed := 0;
assert(idx < Length(mBuckets));
assert(mBuckets[idx] <> nil);
{$ENDIF}
- result := mBuckets[idx].hash and High(mBuckets);
+ result := (mBuckets[idx].hash xor mSeed) and High(mBuckets);
if (result <= idx) then result := idx-result else result := idx+(Length(mBuckets)-result);
end;
*)
-function THashBase.has (constref akey: KeyT): Boolean;
+function THashBase.has (constref akey: KeyT; keyhashin: PLongWord=nil): Boolean;
var
khash, idx: LongWord;
dist, pdist: LongWord;
- bhigh: LongWord;
+ bhigh, xseed: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then exit;
bhigh := High(mBuckets);
- khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
- idx := khash and bhigh;
+ xseed := mSeed;
+
+ if (keyhashin <> nil) then
+ begin
+ khash := keyhashin^;
+ if (khash = 0) then khash := hashfn(akey);
+ end
+ else
+ begin
+ khash := hashfn(akey);
+ end;
+ if (khash = 0) then khash := $29a;
+
+ idx := (khash xor xseed) and bhigh;
if (mBuckets[idx] = nil) then exit;
for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then break;
//pdist := distToStIdx(idx);
- pdist := mBuckets[idx].hash and bhigh;
+ pdist := (mBuckets[idx].hash xor xseed) and bhigh;
if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
//
if (dist > pdist) then break;
end;
-function THashBase.get (constref akey: KeyT; out rval: ValueT): Boolean;
+function THashBase.get (constref akey: KeyT; out rval: ValueT; keyhashin: PLongWord=nil): Boolean;
var
khash, idx: LongWord;
dist, pdist: LongWord;
- bhigh: LongWord;
+ bhigh, xseed: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then begin rval := Default(ValueT); exit; end;
bhigh := High(mBuckets);
- khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
- idx := khash and bhigh;
- if (mBuckets[idx] = nil) then begin rval := Default(ValueT); exit; end;
+ xseed := mSeed;
+
+ if (keyhashin <> nil) then
+ begin
+ khash := keyhashin^;
+ if (khash = 0) then khash := hashfn(akey);
+ end
+ else
+ begin
+ khash := hashfn(akey);
+ end;
+ if (khash = 0) then khash := $29a;
+
+ idx := (khash xor xseed) and bhigh;
for dist := 0 to bhigh do
begin
if (mBuckets[idx] = nil) then break;
//pdist := distToStIdx(idx);
- pdist := mBuckets[idx].hash and bhigh;
+ pdist := (mBuckets[idx].hash xor xseed) and bhigh;
if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
//
if (dist > pdist) then break;
result := (mBuckets[idx].hash = khash) and equfn(mBuckets[idx].key, akey);
- if result then
- begin
- rval := mBuckets[idx].value;
- break;
- end;
+ if result then begin rval := mBuckets[idx].value; break; end;
idx := (idx+1) and bhigh;
end;
var
idx, dist, pcur, pdist: LongWord;
tmpe: PEntry; // current entry to swap (or nothing)
- bhigh: LongWord;
+ bhigh, xseed: LongWord;
begin
bhigh := High(mBuckets);
- idx := swpe.hash and bhigh;
+ xseed := mSeed;
+ idx := (swpe.hash xor xseed) and bhigh;
{$IFDEF RBHASH_DEBUG_INSERT}writeln('inserting key ', swpe.key, '; value=', swpe.value, '; wantidx=', idx, '; bhigh=', bhigh);{$ENDIF}
pcur := 0;
for dist := 0 to bhigh do
break;
end;
//pdist := distToStIdx(idx);
- pdist := mBuckets[idx].hash and bhigh;
+ pdist := (mBuckets[idx].hash xor xseed) and bhigh;
if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
//
if (pcur > pdist) then
end;
-function THashBase.put (constref akey: KeyT; constref aval: ValueT): Boolean;
+function THashBase.put (constref akey: KeyT; constref aval: ValueT; keyhashout: PLongWord=nil): Boolean;
var
khash, idx, dist, pdist: LongWord;
swpe: PEntry = nil; // current entry to swap (or nothing)
- bhigh: LongWord;
+ bhigh, xseed: LongWord;
newsz, eidx: Integer;
begin
result := false;
bhigh := High(mBuckets);
- khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
- idx := khash and bhigh;
+ xseed := mSeed;
+ khash := hashfn(akey);
+ if (khash = 0) then khash := $29a;
+ if (keyhashout <> nil) then keyhashout^ := khash;
+ idx := (khash xor xseed) and bhigh;
// check if we already have this key
if (mBucketsUsed <> 0) and (mBuckets[idx] <> nil) then
begin
if (mBuckets[idx] = nil) then break;
//pdist := distToStIdx(idx);
- pdist := mBuckets[idx].hash and bhigh;
+ pdist := (mBuckets[idx].hash xor xseed) and bhigh;
if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
//
if (dist > pdist) then break;
// mFreeEntryHead will be fixed in `rehash()`
// reinsert entries
rehash();
- // as seed was changed, recalc hash
- khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
end;
// create new entry
// see http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
-function THashBase.del (constref akey: KeyT): Boolean;
+function THashBase.del (constref akey: KeyT; keyhashin: PLongWord=nil): Boolean;
var
khash, idx, idxnext, pdist, dist: LongWord;
- bhigh: LongWord;
+ bhigh, xseed: LongWord;
begin
result := false;
if (mBucketsUsed = 0) then exit;
bhigh := High(mBuckets);
- khash := hashfn(akey) xor mSeed; if (khash = 0) then khash := $29a;
- idx := khash and bhigh;
+ xseed := mSeed;
+
+ if (keyhashin <> nil) then
+ begin
+ khash := keyhashin^;
+ if (khash = 0) then khash := hashfn(akey);
+ end
+ else
+ begin
+ khash := hashfn(akey);
+ end;
+ if (khash = 0) then khash := $29a;
+
+ idx := (khash xor xseed) and bhigh;
// find key
if (mBuckets[idx] = nil) then exit; // no key
begin
if (mBuckets[idx] = nil) then break;
//pdist := distToStIdx(idxcur);
- pdist := mBuckets[idx].hash and bhigh;
+ pdist := (mBuckets[idx].hash xor xseed) and bhigh;
if (pdist <= idx) then pdist := idx-pdist else pdist := idx+((bhigh+1)-pdist);
//
if (dist > pdist) then break;
{$ENDIF}
if (mBuckets[idxnext] = nil) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' idxnext nil');{$ENDIF} mBuckets[idx] := nil; break; end;
//pdist := distToStIdx(idxnext);
- pdist := mBuckets[idxnext].hash and bhigh;
+ pdist := (mBuckets[idxnext].hash xor xseed) and bhigh;
if (pdist <= idxnext) then pdist := idxnext-pdist else pdist := idxnext+((bhigh+1)-pdist);
//
if (pdist = 0) then begin {$IFDEF RBHASH_DEBUG_DELETE}writeln(' pdist is zero');{$ENDIF} mBuckets[idx] := nil; break; end;
{$ENDIF}
begin
// change seed, to minimize pathological cases
+ //TODO: use prng to generate new hash
if (mSeed = 0) then mSeed := $29a;
mSeed := u32Hash(mSeed);
// clear buckets
Inc(cnt);
if (cnt = mBucketsUsed) and (idx <> mLastEntry) then raise Exception.Create('internal error in rehash: inconsistent (2)');
{$ENDIF}
- e.hash := hashfn(e.key) xor mSeed; if (e.hash = 0) then e.hash := $29a;
+ // no need to recalculate hash
+ //e.hash := hashfn(e.key) xor mSeed; if (e.hash = 0) then e.hash := $29a;
putEntryInternal(e);
end
else
end;
-function THashBase.forEach (it: TIteratorFn): Boolean;
+function THashBase.forEach (it: TIteratorFn): Boolean; overload;
var
i: Integer;
begin
end;
end;
+function THashBase.forEach (it: TIteratorExFn): Boolean; overload;
+var
+ i: Integer;
+begin
+ result := false;
+ if not assigned(it) then exit;
+ i := mFirstEntry;
+ if (i < 0) then exit;
+ while (i <= mLastEntry) do
+ begin
+ if (mEntries[i].hash <> 0) then
+ begin
+ result := it(mEntries[i].key, mEntries[i].value, mEntries[i].hash);
+ if result then exit;
+ end;
+ Inc(i);
+ end;
+end;
+
+
+// enumerators
+function THashBase.GetEnumerator (): TValEnumerator;
+begin
+ if (Length(mEntries) > 0) then result := TValEnumerator.Create(mEntries, mFirstEntry, mLastEntry)
+ else result := TValEnumerator.Create(nil, -1, -1);
+end;
+
+function THashBase.byKey (): TKeyEnumerator;
+begin
+ if (Length(mEntries) > 0) then result := TKeyEnumerator.Create(mEntries, mFirstEntry, mLastEntry)
+ else result := TKeyEnumerator.Create(nil, -1, -1);
+end;
+
+function THashBase.byValue (): TValEnumerator;
+begin
+ if (Length(mEntries) > 0) then result := TValEnumerator.Create(mEntries, mFirstEntry, mLastEntry)
+ else result := TValEnumerator.Create(nil, -1, -1);
+end;
+
+function THashBase.byKeyValue (): TKeyValEnumerator; // PEntry
+begin
+ if (Length(mEntries) > 0) then result := TKeyValEnumerator.Create(mEntries, mFirstEntry, mLastEntry)
+ else result := TKeyValEnumerator.Create(nil, -1, -1);
+end;
+
+
+function THashBase.TValEnumerator.GetEnumerator (): TValEnumerator; inline; begin result.mEntries := self.mEntries; result.mFirstEntry := self.mFirstEntry; result.mLastEntry := self.mLastEntry; result.cur := self.cur; end;
+function THashBase.TKeyEnumerator.GetEnumerator (): TKeyEnumerator; inline; begin result.mEntries := self.mEntries; result.mFirstEntry := self.mFirstEntry; result.mLastEntry := self.mLastEntry; result.cur := self.cur; end;
+function THashBase.TKeyValEnumerator.GetEnumerator (): TKeyValEnumerator; inline; begin result.mEntries := self.mEntries; result.mFirstEntry := self.mFirstEntry; result.mLastEntry := self.mLastEntry; result.cur := self.cur; end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+constructor THashBase.TValEnumerator.Create (const aents: TEntryArray; afirst, alast: Integer);
+begin
+ mEntries := aents;
+ mFirstEntry := afirst;
+ mLastEntry := alast;
+ cur := mFirstEntry-1;
+end;
+
+function THashBase.TValEnumerator.MoveNext (): Boolean; inline;
+begin
+ Inc(cur);
+ while (cur <= mLastEntry) do
+ begin
+ if (mEntries[cur].hash <> 0) then begin result := true; exit; end;
+ end;
+ result := false;
+end;
+
+function THashBase.TValEnumerator.getCurrent (): ValueT; inline;
+begin
+ result := mEntries[cur].value;
+end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+constructor THashBase.TKeyEnumerator.Create (const aents: TEntryArray; afirst, alast: Integer);
+begin
+ mEntries := aents;
+ mFirstEntry := afirst;
+ mLastEntry := alast;
+ cur := mFirstEntry-1;
+end;
+
+function THashBase.TKeyEnumerator.MoveNext (): Boolean; inline;
+begin
+ Inc(cur);
+ while (cur <= mLastEntry) do
+ begin
+ if (mEntries[cur].hash <> 0) then begin result := true; exit; end;
+ end;
+ result := false;
+end;
+
+function THashBase.TKeyEnumerator.getCurrent (): KeyT; inline;
+begin
+ result := mEntries[cur].key;
+end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+constructor THashBase.TKeyValEnumerator.Create (const aents: TEntryArray; afirst, alast: Integer);
+begin
+ mEntries := aents;
+ mFirstEntry := afirst;
+ mLastEntry := alast;
+ cur := mFirstEntry-1;
+end;
+
+function THashBase.TKeyValEnumerator.MoveNext (): Boolean; inline;
+begin
+ Inc(cur);
+ while (cur <= mLastEntry) do
+ begin
+ if (mEntries[cur].hash <> 0) then begin result := true; exit; end;
+ end;
+ result := false;
+end;
+
+function THashBase.TKeyValEnumerator.getCurrent (): PEntry; inline;
+begin
+ result := @mEntries[cur];
+end;
+
end.