X-Git-Url: https://deadsoftware.ru/gitweb?a=blobdiff_plain;ds=sidebyside;f=src%2Fshared%2Futils.pas;h=9e136495bc8792dc661c3e4fa254ee58d24974a6;hb=223356cbae3197afc861efa6241c4ae91bd92885;hp=36d59ff8bd1d5c22606d8b62e4b0727c0e389cba;hpb=7f60063e21b5764c01cf86883d2e1808af8b04ef;p=d2df-sdl.git diff --git a/src/shared/utils.pas b/src/shared/utils.pas index 36d59ff..9e13649 100644 --- a/src/shared/utils.pas +++ b/src/shared/utils.pas @@ -22,6 +22,36 @@ uses SysUtils, Classes; +// ////////////////////////////////////////////////////////////////////////// // +type + TUtf8DecoderFast = packed record + public + const Replacement = $FFFD; // replacement char for invalid unicode + const Accept = 0; + const Reject = 12; + + private + state: LongWord; + + public + codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state) + + public + constructor Create (v: Boolean{fuck you, fpc}); + + procedure reset (); inline; + + function complete (): Boolean; inline; // is current character complete? take `codepoint` then + function invalid (): Boolean; inline; + function completeOrInvalid (): Boolean; inline; + + // process one byte, return `true` if codepoint is ready + function decode (b: Byte): Boolean; inline; overload; + function decode (c: AnsiChar): Boolean; inline; overload; + end; + + +// ////////////////////////////////////////////////////////////////////////// // // does filename have one of ".wad", ".pk3", ".zip" extensions? function hasWadExtension (fn: AnsiString): Boolean; @@ -98,10 +128,423 @@ type // returns formatted string if `writerCB` is `nil`, empty string otherwise function formatstrf (const fmt: AnsiString; args: array of const; writerCB: TFormatStrFCallback=nil): AnsiString; +function wchar2win (wc: WideChar): AnsiChar; inline; +function utf2win (const s: AnsiString): AnsiString; +function win2utf (const s: AnsiString): AnsiString; +function digitInBase (ch: AnsiChar; base: Integer): Integer; + +// returns string in single or double quotes +// single quotes supports only pascal-style '' for single quote char +// double quotes supports c-style escapes +// function will select quote mode automatically +function quoteStr (const s: AnsiString): AnsiString; + + +type + generic TSimpleList = class + private + type PItemT = ^ItemT; + + public + type + TEnumerator = record + private + mItems: PItemT; + mCount: Integer; + mCurrent: Integer; + public + constructor Create (aitems: PItemT; acount: Integer); + function MoveNext: Boolean; + function getCurrent (): ItemT; + property Current: ItemT read getCurrent; + end; + + private + mItems: array of ItemT; + mCount: Integer; // can be less than `mItems` size + + private + function getAt (idx: Integer): ItemT; inline; + procedure setAt (idx: Integer; const it: ItemT); inline; + + function getCapacity (): Integer; inline; + procedure setCapacity (v: Integer); inline; + + public + constructor Create (acapacity: Integer=-1); + destructor Destroy (); override; + + //WARNING! don't change list contents in `for ... in`! + function GetEnumerator (): TEnumerator; + + procedure reset (); inline; // won't resize `mItems` + procedure clear (); inline; + + procedure append (constref it: ItemT); inline; + + public + property count: Integer read mCount; + property capacity: Integer read getCapacity write setCapacity; + property at[idx: Integer]: ItemT read getAt write setAt; default; + end; + implementation +// ////////////////////////////////////////////////////////////////////////// // +constructor TSimpleList.TEnumerator.Create (aitems: PItemT; acount: Integer); +begin + mItems := aitems; + mCount := acount; + mCurrent := -1; +end; + +function TSimpleList.TEnumerator.MoveNext: Boolean; +begin + Inc(mCurrent); + result := (mCurrent < mCount); +end; + +function TSimpleList.TEnumerator.getCurrent (): ItemT; +begin + result := mItems[mCurrent]; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +constructor TSimpleList.Create (acapacity: Integer=-1); +begin + mItems := nil; + if (acapacity > 0) then SetLength(mItems, acapacity); + mCount := 0; +end; + + +destructor TSimpleList.Destroy (); +begin + mItems := nil; + inherited; +end; + + +function TSimpleList.getCapacity (): Integer; inline; +begin + result := Length(mItems); +end; + + +procedure TSimpleList.setCapacity (v: Integer); inline; +begin + if (v < mCount) then v := mCount; + if (v <> Length(mItems)) then SetLength(mItems, v); +end; + + +function TSimpleList.GetEnumerator (): TEnumerator; +begin + if (Length(mItems) > 0) then result := TEnumerator.Create(@mItems[0], mCount) + else result := TEnumerator.Create(nil, -1); +end; + + +procedure TSimpleList.reset (); inline; +begin + mCount := 0; +end; + + +procedure TSimpleList.clear (); inline; +begin + mItems := nil; + mCount := 0; +end; + + +function TSimpleList.getAt (idx: Integer): ItemT; inline; +begin + if (idx >= 0) and (idx < mCount) then result := mItems[idx] else result := Default(ItemT); +end; + + +procedure TSimpleList.setAt (idx: Integer; const it: ItemT); inline; +begin + if (idx >= 0) and (idx < mCount) then mItems[idx] := it; +end; + + +procedure TSimpleList.append (constref it: ItemT); inline; +begin + if (mCount = Length(mItems)) then + begin + if (mCount = 0) then SetLength(mItems, 128) else SetLength(mItems, mCount*2); + end; + mItems[mCount] := it; + Inc(mCount); +end; + + +// ////////////////////////////////////////////////////////////////////////// // +var + wc2shitmap: array[0..65535] of AnsiChar; + wc2shitmapInited: Boolean = false; + + +// ////////////////////////////////////////////////////////////////////////// // +const + cp1251: array[0..127] of Word = ( + $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, + $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, + $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, + $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, + $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, + $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, + $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, + $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F + ); + + +procedure initShitMap (); +var + f: Integer; +begin + for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; + for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); + for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); + wc2shitmapInited := true; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +// fast state-machine based UTF-8 decoder; using 8 bytes of memory +// code points from invalid range will never be valid, this is the property of the state machine +const + // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + utf8dfa: array[0..$16c-1] of Byte = ( + // maps bytes to character classes + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f + $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f + $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f + $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f + $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af + $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf + $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf + $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df + $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef + $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff + // maps a combination of a state of the automaton and a character class to a state + $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f + $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f + $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f + $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f + $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f + $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f + $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c); + + +// ////////////////////////////////////////////////////////////////////////// // +constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end; + +procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end; + +function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end; +function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end; +function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end; + +function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end; + +function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload; +var + tp: LongWord; +begin + if (state = Reject) then begin state := Accept; codepoint := 0; end; + tp := utf8dfa[b]; + if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b; + state := utf8dfa[256+state+tp]; + if (state = Reject) then begin codepoint := Replacement; state := Accept; end; + result := (state = Accept); +end; + + +// ////////////////////////////////////////////////////////////////////////// // +function wchar2win (wc: WideChar): AnsiChar; inline; +begin + if not wc2shitmapInited then initShitMap(); + if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +function utf2win (const s: AnsiString): AnsiString; +var + f, c: Integer; + ud: TUtf8DecoderFast; +begin + for f := 1 to Length(s) do + begin + if (Byte(s[f]) > 127) then + begin + ud := TUtf8DecoderFast.Create(true); + result := ''; + for c := 1 to Length(s) do + begin + if ud.decode(s[c]) then result += wchar2win(WideChar(ud.codepoint)); + end; + exit; + end; + end; + result := s; +end; + + +function win2utf (const s: AnsiString): AnsiString; +var + f, c: Integer; + + function utf8Encode (code: Integer): AnsiString; + begin + if (code < 0) or (code > $10FFFF) then begin result := '?'; exit; end; + if (code <= $7f) then + begin + result := Char(code and $ff); + end + else if (code <= $7FF) then + begin + result := Char($C0 or (code shr 6)); + result += Char($80 or (code and $3F)); + end + else if (code <= $FFFF) then + begin + result := Char($E0 or (code shr 12)); + result += Char($80 or ((code shr 6) and $3F)); + result += Char($80 or (code and $3F)); + end + else if (code <= $10FFFF) then + begin + result := Char($F0 or (code shr 18)); + result += Char($80 or ((code shr 12) and $3F)); + result += Char($80 or ((code shr 6) and $3F)); + result += Char($80 or (code and $3F)); + end + else + begin + result := '?'; + end; + end; + +begin + for f := 1 to Length(s) do + begin + if (Byte(s[f]) > 127) then + begin + result := ''; + for c := 1 to Length(s) do + begin + if (Byte(s[c]) < 128) then + begin + result += s[c]; + end + else + begin + result += utf8Encode(cp1251[Byte(s[c])-128]) + end; + end; + exit; + end; + end; + result := s; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +function digitInBase (ch: AnsiChar; base: Integer): Integer; +begin + result := -1; + if (base < 1) or (base > 36) then exit; + if (ch < '0') then exit; + if (base <= 10) then + begin + if (Integer(ch) >= 48+base) then exit; + result := Integer(ch)-48; + end + else + begin + if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end; + if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower() + if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit; + result := Integer(ch)-65+10; + end; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +function quoteStr (const s: AnsiString): AnsiString; + + function squote (const s: AnsiString): AnsiString; + var + f: Integer; + begin + result := ''''; + for f := 1 to Length(s) do + begin + if (s[f] = '''') then result += ''''; + result += s[f]; + end; + result += ''''; + end; + + function dquote (const s: AnsiString): AnsiString; + var + f: Integer; + ch: AnsiChar; + begin + result := '"'; + for f := 1 to Length(s) do + begin + ch := s[f]; + if (ch = #0) then result += '\z' + else if (ch = #9) then result += '\t' + else if (ch = #10) then result += '\n' + else if (ch = #13) then result += '\r' + else if (ch = #27) then result += '\e' + else if (ch < ' ') or (ch = #127) then + begin + result += '\x'; + result += LowerCase(IntToHex(Integer(ch), 2)); + end + else if (ch = '"') or (ch = '\') then + begin + result += '\'; + result += ch; + end + else + begin + result += ch; + end; + end; + result += '"'; + end; + +var + needSingle: Boolean = false; + f: Integer; +begin + for f := 1 to Length(s) do + begin + if (s[f] = '''') then begin needSingle := true; continue; end; + if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end; + end; + if needSingle then result := squote(s) else result := ''''+s+''''; +end; + + +// ////////////////////////////////////////////////////////////////////////// // function hasWadExtension (fn: AnsiString): Boolean; begin fn := ExtractFileExt(fn); @@ -596,7 +1039,7 @@ var while (len > 0) do begin if (len > 255) then slen := 255 else slen := Integer(len); - Move(b^, ss[1], len); + Move(b^, ss[1], slen); ss[0] := AnsiChar(slen); result += ss; b += slen;