X-Git-Url: http://deadsoftware.ru/gitweb?a=blobdiff_plain;ds=sidebyside;f=src%2Fshared%2Fxparser.pas;h=76ac3c0165483ef90d4066904540d1eef5b7a2dd;hb=3f758d569aeb250474aece9c4051f05ad7362805;hp=503a9a0017a56be690c88195c420bb339e1b1666;hpb=cead2891e0ba7e60639a60af7142eb144ab88ee4;p=d2df-sdl.git diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas index 503a9a0..76ac3c0 100644 --- a/src/shared/xparser.pas +++ b/src/shared/xparser.pas @@ -18,34 +18,8 @@ unit xparser; interface - -// ////////////////////////////////////////////////////////////////////////// // -type - TUtf8DecoderFast = packed record - public - const Replacement = $FFFD; // replacement char for invalid unicode - const Accept = 0; - const Reject = 12; - - private - state: LongWord; - - public - codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state) - - public - constructor Create (v: Boolean{fuck you, fpc}); - - procedure reset (); inline; - - function complete (): Boolean; inline; // is current character complete? take `codepoint` then - function invalid (): Boolean; inline; - function completeOrInvalid (): Boolean; inline; - - // process one byte, return `true` if codepoint is ready - function decode (b: Byte): Boolean; inline; overload; - function decode (c: AnsiChar): Boolean; inline; overload; - end; +uses + Classes; // ////////////////////////////////////////////////////////////////////////// // @@ -79,14 +53,11 @@ type mTokInt: Integer; protected - procedure warmup (); virtual; abstract; // called in constructor to warm up the system + procedure warmup (); virtual; // called in constructor to warm up the system procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof' public - class function quote (const s: AnsiString): AnsiString; - - public - constructor Create (loadToken: Boolean=true); + constructor Create (); destructor Destroy (); override; function isEOF (): Boolean; inline; @@ -132,14 +103,21 @@ type type TFileTextParser = class(TTextParser) private - mFile: File; + const BufSize = 16384; + + private + mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PChar; + mBufLen: Integer; + mBufPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public - constructor Create (const fname: AnsiString; loadToken: Boolean=true); + constructor Create (const fname: AnsiString); + constructor Create (st: TStream; astOwned: Boolean=true); // will take ownership on st by default destructor Destroy (); override; end; @@ -149,11 +127,10 @@ type mPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public - constructor Create (const astr: AnsiString; loadToken: Boolean=true); + constructor Create (const astr: AnsiString); destructor Destroy (); override; end; @@ -170,6 +147,8 @@ type public constructor Create (); + procedure flush (); virtual; + procedure put (const s: AnsiString); overload; procedure put (v: Byte); overload; procedure put (v: Integer); overload; @@ -184,22 +163,38 @@ type type TFileTextWriter = class(TTextWriter) private - mFile: File; + const BufSize = 16384; + + private + mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PAnsiChar; + mBufUsed: Integer; protected procedure putBuf (constref buf; len: SizeUInt); override; public constructor Create (const fname: AnsiString); + constructor Create (ast: TStream; astOwned: Boolean=true); // will own the stream by default destructor Destroy (); override; + + procedure flush (); override; end; + TStrTextWriter = class(TTextWriter) + private + mStr: AnsiString; + + protected + procedure putBuf (constref buf; len: SizeUInt); override; -// ////////////////////////////////////////////////////////////////////////// // -function wcharTo1251 (wc: WideChar): AnsiChar; inline; -function utfTo1251 (const s: AnsiString): AnsiString; + public + constructor Create (); + destructor Destroy (); override; -function digitInBase (ch: AnsiChar; base: Integer): Integer; + property str: AnsiString read mStr; + end; implementation @@ -208,207 +203,12 @@ uses SysUtils, utils; -var - wc2shitmap: array[0..65535] of AnsiChar; - wc2shitmapInited: Boolean = false; - - -// ////////////////////////////////////////////////////////////////////////// // -procedure initShitMap (); -const - cp1251: array[0..127] of Word = ( - $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, - $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, - $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, - $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, - $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, - $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, - $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F - ); -var - f: Integer; -begin - for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; - for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); - for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); - wc2shitmapInited := true; -end; - - // ////////////////////////////////////////////////////////////////////////// // -// TODO: make a hash or something -function wcharTo1251 (wc: WideChar): AnsiChar; inline; -begin - if not wc2shitmapInited then initShitMap(); - if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; -end; +function StrEqu (const a, b: AnsiString): Boolean; inline; begin result := (a = b); end; // ////////////////////////////////////////////////////////////////////////// // -// fast state-machine based UTF-8 decoder; using 8 bytes of memory -// code points from invalid range will never be valid, this is the property of the state machine -const - // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - utf8dfa: array[0..$16c-1] of Byte = ( - // maps bytes to character classes - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f - $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f - $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf - $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf - $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df - $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef - $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff - // maps a combination of a state of the automaton and a character class to a state - $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f - $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f - $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f - $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f - $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c); - - -// ////////////////////////////////////////////////////////////////////////// // -constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end; - -procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end; - -function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end; -function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end; -function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end; - -function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end; - -function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload; -var - tp: LongWord; -begin - if (state = Reject) then begin state := Accept; codepoint := 0; end; - tp := utf8dfa[b]; - if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b; - state := utf8dfa[256+state+tp]; - if (state = Reject) then begin codepoint := Replacement; state := Accept; end; - result := (state = Accept); -end; - - -// ////////////////////////////////////////////////////////////////////////// // -function utfTo1251 (const s: AnsiString): AnsiString; -var - f, c: Integer; - ud: TUtf8DecoderFast; -begin - for f := 1 to Length(s) do - begin - if (Byte(s[f]) > 127) then - begin - ud := TUtf8DecoderFast.Create(true); - result := ''; - for c := 1 to Length(s) do - begin - if ud.decode(s[c]) then result += wcharTo1251(WideChar(ud.codepoint)); - end; - exit; - end; - end; - result := s; -end; - - -// ////////////////////////////////////////////////////////////////////////// // -function digitInBase (ch: AnsiChar; base: Integer): Integer; -begin - result := -1; - if (base < 1) or (base > 36) then exit; - if (ch < '0') then exit; - if (base <= 10) then - begin - if (Integer(ch) >= 48+base) then exit; - result := Integer(ch)-48; - end - else - begin - if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end; - if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower() - if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit; - result := Integer(ch)-65+10; - end; -end; - - -// ////////////////////////////////////////////////////////////////////////// // -class function TTextParser.quote (const s: AnsiString): AnsiString; - - function squote (const s: AnsiString): AnsiString; - var - f: Integer; - begin - result := ''''; - for f := 1 to Length(s) do - begin - if (s[f] = '''') then result += ''''; - result += s[f]; - end; - result += ''''; - end; - - function dquote (const s: AnsiString): AnsiString; - var - f: Integer; - ch: AnsiChar; - begin - result := '"'; - for f := 1 to Length(s) do - begin - ch := s[f]; - if (ch = #0) then result += '\z' - else if (ch = #9) then result += '\t' - else if (ch = #10) then result += '\n' - else if (ch = #13) then result += '\r' - else if (ch = #27) then result += '\e' - else if (ch < ' ') or (ch = #127) then - begin - result += '\x'; - result += LowerCase(IntToHex(Integer(ch), 2)); - end - else if (ch = '"') or (ch = '\') then - begin - result += '\'; - result += ch; - end - else - begin - result += ch; - end; - end; - result += '"'; - end; - -var - needSingle: Boolean = false; - f: Integer; -begin - for f := 1 to Length(s) do - begin - if (s[f] = '''') then begin needSingle := true; continue; end; - if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end; - end; - if needSingle then result := squote(s) else result := ''''+s+''''; -end; - - -// ////////////////////////////////////////////////////////////////////////// // -constructor TTextParser.Create (loadToken: Boolean=true); +constructor TTextParser.Create (); begin mLine := 1; mCol := 1; @@ -420,7 +220,7 @@ begin mTokInt := 0; mAllowSignedNumbers := true; warmup(); // change `mAllowSignedNumbers` there, if necessary - if loadToken then skipToken(); + skipToken(); end; @@ -433,6 +233,15 @@ end; function TTextParser.isEOF (): Boolean; inline; begin result := (mCurChar = #0); end; +procedure TTextParser.warmup (); +begin + mNextChar := ' '; + loadNextChar(); + mCurChar := mNextChar; + if (mNextChar <> #0) then loadNextChar(); +end; + + function TTextParser.skipChar (): Boolean; begin if (mCurChar = #0) then begin result := false; exit; end; @@ -712,7 +521,7 @@ end; procedure TTextParser.expectId (const aid: AnsiString); begin - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then raise Exception.Create('identifier '''+aid+''' expected'); + if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then raise Exception.Create('identifier '''+aid+''' expected'); skipToken(); end; @@ -720,7 +529,7 @@ end; function TTextParser.eatId (const aid: AnsiString): Boolean; begin result := false; - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then exit; + if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then exit; result := true; skipToken(); end; @@ -775,48 +584,67 @@ end; // ////////////////////////////////////////////////////////////////////////// // -constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true); +constructor TFileTextParser.Create (const fname: AnsiString); begin - AssignFile(mFile, fname); - Reset(mFile, 1); - inherited Create(loadToken); + mBuffer := nil; + mFile := openDiskFileRO(fname); + mStreamOwned := true; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + inherited Create(); end; -destructor TFileTextParser.Destroy (); +constructor TFileTextParser.Create (st: TStream; astOwned: Boolean=true); begin - CloseFile(mFile); - inherited; + if (st = nil) then raise Exception.Create('cannot create parser for nil stream'); + mFile := st; + mStreamOwned := astOwned; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + inherited Create(); end; -procedure TFileTextParser.warmup (); -var - rd: Integer; +destructor TFileTextParser.Destroy (); begin - blockRead(mFile, mCurChar, 1, rd); - if (rd = 0) then begin mCurChar := #0; exit; end; - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBuffer := nil; + mBufPos := 0; + mBufLen := 0; + if mStreamOwned then mFile.Free(); + mFile := nil; + inherited; end; procedure TFileTextParser.loadNextChar (); -var - rd: Integer; begin - blockRead(mFile, mNextChar, 1, rd); - if (rd = 0) then begin mNextChar := #0; exit; end; + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + if (mBufPos >= mBufLen) then + begin + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + mBufPos := 0; + end; + assert(mBufPos < mBufLen); + mNextChar := mBuffer[mBufPos]; + Inc(mBufPos); if (mNextChar = #0) then mNextChar := ' '; end; // ////////////////////////////////////////////////////////////////////////// // -constructor TStrTextParser.Create (const astr: AnsiString; loadToken: Boolean=true); +constructor TStrTextParser.Create (const astr: AnsiString); begin mStr := astr; mPos := 1; - inherited Create(loadToken); + inherited Create(); end; @@ -827,20 +655,6 @@ begin end; -procedure TStrTextParser.warmup (); -begin - if (mPos > Length(mStr)) then - begin - mCurChar := #0; - mNextChar := #0; - exit; - end; - mCurChar := mStr[mPos]; Inc(mPos); - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); -end; - - procedure TStrTextParser.loadNextChar (); begin mNextChar := #0; @@ -852,6 +666,7 @@ end; // ////////////////////////////////////////////////////////////////////////// // constructor TTextWriter.Create (); begin mIndent := 0; end; +procedure TTextWriter.flush (); begin end; procedure TTextWriter.put (const s: AnsiString); overload; begin if (Length(s) > 0) then putBuf((@(s[1]))^, Length(s)); end; procedure TTextWriter.put (v: Byte); overload; begin put('%d', [v]); end; procedure TTextWriter.put (v: Integer); overload; begin put('%d', [v]); end; @@ -864,36 +679,97 @@ procedure TTextWriter.unindent (); begin Dec(mIndent, 2); end; // ////////////////////////////////////////////////////////////////////////// // constructor TFileTextWriter.Create (const fname: AnsiString); begin - AssignFile(mFile, fname); - Rewrite(mFile, 1); + mFile := createDiskFile(fname); + mStreamOwned := true; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); inherited Create(); end; +constructor TFileTextWriter.Create (ast: TStream; astOwned: Boolean=true); +begin + if (ast = nil) then raise Exception.Create('cannot write to nil stream'); + mFile := ast; + mStreamOwned := astOwned; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); +end; + + destructor TFileTextWriter.Destroy (); begin - CloseFile(mFile); + flush(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBufUsed := 0; + mBuffer := nil; + if (mStreamOwned) then mFile.Free(); + mFile := nil; + inherited; +end; + + +procedure TFileTextWriter.flush (); +begin + if (mFile <> nil) and (mBufUsed > 0) then + begin + mFile.WriteBuffer(mBuffer^, mBufUsed); + end; + mBufUsed := 0; end; procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt); var - wr: SizeUInt; pc: PChar; + left: Integer; begin - if (len > 0) then + if (len = 0) then exit; + pc := @buf; + while (len > 0) do begin - pc := @buf; - BlockWrite(mFile, pc^, len, wr); - if (wr <> len) then raise Exception.Create('write error'); - { - while (len > 0) do + left := BufSize-mBufUsed; + if (left = 0) then begin - write(pc^); - Inc(pc); - Dec(len); + flush(); + left := BufSize-mBufUsed; + assert(left > 0); end; - } + if (left > len) then left := Integer(len); + Move(pc^, (mBuffer+mBufUsed)^, left); + Inc(mBufUsed, left); + pc += left; + len -= left; + end; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +constructor TStrTextWriter.Create (); +begin + mStr := ''; +end; + + +destructor TStrTextWriter.Destroy (); +begin + mStr := ''; + inherited; +end; + + +procedure TStrTextWriter.putBuf (constref buf; len: SizeUInt); +var + st: AnsiString = ''; +begin + if (len > 0) then + begin + SetLength(st, Integer(len)); + Move(buf, PChar(st)^, Integer(len)); + mStr += st; + st := ''; end; end;