X-Git-Url: http://deadsoftware.ru/gitweb?a=blobdiff_plain;f=src%2Fshared%2Fxparser.pas;h=7263b7db60037eef69dba6c49ffa9b6cac5a3108;hb=5db5b1f141c10ebe97a0e06404cb7118fb412a37;hp=503a9a0017a56be690c88195c420bb339e1b1666;hpb=cead2891e0ba7e60639a60af7142eb144ab88ee4;p=d2df-sdl.git diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas index 503a9a0..7263b7d 100644 --- a/src/shared/xparser.pas +++ b/src/shared/xparser.pas @@ -18,34 +18,8 @@ unit xparser; interface - -// ////////////////////////////////////////////////////////////////////////// // -type - TUtf8DecoderFast = packed record - public - const Replacement = $FFFD; // replacement char for invalid unicode - const Accept = 0; - const Reject = 12; - - private - state: LongWord; - - public - codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state) - - public - constructor Create (v: Boolean{fuck you, fpc}); - - procedure reset (); inline; - - function complete (): Boolean; inline; // is current character complete? take `codepoint` then - function invalid (): Boolean; inline; - function completeOrInvalid (): Boolean; inline; - - // process one byte, return `true` if codepoint is ready - function decode (b: Byte): Boolean; inline; overload; - function decode (c: AnsiChar): Boolean; inline; overload; - end; +uses + Classes; // ////////////////////////////////////////////////////////////////////////// // @@ -79,12 +53,9 @@ type mTokInt: Integer; protected - procedure warmup (); virtual; abstract; // called in constructor to warm up the system + procedure warmup (); virtual; // called in constructor to warm up the system procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof' - public - class function quote (const s: AnsiString): AnsiString; - public constructor Create (loadToken: Boolean=true); destructor Destroy (); override; @@ -132,14 +103,20 @@ type type TFileTextParser = class(TTextParser) private - mFile: File; + const BufSize = 65536; + + private + mFile: TStream; + mBuffer: PChar; + mBufLen: Integer; + mBufPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public constructor Create (const fname: AnsiString; loadToken: Boolean=true); + constructor Create (st: TStream; loadToken: Boolean=true); // will take ownership on st destructor Destroy (); override; end; @@ -149,7 +126,6 @@ type mPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public @@ -184,7 +160,7 @@ type type TFileTextWriter = class(TTextWriter) private - mFile: File; + mFile: TStream; protected procedure putBuf (constref buf; len: SizeUInt); override; @@ -195,19 +171,17 @@ type end; -// ////////////////////////////////////////////////////////////////////////// // -function wcharTo1251 (wc: WideChar): AnsiChar; inline; -function utfTo1251 (const s: AnsiString): AnsiString; - -function digitInBase (ch: AnsiChar; base: Integer): Integer; - - implementation uses SysUtils, utils; +// ////////////////////////////////////////////////////////////////////////// // +function StrEqu (const a, b: AnsiString): Boolean; inline; begin result := (a = b); end; + + +// ////////////////////////////////////////////////////////////////////////// // var wc2shitmap: array[0..65535] of AnsiChar; wc2shitmapInited: Boolean = false; @@ -245,168 +219,6 @@ begin end; -// ////////////////////////////////////////////////////////////////////////// // -// fast state-machine based UTF-8 decoder; using 8 bytes of memory -// code points from invalid range will never be valid, this is the property of the state machine -const - // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - utf8dfa: array[0..$16c-1] of Byte = ( - // maps bytes to character classes - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f - $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f - $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf - $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf - $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df - $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef - $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff - // maps a combination of a state of the automaton and a character class to a state - $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f - $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f - $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f - $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f - $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c); - - -// ////////////////////////////////////////////////////////////////////////// // -constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end; - -procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end; - -function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end; -function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end; -function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end; - -function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end; - -function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload; -var - tp: LongWord; -begin - if (state = Reject) then begin state := Accept; codepoint := 0; end; - tp := utf8dfa[b]; - if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b; - state := utf8dfa[256+state+tp]; - if (state = Reject) then begin codepoint := Replacement; state := Accept; end; - result := (state = Accept); -end; - - -// ////////////////////////////////////////////////////////////////////////// // -function utfTo1251 (const s: AnsiString): AnsiString; -var - f, c: Integer; - ud: TUtf8DecoderFast; -begin - for f := 1 to Length(s) do - begin - if (Byte(s[f]) > 127) then - begin - ud := TUtf8DecoderFast.Create(true); - result := ''; - for c := 1 to Length(s) do - begin - if ud.decode(s[c]) then result += wcharTo1251(WideChar(ud.codepoint)); - end; - exit; - end; - end; - result := s; -end; - - -// ////////////////////////////////////////////////////////////////////////// // -function digitInBase (ch: AnsiChar; base: Integer): Integer; -begin - result := -1; - if (base < 1) or (base > 36) then exit; - if (ch < '0') then exit; - if (base <= 10) then - begin - if (Integer(ch) >= 48+base) then exit; - result := Integer(ch)-48; - end - else - begin - if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end; - if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower() - if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit; - result := Integer(ch)-65+10; - end; -end; - - -// ////////////////////////////////////////////////////////////////////////// // -class function TTextParser.quote (const s: AnsiString): AnsiString; - - function squote (const s: AnsiString): AnsiString; - var - f: Integer; - begin - result := ''''; - for f := 1 to Length(s) do - begin - if (s[f] = '''') then result += ''''; - result += s[f]; - end; - result += ''''; - end; - - function dquote (const s: AnsiString): AnsiString; - var - f: Integer; - ch: AnsiChar; - begin - result := '"'; - for f := 1 to Length(s) do - begin - ch := s[f]; - if (ch = #0) then result += '\z' - else if (ch = #9) then result += '\t' - else if (ch = #10) then result += '\n' - else if (ch = #13) then result += '\r' - else if (ch = #27) then result += '\e' - else if (ch < ' ') or (ch = #127) then - begin - result += '\x'; - result += LowerCase(IntToHex(Integer(ch), 2)); - end - else if (ch = '"') or (ch = '\') then - begin - result += '\'; - result += ch; - end - else - begin - result += ch; - end; - end; - result += '"'; - end; - -var - needSingle: Boolean = false; - f: Integer; -begin - for f := 1 to Length(s) do - begin - if (s[f] = '''') then begin needSingle := true; continue; end; - if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end; - end; - if needSingle then result := squote(s) else result := ''''+s+''''; -end; - - // ////////////////////////////////////////////////////////////////////////// // constructor TTextParser.Create (loadToken: Boolean=true); begin @@ -433,6 +245,15 @@ end; function TTextParser.isEOF (): Boolean; inline; begin result := (mCurChar = #0); end; +procedure TTextParser.warmup (); +begin + mNextChar := ' '; + loadNextChar(); + mCurChar := mNextChar; + if (mNextChar <> #0) then loadNextChar(); +end; + + function TTextParser.skipChar (): Boolean; begin if (mCurChar = #0) then begin result := false; exit; end; @@ -712,7 +533,7 @@ end; procedure TTextParser.expectId (const aid: AnsiString); begin - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then raise Exception.Create('identifier '''+aid+''' expected'); + if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then raise Exception.Create('identifier '''+aid+''' expected'); skipToken(); end; @@ -720,7 +541,7 @@ end; function TTextParser.eatId (const aid: AnsiString): Boolean; begin result := false; - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then exit; + if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then exit; result := true; skipToken(); end; @@ -777,36 +598,49 @@ end; // ////////////////////////////////////////////////////////////////////////// // constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true); begin - AssignFile(mFile, fname); - Reset(mFile, 1); + mBuffer := nil; + mFile := openDiskFileRO(fname); + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); inherited Create(loadToken); end; -destructor TFileTextParser.Destroy (); +constructor TFileTextParser.Create (st: TStream; loadToken: Boolean=true); begin - CloseFile(mFile); - inherited; + if (st = nil) then raise Exception.Create('cannot create parser for nil stream'); + mFile := st; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + inherited Create(loadToken); end; -procedure TFileTextParser.warmup (); -var - rd: Integer; +destructor TFileTextParser.Destroy (); begin - blockRead(mFile, mCurChar, 1, rd); - if (rd = 0) then begin mCurChar := #0; exit; end; - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mFile.Free(); + inherited; end; procedure TFileTextParser.loadNextChar (); -var - rd: Integer; begin - blockRead(mFile, mNextChar, 1, rd); - if (rd = 0) then begin mNextChar := #0; exit; end; + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + if (mBufPos >= mBufLen) then + begin + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + mBufPos := 0; + end; + assert(mBufPos < mBufLen); + mNextChar := mBuffer[mBufPos]; + Inc(mBufPos); if (mNextChar = #0) then mNextChar := ' '; end; @@ -827,20 +661,6 @@ begin end; -procedure TStrTextParser.warmup (); -begin - if (mPos > Length(mStr)) then - begin - mCurChar := #0; - mNextChar := #0; - exit; - end; - mCurChar := mStr[mPos]; Inc(mPos); - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); -end; - - procedure TStrTextParser.loadNextChar (); begin mNextChar := #0; @@ -864,28 +684,26 @@ procedure TTextWriter.unindent (); begin Dec(mIndent, 2); end; // ////////////////////////////////////////////////////////////////////////// // constructor TFileTextWriter.Create (const fname: AnsiString); begin - AssignFile(mFile, fname); - Rewrite(mFile, 1); + mFile := createDiskFile(fname); inherited Create(); end; destructor TFileTextWriter.Destroy (); begin - CloseFile(mFile); + mFile.Free(); + inherited; end; procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt); var - wr: SizeUInt; pc: PChar; begin if (len > 0) then begin pc := @buf; - BlockWrite(mFile, pc^, len, wr); - if (wr <> len) then raise Exception.Create('write error'); + mFile.WriteBuffer(pc^, len); { while (len > 0) do begin