X-Git-Url: http://deadsoftware.ru/gitweb?a=blobdiff_plain;f=src%2Fshared%2Fxparser.pas;h=595d300f99f32291ae9af1da261b3a8907f95106;hb=05494fe1320ebc427c3b5c688c18669bf3abc260;hp=503a9a0017a56be690c88195c420bb339e1b1666;hpb=cead2891e0ba7e60639a60af7142eb144ab88ee4;p=d2df-sdl.git diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas index 503a9a0..595d300 100644 --- a/src/shared/xparser.pas +++ b/src/shared/xparser.pas @@ -1,4 +1,5 @@ -(* Copyright (C) DooM 2D:Forever Developers +(* coded by Ketmar // Invisible Vector + * Understanding is not required. Only obedience. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,46 +12,32 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * along with this program. If not, see . *) {$INCLUDE a_modes.inc} +{.$DEFINE XPARSER_DEBUG} unit xparser; interface +uses + SysUtils, Classes{$IFDEF USE_MEMPOOL}, mempool{$ENDIF}; + // ////////////////////////////////////////////////////////////////////////// // type - TUtf8DecoderFast = packed record - public - const Replacement = $FFFD; // replacement char for invalid unicode - const Accept = 0; - const Reject = 12; - - private - state: LongWord; + TTextParser = class; + TParserException = class(Exception) public - codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state) + tokLine, tokCol: Integer; public - constructor Create (v: Boolean{fuck you, fpc}); - - procedure reset (); inline; - - function complete (): Boolean; inline; // is current character complete? take `codepoint` then - function invalid (): Boolean; inline; - function completeOrInvalid (): Boolean; inline; - - // process one byte, return `true` if codepoint is ready - function decode (b: Byte): Boolean; inline; overload; - function decode (c: AnsiChar): Boolean; inline; overload; + constructor Create (pr: TTextParser; const amsg: AnsiString); + constructor CreateFmt (pr: TTextParser; const afmt: AnsiString; const args: array of const); end; - -// ////////////////////////////////////////////////////////////////////////// // -type - TTextParser = class + TTextParser = class{$IFDEF USE_MEMPOOL}(TPoolObject){$ENDIF} public const TTNone = -1; @@ -59,18 +46,46 @@ type TTInt = 2; //TTFloat = 3; // not yet TTStr = 4; // string - TTComma = 5; // ',' - TTColon = 6; // ':' - TTSemi = 7; // ';' - TTBegin = 8; // left curly - TTEnd = 9; // right curly - TTDelim = 10; // other delimiters + TTDelim = 5; // one-char delimiters + // + TTLogAnd = 11; // && + TTLogOr = 12; // || + TTLessEqu = 13; // <= + TTGreatEqu = 14; // >= + TTNotEqu = 15; // != + TTEqu = 16; // == or <> + TTAss = 17; // := + TTShl = 18; // << + TTShr = 19; // >> + TTDotDot = 19; // .. + + public + type + TOption = ( + SignedNumbers, // allow signed numbers; otherwise sign will be TTDelim + DollarIsId, // allow dollar in identifiers; otherwise dollar will be TTDelim + DotIsId, // allow dot in identifiers; otherwise dot will be TTDelim + DashIsId, // '-' can be part of identifier (but identifier cannot start with '-') + HtmlColors, // #rgb or #rrggbb colors + PascalComments // allow `{}` pascal comments + ); + TOptions = set of TOption; + + private + type + TAnsiCharSet = set of AnsiChar; + const + CharBufSize = 8; private mLine, mCol: Integer; - mCurChar, mNextChar: AnsiChar; + // chars for 'unget' + mCharBuf: packed array [0..CharBufSize-1] of AnsiChar; + mCharBufUsed: Integer; + mCharBufPos: Integer; + mEofHit: Boolean; // no more chars to load into mCharBuf - mAllowSignedNumbers: Boolean; // internal control + mOptions: TOptions; mTokLine, mTokCol: Integer; // token start mTokType: Integer; @@ -78,44 +93,72 @@ type mTokChar: AnsiChar; // for delimiters mTokInt: Integer; + private + procedure fillCharBuf (); + function popFrontChar (): AnsiChar; inline; // never drains char buffer (except on "total EOF") + function peekCurChar (): AnsiChar; inline; + function peekNextChar (): AnsiChar; inline; + function peekChar (dest: Integer): AnsiChar; inline; + protected - procedure warmup (); virtual; abstract; // called in constructor to warm up the system - procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; virtual; abstract; // loads next char; #0 means 'eof' public - class function quote (const s: AnsiString): AnsiString; + function isIdStartChar (ch: AnsiChar): Boolean; inline; + function isIdMidChar (ch: AnsiChar): Boolean; inline; public - constructor Create (loadToken: Boolean=true); + constructor Create (aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; - function isEOF (): Boolean; inline; + procedure error (const amsg: AnsiString); noreturn; + procedure errorfmt (const afmt: AnsiString; const args: array of const); noreturn; function skipChar (): Boolean; // returns `false` on eof function skipBlanks (): Boolean; // ...and comments; returns `false` on eof function skipToken (): Boolean; // returns `false` on eof + {$IFDEF XPARSER_DEBUG} + function skipToken1 (): Boolean; + {$ENDIF} + + function isEOF (): Boolean; inline; + function isId (): Boolean; inline; + function isInt (): Boolean; inline; + function isStr (): Boolean; inline; + function isDelim (): Boolean; inline; + function isIdOrStr (): Boolean; inline; function expectId (): AnsiString; - procedure expectId (const aid: AnsiString); - function eatId (const aid: AnsiString): Boolean; + procedure expectId (const aid: AnsiString; caseSens: Boolean=true); + function eatId (const aid: AnsiString; caseSens: Boolean=true): Boolean; + function eatIdOrStr (const aid: AnsiString; caseSens: Boolean=true): Boolean; + function eatIdOrStrCI (const aid: AnsiString): Boolean; inline; function expectStr (allowEmpty: Boolean=false): AnsiString; function expectInt (): Integer; + function expectIdOrStr (allowEmpty: Boolean=false): AnsiString; + procedure expectTT (ttype: Integer); function eatTT (ttype: Integer): Boolean; - function expectDelim (const ch: AnsiChar): AnsiChar; + procedure expectDelim (const ch: AnsiChar); + function expectDelims (const ch: TAnsiCharSet): AnsiChar; function eatDelim (const ch: AnsiChar): Boolean; + function isDelim (const ch: AnsiChar): Boolean; inline; + + public + property options: TOptions read mOptions write mOptions; + public property col: Integer read mCol; property line: Integer read mLine; - property curChar: AnsiChar read mCurChar; - property nextChar: AnsiChar read mNextChar; + property curChar: AnsiChar read peekCurChar; + property nextChar: AnsiChar read peekNextChar; // token start property tokCol: Integer read mTokCol; @@ -132,14 +175,21 @@ type type TFileTextParser = class(TTextParser) private - mFile: File; + const BufSize = 16384; + + private + mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PChar; + mBufLen: Integer; + mBufPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system - procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; override; // loads next char; #0 means 'eof' public - constructor Create (const fname: AnsiString; loadToken: Boolean=true); + constructor Create (const fname: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); + constructor Create (st: TStream; astOwned: Boolean=true; aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; end; @@ -149,11 +199,10 @@ type mPos: Integer; protected - procedure warmup (); override; // called in constructor to warm up the system - procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; override; // loads next char; #0 means 'eof' public - constructor Create (const astr: AnsiString; loadToken: Boolean=true); + constructor Create (const astr: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; end; @@ -170,6 +219,8 @@ type public constructor Create (); + procedure flush (); virtual; + procedure put (const s: AnsiString); overload; procedure put (v: Byte); overload; procedure put (v: Integer); overload; @@ -177,6 +228,9 @@ type procedure putIndent (); procedure indent (); procedure unindent (); + + public + property curIndent: Integer read mIndent; end; @@ -184,269 +238,195 @@ type type TFileTextWriter = class(TTextWriter) private - mFile: File; + const BufSize = 16384; + + private + mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PAnsiChar; + mBufUsed: Integer; protected procedure putBuf (constref buf; len: SizeUInt); override; public constructor Create (const fname: AnsiString); + constructor Create (ast: TStream; astOwned: Boolean=true); // will own the stream by default destructor Destroy (); override; + + procedure flush (); override; end; + TStrTextWriter = class(TTextWriter) + private + mStr: AnsiString; + + protected + procedure putBuf (constref buf; len: SizeUInt); override; -// ////////////////////////////////////////////////////////////////////////// // -function wcharTo1251 (wc: WideChar): AnsiChar; inline; -function utfTo1251 (const s: AnsiString): AnsiString; + public + constructor Create (); + destructor Destroy (); override; -function digitInBase (ch: AnsiChar; base: Integer): Integer; + property str: AnsiString read mStr; + end; implementation uses - SysUtils, utils; - - -var - wc2shitmap: array[0..65535] of AnsiChar; - wc2shitmapInited: Boolean = false; + utils; // ////////////////////////////////////////////////////////////////////////// // -procedure initShitMap (); -const - cp1251: array[0..127] of Word = ( - $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, - $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, - $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, - $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, - $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, - $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, - $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F - ); -var - f: Integer; +constructor TParserException.Create (pr: TTextParser; const amsg: AnsiString); begin - for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; - for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); - for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); - wc2shitmapInited := true; + if (pr <> nil) then begin tokLine := pr.tokLine; tokCol := pr.tokCol; end; + inherited Create(amsg); end; - -// ////////////////////////////////////////////////////////////////////////// // -// TODO: make a hash or something -function wcharTo1251 (wc: WideChar): AnsiChar; inline; +constructor TParserException.CreateFmt (pr: TTextParser; const afmt: AnsiString; const args: array of const); begin - if not wc2shitmapInited then initShitMap(); - if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; + if (pr <> nil) then begin tokLine := pr.tokLine; tokCol := pr.tokCol; end; + inherited Create(formatstrf(afmt, args)); end; // ////////////////////////////////////////////////////////////////////////// // -// fast state-machine based UTF-8 decoder; using 8 bytes of memory -// code points from invalid range will never be valid, this is the property of the state machine -const - // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - utf8dfa: array[0..$16c-1] of Byte = ( - // maps bytes to character classes - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f - $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f - $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f - $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af - $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf - $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf - $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df - $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef - $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff - // maps a combination of a state of the automaton and a character class to a state - $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f - $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f - $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f - $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f - $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f - $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c); - - -// ////////////////////////////////////////////////////////////////////////// // -constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end; - -procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end; - -function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end; -function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end; -function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end; +constructor TTextParser.Create (aopts: TOptions=[TOption.SignedNumbers]); +begin + mLine := 1; + mCol := 1; + mCharBufUsed := 0; + mCharBufPos := 0; + mEofHit := false; + mTokType := TTNone; + mTokStr := ''; + mTokChar := #0; + mTokInt := 0; + mOptions := aopts; + skipToken(); + // fuck you, BOM! + { + if (mBufLen >= 3) and (mBuffer[0] = #$EF) and (mBuffer[1] = #$BB) and (mBuffer[2] = #$BF) then + begin + for f := 3 to mBufLen-1 do mBuffer[f-3] := mBuffer[f]; + Dec(mBufLen, 3); + end; + } +end; -function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end; -function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload; -var - tp: LongWord; +destructor TTextParser.Destroy (); begin - if (state = Reject) then begin state := Accept; codepoint := 0; end; - tp := utf8dfa[b]; - if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b; - state := utf8dfa[256+state+tp]; - if (state = Reject) then begin codepoint := Replacement; state := Accept; end; - result := (state = Accept); + inherited; end; -// ////////////////////////////////////////////////////////////////////////// // -function utfTo1251 (const s: AnsiString): AnsiString; -var - f, c: Integer; - ud: TUtf8DecoderFast; +procedure TTextParser.error (const amsg: AnsiString); noreturn; begin - for f := 1 to Length(s) do - begin - if (Byte(s[f]) > 127) then - begin - ud := TUtf8DecoderFast.Create(true); - result := ''; - for c := 1 to Length(s) do - begin - if ud.decode(s[c]) then result += wcharTo1251(WideChar(ud.codepoint)); - end; - exit; - end; - end; - result := s; + raise TParserException.Create(self, amsg); end; -// ////////////////////////////////////////////////////////////////////////// // -function digitInBase (ch: AnsiChar; base: Integer): Integer; +procedure TTextParser.errorfmt (const afmt: AnsiString; const args: array of const); noreturn; begin - result := -1; - if (base < 1) or (base > 36) then exit; - if (ch < '0') then exit; - if (base <= 10) then - begin - if (Integer(ch) >= 48+base) then exit; - result := Integer(ch)-48; - end - else - begin - if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end; - if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower() - if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit; - result := Integer(ch)-65+10; - end; + raise TParserException.CreateFmt(self, afmt, args); end; -// ////////////////////////////////////////////////////////////////////////// // -class function TTextParser.quote (const s: AnsiString): AnsiString; +function TTextParser.isIdStartChar (ch: AnsiChar): Boolean; inline; +begin + result := + (ch = '_') or + ((ch >= 'A') and (ch <= 'Z')) or + ((ch >= 'a') and (ch <= 'z')) or + (ch >= #128) or + ((ch = '$') and (TOption.DollarIsId in mOptions)) or + ((ch = '.') and (TOption.DotIsId in mOptions)); +end; - function squote (const s: AnsiString): AnsiString; - var - f: Integer; - begin - result := ''''; - for f := 1 to Length(s) do - begin - if (s[f] = '''') then result += ''''; - result += s[f]; - end; - result += ''''; - end; +function TTextParser.isIdMidChar (ch: AnsiChar): Boolean; inline; +begin + result := + ((ch >= '0') and (ch <= '9')) or + ((ch = '-') and (TOption.DashIsId in mOptions)) or + isIdStartChar(ch); +end; - function dquote (const s: AnsiString): AnsiString; - var - f: Integer; - ch: AnsiChar; - begin - result := '"'; - for f := 1 to Length(s) do - begin - ch := s[f]; - if (ch = #0) then result += '\z' - else if (ch = #9) then result += '\t' - else if (ch = #10) then result += '\n' - else if (ch = #13) then result += '\r' - else if (ch = #27) then result += '\e' - else if (ch < ' ') or (ch = #127) then - begin - result += '\x'; - result += LowerCase(IntToHex(Integer(ch), 2)); - end - else if (ch = '"') or (ch = '\') then - begin - result += '\'; - result += ch; - end - else - begin - result += ch; - end; - end; - result += '"'; - end; +procedure TTextParser.fillCharBuf (); var - needSingle: Boolean = false; - f: Integer; + ch: AnsiChar; begin - for f := 1 to Length(s) do + if (mEofHit) then begin mCharBuf[mCharBufPos] := #0; exit; end; + while (not mEofHit) and (mCharBufUsed < CharBufSize) do begin - if (s[f] = '''') then begin needSingle := true; continue; end; - if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end; + ch := loadChar(); + mCharBuf[(mCharBufPos+mCharBufUsed) mod CharBufSize] := ch; + if (ch = #0) then begin mEofHit := true; break; end; + Inc(mCharBufUsed); end; - if needSingle then result := squote(s) else result := ''''+s+''''; end; -// ////////////////////////////////////////////////////////////////////////// // -constructor TTextParser.Create (loadToken: Boolean=true); +// never drains char buffer (except on "total EOF") +function TTextParser.popFrontChar (): AnsiChar; inline; begin - mLine := 1; - mCol := 1; - mCurChar := #0; - mNextChar := #0; - mTokType := TTNone; - mTokStr := ''; - mTokChar := #0; - mTokInt := 0; - mAllowSignedNumbers := true; - warmup(); // change `mAllowSignedNumbers` there, if necessary - if loadToken then skipToken(); + if (mEofHit) and (mCharBufUsed = 0) then begin result := #0; exit; end; + assert(mCharBufUsed > 0); + result := mCharBuf[mCharBufPos]; + mCharBufPos := (mCharBufPos+1) mod CharBufSize; + Dec(mCharBufUsed); + if (not mEofHit) and (mCharBufUsed = 0) then fillCharBuf(); end; - -destructor TTextParser.Destroy (); +function TTextParser.peekCurChar (): AnsiChar; inline; begin - inherited; + if (mCharBufUsed = 0) and (not mEofHit) then fillCharBuf(); + result := mCharBuf[mCharBufPos]; // it is safe, 'cause `fillCharBuf()` will put #0 on "total EOF" end; +function TTextParser.peekNextChar (): AnsiChar; inline; +begin + if (mCharBufUsed < 2) and (not mEofHit) then fillCharBuf(); + if (mCharBufUsed < 2) then result := #0 else result := mCharBuf[(mCharBufPos+1) mod CharBufSize]; +end; -function TTextParser.isEOF (): Boolean; inline; begin result := (mCurChar = #0); end; +function TTextParser.peekChar (dest: Integer): AnsiChar; inline; +begin + if (dest < 0) or (dest >= CharBufSize) then error('internal text parser error'); + if (mCharBufUsed < dest+1) then fillCharBuf(); + if (mCharBufUsed < dest+1) then result := #0 else result := mCharBuf[(mCharBufPos+dest) mod CharBufSize]; +end; function TTextParser.skipChar (): Boolean; +var + ch: AnsiChar; begin - if (mCurChar = #0) then begin result := false; exit; end; - if (mCurChar = #10) then begin mCol := 1; Inc(mLine); end else Inc(mCol); - mCurChar := mNextChar; - if (mCurChar = #0) then begin result := false; exit; end; - loadNextChar(); - // skip CR in CR/LF - if (mCurChar = #13) then - begin - if (mNextChar = #10) then loadNextChar(); - mCurChar := #10; - end; + ch := popFrontChar(); + if (ch = #0) then begin result := false; exit; end; result := true; + // CR? + case ch of + #10: + begin + mCol := 1; + Inc(mLine); + end; + #13: + begin + mCol := 1; + Inc(mLine); + if (mCharBufUsed > 0) and (mCharBuf[0] = #10) then + begin + if (popFrontChar() = #0) then result := false; + end; + end; + else + Inc(mCol); + end; end; @@ -454,15 +434,29 @@ function TTextParser.skipBlanks (): Boolean; var level: Integer; begin - while not isEOF do + //writeln('line=', mLine, '; col=', mCol, '; char0=', Integer(peekChar(0))); + if (mLine = 1) and (mCol = 1) and + (peekChar(0) = #$EF) and + (peekChar(1) = #$BB) and + (peekChar(2) = #$BF) then + begin + skipChar(); + skipChar(); + skipChar(); + end; + + while (curChar <> #0) do begin if (curChar = '/') then begin // single-line comment if (nextChar = '/') then begin - while not isEOF and (curChar <> #10) do skipChar(); + //writeln('spos=(', mLine, ',', mCol, ')'); + while (curChar <> #0) and (curChar <> #10) and (curChar <> #13) do skipChar(); skipChar(); // skip EOL + //writeln('{', curChar, '}'); + //writeln('epos=(', mLine, ',', mCol, ')'); continue; end; // multline comment @@ -471,7 +465,7 @@ begin // skip comment start skipChar(); skipChar(); - while not isEOF do + while (curChar <> #0) do begin if (curChar = '*') and (nextChar = '/') then begin @@ -491,7 +485,7 @@ begin skipChar(); skipChar(); level := 1; - while not isEOF do + while (curChar <> #0) do begin if (curChar = '+') and (nextChar = '/') then begin @@ -514,23 +508,67 @@ begin end; continue; end; + end + else if (curChar = '(') and (nextChar = '*') then + begin + // pascal comment; skip comment start + skipChar(); + skipChar(); + while (curChar <> #0) do + begin + if (curChar = '*') and (nextChar = ')') then + begin + // skip comment end + skipChar(); + skipChar(); + break; + end; + skipChar(); + end; + continue; + end + else if (curChar = '{') and (TOption.PascalComments in mOptions) then + begin + // pascal comment; skip comment start + skipChar(); + while (curChar <> #0) do + begin + if (curChar = '}') then + begin + // skip comment end + skipChar(); + break; + end; + skipChar(); + end; + continue; end; if (curChar > ' ') then break; skipChar(); // skip blank end; - result := not isEOF; + result := (curChar <> #0); end; +{$IFDEF XPARSER_DEBUG} function TTextParser.skipToken (): Boolean; +begin + writeln('getting token...'); + result := skipToken1(); + writeln(' got token: ', mTokType, ' <', mTokStr, '> : <', mTokChar, '>'); +end; +function TTextParser.skipToken1 (): Boolean; +{$ELSE} +function TTextParser.skipToken (): Boolean; +{$ENDIF} procedure parseInt (); var neg: Boolean = false; base: Integer = -1; n: Integer; begin - if mAllowSignedNumbers then + if (TOption.SignedNumbers in mOptions) then begin if (curChar = '+') or (curChar = '-') then begin @@ -561,26 +599,28 @@ function TTextParser.skipToken (): Boolean; end; // default base if (base < 0) then base := 10; - if (digitInBase(curChar, base) < 0) then raise Exception.Create('invalid number'); + if (digitInBase(curChar, base) < 0) then error('invalid number'); mTokType := TTInt; mTokInt := 0; // just in case - while not isEOF do + while (curChar <> #0) do begin + if (curChar = '_') then + begin + skipChar(); + if (curChar = #0) then break; + end; n := digitInBase(curChar, base); if (n < 0) then break; n := mTokInt*10+n; - if (n < 0) or (n < mTokInt) then raise Exception.Create('integer overflow'); + if (n < 0) or (n < mTokInt) then error('integer overflow'); mTokInt := n; skipChar(); end; // check for valid number end - if not isEOF then + if (curChar <> #0) then begin - if (curChar = '.') then raise Exception.Create('floating numbers aren''t supported yet'); - if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then - begin - raise Exception.Create('invalid number'); - end; + if (curChar = '.') then error('floating numbers aren''t supported yet'); + if (isIdMidChar(curChar)) then error('invalid number'); end; if neg then mTokInt := -mTokInt; end; @@ -594,12 +634,12 @@ function TTextParser.skipToken (): Boolean; mTokStr := ''; // just in case qch := curChar; skipChar(); // skip starting quote - while not isEOF do + while (curChar <> #0) do begin // escape if (qch = '"') and (curChar = '\') then begin - if (nextChar = #0) then raise Exception.Create('unterminated string escape'); + if (nextChar = #0) then error('unterminated string escape'); ch := nextChar; // skip backslash and escape type skipChar(); @@ -613,7 +653,7 @@ function TTextParser.skipToken (): Boolean; 'x', 'X': // hex escape begin n := digitInBase(curChar, 16); - if (n < 0) then raise Exception.Create('invalid hexstr escape'); + if (n < 0) then error('invalid hexstr escape'); skipChar(); if (digitInBase(curChar, 16) > 0) then begin @@ -649,18 +689,18 @@ function TTextParser.skipToken (): Boolean; begin mTokType := TTId; mTokStr := ''; // just in case - while (curChar = '_') or ((curChar >= '0') and (curChar <= '9')) or - ((curChar >= 'A') and (curChar <= 'Z')) or - ((curChar >= 'a') and (curChar <= 'z')) or - (curChar >= #128) do + while (isIdMidChar(curChar)) do begin + if (curChar = '.') and (nextChar = '.') then break; // dotdot is a token by itself mTokStr += curChar; skipChar(); end; end; +var + xpos: Integer; begin - mTokType := TTEOF; + mTokType := TTNone; mTokStr := ''; mTokChar := #0; mTokInt := 0; @@ -668,6 +708,7 @@ begin if not skipBlanks() then begin result := false; + mTokType := TTEOF; mTokLine := mLine; mTokCol := mCol; exit; @@ -679,57 +720,165 @@ begin result := true; // number? - if mAllowSignedNumbers and ((curChar = '+') or (curChar = '-')) then begin parseInt(); exit; end; + if (TOption.SignedNumbers in mOptions) and ((curChar = '+') or (curChar = '-')) then begin parseInt(); exit; end; if (curChar >= '0') and (curChar <= '9') then begin parseInt(); exit; end; // string? - if (curChar = '"') or (curChar = '''') then begin parseString(); exit; end; + if (curChar = '"') or (curChar = '''') or (curChar = '`') then begin parseString(); exit; end; + + // html color? + if (curChar = '#') and (TOption.HtmlColors in mOptions) then + begin + if (digitInBase(peekChar(1), 16) >= 0) and (digitInBase(peekChar(2), 16) >= 0) and (digitInBase(peekChar(3), 16) >= 0) then + begin + if (digitInBase(peekChar(4), 16) >= 0) and (digitInBase(peekChar(5), 16) >= 0) and (digitInBase(peekChar(6), 16) >= 0) then xpos := 7 else xpos := 4; + if (not isIdMidChar(peekChar(xpos))) then + begin + mTokType := TTId; + mTokStr := ''; + while (xpos > 0) do + begin + mTokStr += curChar; + skipChar(); + Dec(xpos); + end; + exit; + end; + end; + end; // identifier? - if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then begin parseId(); exit; end; + if (isIdStartChar(curChar)) then + begin + if (curChar = '.') and (nextChar = '.') then + begin + // nothing to do here, as dotdot is a token by itself + end + else + begin + parseId(); + exit; + end; + end; // known delimiters? - case curChar of - ',': mTokType := TTComma; - ':': mTokType := TTColon; - ';': mTokType := TTSemi; - '{': mTokType := TTBegin; - '}': mTokType := TTEnd; - else mTokType := TTDelim; - end; mTokChar := curChar; + mTokType := TTDelim; skipChar(); + if (curChar = '=') then + begin + case mTokChar of + '<': begin mTokType := TTLessEqu; mTokStr := '<='; skipChar(); exit; end; + '>': begin mTokType := TTGreatEqu; mTokStr := '>='; skipChar(); exit; end; + '!': begin mTokType := TTNotEqu; mTokStr := '!='; skipChar(); exit; end; + '=': begin mTokType := TTEqu; mTokStr := '=='; skipChar(); exit; end; + ':': begin mTokType := TTAss; mTokStr := ':='; skipChar(); exit; end; + end; + end + else if (mTokChar = curChar) then + begin + case mTokChar of + '<': begin mTokType := TTShl; mTokStr := '<<'; skipChar(); exit; end; + '>': begin mTokType := TTShr; mTokStr := '>>'; skipChar(); exit; end; + '&': begin mTokType := TTLogAnd; mTokStr := '&&'; skipChar(); exit; end; + '|': begin mTokType := TTLogOr; mTokStr := '||'; skipChar(); exit; end; + end; + end + else + begin + case mTokChar of + '<': if (curChar = '>') then begin mTokType := TTNotEqu; mTokStr := '<>'; skipChar(); exit; end; + '.': if (curChar = '.') then begin mTokType := TTDotDot; mTokStr := '..'; skipChar(); exit; end; + end; + end; end; +function TTextParser.isEOF (): Boolean; inline; begin result := (mTokType = TTEOF); end; +function TTextParser.isId (): Boolean; inline; begin result := (mTokType = TTId); end; +function TTextParser.isInt (): Boolean; inline; begin result := (mTokType = TTInt); end; +function TTextParser.isStr (): Boolean; inline; begin result := (mTokType = TTStr); end; +function TTextParser.isDelim (): Boolean; inline; begin result := (mTokType = TTDelim); end; +function TTextParser.isIdOrStr (): Boolean; inline; begin result := (mTokType = TTId) or (mTokType = TTStr); end; + + function TTextParser.expectId (): AnsiString; begin - if (mTokType <> TTId) then raise Exception.Create('identifier expected'); + if (mTokType <> TTId) then error('identifier expected'); result := mTokStr; skipToken(); end; -procedure TTextParser.expectId (const aid: AnsiString); +procedure TTextParser.expectId (const aid: AnsiString; caseSens: Boolean=true); begin - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then raise Exception.Create('identifier '''+aid+''' expected'); + if caseSens then + begin + if (mTokType <> TTId) or (mTokStr <> aid) then error('identifier '''+aid+''' expected'); + end + else + begin + if (mTokType <> TTId) or (not strEquCI1251(mTokStr, aid)) then error('identifier '''+aid+''' expected'); + end; skipToken(); end; -function TTextParser.eatId (const aid: AnsiString): Boolean; +function TTextParser.eatId (const aid: AnsiString; caseSens: Boolean=true): Boolean; begin - result := false; - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then exit; - result := true; - skipToken(); + if caseSens then + begin + result := (mTokType = TTId) and (mTokStr = aid); + end + else + begin + result := (mTokType = TTId) and strEquCI1251(mTokStr, aid); + end; + if result then skipToken(); +end; + + +function TTextParser.eatIdOrStr (const aid: AnsiString; caseSens: Boolean=true): Boolean; +begin + if caseSens then + begin + result := (mTokType = TTId) and (mTokStr = aid); + if not result then result := (mTokType = TTStr) and (mTokStr = aid); + end + else + begin + result := (mTokType = TTId) and strEquCI1251(mTokStr, aid); + if not result then result := (mTokType = TTStr) and strEquCI1251(mTokStr, aid); + end; + if result then skipToken(); +end; + + +function TTextParser.eatIdOrStrCI (const aid: AnsiString): Boolean; inline; +begin + result := eatIdOrStr(aid, false); end; function TTextParser.expectStr (allowEmpty: Boolean=false): AnsiString; begin - if (mTokType <> TTStr) then raise Exception.Create('string expected'); - if (not allowEmpty) and (Length(mTokStr) = 0) then raise Exception.Create('non-empty string expected'); + if (mTokType <> TTStr) then error('string expected'); + if (not allowEmpty) and (Length(mTokStr) = 0) then error('non-empty string expected'); + result := mTokStr; + skipToken(); +end; + + +function TTextParser.expectIdOrStr (allowEmpty: Boolean=false): AnsiString; +begin + case mTokType of + TTStr: + if (not allowEmpty) and (Length(mTokStr) = 0) then error('non-empty string expected'); + TTId: + begin end; + else + error('string or identifier expected'); + end; result := mTokStr; skipToken(); end; @@ -737,7 +886,7 @@ end; function TTextParser.expectInt (): Integer; begin - if (mTokType <> TTInt) then raise Exception.Create('string expected'); + if (mTokType <> TTInt) then error('string expected'); result := mTokInt; skipToken(); end; @@ -745,7 +894,7 @@ end; procedure TTextParser.expectTT (ttype: Integer); begin - if (mTokType <> ttype) then raise Exception.Create('unexpected token'); + if (mTokType <> ttype) then error('unexpected token'); skipToken(); end; @@ -757,9 +906,17 @@ begin end; -function TTextParser.expectDelim (const ch: AnsiChar): AnsiChar; +procedure TTextParser.expectDelim (const ch: AnsiChar); begin - if (mTokType <> TTDelim) then raise Exception.Create(Format('delimiter ''%s'' expected', [ch])); + if (mTokType <> TTDelim) or (mTokChar <> ch) then errorfmt('delimiter ''%s'' expected', [ch]); + skipToken(); +end; + + +function TTextParser.expectDelims (const ch: TAnsiCharSet): AnsiChar; +begin + if (mTokType <> TTDelim) then error('delimiter expected'); + if not (mTokChar in ch) then error('delimiter expected'); result := mTokChar; skipToken(); end; @@ -767,56 +924,78 @@ end; function TTextParser.eatDelim (const ch: AnsiChar): Boolean; begin - result := false; - if (mTokType <> TTDelim) or (mTokChar <> ch) then exit; - result := true; - skipToken(); + result := (mTokType = TTDelim) and (mTokChar = ch); + if result then skipToken(); +end; + + +function TTextParser.isDelim (const ch: AnsiChar): Boolean; inline; +begin + result := (mTokType = TTDelim) and (mTokChar = ch); end; // ////////////////////////////////////////////////////////////////////////// // -constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true); +constructor TFileTextParser.Create (const fname: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); begin - AssignFile(mFile, fname); - Reset(mFile, 1); - inherited Create(loadToken); + mBuffer := nil; + mFile := openDiskFileRO(fname); + mStreamOwned := true; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then error('TFileTextParser: read error'); + inherited Create(aopts); end; -destructor TFileTextParser.Destroy (); +constructor TFileTextParser.Create (st: TStream; astOwned: Boolean=true; aopts: TOptions=[TOption.SignedNumbers]); begin - CloseFile(mFile); - inherited; + if (st = nil) then error('cannot create parser for nil stream'); + mFile := st; + mStreamOwned := astOwned; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then error('TFileTextParser: read error'); + inherited Create(aopts); end; -procedure TFileTextParser.warmup (); -var - rd: Integer; +destructor TFileTextParser.Destroy (); begin - blockRead(mFile, mCurChar, 1, rd); - if (rd = 0) then begin mCurChar := #0; exit; end; - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBuffer := nil; + mBufPos := 0; + mBufLen := 0; + if (mStreamOwned) then FreeAndNil(mFile) else mFile := nil; + inherited; end; -procedure TFileTextParser.loadNextChar (); -var - rd: Integer; +function TFileTextParser.loadChar (): AnsiChar; begin - blockRead(mFile, mNextChar, 1, rd); - if (rd = 0) then begin mNextChar := #0; exit; end; - if (mNextChar = #0) then mNextChar := ' '; + if (mBufLen = 0) then begin result := #0; exit; end; + if (mBufPos >= mBufLen) then + begin + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then error('TFileTextParser: read error'); + if (mBufLen = 0) then begin result := #0; exit; end; + mBufPos := 0; + end; + assert(mBufPos < mBufLen); + result := mBuffer[mBufPos]; + Inc(mBufPos); + if (result = #0) then result := ' '; end; // ////////////////////////////////////////////////////////////////////////// // -constructor TStrTextParser.Create (const astr: AnsiString; loadToken: Boolean=true); +constructor TStrTextParser.Create (const astr: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); begin mStr := astr; mPos := 1; - inherited Create(loadToken); + inherited Create(aopts); end; @@ -827,31 +1006,19 @@ begin end; -procedure TStrTextParser.warmup (); +function TStrTextParser.loadChar (): AnsiChar; begin - if (mPos > Length(mStr)) then - begin - mCurChar := #0; - mNextChar := #0; - exit; - end; - mCurChar := mStr[mPos]; Inc(mPos); - if (mCurChar = #0) then mCurChar := ' '; - loadNextChar(); -end; - - -procedure TStrTextParser.loadNextChar (); -begin - mNextChar := #0; + result := #0; if (mPos > Length(mStr)) then exit; - mNextChar := mStr[mPos]; Inc(mPos); - if (mNextChar = #0) then mNextChar := ' '; + result := mStr[mPos]; + Inc(mPos); + if (result = #0) then result := ' '; end; // ////////////////////////////////////////////////////////////////////////// // constructor TTextWriter.Create (); begin mIndent := 0; end; +procedure TTextWriter.flush (); begin end; procedure TTextWriter.put (const s: AnsiString); overload; begin if (Length(s) > 0) then putBuf((@(s[1]))^, Length(s)); end; procedure TTextWriter.put (v: Byte); overload; begin put('%d', [v]); end; procedure TTextWriter.put (v: Integer); overload; begin put('%d', [v]); end; @@ -864,36 +1031,97 @@ procedure TTextWriter.unindent (); begin Dec(mIndent, 2); end; // ////////////////////////////////////////////////////////////////////////// // constructor TFileTextWriter.Create (const fname: AnsiString); begin - AssignFile(mFile, fname); - Rewrite(mFile, 1); + mFile := createDiskFile(fname); + mStreamOwned := true; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); inherited Create(); end; +constructor TFileTextWriter.Create (ast: TStream; astOwned: Boolean=true); +begin + if (ast = nil) then raise Exception.Create('cannot write to nil stream'); + mFile := ast; + mStreamOwned := astOwned; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); +end; + + destructor TFileTextWriter.Destroy (); begin - CloseFile(mFile); + flush(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBufUsed := 0; + mBuffer := nil; + if (mStreamOwned) then mFile.Free(); + mFile := nil; + inherited; +end; + + +procedure TFileTextWriter.flush (); +begin + if (mFile <> nil) and (mBufUsed > 0) then + begin + mFile.WriteBuffer(mBuffer^, mBufUsed); + end; + mBufUsed := 0; end; procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt); var - wr: SizeUInt; pc: PChar; + left: Integer; begin - if (len > 0) then + if (len = 0) then exit; + pc := @buf; + while (len > 0) do begin - pc := @buf; - BlockWrite(mFile, pc^, len, wr); - if (wr <> len) then raise Exception.Create('write error'); - { - while (len > 0) do + left := BufSize-mBufUsed; + if (left = 0) then begin - write(pc^); - Inc(pc); - Dec(len); + flush(); + left := BufSize-mBufUsed; + assert(left > 0); end; - } + if (left > len) then left := Integer(len); + Move(pc^, (mBuffer+mBufUsed)^, left); + Inc(mBufUsed, left); + pc += left; + len -= left; + end; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +constructor TStrTextWriter.Create (); +begin + mStr := ''; +end; + + +destructor TStrTextWriter.Destroy (); +begin + mStr := ''; + inherited; +end; + + +procedure TStrTextWriter.putBuf (constref buf; len: SizeUInt); +var + st: AnsiString = ''; +begin + if (len > 0) then + begin + SetLength(st, Integer(len)); + Move(buf, PChar(st)^, Integer(len)); + mStr += st; + st := ''; end; end;