X-Git-Url: http://deadsoftware.ru/gitweb?a=blobdiff_plain;f=src%2Fshared%2Fxparser.pas;h=595d300f99f32291ae9af1da261b3a8907f95106;hb=05494fe1320ebc427c3b5c688c18669bf3abc260;hp=a3e39cef4d5ac642c2aab3ee8abfa33f253b707c;hpb=313f52c372a4fe70cdfb3fdfaf845b95e05be9d4;p=d2df-sdl.git diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas index a3e39ce..595d300 100644 --- a/src/shared/xparser.pas +++ b/src/shared/xparser.pas @@ -1,4 +1,5 @@ -(* Copyright (C) DooM 2D:Forever Developers +(* coded by Ketmar // Invisible Vector + * Understanding is not required. Only obedience. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * along with this program. If not, see . *) {$INCLUDE a_modes.inc} {.$DEFINE XPARSER_DEBUG} @@ -64,6 +65,8 @@ type SignedNumbers, // allow signed numbers; otherwise sign will be TTDelim DollarIsId, // allow dollar in identifiers; otherwise dollar will be TTDelim DotIsId, // allow dot in identifiers; otherwise dot will be TTDelim + DashIsId, // '-' can be part of identifier (but identifier cannot start with '-') + HtmlColors, // #rgb or #rrggbb colors PascalComments // allow `{}` pascal comments ); TOptions = set of TOption; @@ -71,10 +74,16 @@ type private type TAnsiCharSet = set of AnsiChar; + const + CharBufSize = 8; private mLine, mCol: Integer; - mCurChar, mNextChar: AnsiChar; + // chars for 'unget' + mCharBuf: packed array [0..CharBufSize-1] of AnsiChar; + mCharBufUsed: Integer; + mCharBufPos: Integer; + mEofHit: Boolean; // no more chars to load into mCharBuf mOptions: TOptions; @@ -84,9 +93,19 @@ type mTokChar: AnsiChar; // for delimiters mTokInt: Integer; + private + procedure fillCharBuf (); + function popFrontChar (): AnsiChar; inline; // never drains char buffer (except on "total EOF") + function peekCurChar (): AnsiChar; inline; + function peekNextChar (): AnsiChar; inline; + function peekChar (dest: Integer): AnsiChar; inline; + protected - procedure warmup (); // called in constructor to warm up the system - procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; virtual; abstract; // loads next char; #0 means 'eof' + + public + function isIdStartChar (ch: AnsiChar): Boolean; inline; + function isIdMidChar (ch: AnsiChar): Boolean; inline; public constructor Create (aopts: TOptions=[TOption.SignedNumbers]); @@ -95,8 +114,6 @@ type procedure error (const amsg: AnsiString); noreturn; procedure errorfmt (const afmt: AnsiString; const args: array of const); noreturn; - function isEOF (): Boolean; inline; - function skipChar (): Boolean; // returns `false` on eof function skipBlanks (): Boolean; // ...and comments; returns `false` on eof @@ -106,6 +123,11 @@ type function skipToken1 (): Boolean; {$ENDIF} + function isEOF (): Boolean; inline; + function isId (): Boolean; inline; + function isInt (): Boolean; inline; + function isStr (): Boolean; inline; + function isDelim (): Boolean; inline; function isIdOrStr (): Boolean; inline; function expectId (): AnsiString; @@ -117,7 +139,7 @@ type function expectStr (allowEmpty: Boolean=false): AnsiString; function expectInt (): Integer; - function expectStrOrId (allowEmpty: Boolean=false): AnsiString; + function expectIdOrStr (allowEmpty: Boolean=false): AnsiString; procedure expectTT (ttype: Integer); function eatTT (ttype: Integer): Boolean; @@ -135,8 +157,8 @@ type property col: Integer read mCol; property line: Integer read mLine; - property curChar: AnsiChar read mCurChar; - property nextChar: AnsiChar read mNextChar; + property curChar: AnsiChar read peekCurChar; + property nextChar: AnsiChar read peekNextChar; // token start property tokCol: Integer read mTokCol; @@ -163,7 +185,7 @@ type mBufPos: Integer; protected - procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; override; // loads next char; #0 means 'eof' public constructor Create (const fname: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); @@ -177,7 +199,7 @@ type mPos: Integer; protected - procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' + function loadChar (): AnsiChar; override; // loads next char; #0 means 'eof' public constructor Create (const astr: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); @@ -275,15 +297,23 @@ constructor TTextParser.Create (aopts: TOptions=[TOption.SignedNumbers]); begin mLine := 1; mCol := 1; - mCurChar := #0; - mNextChar := #0; + mCharBufUsed := 0; + mCharBufPos := 0; + mEofHit := false; mTokType := TTNone; mTokStr := ''; mTokChar := #0; mTokInt := 0; mOptions := aopts; - warmup(); skipToken(); + // fuck you, BOM! + { + if (mBufLen >= 3) and (mBuffer[0] = #$EF) and (mBuffer[1] = #$BB) and (mBuffer[2] = #$BF) then + begin + for f := 3 to mBufLen-1 do mBuffer[f-3] := mBuffer[f]; + Dec(mBufLen, 3); + end; + } end; @@ -305,32 +335,98 @@ begin end; -function TTextParser.isEOF (): Boolean; inline; begin result := (mCurChar = #0); end; - +function TTextParser.isIdStartChar (ch: AnsiChar): Boolean; inline; +begin + result := + (ch = '_') or + ((ch >= 'A') and (ch <= 'Z')) or + ((ch >= 'a') and (ch <= 'z')) or + (ch >= #128) or + ((ch = '$') and (TOption.DollarIsId in mOptions)) or + ((ch = '.') and (TOption.DotIsId in mOptions)); +end; -procedure TTextParser.warmup (); +function TTextParser.isIdMidChar (ch: AnsiChar): Boolean; inline; begin - mNextChar := ' '; - loadNextChar(); - mCurChar := mNextChar; - if (mNextChar <> #0) then loadNextChar(); + result := + ((ch >= '0') and (ch <= '9')) or + ((ch = '-') and (TOption.DashIsId in mOptions)) or + isIdStartChar(ch); end; -function TTextParser.skipChar (): Boolean; +procedure TTextParser.fillCharBuf (); +var + ch: AnsiChar; begin - if (mCurChar = #0) then begin result := false; exit; end; - if (mCurChar = #10) then begin mCol := 1; Inc(mLine); end else Inc(mCol); - mCurChar := mNextChar; - if (mCurChar = #0) then begin result := false; exit; end; - loadNextChar(); - // skip CR in CR/LF - if (mCurChar = #13) then + if (mEofHit) then begin mCharBuf[mCharBufPos] := #0; exit; end; + while (not mEofHit) and (mCharBufUsed < CharBufSize) do begin - if (mNextChar = #10) then loadNextChar(); - mCurChar := #10; + ch := loadChar(); + mCharBuf[(mCharBufPos+mCharBufUsed) mod CharBufSize] := ch; + if (ch = #0) then begin mEofHit := true; break; end; + Inc(mCharBufUsed); end; +end; + + +// never drains char buffer (except on "total EOF") +function TTextParser.popFrontChar (): AnsiChar; inline; +begin + if (mEofHit) and (mCharBufUsed = 0) then begin result := #0; exit; end; + assert(mCharBufUsed > 0); + result := mCharBuf[mCharBufPos]; + mCharBufPos := (mCharBufPos+1) mod CharBufSize; + Dec(mCharBufUsed); + if (not mEofHit) and (mCharBufUsed = 0) then fillCharBuf(); +end; + +function TTextParser.peekCurChar (): AnsiChar; inline; +begin + if (mCharBufUsed = 0) and (not mEofHit) then fillCharBuf(); + result := mCharBuf[mCharBufPos]; // it is safe, 'cause `fillCharBuf()` will put #0 on "total EOF" +end; + +function TTextParser.peekNextChar (): AnsiChar; inline; +begin + if (mCharBufUsed < 2) and (not mEofHit) then fillCharBuf(); + if (mCharBufUsed < 2) then result := #0 else result := mCharBuf[(mCharBufPos+1) mod CharBufSize]; +end; + +function TTextParser.peekChar (dest: Integer): AnsiChar; inline; +begin + if (dest < 0) or (dest >= CharBufSize) then error('internal text parser error'); + if (mCharBufUsed < dest+1) then fillCharBuf(); + if (mCharBufUsed < dest+1) then result := #0 else result := mCharBuf[(mCharBufPos+dest) mod CharBufSize]; +end; + + +function TTextParser.skipChar (): Boolean; +var + ch: AnsiChar; +begin + ch := popFrontChar(); + if (ch = #0) then begin result := false; exit; end; result := true; + // CR? + case ch of + #10: + begin + mCol := 1; + Inc(mLine); + end; + #13: + begin + mCol := 1; + Inc(mLine); + if (mCharBufUsed > 0) and (mCharBuf[0] = #10) then + begin + if (popFrontChar() = #0) then result := false; + end; + end; + else + Inc(mCol); + end; end; @@ -338,15 +434,29 @@ function TTextParser.skipBlanks (): Boolean; var level: Integer; begin - while not isEOF do + //writeln('line=', mLine, '; col=', mCol, '; char0=', Integer(peekChar(0))); + if (mLine = 1) and (mCol = 1) and + (peekChar(0) = #$EF) and + (peekChar(1) = #$BB) and + (peekChar(2) = #$BF) then + begin + skipChar(); + skipChar(); + skipChar(); + end; + + while (curChar <> #0) do begin if (curChar = '/') then begin // single-line comment if (nextChar = '/') then begin - while not isEOF and (curChar <> #10) do skipChar(); + //writeln('spos=(', mLine, ',', mCol, ')'); + while (curChar <> #0) and (curChar <> #10) and (curChar <> #13) do skipChar(); skipChar(); // skip EOL + //writeln('{', curChar, '}'); + //writeln('epos=(', mLine, ',', mCol, ')'); continue; end; // multline comment @@ -355,7 +465,7 @@ begin // skip comment start skipChar(); skipChar(); - while not isEOF do + while (curChar <> #0) do begin if (curChar = '*') and (nextChar = '/') then begin @@ -375,7 +485,7 @@ begin skipChar(); skipChar(); level := 1; - while not isEOF do + while (curChar <> #0) do begin if (curChar = '+') and (nextChar = '/') then begin @@ -404,7 +514,7 @@ begin // pascal comment; skip comment start skipChar(); skipChar(); - while not isEOF do + while (curChar <> #0) do begin if (curChar = '*') and (nextChar = ')') then begin @@ -421,7 +531,7 @@ begin begin // pascal comment; skip comment start skipChar(); - while not isEOF do + while (curChar <> #0) do begin if (curChar = '}') then begin @@ -436,7 +546,7 @@ begin if (curChar > ' ') then break; skipChar(); // skip blank end; - result := not isEOF; + result := (curChar <> #0); end; @@ -489,26 +599,28 @@ function TTextParser.skipToken (): Boolean; end; // default base if (base < 0) then base := 10; - if (digitInBase(curChar, base) < 0) then raise Exception.Create('invalid number'); + if (digitInBase(curChar, base) < 0) then error('invalid number'); mTokType := TTInt; mTokInt := 0; // just in case - while not isEOF do + while (curChar <> #0) do begin + if (curChar = '_') then + begin + skipChar(); + if (curChar = #0) then break; + end; n := digitInBase(curChar, base); if (n < 0) then break; n := mTokInt*10+n; - if (n < 0) or (n < mTokInt) then raise Exception.Create('integer overflow'); + if (n < 0) or (n < mTokInt) then error('integer overflow'); mTokInt := n; skipChar(); end; // check for valid number end - if not isEOF then + if (curChar <> #0) then begin - if (curChar = '.') then raise Exception.Create('floating numbers aren''t supported yet'); - if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then - begin - raise Exception.Create('invalid number'); - end; + if (curChar = '.') then error('floating numbers aren''t supported yet'); + if (isIdMidChar(curChar)) then error('invalid number'); end; if neg then mTokInt := -mTokInt; end; @@ -522,12 +634,12 @@ function TTextParser.skipToken (): Boolean; mTokStr := ''; // just in case qch := curChar; skipChar(); // skip starting quote - while not isEOF do + while (curChar <> #0) do begin // escape if (qch = '"') and (curChar = '\') then begin - if (nextChar = #0) then raise Exception.Create('unterminated string escape'); + if (nextChar = #0) then error('unterminated string escape'); ch := nextChar; // skip backslash and escape type skipChar(); @@ -541,7 +653,7 @@ function TTextParser.skipToken (): Boolean; 'x', 'X': // hex escape begin n := digitInBase(curChar, 16); - if (n < 0) then raise Exception.Create('invalid hexstr escape'); + if (n < 0) then error('invalid hexstr escape'); skipChar(); if (digitInBase(curChar, 16) > 0) then begin @@ -577,20 +689,18 @@ function TTextParser.skipToken (): Boolean; begin mTokType := TTId; mTokStr := ''; // just in case - while (curChar = '_') or ((curChar >= '0') and (curChar <= '9')) or - ((curChar >= 'A') and (curChar <= 'Z')) or - ((curChar >= 'a') and (curChar <= 'z')) or - (curChar >= #128) or - ((TOption.DollarIsId in mOptions) and (curChar = '$')) or - ((TOption.DotIsId in mOptions) and (curChar = '.') and (nextChar <> '.')) do + while (isIdMidChar(curChar)) do begin + if (curChar = '.') and (nextChar = '.') then break; // dotdot is a token by itself mTokStr += curChar; skipChar(); end; end; +var + xpos: Integer; begin - mTokType := TTEOF; + mTokType := TTNone; mTokStr := ''; mTokChar := #0; mTokInt := 0; @@ -598,6 +708,7 @@ begin if not skipBlanks() then begin result := false; + mTokType := TTEOF; mTokLine := mLine; mTokCol := mCol; exit; @@ -613,12 +724,42 @@ begin if (curChar >= '0') and (curChar <= '9') then begin parseInt(); exit; end; // string? - if (curChar = '"') or (curChar = '''') then begin parseString(); exit; end; + if (curChar = '"') or (curChar = '''') or (curChar = '`') then begin parseString(); exit; end; + + // html color? + if (curChar = '#') and (TOption.HtmlColors in mOptions) then + begin + if (digitInBase(peekChar(1), 16) >= 0) and (digitInBase(peekChar(2), 16) >= 0) and (digitInBase(peekChar(3), 16) >= 0) then + begin + if (digitInBase(peekChar(4), 16) >= 0) and (digitInBase(peekChar(5), 16) >= 0) and (digitInBase(peekChar(6), 16) >= 0) then xpos := 7 else xpos := 4; + if (not isIdMidChar(peekChar(xpos))) then + begin + mTokType := TTId; + mTokStr := ''; + while (xpos > 0) do + begin + mTokStr += curChar; + skipChar(); + Dec(xpos); + end; + exit; + end; + end; + end; // identifier? - if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then begin parseId(); exit; end; - if (TOption.DollarIsId in mOptions) and (curChar = '$') then begin parseId(); exit; end; - if (TOption.DotIsId in mOptions) and (curChar = '.') and (nextChar <> '.') then begin parseId(); exit; end; + if (isIdStartChar(curChar)) then + begin + if (curChar = '.') and (nextChar = '.') then + begin + // nothing to do here, as dotdot is a token by itself + end + else + begin + parseId(); + exit; + end; + end; // known delimiters? mTokChar := curChar; @@ -653,15 +794,17 @@ begin end; -function TTextParser.isIdOrStr (): Boolean; inline; -begin - result := (mTokType = TTId) or (mTokType = TTStr); -end; +function TTextParser.isEOF (): Boolean; inline; begin result := (mTokType = TTEOF); end; +function TTextParser.isId (): Boolean; inline; begin result := (mTokType = TTId); end; +function TTextParser.isInt (): Boolean; inline; begin result := (mTokType = TTInt); end; +function TTextParser.isStr (): Boolean; inline; begin result := (mTokType = TTStr); end; +function TTextParser.isDelim (): Boolean; inline; begin result := (mTokType = TTDelim); end; +function TTextParser.isIdOrStr (): Boolean; inline; begin result := (mTokType = TTId) or (mTokType = TTStr); end; function TTextParser.expectId (): AnsiString; begin - if (mTokType <> TTId) then raise Exception.Create('identifier expected'); + if (mTokType <> TTId) then error('identifier expected'); result := mTokStr; skipToken(); end; @@ -671,11 +814,11 @@ procedure TTextParser.expectId (const aid: AnsiString; caseSens: Boolean=true); begin if caseSens then begin - if (mTokType <> TTId) or (mTokStr <> aid) then raise Exception.Create('identifier '''+aid+''' expected'); + if (mTokType <> TTId) or (mTokStr <> aid) then error('identifier '''+aid+''' expected'); end else begin - if (mTokType <> TTId) or (not strEquCI1251(mTokStr, aid)) then raise Exception.Create('identifier '''+aid+''' expected'); + if (mTokType <> TTId) or (not strEquCI1251(mTokStr, aid)) then error('identifier '''+aid+''' expected'); end; skipToken(); end; @@ -719,22 +862,22 @@ end; function TTextParser.expectStr (allowEmpty: Boolean=false): AnsiString; begin - if (mTokType <> TTStr) then raise Exception.Create('string expected'); - if (not allowEmpty) and (Length(mTokStr) = 0) then raise Exception.Create('non-empty string expected'); + if (mTokType <> TTStr) then error('string expected'); + if (not allowEmpty) and (Length(mTokStr) = 0) then error('non-empty string expected'); result := mTokStr; skipToken(); end; -function TTextParser.expectStrOrId (allowEmpty: Boolean=false): AnsiString; +function TTextParser.expectIdOrStr (allowEmpty: Boolean=false): AnsiString; begin case mTokType of TTStr: - if (not allowEmpty) and (Length(mTokStr) = 0) then raise Exception.Create('non-empty string expected'); + if (not allowEmpty) and (Length(mTokStr) = 0) then error('non-empty string expected'); TTId: begin end; else - raise Exception.Create('string or identifier expected'); + error('string or identifier expected'); end; result := mTokStr; skipToken(); @@ -743,7 +886,7 @@ end; function TTextParser.expectInt (): Integer; begin - if (mTokType <> TTInt) then raise Exception.Create('string expected'); + if (mTokType <> TTInt) then error('string expected'); result := mTokInt; skipToken(); end; @@ -751,7 +894,7 @@ end; procedure TTextParser.expectTT (ttype: Integer); begin - if (mTokType <> ttype) then raise Exception.Create('unexpected token'); + if (mTokType <> ttype) then error('unexpected token'); skipToken(); end; @@ -765,15 +908,15 @@ end; procedure TTextParser.expectDelim (const ch: AnsiChar); begin - if (mTokType <> TTDelim) or (mTokChar <> ch) then raise Exception.CreateFmt('delimiter ''%s'' expected', [ch]); + if (mTokType <> TTDelim) or (mTokChar <> ch) then errorfmt('delimiter ''%s'' expected', [ch]); skipToken(); end; function TTextParser.expectDelims (const ch: TAnsiCharSet): AnsiChar; begin - if (mTokType <> TTDelim) then raise Exception.Create('delimiter expected'); - if not (mTokChar in ch) then raise Exception.Create('delimiter expected'); + if (mTokType <> TTDelim) then error('delimiter expected'); + if not (mTokChar in ch) then error('delimiter expected'); result := mTokChar; skipToken(); end; @@ -801,20 +944,20 @@ begin GetMem(mBuffer, BufSize); mBufPos := 0; mBufLen := mFile.Read(mBuffer^, BufSize); - if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + if (mBufLen < 0) then error('TFileTextParser: read error'); inherited Create(aopts); end; constructor TFileTextParser.Create (st: TStream; astOwned: Boolean=true; aopts: TOptions=[TOption.SignedNumbers]); begin - if (st = nil) then raise Exception.Create('cannot create parser for nil stream'); + if (st = nil) then error('cannot create parser for nil stream'); mFile := st; mStreamOwned := astOwned; GetMem(mBuffer, BufSize); mBufPos := 0; mBufLen := mFile.Read(mBuffer^, BufSize); - if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + if (mBufLen < 0) then error('TFileTextParser: read error'); inherited Create(aopts); end; @@ -825,26 +968,25 @@ begin mBuffer := nil; mBufPos := 0; mBufLen := 0; - if mStreamOwned then mFile.Free(); - mFile := nil; + if (mStreamOwned) then FreeAndNil(mFile) else mFile := nil; inherited; end; -procedure TFileTextParser.loadNextChar (); +function TFileTextParser.loadChar (): AnsiChar; begin - if (mBufLen = 0) then begin mNextChar := #0; exit; end; + if (mBufLen = 0) then begin result := #0; exit; end; if (mBufPos >= mBufLen) then begin mBufLen := mFile.Read(mBuffer^, BufSize); - if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); - if (mBufLen = 0) then begin mNextChar := #0; exit; end; + if (mBufLen < 0) then error('TFileTextParser: read error'); + if (mBufLen = 0) then begin result := #0; exit; end; mBufPos := 0; end; assert(mBufPos < mBufLen); - mNextChar := mBuffer[mBufPos]; + result := mBuffer[mBufPos]; Inc(mBufPos); - if (mNextChar = #0) then mNextChar := ' '; + if (result = #0) then result := ' '; end; @@ -864,12 +1006,13 @@ begin end; -procedure TStrTextParser.loadNextChar (); +function TStrTextParser.loadChar (): AnsiChar; begin - mNextChar := #0; + result := #0; if (mPos > Length(mStr)) then exit; - mNextChar := mStr[mPos]; Inc(mPos); - if (mNextChar = #0) then mNextChar := ' '; + result := mStr[mPos]; + Inc(mPos); + if (result = #0) then result := ' '; end;