X-Git-Url: http://deadsoftware.ru/gitweb?a=blobdiff_plain;f=src%2Fshared%2Fxparser.pas;h=6ecf7f029f90777e46e0382783100f0cef9aabe7;hb=ae58c60b09a12df74e69717546235c3e5bf3c992;hp=d8b600dbc1d6340a0a16c9b8017dfde7bd9f401a;hpb=e540bbf4ebb46a3c4b3ae2fd69ba64a149189a32;p=d2df-sdl.git diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas index d8b600d..6ecf7f0 100644 --- a/src/shared/xparser.pas +++ b/src/shared/xparser.pas @@ -14,17 +14,18 @@ * along with this program. If not, see . *) {$INCLUDE a_modes.inc} +{.$DEFINE XPARSER_DEBUG} unit xparser; interface uses - Classes; + Classes, mempool; // ////////////////////////////////////////////////////////////////////////// // type - TTextParser = class + TTextParser = class(TPoolObject) public const TTNone = -1; @@ -33,18 +34,38 @@ type TTInt = 2; //TTFloat = 3; // not yet TTStr = 4; // string - TTComma = 5; // ',' - TTColon = 6; // ':' - TTSemi = 7; // ';' - TTBegin = 8; // left curly - TTEnd = 9; // right curly - TTDelim = 10; // other delimiters + TTDelim = 5; // one-char delimiters + // + TTLogAnd = 11; // && + TTLogOr = 12; // || + TTLessEqu = 13; // <= + TTGreatEqu = 14; // >= + TTNotEqu = 15; // != + TTEqu = 16; // == or <> + TTAss = 17; // := + TTShl = 18; // << + TTShr = 19; // >> + TTDotDot = 19; // .. + + public + type + TOption = ( + SignedNumbers, // allow signed numbers; otherwise sign will be TTDelim + DollarIsId, // allow dollar in identifiers; otherwise dollar will be TTDelim + DotIsId, // allow dot in identifiers; otherwise dot will be TTDelim + PascalComments // allow `{}` pascal comments + ); + TOptions = set of TOption; + + private + type + TAnsiCharSet = set of AnsiChar; private mLine, mCol: Integer; mCurChar, mNextChar: AnsiChar; - mAllowSignedNumbers: Boolean; // internal control + mOptions: TOptions; mTokLine, mTokCol: Integer; // token start mTokType: Integer; @@ -53,11 +74,11 @@ type mTokInt: Integer; protected - procedure warmup (); virtual; // called in constructor to warm up the system + procedure warmup (); // called in constructor to warm up the system procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof' public - constructor Create (loadToken: Boolean=true); + constructor Create (aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; function isEOF (): Boolean; inline; @@ -67,6 +88,9 @@ type function skipBlanks (): Boolean; // ...and comments; returns `false` on eof function skipToken (): Boolean; // returns `false` on eof + {$IFDEF XPARSER_DEBUG} + function skipToken1 (): Boolean; + {$ENDIF} function expectId (): AnsiString; procedure expectId (const aid: AnsiString); @@ -75,12 +99,20 @@ type function expectStr (allowEmpty: Boolean=false): AnsiString; function expectInt (): Integer; + function expectStrOrId (allowEmpty: Boolean=false): AnsiString; + procedure expectTT (ttype: Integer); function eatTT (ttype: Integer): Boolean; - function expectDelim (const ch: AnsiChar): AnsiChar; + procedure expectDelim (const ch: AnsiChar); + function expectDelims (const ch: TAnsiCharSet): AnsiChar; function eatDelim (const ch: AnsiChar): Boolean; + function isDelim (const ch: AnsiChar): Boolean; inline; + + public + property options: TOptions read mOptions write mOptions; + public property col: Integer read mCol; property line: Integer read mLine; @@ -102,15 +134,22 @@ type // ////////////////////////////////////////////////////////////////////////// // type TFileTextParser = class(TTextParser) + private + const BufSize = 16384; + private mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PChar; + mBufLen: Integer; + mBufPos: Integer; protected procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public - constructor Create (const fname: AnsiString; loadToken: Boolean=true); - constructor Create (st: TStream; loadToken: Boolean=true); // will take ownership on st + constructor Create (const fname: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); + constructor Create (st: TStream; astOwned: Boolean=true; aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; end; @@ -123,7 +162,7 @@ type procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof' public - constructor Create (const astr: AnsiString; loadToken: Boolean=true); + constructor Create (const astr: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); destructor Destroy (); override; end; @@ -140,6 +179,8 @@ type public constructor Create (); + procedure flush (); virtual; + procedure put (const s: AnsiString); overload; procedure put (v: Byte); overload; procedure put (v: Integer); overload; @@ -147,69 +188,58 @@ type procedure putIndent (); procedure indent (); procedure unindent (); + + public + property curIndent: Integer read mIndent; end; // ////////////////////////////////////////////////////////////////////////// // type TFileTextWriter = class(TTextWriter) + private + const BufSize = 16384; + private mFile: TStream; + mStreamOwned: Boolean; + mBuffer: PAnsiChar; + mBufUsed: Integer; protected procedure putBuf (constref buf; len: SizeUInt); override; public constructor Create (const fname: AnsiString); + constructor Create (ast: TStream; astOwned: Boolean=true); // will own the stream by default destructor Destroy (); override; - end; + procedure flush (); override; + end; -implementation + TStrTextWriter = class(TTextWriter) + private + mStr: AnsiString; -uses - SysUtils, utils; + protected + procedure putBuf (constref buf; len: SizeUInt); override; + public + constructor Create (); + destructor Destroy (); override; -var - wc2shitmap: array[0..65535] of AnsiChar; - wc2shitmapInited: Boolean = false; + property str: AnsiString read mStr; + end; -// ////////////////////////////////////////////////////////////////////////// // -procedure initShitMap (); -const - cp1251: array[0..127] of Word = ( - $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F, - $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F, - $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407, - $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457, - $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F, - $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F, - $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F, - $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F - ); -var - f: Integer; -begin - for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?'; - for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f); - for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128); - wc2shitmapInited := true; -end; - +implementation -// ////////////////////////////////////////////////////////////////////////// // -// TODO: make a hash or something -function wcharTo1251 (wc: WideChar): AnsiChar; inline; -begin - if not wc2shitmapInited then initShitMap(); - if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)]; -end; +uses + SysUtils, utils; // ////////////////////////////////////////////////////////////////////////// // -constructor TTextParser.Create (loadToken: Boolean=true); +constructor TTextParser.Create (aopts: TOptions=[TOption.SignedNumbers]); begin mLine := 1; mCol := 1; @@ -219,9 +249,9 @@ begin mTokStr := ''; mTokChar := #0; mTokInt := 0; - mAllowSignedNumbers := true; - warmup(); // change `mAllowSignedNumbers` there, if necessary - if loadToken then skipToken(); + mOptions := aopts; + warmup(); + skipToken(); end; @@ -324,6 +354,40 @@ begin end; continue; end; + end + else if (curChar = '(') and (nextChar = '*') then + begin + // pascal comment; skip comment start + skipChar(); + skipChar(); + while not isEOF do + begin + if (curChar = '*') and (nextChar = ')') then + begin + // skip comment end + skipChar(); + skipChar(); + break; + end; + skipChar(); + end; + continue; + end + else if (curChar = '{') and (TOption.PascalComments in mOptions) then + begin + // pascal comment; skip comment start + skipChar(); + while not isEOF do + begin + if (curChar = '}') then + begin + // skip comment end + skipChar(); + break; + end; + skipChar(); + end; + continue; end; if (curChar > ' ') then break; skipChar(); // skip blank @@ -332,15 +396,25 @@ begin end; +{$IFDEF XPARSER_DEBUG} function TTextParser.skipToken (): Boolean; +begin + writeln('getting token...'); + result := skipToken1(); + writeln(' got token: ', mTokType, ' <', mTokStr, '> : <', mTokChar, '>'); +end; +function TTextParser.skipToken1 (): Boolean; +{$ELSE} +function TTextParser.skipToken (): Boolean; +{$ENDIF} procedure parseInt (); var neg: Boolean = false; base: Integer = -1; n: Integer; begin - if mAllowSignedNumbers then + if (TOption.SignedNumbers in mOptions) then begin if (curChar = '+') or (curChar = '-') then begin @@ -462,7 +536,9 @@ function TTextParser.skipToken (): Boolean; while (curChar = '_') or ((curChar >= '0') and (curChar <= '9')) or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or - (curChar >= #128) do + (curChar >= #128) or + ((TOption.DollarIsId in mOptions) and (curChar = '$')) or + ((TOption.DotIsId in mOptions) and (curChar = '.') and (nextChar <> '.')) do begin mTokStr += curChar; skipChar(); @@ -489,7 +565,7 @@ begin result := true; // number? - if mAllowSignedNumbers and ((curChar = '+') or (curChar = '-')) then begin parseInt(); exit; end; + if (TOption.SignedNumbers in mOptions) and ((curChar = '+') or (curChar = '-')) then begin parseInt(); exit; end; if (curChar >= '0') and (curChar <= '9') then begin parseInt(); exit; end; // string? @@ -497,18 +573,39 @@ begin // identifier? if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then begin parseId(); exit; end; + if (TOption.DollarIsId in mOptions) and (curChar = '$') then begin parseId(); exit; end; + if (TOption.DotIsId in mOptions) and (curChar = '.') and (nextChar <> '.') then begin parseId(); exit; end; // known delimiters? - case curChar of - ',': mTokType := TTComma; - ':': mTokType := TTColon; - ';': mTokType := TTSemi; - '{': mTokType := TTBegin; - '}': mTokType := TTEnd; - else mTokType := TTDelim; - end; mTokChar := curChar; + mTokType := TTDelim; skipChar(); + if (curChar = '=') then + begin + case mTokChar of + '<': begin mTokType := TTLessEqu; mTokStr := '<='; skipChar(); exit; end; + '>': begin mTokType := TTGreatEqu; mTokStr := '>='; skipChar(); exit; end; + '!': begin mTokType := TTNotEqu; mTokStr := '!='; skipChar(); exit; end; + '=': begin mTokType := TTEqu; mTokStr := '=='; skipChar(); exit; end; + ':': begin mTokType := TTAss; mTokStr := ':='; skipChar(); exit; end; + end; + end + else if (mTokChar = curChar) then + begin + case mTokChar of + '<': begin mTokType := TTShl; mTokStr := '<<'; skipChar(); exit; end; + '>': begin mTokType := TTShr; mTokStr := '>>'; skipChar(); exit; end; + '&': begin mTokType := TTLogAnd; mTokStr := '&&'; skipChar(); exit; end; + '|': begin mTokType := TTLogOr; mTokStr := '||'; skipChar(); exit; end; + end; + end + else + begin + case mTokChar of + '<': if (curChar = '>') then begin mTokType := TTNotEqu; mTokStr := '<>'; skipChar(); exit; end; + '.': if (curChar = '.') then begin mTokType := TTDotDot; mTokStr := '..'; skipChar(); exit; end; + end; + end; end; @@ -522,17 +619,15 @@ end; procedure TTextParser.expectId (const aid: AnsiString); begin - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then raise Exception.Create('identifier '''+aid+''' expected'); + if (mTokType <> TTId) or (mTokStr <> aid) then raise Exception.Create('identifier '''+aid+''' expected'); skipToken(); end; function TTextParser.eatId (const aid: AnsiString): Boolean; begin - result := false; - if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then exit; - result := true; - skipToken(); + result := (mTokType = TTId) and (mTokStr = aid); + if result then skipToken(); end; @@ -545,6 +640,21 @@ begin end; +function TTextParser.expectStrOrId (allowEmpty: Boolean=false): AnsiString; +begin + case mTokType of + TTStr: + if (not allowEmpty) and (Length(mTokStr) = 0) then raise Exception.Create('non-empty string expected'); + TTId: + begin end; + else + raise Exception.Create('string or identifier expected'); + end; + result := mTokStr; + skipToken(); +end; + + function TTextParser.expectInt (): Integer; begin if (mTokType <> TTInt) then raise Exception.Create('string expected'); @@ -567,9 +677,17 @@ begin end; -function TTextParser.expectDelim (const ch: AnsiChar): AnsiChar; +procedure TTextParser.expectDelim (const ch: AnsiChar); +begin + if (mTokType <> TTDelim) or (mTokChar <> ch) then raise Exception.CreateFmt('delimiter ''%s'' expected', [ch]); + skipToken(); +end; + + +function TTextParser.expectDelims (const ch: TAnsiCharSet): AnsiChar; begin - if (mTokType <> TTDelim) then raise Exception.Create(Format('delimiter ''%s'' expected', [ch])); + if (mTokType <> TTDelim) then raise Exception.Create('delimiter expected'); + if not (mTokChar in ch) then raise Exception.Create('delimiter expected'); result := mTokChar; skipToken(); end; @@ -577,52 +695,79 @@ end; function TTextParser.eatDelim (const ch: AnsiChar): Boolean; begin - result := false; - if (mTokType <> TTDelim) or (mTokChar <> ch) then exit; - result := true; - skipToken(); + result := (mTokType = TTDelim) and (mTokChar = ch); + if result then skipToken(); +end; + + +function TTextParser.isDelim (const ch: AnsiChar): Boolean; inline; +begin + result := (mTokType = TTDelim) and (mTokChar = ch); end; // ////////////////////////////////////////////////////////////////////////// // -constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true); +constructor TFileTextParser.Create (const fname: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); begin + mBuffer := nil; mFile := openDiskFileRO(fname); - inherited Create(loadToken); + mStreamOwned := true; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + inherited Create(aopts); end; -constructor TFileTextParser.Create (st: TStream; loadToken: Boolean=true); +constructor TFileTextParser.Create (st: TStream; astOwned: Boolean=true; aopts: TOptions=[TOption.SignedNumbers]); begin if (st = nil) then raise Exception.Create('cannot create parser for nil stream'); mFile := st; - inherited Create(loadToken); + mStreamOwned := astOwned; + GetMem(mBuffer, BufSize); + mBufPos := 0; + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + inherited Create(aopts); end; destructor TFileTextParser.Destroy (); begin - mFile.Free(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBuffer := nil; + mBufPos := 0; + mBufLen := 0; + if mStreamOwned then mFile.Free(); + mFile := nil; inherited; end; procedure TFileTextParser.loadNextChar (); -var - rd: Integer; begin - rd := mFile.Read(mNextChar, 1); - if (rd = 0) then begin mNextChar := #0; exit; end; + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + if (mBufPos >= mBufLen) then + begin + mBufLen := mFile.Read(mBuffer^, BufSize); + if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error'); + if (mBufLen = 0) then begin mNextChar := #0; exit; end; + mBufPos := 0; + end; + assert(mBufPos < mBufLen); + mNextChar := mBuffer[mBufPos]; + Inc(mBufPos); if (mNextChar = #0) then mNextChar := ' '; end; // ////////////////////////////////////////////////////////////////////////// // -constructor TStrTextParser.Create (const astr: AnsiString; loadToken: Boolean=true); +constructor TStrTextParser.Create (const astr: AnsiString; aopts: TOptions=[TOption.SignedNumbers]); begin mStr := astr; mPos := 1; - inherited Create(loadToken); + inherited Create(aopts); end; @@ -644,6 +789,7 @@ end; // ////////////////////////////////////////////////////////////////////////// // constructor TTextWriter.Create (); begin mIndent := 0; end; +procedure TTextWriter.flush (); begin end; procedure TTextWriter.put (const s: AnsiString); overload; begin if (Length(s) > 0) then putBuf((@(s[1]))^, Length(s)); end; procedure TTextWriter.put (v: Byte); overload; begin put('%d', [v]); end; procedure TTextWriter.put (v: Integer); overload; begin put('%d', [v]); end; @@ -657,33 +803,96 @@ procedure TTextWriter.unindent (); begin Dec(mIndent, 2); end; constructor TFileTextWriter.Create (const fname: AnsiString); begin mFile := createDiskFile(fname); + mStreamOwned := true; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); inherited Create(); end; +constructor TFileTextWriter.Create (ast: TStream; astOwned: Boolean=true); +begin + if (ast = nil) then raise Exception.Create('cannot write to nil stream'); + mFile := ast; + mStreamOwned := astOwned; + mBufUsed := 0; + GetMem(mBuffer, BufSize); + assert(mBuffer <> nil); +end; + + destructor TFileTextWriter.Destroy (); begin - mFile.Free(); + flush(); + if (mBuffer <> nil) then FreeMem(mBuffer); + mBufUsed := 0; + mBuffer := nil; + if (mStreamOwned) then mFile.Free(); + mFile := nil; inherited; end; +procedure TFileTextWriter.flush (); +begin + if (mFile <> nil) and (mBufUsed > 0) then + begin + mFile.WriteBuffer(mBuffer^, mBufUsed); + end; + mBufUsed := 0; +end; + + procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt); var pc: PChar; + left: Integer; begin - if (len > 0) then + if (len = 0) then exit; + pc := @buf; + while (len > 0) do begin - pc := @buf; - mFile.WriteBuffer(pc^, len); - { - while (len > 0) do + left := BufSize-mBufUsed; + if (left = 0) then begin - write(pc^); - Inc(pc); - Dec(len); + flush(); + left := BufSize-mBufUsed; + assert(left > 0); end; - } + if (left > len) then left := Integer(len); + Move(pc^, (mBuffer+mBufUsed)^, left); + Inc(mBufUsed, left); + pc += left; + len -= left; + end; +end; + + +// ////////////////////////////////////////////////////////////////////////// // +constructor TStrTextWriter.Create (); +begin + mStr := ''; +end; + + +destructor TStrTextWriter.Destroy (); +begin + mStr := ''; + inherited; +end; + + +procedure TStrTextWriter.putBuf (constref buf; len: SizeUInt); +var + st: AnsiString = ''; +begin + if (len > 0) then + begin + SetLength(st, Integer(len)); + Move(buf, PChar(st)^, Integer(len)); + mStr += st; + st := ''; end; end;