1 (* coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 {$INCLUDE a_modes.inc}
18 {.$DEFINE XPARSER_DEBUG}
24 SysUtils
, Classes
{$IFDEF USE_MEMPOOL}, mempool
{$ENDIF};
27 // ////////////////////////////////////////////////////////////////////////// //
31 TParserException
= class(Exception
)
33 tokLine
, tokCol
: Integer;
36 constructor Create (pr
: TTextParser
; const amsg
: AnsiString);
37 constructor CreateFmt (pr
: TTextParser
; const afmt
: AnsiString; const args
: array of const);
40 TTextParser
= class{$IFDEF USE_MEMPOOL}(TPoolObject
){$ENDIF}
47 //TTFloat = 3; // not yet
49 TTDelim
= 5; // one-char delimiters
54 TTGreatEqu
= 14; // >=
56 TTEqu
= 16; // == or <>
65 SignedNumbers
, // allow signed numbers; otherwise sign will be TTDelim
66 DollarIsId
, // allow dollar in identifiers; otherwise dollar will be TTDelim
67 DotIsId
, // allow dot in identifiers; otherwise dot will be TTDelim
68 DashIsId
, // '-' can be part of identifier (but identifier cannot start with '-')
69 PascalComments
// allow `{}` pascal comments
71 TOptions
= set of TOption
;
75 TAnsiCharSet
= set of AnsiChar;
79 mCurChar
, mNextChar
: AnsiChar;
83 mTokLine
, mTokCol
: Integer; // token start
85 mTokStr
: AnsiString; // string or identifier
86 mTokChar
: AnsiChar; // for delimiters
90 procedure warmup (); // called in constructor to warm up the system
91 procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof'
94 constructor Create (aopts
: TOptions
=[TOption
.SignedNumbers
]);
95 destructor Destroy (); override;
97 procedure error (const amsg
: AnsiString); noreturn
;
98 procedure errorfmt (const afmt
: AnsiString; const args
: array of const); noreturn
;
100 function isEOF (): Boolean; inline;
102 function skipChar (): Boolean; // returns `false` on eof
104 function skipBlanks (): Boolean; // ...and comments; returns `false` on eof
106 function skipToken (): Boolean; // returns `false` on eof
107 {$IFDEF XPARSER_DEBUG}
108 function skipToken1 (): Boolean;
111 function isIdOrStr (): Boolean; inline;
113 function expectId (): AnsiString;
114 procedure expectId (const aid
: AnsiString; caseSens
: Boolean=true);
115 function eatId (const aid
: AnsiString; caseSens
: Boolean=true): Boolean;
116 function eatIdOrStr (const aid
: AnsiString; caseSens
: Boolean=true): Boolean;
117 function eatIdOrStrCI (const aid
: AnsiString): Boolean; inline;
119 function expectStr (allowEmpty
: Boolean=false): AnsiString;
120 function expectInt (): Integer;
122 function expectIdOrStr (allowEmpty
: Boolean=false): AnsiString;
124 procedure expectTT (ttype
: Integer);
125 function eatTT (ttype
: Integer): Boolean;
127 procedure expectDelim (const ch
: AnsiChar);
128 function expectDelims (const ch
: TAnsiCharSet
): AnsiChar;
129 function eatDelim (const ch
: AnsiChar): Boolean;
131 function isDelim (const ch
: AnsiChar): Boolean; inline;
134 property options
: TOptions read mOptions write mOptions
;
137 property col
: Integer read mCol
;
138 property line
: Integer read mLine
;
140 property curChar
: AnsiChar read mCurChar
;
141 property nextChar
: AnsiChar read mNextChar
;
144 property tokCol
: Integer read mTokCol
;
145 property tokLine
: Integer read mTokLine
;
147 property tokType
: Integer read mTokType
; // see TTXXX constants
148 property tokStr
: AnsiString read mTokStr
; // string or identifier
149 property tokChar
: AnsiChar read mTokChar
; // for delimiters
150 property tokInt
: Integer read mTokInt
;
154 // ////////////////////////////////////////////////////////////////////////// //
156 TFileTextParser
= class(TTextParser
)
158 const BufSize
= 16384;
162 mStreamOwned
: Boolean;
168 procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
171 constructor Create (const fname
: AnsiString; aopts
: TOptions
=[TOption
.SignedNumbers
]);
172 constructor Create (st
: TStream
; astOwned
: Boolean=true; aopts
: TOptions
=[TOption
.SignedNumbers
]);
173 destructor Destroy (); override;
176 TStrTextParser
= class(TTextParser
)
182 procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
185 constructor Create (const astr
: AnsiString; aopts
: TOptions
=[TOption
.SignedNumbers
]);
186 destructor Destroy (); override;
190 // ////////////////////////////////////////////////////////////////////////// //
197 procedure putBuf (constref buf
; len
: SizeUInt
); virtual; abstract;
200 constructor Create ();
202 procedure flush (); virtual;
204 procedure put (const s
: AnsiString); overload
;
205 procedure put (v
: Byte); overload
;
206 procedure put (v
: Integer); overload
;
207 procedure put (const fmt
: AnsiString; args
: array of const); overload
;
208 procedure putIndent ();
210 procedure unindent ();
213 property curIndent
: Integer read mIndent
;
217 // ////////////////////////////////////////////////////////////////////////// //
219 TFileTextWriter
= class(TTextWriter
)
221 const BufSize
= 16384;
225 mStreamOwned
: Boolean;
230 procedure putBuf (constref buf
; len
: SizeUInt
); override;
233 constructor Create (const fname
: AnsiString);
234 constructor Create (ast
: TStream
; astOwned
: Boolean=true); // will own the stream by default
235 destructor Destroy (); override;
237 procedure flush (); override;
240 TStrTextWriter
= class(TTextWriter
)
245 procedure putBuf (constref buf
; len
: SizeUInt
); override;
248 constructor Create ();
249 destructor Destroy (); override;
251 property str
: AnsiString read mStr
;
261 // ////////////////////////////////////////////////////////////////////////// //
262 constructor TParserException
.Create (pr
: TTextParser
; const amsg
: AnsiString);
264 if (pr
<> nil) then begin tokLine
:= pr
.tokLine
; tokCol
:= pr
.tokCol
; end;
265 inherited Create(amsg
);
268 constructor TParserException
.CreateFmt (pr
: TTextParser
; const afmt
: AnsiString; const args
: array of const);
270 if (pr
<> nil) then begin tokLine
:= pr
.tokLine
; tokCol
:= pr
.tokCol
; end;
271 inherited Create(formatstrf(afmt
, args
));
275 // ////////////////////////////////////////////////////////////////////////// //
276 constructor TTextParser
.Create (aopts
: TOptions
=[TOption
.SignedNumbers
]);
292 destructor TTextParser
.Destroy ();
298 procedure TTextParser
.error (const amsg
: AnsiString); noreturn
;
300 raise TParserException
.Create(self
, amsg
);
304 procedure TTextParser
.errorfmt (const afmt
: AnsiString; const args
: array of const); noreturn
;
306 raise TParserException
.CreateFmt(self
, afmt
, args
);
310 function TTextParser
.isEOF (): Boolean; inline; begin {result := (mCurChar = #0);} result
:= (mTokType
= TTEOF
); end;
313 procedure TTextParser
.warmup ();
317 mCurChar
:= mNextChar
;
318 if (mNextChar
<> #0) then loadNextChar();
322 function TTextParser
.skipChar (): Boolean;
324 if (mCurChar
= #0) then begin result
:= false; exit
; end;
325 if (mCurChar
= #10) then begin mCol
:= 1; Inc(mLine
); end else Inc(mCol
);
326 mCurChar
:= mNextChar
;
327 if (mCurChar
= #0) then begin result
:= false; exit
; end;
330 if (mCurChar
= #13) then
332 if (mNextChar
= #10) then loadNextChar();
339 function TTextParser
.skipBlanks (): Boolean;
343 while (mCurChar
<> #0) do
345 if (mCurChar
= '/') then
347 // single-line comment
348 if (mNextChar
= '/') then
350 while (mCurChar
<> #0) and (mCurChar
<> #10) do skipChar();
351 skipChar(); // skip EOL
355 if (mNextChar
= '*') then
357 // skip comment start
360 while (mCurChar
<> #0) do
362 if (mCurChar
= '*') and (mNextChar
= '/') then
373 // nesting multline comment
374 if (mNextChar
= '+') then
376 // skip comment start
380 while (mCurChar
<> #0) do
382 if (mCurChar
= '+') and (mNextChar
= '/') then
388 if (level
= 0) then break
;
391 if (mCurChar
= '/') and (mNextChar
= '+') then
393 // skip comment start
404 else if (mCurChar
= '(') and (mNextChar
= '*') then
406 // pascal comment; skip comment start
409 while (mCurChar
<> #0) do
411 if (mCurChar
= '*') and (mNextChar
= ')') then
422 else if (mCurChar
= '{') and (TOption
.PascalComments
in mOptions
) then
424 // pascal comment; skip comment start
426 while (mCurChar
<> #0) do
428 if (mCurChar
= '}') then
438 if (mCurChar
> ' ') then break
;
439 skipChar(); // skip blank
441 result
:= (mCurChar
<> #0);
445 {$IFDEF XPARSER_DEBUG}
446 function TTextParser
.skipToken (): Boolean;
448 writeln('getting token...');
449 result
:= skipToken1();
450 writeln(' got token: ', mTokType
, ' <', mTokStr
, '> : <', mTokChar
, '>');
453 function TTextParser
.skipToken1 (): Boolean;
455 function TTextParser
.skipToken (): Boolean;
457 procedure parseInt ();
459 neg
: Boolean = false;
463 if (TOption
.SignedNumbers
in mOptions
) then
465 if (mCurChar
= '+') or (mCurChar
= '-') then
467 neg
:= (mCurChar
= '-');
469 if (mCurChar
< '0') or (mCurChar
> '9') then
472 if (neg
) then mTokChar
:= '-' else mTokChar
:= '+';
477 if (mCurChar
= '0') then
493 if (base
< 0) then base
:= 10;
494 if (digitInBase(mCurChar
, base
) < 0) then raise Exception
.Create('invalid number');
496 mTokInt
:= 0; // just in case
497 while (mCurChar
<> #0) do
499 n
:= digitInBase(mCurChar
, base
);
500 if (n
< 0) then break
;
502 if (n
< 0) or (n
< mTokInt
) then raise Exception
.Create('integer overflow');
506 // check for valid number end
507 if (mCurChar
<> #0) then
509 if (mCurChar
= '.') then raise Exception
.Create('floating numbers aren''t supported yet');
510 if (mCurChar
= '_') or ((mCurChar
>= 'A') and (mCurChar
<= 'Z')) or ((mCurChar
>= 'a') and (mCurChar
<= 'z')) or (mCurChar
>= #128) then
512 raise Exception
.Create('invalid number');
515 if neg
then mTokInt
:= -mTokInt
;
518 procedure parseString ();
524 mTokStr
:= ''; // just in case
526 skipChar(); // skip starting quote
527 while (mCurChar
<> #0) do
530 if (qch
= '"') and (mCurChar
= '\') then
532 if (mNextChar
= #0) then raise Exception
.Create('unterminated string escape');
534 // skip backslash and escape type
543 'x', 'X': // hex escape
545 n
:= digitInBase(mCurChar
, 16);
546 if (n
< 0) then raise Exception
.Create('invalid hexstr escape');
548 if (digitInBase(mCurChar
, 16) > 0) then
550 n
:= n
*16+digitInBase(mCurChar
, 16);
553 mTokStr
+= AnsiChar(n
);
559 // duplicate single quote (pascal style)
560 if (qch
= '''') and (mCurChar
= '''') and (mNextChar
= '''') then
568 if (mCurChar
= qch
) then
570 skipChar(); // skip ending quote
578 procedure parseId ();
581 mTokStr
:= ''; // just in case
582 while (mCurChar
= '_') or ((mCurChar
>= '0') and (mCurChar
<= '9')) or
583 ((mCurChar
>= 'A') and (mCurChar
<= 'Z')) or
584 ((mCurChar
>= 'a') and (mCurChar
<= 'z')) or
585 (mCurChar
>= #128) or
586 ((TOption
.DollarIsId
in mOptions
) and (mCurChar
= '$')) or
587 ((TOption
.DotIsId
in mOptions
) and (mCurChar
= '.') and (mNextChar
<> '.')) or
588 ((TOption
.DashIsId
in mOptions
) and (mCurChar
= '-')) do
601 if not skipBlanks() then
616 if (TOption
.SignedNumbers
in mOptions
) and ((mCurChar
= '+') or (mCurChar
= '-')) then begin parseInt(); exit
; end;
617 if (mCurChar
>= '0') and (mCurChar
<= '9') then begin parseInt(); exit
; end;
620 if (mCurChar
= '"') or (mCurChar
= '''') then begin parseString(); exit
; end;
623 if (mCurChar
= '_') or ((mCurChar
>= 'A') and (mCurChar
<= 'Z')) or ((mCurChar
>= 'a') and (mCurChar
<= 'z')) or (mCurChar
>= #128) then begin parseId(); exit
; end;
624 if (TOption
.DollarIsId
in mOptions
) and (mCurChar
= '$') then begin parseId(); exit
; end;
625 if (TOption
.DotIsId
in mOptions
) and (mCurChar
= '.') and (mNextChar
<> '.') then begin parseId(); exit
; end;
628 mTokChar
:= mCurChar
;
631 if (mCurChar
= '=') then
634 '<': begin mTokType
:= TTLessEqu
; mTokStr
:= '<='; skipChar(); exit
; end;
635 '>': begin mTokType
:= TTGreatEqu
; mTokStr
:= '>='; skipChar(); exit
; end;
636 '!': begin mTokType
:= TTNotEqu
; mTokStr
:= '!='; skipChar(); exit
; end;
637 '=': begin mTokType
:= TTEqu
; mTokStr
:= '=='; skipChar(); exit
; end;
638 ':': begin mTokType
:= TTAss
; mTokStr
:= ':='; skipChar(); exit
; end;
641 else if (mTokChar
= mCurChar
) then
644 '<': begin mTokType
:= TTShl
; mTokStr
:= '<<'; skipChar(); exit
; end;
645 '>': begin mTokType
:= TTShr
; mTokStr
:= '>>'; skipChar(); exit
; end;
646 '&': begin mTokType
:= TTLogAnd
; mTokStr
:= '&&'; skipChar(); exit
; end;
647 '|': begin mTokType
:= TTLogOr
; mTokStr
:= '||'; skipChar(); exit
; end;
653 '<': if (mCurChar
= '>') then begin mTokType
:= TTNotEqu
; mTokStr
:= '<>'; skipChar(); exit
; end;
654 '.': if (mCurChar
= '.') then begin mTokType
:= TTDotDot
; mTokStr
:= '..'; skipChar(); exit
; end;
660 function TTextParser
.isIdOrStr (): Boolean; inline;
662 result
:= (mTokType
= TTId
) or (mTokType
= TTStr
);
666 function TTextParser
.expectId (): AnsiString;
668 if (mTokType
<> TTId
) then raise Exception
.Create('identifier expected');
674 procedure TTextParser
.expectId (const aid
: AnsiString; caseSens
: Boolean=true);
678 if (mTokType
<> TTId
) or (mTokStr
<> aid
) then raise Exception
.Create('identifier '''+aid
+''' expected');
682 if (mTokType
<> TTId
) or (not strEquCI1251(mTokStr
, aid
)) then raise Exception
.Create('identifier '''+aid
+''' expected');
688 function TTextParser
.eatId (const aid
: AnsiString; caseSens
: Boolean=true): Boolean;
692 result
:= (mTokType
= TTId
) and (mTokStr
= aid
);
696 result
:= (mTokType
= TTId
) and strEquCI1251(mTokStr
, aid
);
698 if result
then skipToken();
702 function TTextParser
.eatIdOrStr (const aid
: AnsiString; caseSens
: Boolean=true): Boolean;
706 result
:= (mTokType
= TTId
) and (mTokStr
= aid
);
707 if not result
then result
:= (mTokType
= TTStr
) and (mTokStr
= aid
);
711 result
:= (mTokType
= TTId
) and strEquCI1251(mTokStr
, aid
);
712 if not result
then result
:= (mTokType
= TTStr
) and strEquCI1251(mTokStr
, aid
);
714 if result
then skipToken();
718 function TTextParser
.eatIdOrStrCI (const aid
: AnsiString): Boolean; inline;
720 result
:= eatIdOrStr(aid
, false);
724 function TTextParser
.expectStr (allowEmpty
: Boolean=false): AnsiString;
726 if (mTokType
<> TTStr
) then raise Exception
.Create('string expected');
727 if (not allowEmpty
) and (Length(mTokStr
) = 0) then raise Exception
.Create('non-empty string expected');
733 function TTextParser
.expectIdOrStr (allowEmpty
: Boolean=false): AnsiString;
737 if (not allowEmpty
) and (Length(mTokStr
) = 0) then raise Exception
.Create('non-empty string expected');
741 raise Exception
.Create('string or identifier expected');
748 function TTextParser
.expectInt (): Integer;
750 if (mTokType
<> TTInt
) then raise Exception
.Create('string expected');
756 procedure TTextParser
.expectTT (ttype
: Integer);
758 if (mTokType
<> ttype
) then raise Exception
.Create('unexpected token');
763 function TTextParser
.eatTT (ttype
: Integer): Boolean;
765 result
:= (mTokType
= ttype
);
766 if result
then skipToken();
770 procedure TTextParser
.expectDelim (const ch
: AnsiChar);
772 if (mTokType
<> TTDelim
) or (mTokChar
<> ch
) then raise Exception
.CreateFmt('delimiter ''%s'' expected', [ch
]);
777 function TTextParser
.expectDelims (const ch
: TAnsiCharSet
): AnsiChar;
779 if (mTokType
<> TTDelim
) then raise Exception
.Create('delimiter expected');
780 if not (mTokChar
in ch
) then raise Exception
.Create('delimiter expected');
786 function TTextParser
.eatDelim (const ch
: AnsiChar): Boolean;
788 result
:= (mTokType
= TTDelim
) and (mTokChar
= ch
);
789 if result
then skipToken();
793 function TTextParser
.isDelim (const ch
: AnsiChar): Boolean; inline;
795 result
:= (mTokType
= TTDelim
) and (mTokChar
= ch
);
799 // ////////////////////////////////////////////////////////////////////////// //
800 constructor TFileTextParser
.Create (const fname
: AnsiString; aopts
: TOptions
=[TOption
.SignedNumbers
]);
803 mFile
:= openDiskFileRO(fname
);
804 mStreamOwned
:= true;
805 GetMem(mBuffer
, BufSize
);
807 mBufLen
:= mFile
.Read(mBuffer
^, BufSize
);
808 if (mBufLen
< 0) then raise Exception
.Create('TFileTextParser: read error');
809 inherited Create(aopts
);
813 constructor TFileTextParser
.Create (st
: TStream
; astOwned
: Boolean=true; aopts
: TOptions
=[TOption
.SignedNumbers
]);
815 if (st
= nil) then raise Exception
.Create('cannot create parser for nil stream');
817 mStreamOwned
:= astOwned
;
818 GetMem(mBuffer
, BufSize
);
820 mBufLen
:= mFile
.Read(mBuffer
^, BufSize
);
821 if (mBufLen
< 0) then raise Exception
.Create('TFileTextParser: read error');
822 inherited Create(aopts
);
826 destructor TFileTextParser
.Destroy ();
828 if (mBuffer
<> nil) then FreeMem(mBuffer
);
832 if mStreamOwned
then mFile
.Free();
838 procedure TFileTextParser
.loadNextChar ();
840 if (mBufLen
= 0) then begin mNextChar
:= #0; exit
; end;
841 if (mBufPos
>= mBufLen
) then
843 mBufLen
:= mFile
.Read(mBuffer
^, BufSize
);
844 if (mBufLen
< 0) then raise Exception
.Create('TFileTextParser: read error');
845 if (mBufLen
= 0) then begin mNextChar
:= #0; exit
; end;
848 assert(mBufPos
< mBufLen
);
849 mNextChar
:= mBuffer
[mBufPos
];
851 if (mNextChar
= #0) then mNextChar
:= ' ';
855 // ////////////////////////////////////////////////////////////////////////// //
856 constructor TStrTextParser
.Create (const astr
: AnsiString; aopts
: TOptions
=[TOption
.SignedNumbers
]);
860 inherited Create(aopts
);
864 destructor TStrTextParser
.Destroy ();
871 procedure TStrTextParser
.loadNextChar ();
874 if (mPos
> Length(mStr
)) then exit
;
875 mNextChar
:= mStr
[mPos
]; Inc(mPos
);
876 if (mNextChar
= #0) then mNextChar
:= ' ';
880 // ////////////////////////////////////////////////////////////////////////// //
881 constructor TTextWriter
.Create (); begin mIndent
:= 0; end;
882 procedure TTextWriter
.flush (); begin end;
883 procedure TTextWriter
.put (const s
: AnsiString); overload
; begin if (Length(s
) > 0) then putBuf((@(s
[1]))^, Length(s
)); end;
884 procedure TTextWriter
.put (v
: Byte); overload
; begin put('%d', [v
]); end;
885 procedure TTextWriter
.put (v
: Integer); overload
; begin put('%d', [v
]); end;
886 procedure TTextWriter
.put (const fmt
: AnsiString; args
: array of const); overload
; begin put(formatstrf(fmt
, args
)); end;
887 procedure TTextWriter
.putIndent (); var f
: Integer; begin for f
:= 1 to mIndent
do put(' '); end;
888 procedure TTextWriter
.indent (); begin Inc(mIndent
, 2); end;
889 procedure TTextWriter
.unindent (); begin Dec(mIndent
, 2); end;
892 // ////////////////////////////////////////////////////////////////////////// //
893 constructor TFileTextWriter
.Create (const fname
: AnsiString);
895 mFile
:= createDiskFile(fname
);
896 mStreamOwned
:= true;
898 GetMem(mBuffer
, BufSize
);
899 assert(mBuffer
<> nil);
904 constructor TFileTextWriter
.Create (ast
: TStream
; astOwned
: Boolean=true);
906 if (ast
= nil) then raise Exception
.Create('cannot write to nil stream');
908 mStreamOwned
:= astOwned
;
910 GetMem(mBuffer
, BufSize
);
911 assert(mBuffer
<> nil);
915 destructor TFileTextWriter
.Destroy ();
918 if (mBuffer
<> nil) then FreeMem(mBuffer
);
921 if (mStreamOwned
) then mFile
.Free();
927 procedure TFileTextWriter
.flush ();
929 if (mFile
<> nil) and (mBufUsed
> 0) then
931 mFile
.WriteBuffer(mBuffer
^, mBufUsed
);
937 procedure TFileTextWriter
.putBuf (constref buf
; len
: SizeUInt
);
942 if (len
= 0) then exit
;
946 left
:= BufSize
-mBufUsed
;
950 left
:= BufSize
-mBufUsed
;
953 if (left
> len
) then left
:= Integer(len
);
954 Move(pc
^, (mBuffer
+mBufUsed
)^, left
);
962 // ////////////////////////////////////////////////////////////////////////// //
963 constructor TStrTextWriter
.Create ();
969 destructor TStrTextWriter
.Destroy ();
976 procedure TStrTextWriter
.putBuf (constref buf
; len
: SizeUInt
);
982 SetLength(st
, Integer(len
));
983 Move(buf
, PChar(st
)^, Integer(len
));