DEADSOFTWARE

no more old mapreader: use textmap reader both for text and for binary maps
[d2df-sdl.git] / src / shared / xparser.pas
1 (* Copyright (C) DooM 2D:Forever Developers
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 *)
16 {$INCLUDE a_modes.inc}
17 unit xparser;
19 interface
21 uses
22 Classes;
25 // ////////////////////////////////////////////////////////////////////////// //
26 type
27 TTextParser = class
28 public
29 const
30 TTNone = -1;
31 TTEOF = 0;
32 TTId = 1;
33 TTInt = 2;
34 //TTFloat = 3; // not yet
35 TTStr = 4; // string
36 TTComma = 5; // ','
37 TTColon = 6; // ':'
38 TTSemi = 7; // ';'
39 TTBegin = 8; // left curly
40 TTEnd = 9; // right curly
41 TTDelim = 10; // other delimiters
43 private
44 mLine, mCol: Integer;
45 mCurChar, mNextChar: AnsiChar;
47 mAllowSignedNumbers: Boolean; // internal control
49 mTokLine, mTokCol: Integer; // token start
50 mTokType: Integer;
51 mTokStr: AnsiString; // string or identifier
52 mTokChar: AnsiChar; // for delimiters
53 mTokInt: Integer;
55 protected
56 procedure warmup (); virtual; // called in constructor to warm up the system
57 procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof'
59 public
60 constructor Create (loadToken: Boolean=true);
61 destructor Destroy (); override;
63 function isEOF (): Boolean; inline;
65 function skipChar (): Boolean; // returns `false` on eof
67 function skipBlanks (): Boolean; // ...and comments; returns `false` on eof
69 function skipToken (): Boolean; // returns `false` on eof
71 function expectId (): AnsiString;
72 procedure expectId (const aid: AnsiString);
73 function eatId (const aid: AnsiString): Boolean;
75 function expectStr (allowEmpty: Boolean=false): AnsiString;
76 function expectInt (): Integer;
78 procedure expectTT (ttype: Integer);
79 function eatTT (ttype: Integer): Boolean;
81 function expectDelim (const ch: AnsiChar): AnsiChar;
82 function eatDelim (const ch: AnsiChar): Boolean;
84 public
85 property col: Integer read mCol;
86 property line: Integer read mLine;
88 property curChar: AnsiChar read mCurChar;
89 property nextChar: AnsiChar read mNextChar;
91 // token start
92 property tokCol: Integer read mTokCol;
93 property tokLine: Integer read mTokLine;
95 property tokType: Integer read mTokType; // see TTXXX constants
96 property tokStr: AnsiString read mTokStr; // string or identifier
97 property tokChar: AnsiChar read mTokChar; // for delimiters
98 property tokInt: Integer read mTokInt;
99 end;
102 // ////////////////////////////////////////////////////////////////////////// //
103 type
104 TFileTextParser = class(TTextParser)
105 private
106 const BufSize = 65536;
108 private
109 mFile: TStream;
110 mBuffer: PChar;
111 mBufLen: Integer;
112 mBufPos: Integer;
114 protected
115 procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
117 public
118 constructor Create (const fname: AnsiString; loadToken: Boolean=true);
119 constructor Create (st: TStream; loadToken: Boolean=true); // will take ownership on st
120 destructor Destroy (); override;
121 end;
123 TStrTextParser = class(TTextParser)
124 private
125 mStr: AnsiString;
126 mPos: Integer;
128 protected
129 procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
131 public
132 constructor Create (const astr: AnsiString; loadToken: Boolean=true);
133 destructor Destroy (); override;
134 end;
137 // ////////////////////////////////////////////////////////////////////////// //
138 type
139 TTextWriter = class
140 protected
141 mIndent: Integer;
143 protected
144 procedure putBuf (constref buf; len: SizeUInt); virtual; abstract;
146 public
147 constructor Create ();
149 procedure put (const s: AnsiString); overload;
150 procedure put (v: Byte); overload;
151 procedure put (v: Integer); overload;
152 procedure put (const fmt: AnsiString; args: array of const); overload;
153 procedure putIndent ();
154 procedure indent ();
155 procedure unindent ();
156 end;
159 // ////////////////////////////////////////////////////////////////////////// //
160 type
161 TFileTextWriter = class(TTextWriter)
162 private
163 mFile: TStream;
165 protected
166 procedure putBuf (constref buf; len: SizeUInt); override;
168 public
169 constructor Create (const fname: AnsiString);
170 destructor Destroy (); override;
171 end;
174 implementation
176 uses
177 SysUtils, utils;
180 // ////////////////////////////////////////////////////////////////////////// //
181 function StrEqu (const a, b: AnsiString): Boolean; inline; begin result := (a = b); end;
184 // ////////////////////////////////////////////////////////////////////////// //
185 var
186 wc2shitmap: array[0..65535] of AnsiChar;
187 wc2shitmapInited: Boolean = false;
190 // ////////////////////////////////////////////////////////////////////////// //
191 procedure initShitMap ();
192 const
193 cp1251: array[0..127] of Word = (
194 $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
195 $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
196 $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
197 $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
198 $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
199 $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
200 $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
201 $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
202 );
203 var
204 f: Integer;
205 begin
206 for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
207 for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
208 for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
209 wc2shitmapInited := true;
210 end;
213 // ////////////////////////////////////////////////////////////////////////// //
214 // TODO: make a hash or something
215 function wcharTo1251 (wc: WideChar): AnsiChar; inline;
216 begin
217 if not wc2shitmapInited then initShitMap();
218 if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
219 end;
222 // ////////////////////////////////////////////////////////////////////////// //
223 constructor TTextParser.Create (loadToken: Boolean=true);
224 begin
225 mLine := 1;
226 mCol := 1;
227 mCurChar := #0;
228 mNextChar := #0;
229 mTokType := TTNone;
230 mTokStr := '';
231 mTokChar := #0;
232 mTokInt := 0;
233 mAllowSignedNumbers := true;
234 warmup(); // change `mAllowSignedNumbers` there, if necessary
235 if loadToken then skipToken();
236 end;
239 destructor TTextParser.Destroy ();
240 begin
241 inherited;
242 end;
245 function TTextParser.isEOF (): Boolean; inline; begin result := (mCurChar = #0); end;
248 procedure TTextParser.warmup ();
249 begin
250 mNextChar := ' ';
251 loadNextChar();
252 mCurChar := mNextChar;
253 if (mNextChar <> #0) then loadNextChar();
254 end;
257 function TTextParser.skipChar (): Boolean;
258 begin
259 if (mCurChar = #0) then begin result := false; exit; end;
260 if (mCurChar = #10) then begin mCol := 1; Inc(mLine); end else Inc(mCol);
261 mCurChar := mNextChar;
262 if (mCurChar = #0) then begin result := false; exit; end;
263 loadNextChar();
264 // skip CR in CR/LF
265 if (mCurChar = #13) then
266 begin
267 if (mNextChar = #10) then loadNextChar();
268 mCurChar := #10;
269 end;
270 result := true;
271 end;
274 function TTextParser.skipBlanks (): Boolean;
275 var
276 level: Integer;
277 begin
278 while not isEOF do
279 begin
280 if (curChar = '/') then
281 begin
282 // single-line comment
283 if (nextChar = '/') then
284 begin
285 while not isEOF and (curChar <> #10) do skipChar();
286 skipChar(); // skip EOL
287 continue;
288 end;
289 // multline comment
290 if (nextChar = '*') then
291 begin
292 // skip comment start
293 skipChar();
294 skipChar();
295 while not isEOF do
296 begin
297 if (curChar = '*') and (nextChar = '/') then
298 begin
299 // skip comment end
300 skipChar();
301 skipChar();
302 break;
303 end;
304 skipChar();
305 end;
306 continue;
307 end;
308 // nesting multline comment
309 if (nextChar = '+') then
310 begin
311 // skip comment start
312 skipChar();
313 skipChar();
314 level := 1;
315 while not isEOF do
316 begin
317 if (curChar = '+') and (nextChar = '/') then
318 begin
319 // skip comment end
320 skipChar();
321 skipChar();
322 Dec(level);
323 if (level = 0) then break;
324 continue;
325 end;
326 if (curChar = '/') and (nextChar = '+') then
327 begin
328 // skip comment start
329 skipChar();
330 skipChar();
331 Inc(level);
332 continue;
333 end;
334 skipChar();
335 end;
336 continue;
337 end;
338 end;
339 if (curChar > ' ') then break;
340 skipChar(); // skip blank
341 end;
342 result := not isEOF;
343 end;
346 function TTextParser.skipToken (): Boolean;
348 procedure parseInt ();
349 var
350 neg: Boolean = false;
351 base: Integer = -1;
352 n: Integer;
353 begin
354 if mAllowSignedNumbers then
355 begin
356 if (curChar = '+') or (curChar = '-') then
357 begin
358 neg := (curChar = '-');
359 skipChar();
360 if (curChar < '0') or (curChar > '9') then
361 begin
362 mTokType := TTDelim;
363 if (neg) then mTokChar := '-' else mTokChar := '+';
364 exit;
365 end;
366 end;
367 end;
368 if (curChar = '0') then
369 begin
370 case nextChar of
371 'b','B': base := 2;
372 'o','O': base := 8;
373 'd','D': base := 10;
374 'h','H': base := 16;
375 end;
376 if (base > 0) then
377 begin
378 // skip prefix
379 skipChar();
380 skipChar();
381 end;
382 end;
383 // default base
384 if (base < 0) then base := 10;
385 if (digitInBase(curChar, base) < 0) then raise Exception.Create('invalid number');
386 mTokType := TTInt;
387 mTokInt := 0; // just in case
388 while not isEOF do
389 begin
390 n := digitInBase(curChar, base);
391 if (n < 0) then break;
392 n := mTokInt*10+n;
393 if (n < 0) or (n < mTokInt) then raise Exception.Create('integer overflow');
394 mTokInt := n;
395 skipChar();
396 end;
397 // check for valid number end
398 if not isEOF then
399 begin
400 if (curChar = '.') then raise Exception.Create('floating numbers aren''t supported yet');
401 if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then
402 begin
403 raise Exception.Create('invalid number');
404 end;
405 end;
406 if neg then mTokInt := -mTokInt;
407 end;
409 procedure parseString ();
410 var
411 qch, ch: AnsiChar;
412 n: Integer;
413 begin
414 mTokType := TTStr;
415 mTokStr := ''; // just in case
416 qch := curChar;
417 skipChar(); // skip starting quote
418 while not isEOF do
419 begin
420 // escape
421 if (qch = '"') and (curChar = '\') then
422 begin
423 if (nextChar = #0) then raise Exception.Create('unterminated string escape');
424 ch := nextChar;
425 // skip backslash and escape type
426 skipChar();
427 skipChar();
428 case ch of
429 't': mTokStr += #9;
430 'n': mTokStr += #10;
431 'r': mTokStr += #13;
432 'z': mTokStr += #0;
433 'e': mTokStr += #27;
434 'x', 'X': // hex escape
435 begin
436 n := digitInBase(curChar, 16);
437 if (n < 0) then raise Exception.Create('invalid hexstr escape');
438 skipChar();
439 if (digitInBase(curChar, 16) > 0) then
440 begin
441 n := n*16+digitInBase(curChar, 16);
442 skipChar();
443 end;
444 mTokStr += AnsiChar(n);
445 end;
446 else mTokStr += ch;
447 end;
448 continue;
449 end;
450 // duplicate single quote (pascal style)
451 if (qch = '''') and (curChar = '''') and (nextChar = '''') then
452 begin
453 // skip both quotes
454 skipChar();
455 skipChar();
456 mTokStr += '''';
457 continue;
458 end;
459 if (curChar = qch) then
460 begin
461 skipChar(); // skip ending quote
462 break;
463 end;
464 mTokStr += curChar;
465 skipChar();
466 end;
467 end;
469 procedure parseId ();
470 begin
471 mTokType := TTId;
472 mTokStr := ''; // just in case
473 while (curChar = '_') or ((curChar >= '0') and (curChar <= '9')) or
474 ((curChar >= 'A') and (curChar <= 'Z')) or
475 ((curChar >= 'a') and (curChar <= 'z')) or
476 (curChar >= #128) do
477 begin
478 mTokStr += curChar;
479 skipChar();
480 end;
481 end;
483 begin
484 mTokType := TTEOF;
485 mTokStr := '';
486 mTokChar := #0;
487 mTokInt := 0;
489 if not skipBlanks() then
490 begin
491 result := false;
492 mTokLine := mLine;
493 mTokCol := mCol;
494 exit;
495 end;
497 mTokLine := mLine;
498 mTokCol := mCol;
500 result := true;
502 // number?
503 if mAllowSignedNumbers and ((curChar = '+') or (curChar = '-')) then begin parseInt(); exit; end;
504 if (curChar >= '0') and (curChar <= '9') then begin parseInt(); exit; end;
506 // string?
507 if (curChar = '"') or (curChar = '''') then begin parseString(); exit; end;
509 // identifier?
510 if (curChar = '_') or ((curChar >= 'A') and (curChar <= 'Z')) or ((curChar >= 'a') and (curChar <= 'z')) or (curChar >= #128) then begin parseId(); exit; end;
512 // known delimiters?
513 case curChar of
514 ',': mTokType := TTComma;
515 ':': mTokType := TTColon;
516 ';': mTokType := TTSemi;
517 '{': mTokType := TTBegin;
518 '}': mTokType := TTEnd;
519 else mTokType := TTDelim;
520 end;
521 mTokChar := curChar;
522 skipChar();
523 end;
526 function TTextParser.expectId (): AnsiString;
527 begin
528 if (mTokType <> TTId) then raise Exception.Create('identifier expected');
529 result := mTokStr;
530 skipToken();
531 end;
534 procedure TTextParser.expectId (const aid: AnsiString);
535 begin
536 if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then raise Exception.Create('identifier '''+aid+''' expected');
537 skipToken();
538 end;
541 function TTextParser.eatId (const aid: AnsiString): Boolean;
542 begin
543 result := false;
544 if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then exit;
545 result := true;
546 skipToken();
547 end;
550 function TTextParser.expectStr (allowEmpty: Boolean=false): AnsiString;
551 begin
552 if (mTokType <> TTStr) then raise Exception.Create('string expected');
553 if (not allowEmpty) and (Length(mTokStr) = 0) then raise Exception.Create('non-empty string expected');
554 result := mTokStr;
555 skipToken();
556 end;
559 function TTextParser.expectInt (): Integer;
560 begin
561 if (mTokType <> TTInt) then raise Exception.Create('string expected');
562 result := mTokInt;
563 skipToken();
564 end;
567 procedure TTextParser.expectTT (ttype: Integer);
568 begin
569 if (mTokType <> ttype) then raise Exception.Create('unexpected token');
570 skipToken();
571 end;
574 function TTextParser.eatTT (ttype: Integer): Boolean;
575 begin
576 result := (mTokType = ttype);
577 if result then skipToken();
578 end;
581 function TTextParser.expectDelim (const ch: AnsiChar): AnsiChar;
582 begin
583 if (mTokType <> TTDelim) then raise Exception.Create(Format('delimiter ''%s'' expected', [ch]));
584 result := mTokChar;
585 skipToken();
586 end;
589 function TTextParser.eatDelim (const ch: AnsiChar): Boolean;
590 begin
591 result := false;
592 if (mTokType <> TTDelim) or (mTokChar <> ch) then exit;
593 result := true;
594 skipToken();
595 end;
598 // ////////////////////////////////////////////////////////////////////////// //
599 constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true);
600 begin
601 mBuffer := nil;
602 mFile := openDiskFileRO(fname);
603 GetMem(mBuffer, BufSize);
604 mBufPos := 0;
605 mBufLen := mFile.Read(mBuffer^, BufSize);
606 if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
607 inherited Create(loadToken);
608 end;
611 constructor TFileTextParser.Create (st: TStream; loadToken: Boolean=true);
612 begin
613 if (st = nil) then raise Exception.Create('cannot create parser for nil stream');
614 mFile := st;
615 GetMem(mBuffer, BufSize);
616 mBufPos := 0;
617 mBufLen := mFile.Read(mBuffer^, BufSize);
618 if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
619 inherited Create(loadToken);
620 end;
623 destructor TFileTextParser.Destroy ();
624 begin
625 if (mBuffer <> nil) then FreeMem(mBuffer);
626 mFile.Free();
627 inherited;
628 end;
631 procedure TFileTextParser.loadNextChar ();
632 begin
633 if (mBufLen = 0) then begin mNextChar := #0; exit; end;
634 if (mBufPos >= mBufLen) then
635 begin
636 mBufLen := mFile.Read(mBuffer^, BufSize);
637 if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
638 if (mBufLen = 0) then begin mNextChar := #0; exit; end;
639 mBufPos := 0;
640 end;
641 assert(mBufPos < mBufLen);
642 mNextChar := mBuffer[mBufPos];
643 Inc(mBufPos);
644 if (mNextChar = #0) then mNextChar := ' ';
645 end;
648 // ////////////////////////////////////////////////////////////////////////// //
649 constructor TStrTextParser.Create (const astr: AnsiString; loadToken: Boolean=true);
650 begin
651 mStr := astr;
652 mPos := 1;
653 inherited Create(loadToken);
654 end;
657 destructor TStrTextParser.Destroy ();
658 begin
659 mStr := '';
660 inherited;
661 end;
664 procedure TStrTextParser.loadNextChar ();
665 begin
666 mNextChar := #0;
667 if (mPos > Length(mStr)) then exit;
668 mNextChar := mStr[mPos]; Inc(mPos);
669 if (mNextChar = #0) then mNextChar := ' ';
670 end;
673 // ////////////////////////////////////////////////////////////////////////// //
674 constructor TTextWriter.Create (); begin mIndent := 0; end;
675 procedure TTextWriter.put (const s: AnsiString); overload; begin if (Length(s) > 0) then putBuf((@(s[1]))^, Length(s)); end;
676 procedure TTextWriter.put (v: Byte); overload; begin put('%d', [v]); end;
677 procedure TTextWriter.put (v: Integer); overload; begin put('%d', [v]); end;
678 procedure TTextWriter.put (const fmt: AnsiString; args: array of const); overload; begin put(formatstrf(fmt, args)); end;
679 procedure TTextWriter.putIndent (); var f: Integer; begin for f := 1 to mIndent do put(' '); end;
680 procedure TTextWriter.indent (); begin Inc(mIndent, 2); end;
681 procedure TTextWriter.unindent (); begin Dec(mIndent, 2); end;
684 // ////////////////////////////////////////////////////////////////////////// //
685 constructor TFileTextWriter.Create (const fname: AnsiString);
686 begin
687 mFile := createDiskFile(fname);
688 inherited Create();
689 end;
692 destructor TFileTextWriter.Destroy ();
693 begin
694 mFile.Free();
695 inherited;
696 end;
699 procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt);
700 var
701 pc: PChar;
702 begin
703 if (len > 0) then
704 begin
705 pc := @buf;
706 mFile.WriteBuffer(pc^, len);
708 while (len > 0) do
709 begin
710 write(pc^);
711 Inc(pc);
712 Dec(len);
713 end;
715 end;
716 end;
719 end.