diff --git a/src/shared/xparser.pas b/src/shared/xparser.pas
index a7b8a51c1bbec667d0f61cc89480186141c3e591..76ac3c0165483ef90d4066904540d1eef5b7a2dd 100644 (file)
--- a/src/shared/xparser.pas
+++ b/src/shared/xparser.pas
Classes;
-// ////////////////////////////////////////////////////////////////////////// //
-type
- TUtf8DecoderFast = packed record
- public
- const Replacement = $FFFD; // replacement char for invalid unicode
- const Accept = 0;
- const Reject = 12;
-
- private
- state: LongWord;
-
- public
- codepoint: LongWord; // decoded codepoint (valid only when decoder is in "complete" state)
-
- public
- constructor Create (v: Boolean{fuck you, fpc});
-
- procedure reset (); inline;
-
- function complete (): Boolean; inline; // is current character complete? take `codepoint` then
- function invalid (): Boolean; inline;
- function completeOrInvalid (): Boolean; inline;
-
- // process one byte, return `true` if codepoint is ready
- function decode (b: Byte): Boolean; inline; overload;
- function decode (c: AnsiChar): Boolean; inline; overload;
- end;
-
-
// ////////////////////////////////////////////////////////////////////////// //
type
TTextParser = class
procedure loadNextChar (); virtual; abstract; // loads next char into mNextChar; #0 means 'eof'
public
- class function quote (const s: AnsiString): AnsiString;
-
- public
- constructor Create (loadToken: Boolean=true);
+ constructor Create ();
destructor Destroy (); override;
function isEOF (): Boolean; inline;
// ////////////////////////////////////////////////////////////////////////// //
type
TFileTextParser = class(TTextParser)
+ private
+ const BufSize = 16384;
+
private
mFile: TStream;
+ mStreamOwned: Boolean;
+ mBuffer: PChar;
+ mBufLen: Integer;
+ mBufPos: Integer;
protected
procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
public
- constructor Create (const fname: AnsiString; loadToken: Boolean=true);
+ constructor Create (const fname: AnsiString);
+ constructor Create (st: TStream; astOwned: Boolean=true); // will take ownership on st by default
destructor Destroy (); override;
end;
procedure loadNextChar (); override; // loads next char into mNextChar; #0 means 'eof'
public
- constructor Create (const astr: AnsiString; loadToken: Boolean=true);
+ constructor Create (const astr: AnsiString);
destructor Destroy (); override;
end;
public
constructor Create ();
+ procedure flush (); virtual;
+
procedure put (const s: AnsiString); overload;
procedure put (v: Byte); overload;
procedure put (v: Integer); overload;
// ////////////////////////////////////////////////////////////////////////// //
type
TFileTextWriter = class(TTextWriter)
+ private
+ const BufSize = 16384;
+
private
mFile: TStream;
+ mStreamOwned: Boolean;
+ mBuffer: PAnsiChar;
+ mBufUsed: Integer;
protected
procedure putBuf (constref buf; len: SizeUInt); override;
public
constructor Create (const fname: AnsiString);
+ constructor Create (ast: TStream; astOwned: Boolean=true); // will own the stream by default
destructor Destroy (); override;
+
+ procedure flush (); override;
end;
+ TStrTextWriter = class(TTextWriter)
+ private
+ mStr: AnsiString;
-// ////////////////////////////////////////////////////////////////////////// //
-function wcharTo1251 (wc: WideChar): AnsiChar; inline;
-function utfTo1251 (const s: AnsiString): AnsiString;
+ protected
+ procedure putBuf (constref buf; len: SizeUInt); override;
-function digitInBase (ch: AnsiChar; base: Integer): Integer;
+ public
+ constructor Create ();
+ destructor Destroy (); override;
+
+ property str: AnsiString read mStr;
+ end;
implementation
SysUtils, utils;
-var
- wc2shitmap: array[0..65535] of AnsiChar;
- wc2shitmapInited: Boolean = false;
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-procedure initShitMap ();
-const
- cp1251: array[0..127] of Word = (
- $0402,$0403,$201A,$0453,$201E,$2026,$2020,$2021,$20AC,$2030,$0409,$2039,$040A,$040C,$040B,$040F,
- $0452,$2018,$2019,$201C,$201D,$2022,$2013,$2014,$003F,$2122,$0459,$203A,$045A,$045C,$045B,$045F,
- $00A0,$040E,$045E,$0408,$00A4,$0490,$00A6,$00A7,$0401,$00A9,$0404,$00AB,$00AC,$00AD,$00AE,$0407,
- $00B0,$00B1,$0406,$0456,$0491,$00B5,$00B6,$00B7,$0451,$2116,$0454,$00BB,$0458,$0405,$0455,$0457,
- $0410,$0411,$0412,$0413,$0414,$0415,$0416,$0417,$0418,$0419,$041A,$041B,$041C,$041D,$041E,$041F,
- $0420,$0421,$0422,$0423,$0424,$0425,$0426,$0427,$0428,$0429,$042A,$042B,$042C,$042D,$042E,$042F,
- $0430,$0431,$0432,$0433,$0434,$0435,$0436,$0437,$0438,$0439,$043A,$043B,$043C,$043D,$043E,$043F,
- $0440,$0441,$0442,$0443,$0444,$0445,$0446,$0447,$0448,$0449,$044A,$044B,$044C,$044D,$044E,$044F
- );
-var
- f: Integer;
-begin
- for f := 0 to High(wc2shitmap) do wc2shitmap[f] := '?';
- for f := 0 to 127 do wc2shitmap[f] := AnsiChar(f);
- for f := 0 to 127 do wc2shitmap[cp1251[f]] := AnsiChar(f+128);
- wc2shitmapInited := true;
-end;
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-// TODO: make a hash or something
-function wcharTo1251 (wc: WideChar): AnsiChar; inline;
-begin
- if not wc2shitmapInited then initShitMap();
- if (LongWord(wc) > 65535) then result := '?' else result := wc2shitmap[LongWord(wc)];
-end;
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-// fast state-machine based UTF-8 decoder; using 8 bytes of memory
-// code points from invalid range will never be valid, this is the property of the state machine
-const
- // see http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- utf8dfa: array[0..$16c-1] of Byte = (
- // maps bytes to character classes
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 00-0f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 10-1f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 20-2f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 30-3f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 40-4f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 50-5f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 60-6f
- $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00, // 70-7f
- $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01, // 80-8f
- $09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09,$09, // 90-9f
- $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // a0-af
- $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07, // b0-bf
- $08,$08,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // c0-cf
- $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02, // d0-df
- $0a,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$04,$03,$03, // e0-ef
- $0b,$06,$06,$06,$05,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08, // f0-ff
- // maps a combination of a state of the automaton and a character class to a state
- $00,$0c,$18,$24,$3c,$60,$54,$0c,$0c,$0c,$30,$48,$0c,$0c,$0c,$0c, // 100-10f
- $0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$00,$0c,$0c,$0c,$0c,$0c,$00, // 110-11f
- $0c,$00,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$18,$0c,$0c, // 120-12f
- $0c,$0c,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$18,$0c,$0c, // 130-13f
- $0c,$0c,$0c,$0c,$0c,$18,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$24, // 140-14f
- $0c,$24,$0c,$0c,$0c,$24,$0c,$0c,$0c,$0c,$0c,$24,$0c,$24,$0c,$0c, // 150-15f
- $0c,$24,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c,$0c);
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-constructor TUtf8DecoderFast.Create (v: Boolean{fuck you, fpc}); begin state := Accept; codepoint := 0; end;
-
-procedure TUtf8DecoderFast.reset (); inline; begin state := Accept; codepoint := 0; end;
-
-function TUtf8DecoderFast.complete (): Boolean; inline; begin result := (state = Accept); end;
-function TUtf8DecoderFast.invalid (): Boolean; inline; begin result := (state = Reject); end;
-function TUtf8DecoderFast.completeOrInvalid (): Boolean; inline; begin result := (state = Accept) or (state = Reject); end;
-
-function TUtf8DecoderFast.decode (c: AnsiChar): Boolean; inline; overload; begin result := decode(Byte(c)); end;
-
-function TUtf8DecoderFast.decode (b: Byte): Boolean; inline; overload;
-var
- tp: LongWord;
-begin
- if (state = Reject) then begin state := Accept; codepoint := 0; end;
- tp := utf8dfa[b];
- if (state <> Accept) then codepoint := (b and $3f) or (codepoint shl 6) else codepoint := ($ff shr tp) and b;
- state := utf8dfa[256+state+tp];
- if (state = Reject) then begin codepoint := Replacement; state := Accept; end;
- result := (state = Accept);
-end;
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-function utfTo1251 (const s: AnsiString): AnsiString;
-var
- f, c: Integer;
- ud: TUtf8DecoderFast;
-begin
- for f := 1 to Length(s) do
- begin
- if (Byte(s[f]) > 127) then
- begin
- ud := TUtf8DecoderFast.Create(true);
- result := '';
- for c := 1 to Length(s) do
- begin
- if ud.decode(s[c]) then result += wcharTo1251(WideChar(ud.codepoint));
- end;
- exit;
- end;
- end;
- result := s;
-end;
-
-
// ////////////////////////////////////////////////////////////////////////// //
-function digitInBase (ch: AnsiChar; base: Integer): Integer;
-begin
- result := -1;
- if (base < 1) or (base > 36) then exit;
- if (ch < '0') then exit;
- if (base <= 10) then
- begin
- if (Integer(ch) >= 48+base) then exit;
- result := Integer(ch)-48;
- end
- else
- begin
- if (ch >= '0') and (ch <= '9') then begin result := Integer(ch)-48; exit; end;
- if (ch >= 'a') and (ch <= 'z') then Dec(ch, 32); // poor man's tolower()
- if (ch < 'A') or (Integer(ch) >= 65+(base-10)) then exit;
- result := Integer(ch)-65+10;
- end;
-end;
+function StrEqu (const a, b: AnsiString): Boolean; inline; begin result := (a = b); end;
// ////////////////////////////////////////////////////////////////////////// //
-class function TTextParser.quote (const s: AnsiString): AnsiString;
-
- function squote (const s: AnsiString): AnsiString;
- var
- f: Integer;
- begin
- result := '''';
- for f := 1 to Length(s) do
- begin
- if (s[f] = '''') then result += '''';
- result += s[f];
- end;
- result += '''';
- end;
-
- function dquote (const s: AnsiString): AnsiString;
- var
- f: Integer;
- ch: AnsiChar;
- begin
- result := '"';
- for f := 1 to Length(s) do
- begin
- ch := s[f];
- if (ch = #0) then result += '\z'
- else if (ch = #9) then result += '\t'
- else if (ch = #10) then result += '\n'
- else if (ch = #13) then result += '\r'
- else if (ch = #27) then result += '\e'
- else if (ch < ' ') or (ch = #127) then
- begin
- result += '\x';
- result += LowerCase(IntToHex(Integer(ch), 2));
- end
- else if (ch = '"') or (ch = '\') then
- begin
- result += '\';
- result += ch;
- end
- else
- begin
- result += ch;
- end;
- end;
- result += '"';
- end;
-
-var
- needSingle: Boolean = false;
- f: Integer;
-begin
- for f := 1 to Length(s) do
- begin
- if (s[f] = '''') then begin needSingle := true; continue; end;
- if (s[f] < ' ') or (s[f] = #127) then begin result := dquote(s); exit; end;
- end;
- if needSingle then result := squote(s) else result := ''''+s+'''';
-end;
-
-
-// ////////////////////////////////////////////////////////////////////////// //
-constructor TTextParser.Create (loadToken: Boolean=true);
+constructor TTextParser.Create ();
begin
mLine := 1;
mCol := 1;
mTokInt := 0;
mAllowSignedNumbers := true;
warmup(); // change `mAllowSignedNumbers` there, if necessary
- if loadToken then skipToken();
+ skipToken();
end;
procedure TTextParser.expectId (const aid: AnsiString);
begin
- if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then raise Exception.Create('identifier '''+aid+''' expected');
+ if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then raise Exception.Create('identifier '''+aid+''' expected');
skipToken();
end;
function TTextParser.eatId (const aid: AnsiString): Boolean;
begin
result := false;
- if (mTokType <> TTId) or (CompareText(mTokStr, aid) <> 0) then exit;
+ if (mTokType <> TTId) or (not StrEqu(mTokStr, aid)) then exit;
result := true;
skipToken();
end;
// ////////////////////////////////////////////////////////////////////////// //
-constructor TFileTextParser.Create (const fname: AnsiString; loadToken: Boolean=true);
+constructor TFileTextParser.Create (const fname: AnsiString);
begin
+ mBuffer := nil;
mFile := openDiskFileRO(fname);
- inherited Create(loadToken);
+ mStreamOwned := true;
+ GetMem(mBuffer, BufSize);
+ mBufPos := 0;
+ mBufLen := mFile.Read(mBuffer^, BufSize);
+ if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
+ inherited Create();
+end;
+
+
+constructor TFileTextParser.Create (st: TStream; astOwned: Boolean=true);
+begin
+ if (st = nil) then raise Exception.Create('cannot create parser for nil stream');
+ mFile := st;
+ mStreamOwned := astOwned;
+ GetMem(mBuffer, BufSize);
+ mBufPos := 0;
+ mBufLen := mFile.Read(mBuffer^, BufSize);
+ if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
+ inherited Create();
end;
destructor TFileTextParser.Destroy ();
begin
- mFile.Free();
+ if (mBuffer <> nil) then FreeMem(mBuffer);
+ mBuffer := nil;
+ mBufPos := 0;
+ mBufLen := 0;
+ if mStreamOwned then mFile.Free();
+ mFile := nil;
inherited;
end;
procedure TFileTextParser.loadNextChar ();
-var
- rd: Integer;
begin
- rd := mFile.Read(mNextChar, 1);
- if (rd = 0) then begin mNextChar := #0; exit; end;
+ if (mBufLen = 0) then begin mNextChar := #0; exit; end;
+ if (mBufPos >= mBufLen) then
+ begin
+ mBufLen := mFile.Read(mBuffer^, BufSize);
+ if (mBufLen < 0) then raise Exception.Create('TFileTextParser: read error');
+ if (mBufLen = 0) then begin mNextChar := #0; exit; end;
+ mBufPos := 0;
+ end;
+ assert(mBufPos < mBufLen);
+ mNextChar := mBuffer[mBufPos];
+ Inc(mBufPos);
if (mNextChar = #0) then mNextChar := ' ';
end;
// ////////////////////////////////////////////////////////////////////////// //
-constructor TStrTextParser.Create (const astr: AnsiString; loadToken: Boolean=true);
+constructor TStrTextParser.Create (const astr: AnsiString);
begin
mStr := astr;
mPos := 1;
- inherited Create(loadToken);
+ inherited Create();
end;
// ////////////////////////////////////////////////////////////////////////// //
constructor TTextWriter.Create (); begin mIndent := 0; end;
+procedure TTextWriter.flush (); begin end;
procedure TTextWriter.put (const s: AnsiString); overload; begin if (Length(s) > 0) then putBuf((@(s[1]))^, Length(s)); end;
procedure TTextWriter.put (v: Byte); overload; begin put('%d', [v]); end;
procedure TTextWriter.put (v: Integer); overload; begin put('%d', [v]); end;
constructor TFileTextWriter.Create (const fname: AnsiString);
begin
mFile := createDiskFile(fname);
+ mStreamOwned := true;
+ mBufUsed := 0;
+ GetMem(mBuffer, BufSize);
+ assert(mBuffer <> nil);
inherited Create();
end;
+constructor TFileTextWriter.Create (ast: TStream; astOwned: Boolean=true);
+begin
+ if (ast = nil) then raise Exception.Create('cannot write to nil stream');
+ mFile := ast;
+ mStreamOwned := astOwned;
+ mBufUsed := 0;
+ GetMem(mBuffer, BufSize);
+ assert(mBuffer <> nil);
+end;
+
+
destructor TFileTextWriter.Destroy ();
begin
- mFile.Free();
+ flush();
+ if (mBuffer <> nil) then FreeMem(mBuffer);
+ mBufUsed := 0;
+ mBuffer := nil;
+ if (mStreamOwned) then mFile.Free();
+ mFile := nil;
inherited;
end;
+procedure TFileTextWriter.flush ();
+begin
+ if (mFile <> nil) and (mBufUsed > 0) then
+ begin
+ mFile.WriteBuffer(mBuffer^, mBufUsed);
+ end;
+ mBufUsed := 0;
+end;
+
+
procedure TFileTextWriter.putBuf (constref buf; len: SizeUInt);
var
pc: PChar;
+ left: Integer;
begin
- if (len > 0) then
+ if (len = 0) then exit;
+ pc := @buf;
+ while (len > 0) do
begin
- pc := @buf;
- mFile.WriteBuffer(pc^, len);
- {
- while (len > 0) do
+ left := BufSize-mBufUsed;
+ if (left = 0) then
begin
- write(pc^);
- Inc(pc);
- Dec(len);
+ flush();
+ left := BufSize-mBufUsed;
+ assert(left > 0);
end;
- }
+ if (left > len) then left := Integer(len);
+ Move(pc^, (mBuffer+mBufUsed)^, left);
+ Inc(mBufUsed, left);
+ pc += left;
+ len -= left;
+ end;
+end;
+
+
+// ////////////////////////////////////////////////////////////////////////// //
+constructor TStrTextWriter.Create ();
+begin
+ mStr := '';
+end;
+
+
+destructor TStrTextWriter.Destroy ();
+begin
+ mStr := '';
+ inherited;
+end;
+
+
+procedure TStrTextWriter.putBuf (constref buf; len: SizeUInt);
+var
+ st: AnsiString = '';
+begin
+ if (len > 0) then
+ begin
+ SetLength(st, Integer(len));
+ Move(buf, PChar(st)^, Integer(len));
+ mStr += st;
+ st := '';
end;
end;