1 (* ==================================================================== *)
3 (* Scanner Module for the Gardens Point Component Pascal Compiler. *)
4 (* Copyright (c) John Gough 1999, 2000. *)
5 (* This module was extensively modified from the scanner *)
6 (* automatically produced by the M2 version of COCO/R, using *)
7 (* the CPascal.atg grammar used for the JVM version of GPCP. *)
9 (* ==================================================================== *)
13 (* This is a modified version for Mburg --- it computes column positions *)
14 (* Scanner generated by Coco/R *)
26 noSym
= Tok
.NOSYM
; (*error token code*)
27 (* not only for errors but also for not finished states of scanner analysis *)
37 listAlways
* = 2; (* listing control constants *)
42 BufBlk
= ARRAY BlkSize
OF UBYTE
;
43 Buffer
= ARRAY BlkNmbr
OF POINTER TO BufBlk
;
44 StartTable
= ARRAY 256 OF INTEGER;
46 (* ======================== EXPORTS ========================= *)
48 ErrorHandler
* = POINTER TO ABSTRACT
RECORD END;
50 Token
* = POINTER TO RECORD
59 Span
* = POINTER TO RECORD
60 sLin
*, sCol
*, eLin
*, eCol
* : INTEGER
63 (* ====================== END EXPORTS ======================= *)
66 ch
: CHAR; (*current input character*)
67 curLine
: INTEGER; (*current input line (may be higher than line)*)
68 lineStart
: INTEGER; (*start position of current line*)
69 apx
: INTEGER; (*length of appendix (CONTEXT phrase)*)
70 oldEols
: INTEGER; (*number of EOLs in a comment*)
71 bp
: INTEGER; (*current position in buf*)
72 bp0
: INTEGER; (*position of current token)*)
73 LBlkSize
: INTEGER; (*BlkSize*)
74 inputLen
: INTEGER; (*source file size*)
75 buf
: Buffer
; (*source buffer for low-level access*)
78 start
: StartTable
; (*start state for every character*)
79 nextLine
: INTEGER; (*line of lookahead symbol*)
80 nextCol
: INTEGER; (*column of lookahead symbol*)
81 nextLen
: INTEGER; (*length of lookahead symbol*)
82 nextPos
: INTEGER; (*file position of lookahead symbol*)
84 spaces
: INTEGER; (* ############# NEW ############## *)
86 (* ======================== EXPORTS ========================= *)
88 src
*: GPBinFiles
.FILE
; (*source file. To be opened by main *)
89 lst
*: GPTextFiles
.FILE
; (*list file. To be opened by main *)
90 line
*, col
*: INTEGER; (*line and column of current symbol*)
91 len
*: INTEGER; (*length of current symbol*)
92 pos
*: INTEGER; (*file position of current symbol*)
93 errors
*: INTEGER; (*number of detected errors*)
94 warnings
*: INTEGER; (*number of detected warnings*)
96 ParseErr
*: ErrorHandler
;
97 SemError
*: ErrorHandler
;
98 (* ====================== END EXPORTS ======================= *)
100 (* ======================== EXPORTS ========================= *)
101 PROCEDURE (s
: ErrorHandler
)Report
*(num
: INTEGER;
103 col
: INTEGER) ,NEW,ABSTRACT
;
105 PROCEDURE (s
: ErrorHandler
)RepSt1
*(num
: INTEGER;
106 IN str
: ARRAY OF CHAR;
108 col
: INTEGER) ,NEW,ABSTRACT
;
110 PROCEDURE (s
: ErrorHandler
)RepSt2
*(num
: INTEGER;
111 IN st1
: ARRAY OF CHAR;
112 IN st2
: ARRAY OF CHAR;
114 col
: INTEGER) ,NEW,ABSTRACT
;
116 PROCEDURE (s
: Span
)SpanSS
*(e
: Span
) : Span
,NEW;
119 IF e
= NIL THEN RETURN s
;
122 res
.sLin
:= s
.sLin
; res
.eLin
:= e
.eLin
;
123 res
.sCol
:= s
.sCol
; res
.eCol
:= e
.eCol
;
128 PROCEDURE mkSpanTT
*(s
, e
: Token
) : Span
;
132 res
.sLin
:= s
.lin
; res
.eLin
:= e
.lin
;
133 res
.sCol
:= s
.col
; res
.eCol
:= e
.col
+ e
.len
;
137 PROCEDURE mkSpanT
*(t
: Token
) : Span
;
141 res
.sLin
:= t
.lin
; res
.eLin
:= t
.lin
;
142 res
.sCol
:= t
.col
; res
.eCol
:= t
.col
+ t
.len
;
146 PROCEDURE Merge
*(s
, e
: Span
) : Span
;
148 IF s
# NIL THEN RETURN s
.SpanSS(e
) ELSE RETURN NIL END;
151 (* ====================== END EXPORTS ======================= *)
153 PROCEDURE^ get
*() : Token
;
154 (* Gets next symbol from source file *)
156 PROCEDURE^ GetString
*(pos
: INTEGER; len
: INTEGER; OUT name
: ARRAY OF CHAR);
157 (* Retrieves exact string of max length len from position pos in source file *)
159 PROCEDURE^ charAt
*(pos
: INTEGER): CHAR;
160 (* Returns exact character at position pos in source file *)
163 (* Reads and stores source file internally *)
165 PROCEDURE^ SkipAndGetLine
*(i
: INTEGER; (* indent to skip *)
166 e
: INTEGER; (* end file-pos *)
167 VAR p
: INTEGER; (* crnt file-pos *)
168 OUT l
: INTEGER; (* fetched length *)
169 VAR s
: ARRAY OF CHAR); (* output string *)
171 (* ==================================================================== *)
173 PROCEDURE (t
: Token
)DiagToken
*(),NEW;
176 Console
.Write("l"); Console
.WriteInt(t
.lin
,1); Console
.Write(":");
177 Console
.Write("c"); Console
.WriteInt(t
.col
,1); Console
.WriteString(" '");
178 FOR i
:= 0 TO t
.len
- 1 DO Console
.Write(charAt(t
.pos
+i
)) END;
179 Console
.Write("'"); Console
.WriteLn
;
182 PROCEDURE digitAt(pos
: INTEGER) : INTEGER;
186 IF (ch
>= '
0'
) & (ch
<= '
9'
) THEN RETURN ORD(ch
) - ORD('
0'
);
187 ELSE RETURN ORD(ch
) + (10 - ORD('A'
));
191 PROCEDURE getHex
*(pos
, len
: INTEGER) : INTEGER;
197 FOR ix
:= pos
TO pos
+ len
- 1 DO
199 IF (ch
>= '
0'
) & (ch
<= '
9'
) THEN rslt
:= rslt
* 16 + ORD(ch
) - ORD('
0'
);
200 ELSIF (ch
>= 'a'
) & (ch
<= 'f'
) THEN rslt
:= rslt
* 16 + ORD(ch
) + (10 - ORD('a'
));
201 ELSIF (ch
>= 'A'
) & (ch
<= 'F'
) THEN rslt
:= rslt
* 16 + ORD(ch
) + (10 - ORD('A'
));
208 PROCEDURE tokToLong
*(t
: Token
) : LONGINT;
215 BEGIN [UNCHECKED_ARITHMETIC
]
217 * This code requires special care.
218 * For the CLR it would be simplest to catch overflows
219 * in the per-character loop, and put in a rescue clause
220 * that reported the Error-233. Unfortunately this does
221 * not work on the JVM, so we have to catch the overflow
222 * manually by detecting the sum wrapping to negative.
224 limt
:= t
.pos
+ t
.len
- 1;
227 IF (ch
= "H") OR (ch
= "L") THEN
229 FOR indx
:= t
.pos
TO limt
DO
230 hexD
:= digitAt(indx
);
231 long
:= long
* 16 + hexD
;
232 IF long
< 0 THEN SemError
.Report(233, t
.lin
, t
.col
); RETURN 0 END;
235 IF RTS
.hiInt(long
) # 0 THEN
236 SemError
.Report(232, t
.lin
, t
.col
); RETURN 0;
238 long
:= LONG(RTS
.loInt(long
));
242 FOR indx
:= t
.pos
TO limt
DO
244 long
:= long
* 10 + (ORD(ch
) - ORD('
0'
));
245 IF long
< 0 THEN SemError
.Report(233, t
.lin
, t
.col
); RETURN 0 END;
251 PROCEDURE tokToReal
*(t
: Token
) : REAL;
252 VAR str
: ARRAY 256 OF CHAR;
256 GetString(t
.pos
, t
.len
, str
);
257 RTS
.StrToRealInvar(str$
, num
, pOk
);
259 SemError
.Report(45, t
.lin
, t
.col
); RETURN 0.0;
265 PROCEDURE tokToChar
*(t
: Token
) : CHAR;
272 limt
:= t
.pos
+ t
.len
- 2;
274 FOR indx
:= t
.pos
TO limt
DO
275 hexD
:= digitAt(indx
);
276 cOrd
:= cOrd
* 16 + hexD
;
278 (* RANGE CHECK HERE *)
282 (* ====================== END EXPORTS ======================= *)
285 (* Return global variable ch *)
287 INC(bp
); ch
:= charAt(bp
);
289 INC(spaces
,8); DEC(spaces
,spaces
MOD 8);
293 IF ch
= EOL
THEN INC(curLine
); lineStart
:= bp
; spaces
:= 0 END
296 (* ==================================================================== *)
298 PROCEDURE comment (): BOOLEAN;
300 level
, startLine
: INTEGER;
301 oldLineStart
: INTEGER;
304 level
:= 1; startLine
:= curLine
;
305 oldLineStart
:= lineStart
; oldSpaces
:= spaces
;
315 IF level
= 0 THEN RETURN TRUE
END
317 ELSIF (ch
= "(") THEN
319 IF (ch
= "*") THEN INC(level
); NextCh
END;
320 ELSIF ch
= eof
THEN RETURN FALSE
324 IF ch
= EOL
THEN DEC(curLine
); lineStart
:= oldLineStart
END;
325 DEC(bp
, 2); NextCh
; spaces
:= oldSpaces
; RETURN FALSE
331 (* ==================================================================== *)
333 PROCEDURE get() : Token
;
338 PROCEDURE equal (IN s
: ARRAY OF CHAR): BOOLEAN;
343 (* Assert: only called with literals ==> LEN(s$) = LEN(s)-1 *)
344 IF nextLen
# LEN(s
)-1 THEN RETURN FALSE
END;
345 i
:= 1; q
:= bp0
; INC(q
);
347 IF charAt(q
) # s
[i
] THEN RETURN FALSE
END;
353 PROCEDURE CheckLiteral(VAR sym
: INTEGER);
356 "A": IF equal("ABSTRACT") THEN sym
:= Tok
.ABSTRACTSym
;
357 ELSIF equal("ARRAY") THEN sym
:= Tok
.ARRAYSym
;
359 |
"B": IF equal("BEGIN") THEN sym
:= Tok
.BEGINSym
;
360 ELSIF equal("BY") THEN sym
:= Tok
.BYSym
;
362 |
"C": IF equal("CASE") THEN sym
:= Tok
.CASESym
;
363 ELSIF equal("CLOSE") THEN sym
:= Tok
.CLOSESym
;
364 ELSIF equal("CONST") THEN sym
:= Tok
.CONSTSym
;
366 |
"D": IF equal("DO") THEN sym
:= Tok
.DOSym
;
367 ELSIF equal("DIV") THEN sym
:= Tok
.DIVSym
;
368 ELSIF equal("DIV0") THEN sym
:= Tok
.DIV0Sym
;
370 |
"E": IF equal("ELSE") THEN sym
:= Tok
.ELSESym
;
371 ELSIF equal("ELSIF") THEN sym
:= Tok
.ELSIFSym
;
372 ELSIF equal("EMPTY") THEN sym
:= Tok
.EMPTYSym
;
373 ELSIF equal("END") THEN sym
:= Tok
.ENDSym
;
374 ELSIF equal("EXIT") THEN sym
:= Tok
.EXITSym
;
375 ELSIF equal("EXTENSIBLE") THEN sym
:= Tok
.EXTENSIBLESym
;
376 ELSIF equal("ENUM") THEN sym
:= Tok
.ENUMSym
;
377 ELSIF equal("EVENT") THEN sym
:= Tok
.EVENTSym
;
379 |
"F": IF equal("FOR") THEN sym
:= Tok
.FORSym
;
381 |
"I": IF equal("IF") THEN sym
:= Tok
.IFSym
;
382 ELSIF equal("IMPORT") THEN sym
:= Tok
.IMPORTSym
;
383 ELSIF equal("IN") THEN sym
:= Tok
.INSym
;
384 ELSIF equal("IS") THEN sym
:= Tok
.ISSym
;
385 ELSIF equal("INTERFACE") THEN sym
:= Tok
.INTERFACESym
;
387 |
"L": IF equal("LIMITED") THEN sym
:= Tok
.LIMITEDSym
;
388 ELSIF equal("LOOP") THEN sym
:= Tok
.LOOPSym
;
390 |
"M": IF equal("MOD") THEN sym
:= Tok
.MODSym
;
391 ELSIF equal("MODULE") THEN sym
:= Tok
.MODULESym
;
393 |
"N": IF equal("NEW") THEN sym
:= Tok
.NEWSym
;
394 ELSIF equal("NIL") THEN sym
:= Tok
.NILSym
;
396 |
"O": IF equal("OF") THEN sym
:= Tok
.OFSym
;
397 ELSIF equal("OR") THEN sym
:= Tok
.ORSym
;
398 ELSIF equal("OUT") THEN sym
:= Tok
.OUTSym
;
400 |
"P": IF equal("POINTER") THEN sym
:= Tok
.POINTERSym
;
401 ELSIF equal("PROCEDURE") THEN sym
:= Tok
.PROCEDURESym
;
403 |
"R": IF equal("RECORD") THEN sym
:= Tok
.RECORDSym
;
404 ELSIF equal("REPEAT") THEN sym
:= Tok
.REPEATSym
;
405 ELSIF equal("RETURN") THEN sym
:= Tok
.RETURNSym
;
406 ELSIF equal("RESCUE") THEN sym
:= Tok
.RESCUESym
;
407 ELSIF equal("REM0") THEN sym
:= Tok
.REM0Sym
;
409 |
"S": IF equal("STATIC") THEN sym
:= Tok
.STATICSym
;
411 |
"T": IF equal("THEN") THEN sym
:= Tok
.THENSym
;
412 ELSIF equal("TO") THEN sym
:= Tok
.TOSym
;
413 ELSIF equal("TYPE") THEN sym
:= Tok
.TYPESym
;
415 |
"U": IF equal("UNTIL") THEN sym
:= Tok
.UNTILSym
;
417 |
"V": IF equal("VAR") THEN sym
:= Tok
.VARSym
;
418 ELSIF equal("VECTOR") THEN sym
:= Tok
.VECTORSym
;
420 |
"W": IF equal("WHILE") THEN sym
:= Tok
.WHILESym
;
421 ELSIF equal("WITH") THEN sym
:= Tok
.WITHSym
;
427 PROCEDURE mkToken(kind
: INTEGER) : Token
;
431 IF kind
= Tok
.idVariant
THEN kind
:= Tok
.identSym
; new
.dlr
:= TRUE
END;
433 new
.lin
:= nextLine
; new
.col
:= nextCol
;
434 new
.len
:= nextLen
; new
.pos
:= nextPos
;
440 (ch
>= CHR(9)) & (ch
<= CHR(10)) OR
441 (ch
= CHR(13)) DO NextCh
END;
442 IF ((ch
= "(")) & comment() THEN RETURN get() END;
443 pos
:= nextPos
; nextPos
:= bp
;
444 col
:= nextCol
; nextCol
:= spaces
;
445 line
:= nextLine
; nextLine
:= curLine
;
446 len
:= nextLen
; nextLen
:= 0;
447 apx
:= 0; state
:= start
[ORD(ch
)]; bp0
:= bp
;
449 NextCh
; INC(nextLen
);
451 (* ---------------------------------- *)
452 1: (* start of ordinary identifier *)
453 IF (ch
>= "0") & (ch
<= "9") OR
454 (ch
>= "A") & (ch
<= "Z") OR
455 (ch
>= "a") & (ch
<= "z") OR
456 (ch
>= 0C0X
) & (ch
<= 0D6X
) OR
457 (ch
>= 0D8X
) & (ch
<= 0F6X
) OR
458 (ch
>= 0F8X
) & (ch
<= 0FFX
) OR
459 (ch
= "_") THEN (* skip *)
460 ELSIF ch
= "@" THEN state
:= 45;
461 ELSIF ch
= "$" THEN state
:= 46;
462 ELSE sym
:= Tok
.identSym
; CheckLiteral(sym
); RETURN mkToken(sym
);
464 (* ---------------------------------- *)
465 |
44:(* start of ` escaped identifier *)
466 IF (ch
>= "0") & (ch
<= "9") OR
467 (ch
>= "A") & (ch
<= "Z") OR
468 (ch
>= "a") & (ch
<= "z") OR
469 (ch
>= 0C0X
) & (ch
<= 0D6X
) OR
470 (ch
>= 0D8X
) & (ch
<= 0F6X
) OR
471 (ch
>= 0F8X
) & (ch
<= 0FFX
) OR
472 (ch
= "_") THEN (* skip *)
474 SemError
.Report(187, nextLine
, spaces
);
475 RETURN mkToken(noSym
);
477 (* throw away the escape char *)
478 INC(nextPos
); INC(nextCol
); DEC(nextLen
);
480 (* ---------------------------------- *)
481 |
45:(* rest of ` escaped identifier *)
482 IF (ch
>= "0") & (ch
<= "9") OR
483 (ch
>= "A") & (ch
<= "Z") OR
484 (ch
>= "a") & (ch
<= "z") OR
486 (ch
= "_") THEN (* skip *)
487 ELSIF ch
= "$" THEN state
:= 47;
488 ELSE RETURN mkToken(Tok
.idVariant
); (* No check for reserved words *)
490 (* ---------------------------------- *)
491 |
46:(* check for $ at end of ident. *)
492 IF (ch
>= "0") & (ch
<= "9") OR
493 (ch
>= "A") & (ch
<= "Z") OR
494 (ch
>= "a") & (ch
<= "z") OR
495 (ch
= "_") THEN state
:= 45; (* embedded "$" *)
497 DEC(bp
, 2); DEC(nextLen
); NextCh
;
498 sym
:= Tok
.identSym
; CheckLiteral(sym
); RETURN mkToken(sym
);
500 (* ---------------------------------- *)
501 |
47:(* check for $ at end of idVar't *)
502 IF (ch
>= "0") & (ch
<= "9") OR
503 (ch
>= "A") & (ch
<= "Z") OR
504 (ch
>= "a") & (ch
<= "z") OR
505 (ch
= "_") THEN state
:= 45; (* embedded "$" *)
507 DEC(bp
, 2); DEC(nextLen
); NextCh
;
508 RETURN mkToken(Tok
.idVariant
); (* No check for reserved words *)
510 (* ---------------------------------- *)
511 |
49: (* !" ..." format string *)
512 IF ch
= '
"' THEN state := 51;
513 ELSIF ch = '\' THEN state := 50;
515 | 50: (* Last char was '\' inside bangStr *)
517 | 51: RETURN mkToken(Tok.bangStrSym);
518 (* ---------------------------------- *)
519 | 2: RETURN mkToken(Tok.integerSym);
520 | 3: DEC(bp, apx+1); DEC(nextLen, apx);
521 NextCh; RETURN mkToken(Tok.integerSym);
522 | 4: IF (ch >= "0") & (ch <= "9") THEN
523 ELSIF (ch = "E
") THEN state := 5;
524 ELSE RETURN mkToken(Tok.realSym);
526 | 5: IF (ch >= "0") & (ch <= "9") THEN state := 7;
528 (ch = "-") THEN state := 6;
529 ELSE RETURN mkToken(noSym);
531 | 6: IF (ch >= "0") & (ch <= "9") THEN state := 7;
532 ELSE RETURN mkToken(noSym);
534 | 7: IF (ch >= "0") & (ch <= "9") THEN
535 ELSE RETURN mkToken(Tok.realSym);
537 | 8: RETURN mkToken(Tok.CharConstantSym);
538 | 9: IF (ch <= CHR(9)) OR
539 (ch >= CHR(11)) & (ch <= CHR(12)) OR
540 (ch >= CHR(14)) & (ch <= "!") OR
542 ELSIF (ch = '"'
) THEN state
:= 10;
543 ELSE RETURN mkToken(noSym
);
545 |
10: RETURN mkToken(Tok
.stringSym
);
546 |
11: IF (ch
<= CHR(9)) OR
547 (ch
>= CHR(11)) & (ch
<= CHR(12)) OR
548 (ch
>= CHR(14)) & (ch
<= "&") OR
550 ELSIF (ch
= "'") THEN state
:= 10;
551 ELSE RETURN mkToken(noSym
);
553 |
12: IF (ch
>= "0") & (ch
<= "9") THEN
554 ELSIF (ch
>= "A") & (ch
<= "F") THEN state
:= 13;
556 (ch
= "L") THEN state
:= 2;
557 ELSIF (ch
= ".") THEN state
:= 14; INC(apx
)
558 ELSIF (ch
= "X") THEN state
:= 8;
559 ELSE RETURN mkToken(Tok
.integerSym
);
561 |
13: IF (ch
>= "0") & (ch
<= "9") OR
562 (ch
>= "A") & (ch
<= "F") THEN
564 (ch
= "L") THEN state
:= 2;
565 ELSIF (ch
= "X") THEN state
:= 8;
566 ELSE RETURN mkToken(noSym
);
568 |
14: IF (ch
>= "0") & (ch
<= "9") THEN state
:= 4; apx
:= 0
569 ELSIF (ch
= ".") THEN state
:= 3; INC(apx
)
570 ELSIF (ch
= "E") THEN state
:= 5; apx
:= 0
571 ELSE RETURN mkToken(Tok
.realSym
);
573 |
15: RETURN mkToken(Tok
.starSym
);
574 |
16: RETURN mkToken(Tok
.minusSym
);
575 |
17: IF (ch
= '
"') THEN state := 49;
576 ELSE RETURN mkToken(Tok.bangSym);
578 | 18: IF (ch = ".") THEN state := 40;
579 ELSE RETURN mkToken(Tok.pointSym);
581 | 19: RETURN mkToken(Tok.equalSym);
582 | 20: RETURN mkToken(Tok.commaSym);
583 | 21: RETURN mkToken(Tok.lparenSym);
584 | 22: RETURN mkToken(Tok.plusSym);
585 | 23: RETURN mkToken(Tok.rparenSym);
586 | 24: RETURN mkToken(Tok.semicolonSym);
587 | 25: IF (ch = "=") THEN state := 41;
588 ELSE RETURN mkToken(Tok.colonSym);
590 | 26: RETURN mkToken(Tok.lbrackSym);
591 | 27: RETURN mkToken(Tok.rbrackSym);
592 | 28: RETURN mkToken(Tok.uparrowSym);
593 | 29: RETURN mkToken(Tok.dollarSym);
594 | 30: RETURN mkToken(Tok.hashSym);
595 | 31: IF (ch = "=") THEN state := 32;
596 ELSE RETURN mkToken(Tok.lessSym);
598 | 32: RETURN mkToken(Tok.lessequalSym);
599 | 33: IF (ch = "=") THEN state := 34;
600 ELSE RETURN mkToken(Tok.greaterSym);
602 | 34: RETURN mkToken(Tok.greaterequalSym);
603 | 35: RETURN mkToken(Tok.slashSym);
604 | 36: RETURN mkToken(Tok.andSym);
605 | 37: RETURN mkToken(Tok.tildeSym);
606 | 38: RETURN mkToken(Tok.lbraceSym);
607 | 39: RETURN mkToken(Tok.rbraceSym);
608 | 40: RETURN mkToken(Tok.pointpointSym);
609 | 41: RETURN mkToken(Tok.colonequalSym);
610 | 42: RETURN mkToken(Tok.barSym);
611 | 43: ch := 0X; DEC(bp); RETURN mkToken(Tok.EOFSYM);
612 ELSE RETURN mkToken(noSym); (*NextCh already done*)
617 (* ==================================================================== *)
619 PROCEDURE SkipAndGetLine(i : INTEGER; (* indent to skip *)
620 e : INTEGER; (* end file-pos *)
621 VAR p : INTEGER; (* crnt file-pos *)
622 OUT l : INTEGER; (* fetched length *)
623 VAR s : ARRAY OF CHAR); (* output string *)
630 ch := charAt(p); INC(p);
631 (* skip i positions if possible *)
632 WHILE (sp < i) & (ch <= " ") & (p <= e) & (ch # asciiLF) DO
633 IF ch = asciiHT THEN INC(sp,8); DEC(sp,sp MOD 8) ELSE INC(sp) END;
634 ch := charAt(p); INC(p);
638 s[ix] := " "; INC(ix); DEC(sp);
640 WHILE (p <= e) & (ch # asciiLF) DO
641 s[ix] := ch; INC(ix);
642 ch := charAt(p); INC(p);
644 s[ix] := 0X; l := ix;
647 (* ==================================================================== *)
649 PROCEDURE GetString (pos: INTEGER; len: INTEGER; OUT name: ARRAY OF CHAR);
654 IF len >= LEN(name) THEN len := LEN(name)-1 END;
657 name[i] := charAt(p); INC(i); INC(p)
662 (* ==================================================================== *)
664 PROCEDURE charAt (pos: INTEGER): CHAR;
668 IF pos >= inputLen THEN RETURN eof END;
669 ch := buf[pos DIV LBlkSize][pos MOD LBlkSize];
670 IF ch # eof THEN RETURN ch ELSE RETURN eof END
673 (* ==================================================================== *)
679 BEGIN (*assert: src has been opened*)
680 FOR i := 0 TO BlkNmbr - 1 DO savedBuf[i] := NIL END; bufSaved := FALSE;
686 * Conserve memory by not deallocating the buffer.
687 * Reuse for later compilation, expanding if necessary.
689 IF buf[i] = NIL THEN NEW(buf[i]) END;
690 read := GPBinFiles.readNBytes(src, buf[i]^, BlkSize);
692 UNTIL read < BlkSize;
693 GPBinFiles.CloseFile(src);
694 buf[i][read] := eofByt;
695 curLine := 1; lineStart := -2; bp := -1;
696 oldEols := 0; apx := 0; errors := 0; warnings := 0;
697 spaces := 0; (* # new # *)
701 PROCEDURE NewReadBuffer*(source : ARRAY OF POINTER TO ARRAY OF CHAR);
702 VAR count, linIx, chrIx, index : INTEGER;
703 lineP : POINTER TO ARRAY OF CHAR;
708 WHILE (count < BlkNmbr) & (buf[count] # NIL) DO
709 savedBuf[count] := buf[count]; INC(count);
715 FOR linIx := 0 TO LEN(source) - 1 DO
716 lineP := source[linIx];
718 IF lineP = NIL THEN theCh := 0X ELSE theCh := lineP[0] END;
720 buf[0][index] := USHORT(ORD(theCh)); INC(index); INC(chrIx);
721 theCh := lineP[chrIx];
723 buf[0][index] := ORD(ASCII.LF); INC(index);
725 buf[0][index] := eofByt;
727 * Initialize the scanner state.
729 curLine := 1; lineStart := -2; bp := -1;
730 oldEols := 0; apx := 0;
731 spaces := 0; (* # new # *)
735 PROCEDURE RestoreFileBuffer*();
739 WHILE (count < BlkNmbr) & (savedBuf[count] # NIL) DO
740 buf[count] := savedBuf[count]; INC(count);
742 END RestoreFileBuffer;
744 (* ==================================================================== *)
747 start[ 0] := 43; start[ 1] := 48; start[ 2] := 48; start[ 3] := 48;
748 start[ 4] := 48; start[ 5] := 48; start[ 6] := 48; start[ 7] := 48;
749 start[ 8] := 48; start[ 9] := 48; start[ 10] := 48; start[ 11] := 48;
750 start[ 12] := 48; start[ 13] := 48; start[ 14] := 48; start[ 15] := 48;
751 start[ 16] := 48; start[ 17] := 48; start[ 18] := 48; start[ 19] := 48;
752 start[ 20] := 48; start[ 21] := 48; start[ 22] := 48; start[ 23] := 48;
753 start[ 24] := 48; start[ 25] := 48; start[ 26] := 48; start[ 27] := 48;
754 start[ 28] := 48; start[ 29] := 48; start[ 30] := 48; start[ 31] := 48;
755 start[ 32] := 48; start[ 33] := 17; start[ 34] := 9; start[ 35] := 30; (* '!' = 33 => state 17 *)
756 start[ 36] := 29; start[ 37] := 48; start[ 38] := 36; start[ 39] := 11; (* '%' = 37 => state 48 *)
757 start[ 40] := 21; start[ 41] := 23; start[ 42] := 15; start[ 43] := 22;
758 start[ 44] := 20; start[ 45] := 16; start[ 46] := 18; start[ 47] := 35;
759 start[ 48] := 12; start[ 49] := 12; start[ 50] := 12; start[ 51] := 12;
760 start[ 52] := 12; start[ 53] := 12; start[ 54] := 12; start[ 55] := 12;
761 start[ 56] := 12; start[ 57] := 12; start[ 58] := 25; start[ 59] := 24;
762 start[ 60] := 31; start[ 61] := 19; start[ 62] := 33; start[ 63] := 48;
763 start[ 64] := 48; start[ 65] := 1; start[ 66] := 1; start[ 67] := 1;
764 start[ 68] := 1; start[ 69] := 1; start[ 70] := 1; start[ 71] := 1;
765 start[ 72] := 1; start[ 73] := 1; start[ 74] := 1; start[ 75] := 1;
766 start[ 76] := 1; start[ 77] := 1; start[ 78] := 1; start[ 79] := 1;
767 start[ 80] := 1; start[ 81] := 1; start[ 82] := 1; start[ 83] := 1;
768 start[ 84] := 1; start[ 85] := 1; start[ 86] := 1; start[ 87] := 1;
769 start[ 88] := 1; start[ 89] := 1; start[ 90] := 1; start[ 91] := 26;
770 start[ 92] := 48; start[ 93] := 27; start[ 94] := 28;
771 (* ------------------------------------------- *)
772 (* Two special-case characters ... "_
" and "`
" *)
773 (* ------------------------------------------- *)
774 start[ 95] := 1; start[ 96] := 44;
775 (* ------------------------------------------- *)
776 start[ 97] := 1; start[ 98] := 1; start[ 99] := 1;
777 start[100] := 1; start[101] := 1; start[102] := 1; start[103] := 1;
778 start[104] := 1; start[105] := 1; start[106] := 1; start[107] := 1;
779 start[108] := 1; start[109] := 1; start[110] := 1; start[111] := 1;
780 start[112] := 1; start[113] := 1; start[114] := 1; start[115] := 1;
781 start[116] := 1; start[117] := 1; start[118] := 1; start[119] := 1;
782 start[120] := 1; start[121] := 1; start[122] := 1; start[123] := 38;
783 start[124] := 42; start[125] := 39; start[126] := 37; start[127] := 48;
784 start[128] := 48; start[129] := 48; start[130] := 48; start[131] := 48;
785 start[132] := 48; start[133] := 48; start[134] := 48; start[135] := 48;
786 start[136] := 48; start[137] := 48; start[138] := 48; start[139] := 48;
787 start[140] := 48; start[141] := 48; start[142] := 48; start[143] := 48;
788 start[144] := 48; start[145] := 48; start[148] := 48; start[147] := 48;
789 start[148] := 48; start[149] := 48; start[150] := 48; start[151] := 48;
790 start[152] := 48; start[153] := 48; start[154] := 48; start[155] := 48;
791 start[156] := 48; start[157] := 48; start[158] := 48; start[159] := 48;
792 start[160] := 48; start[161] := 48; start[162] := 48; start[163] := 48;
793 start[164] := 48; start[165] := 48; start[166] := 48; start[167] := 48;
794 start[168] := 48; start[169] := 48; start[170] := 48; start[171] := 48;
795 start[172] := 48; start[173] := 48; start[174] := 48; start[175] := 48;
796 start[176] := 48; start[177] := 48; start[178] := 48; start[179] := 48;
797 start[180] := 48; start[181] := 48; start[182] := 48; start[183] := 48;
798 start[184] := 48; start[185] := 48; start[186] := 48; start[187] := 48;
799 start[188] := 48; start[189] := 48; start[190] := 48; start[191] := 48;
800 (* ------------------------------------------- *)
801 (* Latin-8 alphabetics start here ... *)
802 (* ------------------------------------------- *)
803 start[192] := 1; start[193] := 1; start[194] := 1; start[195] := 1;
804 start[196] := 1; start[197] := 1; start[198] := 1; start[199] := 1;
805 start[200] := 1; start[201] := 1; start[202] := 1; start[203] := 1;
806 start[204] := 1; start[205] := 1; start[206] := 1; start[207] := 1;
807 start[208] := 1; start[209] := 1; start[210] := 1; start[211] := 1;
808 start[212] := 1; start[213] := 1; start[214] := 1;
810 (* odd character out *)
813 start[216] := 1; start[217] := 1; start[218] := 1; start[219] := 1;
814 start[220] := 1; start[221] := 1; start[222] := 1; start[223] := 1;
815 start[224] := 1; start[225] := 1; start[226] := 1; start[227] := 1;
816 start[228] := 1; start[229] := 1; start[230] := 1; start[231] := 1;
817 start[232] := 1; start[233] := 1; start[234] := 1; start[235] := 1;
818 start[236] := 1; start[237] := 1; start[238] := 1; start[239] := 1;
819 start[240] := 1; start[241] := 1; start[242] := 1; start[243] := 1;
820 start[244] := 1; start[245] := 1; start[246] := 1;
822 (* odd character out *)
825 start[248] := 1; start[249] := 1; start[250] := 1; start[251] := 1;
826 start[252] := 1; start[253] := 1; start[254] := 1; start[255] := 1;