From: DeaDDooMER Date: Sat, 16 Sep 2017 16:41:30 +0000 (+0300) Subject: Добавлен порт COCO/R X-Git-Url: http://deadsoftware.ru/gitweb?a=commitdiff_plain;h=b32ba7cd2d9c7ddd96e5504bd7bc7552228167e3;p=dsw-obn.git Добавлен порт COCO/R --- diff --git a/TextV4/make.sh b/TextV4/make.sh deleted file mode 100755 index 62688d1..0000000 --- a/TextV4/make.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -set -e - -PATH="..:$PATH" - -obn-compile.sh -I TextV4 TextV4 diff --git a/TextV4/voc/.gitignore b/TextV4/voc/.gitignore deleted file mode 100644 index 94818f5..0000000 --- a/TextV4/voc/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -TextV4Print -TextV4ToAscii -TextV4ToText -*.o -*.sym -*.h -*.c diff --git a/src/oberon.c b/src/oberon.c index 1d9e670..cd7e97a 100644 --- a/src/oberon.c +++ b/src/oberon.c @@ -2447,6 +2447,12 @@ oberon_proc_decl(oberon_context_t * ctx) char * name; int export; int read_only; + + if(ctx -> token == STAR) + { + oberon_assert_token(ctx, STAR); + } + name = oberon_assert_ident(ctx); oberon_def(ctx, &export, &read_only); diff --git a/TextV4/.gitignore b/tools/Coco/.gitignore similarity index 100% rename from TextV4/.gitignore rename to tools/Coco/.gitignore diff --git a/tools/Coco/CR.atg b/tools/Coco/CR.atg new file mode 100644 index 0000000..74d023b --- /dev/null +++ b/tools/Coco/CR.atg @@ -0,0 +1,376 @@ +COMPILER CR (*H.Moessenboeck 17.11.93, Coco/R*) + +(*---------------------- semantic declarations ----------------------------*) + +IMPORT CRT, CRA, CRX, Sets, Texts, Oberon; + +CONST + ident = 0; string = 1; (*symbol kind*) + +VAR + str: ARRAY 32 OF CHAR; + w: Texts.Writer; + genScanner: BOOLEAN; + + +PROCEDURE SemErr(nr: INTEGER); +BEGIN + CRS.Error(200+nr, CRS.pos); +END SemErr; + +PROCEDURE MatchLiteral(sp: INTEGER); (*store string either as token or as literal*) + VAR sn, sn1: CRT.SymbolNode; matchedSp: INTEGER; +BEGIN + CRT.GetSym(sp, sn); + CRA.MatchDFA(sn.name, sp, matchedSp); + IF matchedSp # CRT.noSym THEN + CRT.GetSym(matchedSp, sn1); sn1.struct := CRT.classLitToken; CRT.PutSym(matchedSp, sn1); + sn.struct := CRT.litToken + ELSE sn.struct := CRT.classToken; + END; + CRT.PutSym(sp, sn) +END MatchLiteral; + +PROCEDURE SetCtx(gp: INTEGER); (*set transition code to CRT.contextTrans*) + VAR gn: CRT.GraphNode; +BEGIN + WHILE gp > 0 DO + CRT.GetNode(gp, gn); + IF gn.typ IN {CRT.char, CRT.class} THEN + gn.p2 := CRT.contextTrans; CRT.PutNode(gp, gn) + ELSIF gn.typ IN {CRT.opt, CRT.iter} THEN SetCtx(gn.p1) + ELSIF gn.typ = CRT.alt THEN SetCtx(gn.p1); SetCtx(gn.p2) + END; + gp := gn.next + END +END SetCtx; + +PROCEDURE SetDDT(s: ARRAY OF CHAR); + VAR name: ARRAY 64 OF CHAR; i: INTEGER; ch: CHAR; +BEGIN + i := 1; + WHILE s[i] # 0X DO + ch := s[i]; INC(i); + IF (ch >= "0") & (ch <= "9") THEN CRT.ddt[ORD(ch)-ORD("0")] := TRUE END + END +END SetDDT; + +PROCEDURE FixString (VAR s: ARRAY OF CHAR; len: INTEGER); + VAR double: BOOLEAN; i: INTEGER; +BEGIN + double := FALSE; + FOR i := 0 TO len-2 DO + IF s[i] = '"' THEN double := TRUE END + END; + IF ~ double THEN s[0] := '"'; s[len-1] := '"' END +END FixString; + +(*-------------------------------------------------------------------------*) +CHARACTERS + letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz". + digit = "0123456789". + eol = CHR(13). + tab = CHR(9). + noQuote1 = ANY - '"' - eol. + noQuote2 = ANY - "'" - eol. + +IGNORE eol + tab + CHR(28) + + +TOKENS + ident = letter {letter | digit}. + string = '"' {noQuote1} '"' | "'" {noQuote2} "'". + number = digit {digit}. + + +PRAGMAS + ddtSym = "$" {digit}. (. CRS.GetName(CRS.nextPos, CRS.nextLen, str); SetDDT(str) .) + + +COMMENTS FROM "(*" TO "*)" NESTED + +(*-------------------------------------------------------------------------*) +PRODUCTIONS + +CR (. VAR undef, hasAttrs, ok, ok1: BOOLEAN; eofSy, gR: INTEGER; + gramLine, sp: INTEGER; + gn: CRT.GraphNode; sn: CRT.SymbolNode; + name, gramName: CRT.Name; .) += + "COMPILER" (. Texts.OpenWriter(w); + CRT.Init; CRX.Init; CRA.Init; + gramLine := CRS.line; + eofSy := CRT.NewSym(CRT.t, "EOF", 0); + genScanner := TRUE; + CRT.ignoreCase := FALSE; + ok := TRUE; + Sets.Clear(CRT.ignored) .) + ident (. CRS.GetName(CRS.pos, CRS.len, gramName); + CRT.semDeclPos.beg := CRS.nextPos; CRT.importPos.beg := -1; .) + { "IMPORT" (. CRT.importPos.beg := CRS.nextPos .) + {ANY} ";" (. CRT.importPos.len := SHORT(CRS.pos - CRT.importPos.beg); + CRT.importPos.col := 0; + CRT.semDeclPos.beg := CRS.nextPos .) + | ANY + } (. CRT.semDeclPos.len := SHORT(CRS.nextPos - CRT.semDeclPos.beg); + CRT.semDeclPos.col := 0 .) + { Declaration } + SYNC + "PRODUCTIONS" (. IF genScanner THEN CRA.MakeDeterministic(ok) END; + CRT.nNodes := 0 .) + { ident (. CRS.GetName(CRS.pos, CRS.len, name); + sp := CRT.FindSym(name); undef := sp = CRT.noSym; + IF undef THEN + sp := CRT.NewSym(CRT.nt, name, CRS.line); + CRT.GetSym(sp, sn); + ELSE + CRT.GetSym(sp, sn); + IF sn.typ = CRT.nt THEN + IF sn.struct > 0 THEN SemErr(7) END + ELSE SemErr(8) + END; + sn.line := CRS.line + END; + hasAttrs := sn.attrPos.beg >= 0 .) + ( Attribs (. IF ~undef & ~hasAttrs THEN SemErr(9) END; + CRT.PutSym(sp, sn) .) + | (. IF ~undef & hasAttrs THEN SemErr(10) END .) + ) + [ SemText ] + WEAK "=" + Expression (. CRT.CompleteGraph(gR); CRT.PutSym(sp, sn); + IF CRT.ddt[2] THEN CRT.PrintGraph END .) + WEAK "." + } (. sp := CRT.FindSym(gramName); + IF sp = CRT.noSym THEN SemErr(11); + ELSE + CRT.GetSym(sp, sn); + IF sn.attrPos.beg >= 0 THEN SemErr(12) END; + CRT.root := CRT.NewNode(CRT.nt, sp, gramLine); + END .) + "END" ident (. CRS.GetName(CRS.pos, CRS.len, name); + IF name # gramName THEN SemErr(17) END; + IF CRS.errors = 0 THEN + Texts.WriteString(w, " checking"); Texts.Append(Oberon.Log, w.buf); + CRT.CompSymbolSets; + IF ok THEN CRT.TestCompleteness(ok) END; + IF ok THEN + CRT.TestIfAllNtReached(ok1); CRT.FindCircularProductions(ok) + END; + IF ok THEN CRT.TestIfNtToTerm(ok) END; + IF ok THEN CRT.LL1Test(ok1) END; + IF CRT.ddt[0] THEN CRA.PrintStates END; + IF CRT.ddt[7] THEN CRT.XRef END; + IF ok THEN + Texts.WriteString(w, " +parser"); + Texts.Append(Oberon.Log, w.buf); + CRX.GenCompiler; + IF genScanner THEN + Texts.WriteString(w, " +scanner"); + Texts.Append(Oberon.Log, w.buf); + CRA.WriteScanner + END; + IF CRT.ddt[8] THEN CRX.WriteStatistics END + END + ELSE ok := FALSE + END; + IF CRT.ddt[6] THEN CRT.PrintSymbolTable END; + IF ok THEN Texts.WriteString(w, " done") END; + Texts.WriteLn(w); Texts.Append(Oberon.Log, w.buf) .) + ".". +(*------------------------------------------------------------------------------------*) +Declaration (. VAR gL1, gR1, gL2, gR2: INTEGER; nested: BOOLEAN; .) += + "CHARACTERS" { SetDecl } +| "TOKENS" { TokenDecl } +| "PRAGMAS" { TokenDecl } +| "COMMENTS" + "FROM" TokenExpr + "TO" TokenExpr + ( "NESTED" (. nested := TRUE .) + | (. nested := FALSE .) + ) (. CRA.NewComment(gL1, gL2, nested) .) +| "IGNORE" + ( "CASE" (. CRT.ignoreCase := TRUE .) + | Set + ) +. + +(*------------------------------------------------------------------------------------*) +SetDecl (. VAR c: INTEGER; set: CRT.Set; name: CRT.Name; .) += + ident (. CRS.GetName(CRS.pos, CRS.len, name); + c := CRT.ClassWithName(name); IF c >= 0 THEN SemErr(7) END .) + "=" Set (. c := CRT.NewClass(name, set) .) + ".". +(*------------------------------------------------------------------------------------*) +Set (. VAR set2: CRT.Set; .) += + SimSet + { "+" SimSet (. Sets.Unite(set, set2) .) + | "-" SimSet (. Sets.Differ(set, set2) .) + }. +(*------------------------------------------------------------------------------------*) +SimSet (. VAR c, n, i: INTEGER; name: CRT.Name; s: ARRAY 128 OF CHAR; .) += + ident (. CRS.GetName(CRS.pos, CRS.len, name); + c := CRT.ClassWithName(name); + IF c < 0 THEN SemErr(15); Sets.Clear(set) + ELSE CRT.GetClass(c, set) + END .) +| string (. CRS.GetName(CRS.pos, CRS.len, s); + Sets.Clear(set); i := 1; + WHILE s[i] # s[0] DO + Sets.Incl(set, ORD(s[i])); INC(i) + END .) +| "CHR" "(" number (. CRS.GetName(CRS.pos, CRS.len, name); + n := 0; i := 0; + WHILE name[i] # 0X DO + n := 10 * n + (ORD(name[i]) - ORD("0")); + INC(i) + END; + Sets.Clear(set); Sets.Incl(set, n) .) + ")" +| "ANY" (. Sets.Fill(set) .) +. +(*------------------------------------------------------------------------------------*) +TokenDecl (. VAR sp, kind, gL, gR: INTEGER; sn: CRT.SymbolNode; + pos: CRT.Position; name: CRT.Name; .) += + Symbol (. IF CRT.FindSym(name) # CRT.noSym THEN SemErr(7) + ELSE + sp := CRT.NewSym(typ, name, CRS.line); + CRT.GetSym(sp, sn); sn.struct := CRT.classToken; + CRT.PutSym(sp, sn) + END .) + SYNC + ( "=" TokenExpr "." (. IF kind # ident THEN SemErr(13) END; + CRT.CompleteGraph(gR); + CRA.ConvertToStates(gL, sp) .) + | (. IF kind = ident THEN genScanner := FALSE + ELSE MatchLiteral(sp) + END .) + ) + [ SemText (. IF typ = CRT.t THEN SemErr(14) END; + CRT.GetSym(sp, sn); sn.semPos := pos; CRT.PutSym(sp, sn) .) + ]. +(*------------------------------------------------------------------------------------*) +Expression (. VAR gL2, gR2: INTEGER; first: BOOLEAN; .) += + Term (. first := TRUE .) + { WEAK "|" + Term (. IF first THEN + CRT.MakeFirstAlt(gL, gR); first := FALSE + END; + CRT.ConcatAlt(gL, gR, gL2, gR2) .) + }. +(*------------------------------------------------------------------------------------*) +Term (. VAR gL2, gR2: INTEGER; .) += (. gL := 0; gR := 0 .) + ( Factor + { Factor (. CRT.ConcatSeq(gL, gR, gL2, gR2) .) + } + | (. gL := CRT.NewNode(CRT.eps, 0, 0); gR := gL .) + ). +(*------------------------------------------------------------------------------------*) +Factor (. VAR sp, kind, c: INTEGER; name: CRT.Name; + gn: CRT.GraphNode; sn: CRT.SymbolNode; + set: CRT.Set; + undef, weak: BOOLEAN; + pos: CRT.Position; .) += + (. gL :=0; gR := 0; weak := FALSE .) +( [ "WEAK" (. weak := TRUE .) + ] + Symbol (. sp := CRT.FindSym(name); undef := sp = CRT.noSym; + IF undef THEN + IF kind = ident THEN (*forward nt*) + sp := CRT.NewSym(CRT.nt, name, 0) + ELSE (*undefined string in production*) + sp := CRT.NewSym(CRT.t, name, CRS.line); + MatchLiteral(sp) + END + END; + CRT.GetSym(sp, sn); + IF ~(sn.typ IN {CRT.t,CRT.nt}) THEN SemErr(4) END; + IF weak THEN + IF sn.typ = CRT.t THEN sn.typ := CRT.wt ELSE SemErr(23) END + END; + gL := CRT.NewNode(sn.typ, sp, CRS.line); gR := gL .) + + ( Attribs (. CRT.GetNode(gL, gn); gn.pos := pos; CRT.PutNode(gL, gn); + CRT.GetSym(sp, sn); + IF undef THEN + sn.attrPos := pos; CRT.PutSym(sp, sn) + ELSIF sn.attrPos.beg < 0 THEN SemErr(5) + END; + IF kind # ident THEN SemErr(3) END .) + | (. CRT.GetSym(sp, sn); + IF sn.attrPos.beg >= 0 THEN SemErr(6) END .) + ) +| "(" Expression ")" +| "[" Expression "]" (. CRT.MakeOption(gL, gR) .) +| "{" Expression "}" (. CRT.MakeIteration(gL, gR) .) +| SemText (. gL := CRT.NewNode(CRT.sem, 0, 0); + gR := gL; + CRT.GetNode(gL, gn); gn.pos := pos; CRT.PutNode(gL, gn) .) +| "ANY" (. Sets.Fill(set); Sets.Excl(set, CRT.eofSy); + gL := CRT.NewNode(CRT.any, CRT.NewSet(set), 0); gR := gL .) +| "SYNC" (. gL := CRT.NewNode(CRT.sync, 0, 0); gR := gL .) +). +(*------------------------------------------------------------------------------------*) +TokenExpr (. VAR gL2, gR2: INTEGER; first: BOOLEAN; .) += + TokenTerm (. first := TRUE .) + { WEAK "|" + TokenTerm (. IF first THEN + CRT.MakeFirstAlt(gL, gR); first := FALSE + END; + CRT.ConcatAlt(gL, gR, gL2, gR2) .) + }. +(*------------------------------------------------------------------------------------*) +TokenTerm (. VAR gL2, gR2: INTEGER; .) += + TokenFactor + { TokenFactor (. CRT.ConcatSeq(gL, gR, gL2, gR2) .) + } + [ "CONTEXT" + "(" TokenExpr (. SetCtx(gL2); CRT.ConcatSeq(gL, gR, gL2, gR2) .) + ")" + ]. +(*------------------------------------------------------------------------------------*) +TokenFactor (. VAR kind, c: INTEGER; set: CRT.Set; name: CRT.Name; .) += + (. gL :=0; gR := 0 .) +( Symbol (. IF kind = ident THEN + c := CRT.ClassWithName(name); + IF c < 0 THEN + SemErr(15); + Sets.Clear(set); c := CRT.NewClass(name, set) + END; + gL := CRT.NewNode(CRT.class, c, 0); gR := gL + ELSE (*string*) + CRT.StrToGraph(name, gL, gR) + END .) +| "(" TokenExpr ")" +| "[" TokenExpr "]" (. CRT.MakeOption(gL, gR) .) +| "{" TokenExpr "}" (. CRT.MakeIteration(gL, gR) .) +). +(*------------------------------------------------------------------------------------*) +Symbol = + ( ident (. kind := ident .) + | string (. kind := string .) + ) (. CRS.GetName(CRS.pos, CRS.len, name); + IF kind = string THEN FixString(name, CRS.len) END .) . +(*------------------------------------------------------------------------------------*) +Attribs = + "<" (. attrPos.beg := CRS.nextPos; attrPos.col := CRS.nextCol .) + { ANY } + ">" (. attrPos.len := SHORT(CRS.pos - attrPos.beg) .). +(*------------------------------------------------------------------------------------*) +SemText = + "(." (. semPos.beg := CRS.nextPos; semPos.col := CRS.nextCol .) + { ANY } + ".)" (. semPos.len := SHORT(CRS.pos - semPos.beg) .). + +END CR. diff --git a/tools/Coco/CRA.obn b/tools/Coco/CRA.obn new file mode 100644 index 0000000..5a7face --- /dev/null +++ b/tools/Coco/CRA.obn @@ -0,0 +1,930 @@ +MODULE CRA; (* handles the DFA *) + +IMPORT Oberon, Texts, Sets, CRS, CRT; + +CONST + maxStates = 300; + EOL = 0DX; + +TYPE + State = POINTER TO StateNode; + Action = POINTER TO ActionNode; + Target = POINTER TO TargetNode; + + StateNode = RECORD (*state of finite automaton*) + nr: INTEGER; (*state number*) + firstAction: Action; (*to first action of this state*) + endOf: INTEGER; (*nr. of recognized token if state is final*) + ctx: BOOLEAN; (*TRUE: state reached by contextTrans*) + next: State + END; + ActionNode = RECORD (*action of finite automaton*) + typ: INTEGER; (*type of action symbol: char, class*) + sym: INTEGER; (*action symbol*) + tc: INTEGER; (*transition code: normTrans, contextTrans*) + target: Target; (*states after transition with input symbol*) + next: Action; + END; + TargetNode = RECORD (*state after transition with input symbol*) + state: State; (*target state*) + next: Target; + END; + + Comment = POINTER TO CommentNode; + CommentNode = RECORD (* info about a comment syntax *) + start,stop: ARRAY 2 OF CHAR; + nested: BOOLEAN; + next: Comment; + END; + + Melted = POINTER TO MeltedNode; + MeltedNode = RECORD (* info about melted states *) + set: CRT.Set; (* set of old states *) + state: State; (* new state *) + next: Melted; + END; + + +VAR + firstState: State; + lastState: State; (* last allocated state *) + rootState: State; (* start state of DFA *) + lastSimState: INTEGER; (* last non melted state *) + stateNr: INTEGER; (*number of last allocated state*) + firstMelted: Melted; (* list of melted states *) + firstComment: Comment; (* list of comments *) + out: Texts.Writer; (* current output *) + fram: Texts.Reader; (* scanner frame input *) + + +PROCEDURE SemErr(nr: INTEGER); +BEGIN CRS.Error(200+nr, CRS.pos) +END SemErr; + +PROCEDURE Put(ch: CHAR); +BEGIN Texts.Write(out, ch) END Put; + +PROCEDURE PutS(s: ARRAY OF CHAR); + VAR i: INTEGER; +BEGIN i := 0; + WHILE (i < LEN(s)) & (s[i] # 0X) DO + IF s[i] = "$" THEN Texts.WriteLn(out) ELSE Texts.Write(out, s[i]) END; + INC(i) + END +END PutS; + +PROCEDURE PutI(i: INTEGER); +BEGIN Texts.WriteInt(out, i, 0) END PutI; + +PROCEDURE PutI2(i, n: INTEGER); +BEGIN Texts.WriteInt(out, i, n) END PutI2; + +PROCEDURE PutC(ch: CHAR); +BEGIN + IF (ch < " ") OR (ORD(ch) = 34) THEN PutS("CHR("); PutI(ORD(ch)); Put(")") + ELSE Put(CHR(34)); Put(ch); Put(CHR(34)) + END +END PutC; + +PROCEDURE PutRange(s: CRT.Set); + VAR lo, hi: ARRAY 32 OF CHAR; top, i: INTEGER; s1: CRT.Set; +BEGIN + (*----- fill lo and hi *) + top := -1; i := 0; + WHILE i < 128 DO + IF Sets.In(s, i) THEN + INC(top); lo[top] := CHR(i); INC(i); + WHILE (i < 128) & Sets.In(s, i) DO INC(i) END; + hi[top] := CHR(i - 1) + ELSE INC(i) + END + END; + (*----- print ranges *) + IF (top = 1) & (lo[0] = 0X) & (hi[1] = 7FX) & (CHR(ORD(hi[0]) + 2) = lo[1]) THEN + Sets.Fill(s1); Sets.Differ(s1, s); PutS("~ ("); PutRange(s1); Put(")") + ELSE + i := 0; + WHILE i <= top DO + IF hi[i] = lo[i] THEN PutS("(ch="); PutC(lo[i]) + ELSIF lo[i] = 0X THEN PutS("(ch<="); PutC(hi[i]) + ELSIF hi[i] = 7FX THEN PutS("(ch>="); PutC(lo[i]) + ELSE PutS("(ch>="); PutC(lo[i]); PutS(") & (ch<="); PutC(hi[i]) + END; + Put(")"); + IF i < top THEN PutS(" OR ") END; + INC(i) + END + END +END PutRange; + +PROCEDURE PutChCond(ch: CHAR); +BEGIN + PutS("(ch ="); PutC(ch); Put(")") +END PutChCond; + +PROCEDURE Length*(s: ARRAY OF CHAR): INTEGER; + VAR i: INTEGER; +BEGIN + i:=0; WHILE (i < LEN(s)) & (s[i] # 0X) DO INC(i) END; + RETURN i +END Length; + + +PROCEDURE AddAction(act:Action; VAR head:Action); +VAR a,lasta: Action; +BEGIN + a := head; lasta := NIL; + LOOP + IF (a = NIL) (*collecting classes at the front gives better*) + OR (act^.typ < a^.typ) THEN (*performance*) + act^.next := a; + IF lasta = NIL THEN head := act ELSE lasta^.next := act END; + EXIT; + END; + lasta := a; a := a^.next; + END; +END AddAction; + + +PROCEDURE DetachAction(a:Action; VAR L:Action); +BEGIN + IF L = a THEN L := a^.next ELSIF L # NIL THEN DetachAction(a, L^.next) END +END DetachAction; + + +PROCEDURE TheAction (state: State; ch: CHAR): Action; + VAR a: Action; set: CRT.Set; +BEGIN + a := state.firstAction; + WHILE a # NIL DO + IF a.typ = CRT.char THEN + IF ORD(ch) = a.sym THEN RETURN a END + ELSIF a.typ = CRT.class THEN + CRT.GetClass(a^.sym, set); + IF Sets.In(set, ORD(ch)) THEN RETURN a END + END; + a := a.next + END; + RETURN NIL +END TheAction; + + +PROCEDURE AddTargetList(VAR lista, listb: Target); +VAR p,t: Target; + + PROCEDURE AddTarget(t: Target; VAR list:Target); + VAR p,lastp: Target; + BEGIN + p:=list; lastp:=NIL; + LOOP + IF (p = NIL) OR (t^.state.nr < p^.state.nr) THEN EXIT END; + IF p^.state = t^.state THEN RETURN END; + lastp := p; p := p^.next + END; + t^.next:=p; + IF lastp=NIL THEN list:=t ELSE lastp^.next:=t END + END AddTarget; + +BEGIN + p := lista; + WHILE p # NIL DO + NEW(t); t^.state:=p^.state; AddTarget(t, listb); + p := p^.next + END +END AddTargetList; + + +PROCEDURE NewMelted(set: CRT.Set; state: State): Melted; +VAR melt: Melted; +BEGIN + NEW(melt); melt^.set := set; melt^.state := state; + melt^.next := firstMelted; firstMelted := melt; + RETURN melt +END NewMelted; + + +PROCEDURE NewState(): State; + VAR state: State; +BEGIN + NEW(state); INC(stateNr); state.nr := stateNr; + state.firstAction := NIL; state.endOf := CRT.noSym; state.ctx := FALSE; state.next := NIL; + IF firstState = NIL THEN firstState := state ELSE lastState.next := state END; + lastState := state; + RETURN state +END NewState; + + +PROCEDURE NewTransition(from, to: State; typ, sym, tc: INTEGER); + VAR a: Action; t: Target; +BEGIN + NEW(t); t^.state := to; t^.next := NIL; + NEW(a); a^.typ := typ; a^.sym := sym; a^.tc := tc; a^.target := t; + AddAction(a, from.firstAction) +END NewTransition; + + +PROCEDURE NewComment*(from, to: INTEGER; nested: BOOLEAN); + VAR com: Comment; + + PROCEDURE MakeStr(gp: INTEGER; VAR s: ARRAY OF CHAR); + VAR i, n: INTEGER; gn: CRT.GraphNode; set: CRT.Set; + BEGIN + i := 0; + WHILE gp # 0 DO + CRT.GetNode(gp, gn); + IF gn.typ = CRT.char THEN + IF i < 2 THEN s[i] := CHR(gn.p1) END; INC(i) + ELSIF gn.typ = CRT.class THEN + CRT.GetClass(gn.p1, set); + IF Sets.Elements(set, n) # 1 THEN SemErr(26) END; + IF i < 2 THEN s[i] := CHR(n) END; INC(i) + ELSE SemErr(22) + END; + gp := gn.next + END; + IF i > 2 THEN SemErr(25) ELSIF i < 2 THEN s[i] := 0X END + END MakeStr; + +BEGIN + NEW(com); + MakeStr(from, com^.start); MakeStr(to, com^.stop); + com^.nested := nested; + com^.next := firstComment; firstComment := com +END NewComment; + + +PROCEDURE MakeSet(p: Action; VAR set: CRT.Set); +BEGIN + IF p^.typ = CRT.class THEN CRT.GetClass(p^.sym, set) + ELSE Sets.Clear(set); Sets.Incl(set, p^.sym) + END +END MakeSet; + + +PROCEDURE ChangeAction(a: Action; set: CRT.Set); +VAR nr: INTEGER; +BEGIN + IF Sets.Elements(set, nr) = 1 THEN a^.typ := CRT.char; a^.sym := nr + ELSE + nr := CRT.ClassWithSet(set); + IF nr < 0 THEN nr := CRT.NewClass("#0", set) END; (*class with dummy name*) + a^.typ := CRT.class; a^.sym := nr + END +END ChangeAction; + + +PROCEDURE CombineShifts; + VAR state: State; n: INTEGER; a, b, c: Action; seta, setb: CRT.Set; +BEGIN + state := firstState; + WHILE state # NIL DO + a := state.firstAction; + WHILE a # NIL DO + b := a^.next; + WHILE b # NIL DO + IF (a^.target^.state = b^.target^.state) & (a^.tc = b^.tc) THEN + MakeSet(a, seta); MakeSet(b, setb); Sets.Unite(seta, setb); + ChangeAction(a, seta); + c := b; b := b^.next; DetachAction(c, a) + ELSE b := b^.next + END + END; + a := a^.next + END; + state := state.next + END +END CombineShifts; + + +PROCEDURE DeleteRedundantStates; +VAR + action: Action; + state, s1, s2: State; + used: CRT.Set; + newState: ARRAY maxStates OF State; + + PROCEDURE FindUsedStates(state: State); + VAR action: Action; + BEGIN + IF Sets.In(used, state.nr) THEN RETURN END; + Sets.Incl(used, state.nr); + action := state.firstAction; + WHILE action # NIL DO + FindUsedStates(action^.target^.state); + action:=action^.next + END + END FindUsedStates; + + PROCEDURE DelUnused; + VAR state: State; + BEGIN + state := firstState.next; lastState := firstState; stateNr := 0; (*firstState has number 0*) + WHILE state # NIL DO + IF Sets.In(used, state.nr) THEN INC(stateNr); state.nr := stateNr; lastState := state + ELSE lastState.next := state.next + END; + state := state.next + END + END DelUnused; + +BEGIN + Sets.Clear(used); FindUsedStates(firstState); + (*---------- combine equal final states ------------*) + s1 := firstState.next; (*first state cannot be final*) + WHILE s1 # NIL DO + IF Sets.In(used, s1.nr) & (s1.endOf # CRT.noSym) & (s1.firstAction = NIL) & ~ s1.ctx THEN + s2 := s1.next; + WHILE s2 # NIL DO + IF Sets.In(used, s2.nr) & (s1.endOf = s2.endOf) & (s2.firstAction = NIL) & ~ s2.ctx THEN + Sets.Excl(used, s2.nr); newState[s2.nr] := s1 + END; + s2 := s2.next + END + END; + s1 := s1.next + END; + state := firstState; (*> state := firstState.next*) + WHILE state # NIL DO + IF Sets.In(used, state.nr) THEN + action := state.firstAction; + WHILE action # NIL DO + IF ~ Sets.In(used, action.target.state.nr) THEN + action^.target^.state := newState[action.target.state.nr] + END; + action := action^.next + END + END; + state := state.next + END; + DelUnused +END DeleteRedundantStates; + + +PROCEDURE ConvertToStates*(gp0, sp: INTEGER); +(*note: gn.line is abused as a state number!*) + VAR n: INTEGER; S: ARRAY maxStates OF State; gn: CRT.GraphNode; + + PROCEDURE TheState(gp: INTEGER): State; + VAR state: State; gn: CRT.GraphNode; + BEGIN + IF gp = 0 THEN state := NewState(); state.endOf := sp; RETURN state + ELSE CRT.GetNode(gp, gn); RETURN S[gn.line] + END + END TheState; + + PROCEDURE Step(from: State; gp: INTEGER); + VAR gn: CRT.GraphNode; + BEGIN + IF gp = 0 THEN RETURN END; + CRT.GetNode(gp, gn); + CASE gn.typ OF + CRT.class, CRT.char: NewTransition(from, TheState(ABS(gn.next)), gn.typ, gn.p1, gn.p2) + | CRT.alt: Step(from, gn.p1); Step(from, gn.p2) + | CRT.opt, CRT.iter: Step(from, ABS(gn.next)); Step(from, gn.p1) + END + END Step; + + PROCEDURE FindTrans(gp: INTEGER; state: State); + VAR gn: CRT.GraphNode; new: BOOLEAN; + BEGIN + IF gp = 0 THEN RETURN END; (*end of graph*) + CRT.GetNode(gp, gn); + IF gn.line # 0 THEN RETURN END; (*already visited*) + new := state = NIL; + IF new THEN state := NewState() END; + INC(n); S[n] := state; gn.line := n; CRT.PutNode(gp, gn); + IF CRT.DelGraph(gp) THEN state.endOf := sp END; (*state is end state*) + CASE gn.typ OF + CRT.class, CRT.char: FindTrans(ABS(gn.next), NIL); + | CRT.opt: FindTrans(ABS(gn.next), NIL); FindTrans(gn.p1, state) + | CRT.iter: FindTrans(ABS(gn.next), state); FindTrans(gn.p1, state) + | CRT.alt: FindTrans(gn.p1, state); FindTrans(gn.p2, state) + END; + IF new OR (state = firstState) & (gp = gp0) THEN (*start of a group of equally numbered nodes*) + Step(state, gp) + END + END FindTrans; + +BEGIN + IF CRT.DelGraph(gp0) THEN SemErr(20) END; + CRT.GetNode(gp0, gn); + IF gn.typ = CRT.iter THEN SemErr(21) END; + n := 0; FindTrans(gp0, firstState) +END ConvertToStates; + + +PROCEDURE MatchDFA* (s: ARRAY OF CHAR; sp: INTEGER; VAR matchedSp: INTEGER); + VAR state, to: State; a: Action; i, len: INTEGER; +BEGIN (*s with quotes*) + state := firstState; i := 1; len := Length(s) - 1; + LOOP (*try to match s against existing DFA*) + IF i = len THEN EXIT END; + a := TheAction(state, s[i]); + IF a = NIL THEN EXIT END; + state := a.target.state; INC(i) + END; + WHILE i < len DO (*make new DFA for s[i..len-1]*) + to := NewState(); + NewTransition(state, to, CRT.char, ORD(s[i]), CRT.normTrans); + state := to; INC(i) + END; + matchedSp := state.endOf; + IF state.endOf = CRT.noSym THEN state.endOf := sp END +END MatchDFA; + + +PROCEDURE SplitActions(a, b: Action); +VAR c: Action; seta, setb, setc: CRT.Set; + + PROCEDURE CombineTransCodes(t1, t2: INTEGER; VAR result:INTEGER); + BEGIN + IF t1 = CRT.contextTrans THEN result := t1 ELSE result := t2 END + END CombineTransCodes; + +BEGIN + MakeSet(a, seta); MakeSet(b, setb); + IF Sets.Equal(seta, setb) THEN + AddTargetList(b^.target, a^.target); + CombineTransCodes(a^.tc, b^.tc, a^.tc); + DetachAction(b, a) + ELSIF Sets.Includes(seta, setb) THEN + setc := seta; Sets.Differ(setc, setb); + AddTargetList(a^.target, b^.target); + CombineTransCodes(a^.tc, b^.tc, b^.tc); + ChangeAction(a, setc) + ELSIF Sets.Includes(setb, seta) THEN + setc := setb; Sets.Differ(setc, seta); + AddTargetList(b^.target, a^.target); + CombineTransCodes(a^.tc, b^.tc, a^.tc); + ChangeAction(b, setc) + ELSE + Sets.Intersect(seta, setb, setc); + Sets.Differ(seta, setc); + Sets.Differ(setb, setc); + ChangeAction(a, seta); + ChangeAction(b, setb); + NEW(c); c^.target:=NIL; + CombineTransCodes(a^.tc, b^.tc, c^.tc); + AddTargetList(a^.target, c^.target); + AddTargetList(b^.target, c^.target); + ChangeAction(c, setc); + AddAction(c, a) + END +END SplitActions; + + +PROCEDURE MakeUnique(state: State; VAR changed:BOOLEAN); +VAR a, b: Action; + + PROCEDURE Overlap(a, b: Action): BOOLEAN; + VAR seta, setb: CRT.Set; + BEGIN + IF a^.typ = CRT.char THEN + IF b^.typ = CRT.char THEN RETURN a^.sym = b^.sym + ELSE CRT.GetClass(b^.sym, setb); RETURN Sets.In(setb, a^.sym) + END + ELSE + CRT.GetClass(a^.sym, seta); + IF b^.typ = CRT.char THEN RETURN Sets.In(seta, b^.sym) + ELSE CRT.GetClass(b^.sym, setb); RETURN ~ Sets.Different(seta, setb) + END + END + END Overlap; + +BEGIN + a := state.firstAction; changed := FALSE; + WHILE a # NIL DO + b := a^.next; + WHILE b # NIL DO + IF Overlap(a, b) THEN SplitActions(a, b); changed:=TRUE END; + b := b^.next; + END; + a:=a^.next + END +END MakeUnique; + + +PROCEDURE MeltStates(state: State; VAR correct:BOOLEAN); +VAR + action: Action; + ctx: BOOLEAN; + endOf: INTEGER; + melt: Melted; + set: CRT.Set; + s: State; + changed: BOOLEAN; + + PROCEDURE AddMeltedSet(nr: INTEGER; VAR set: CRT.Set); + VAR m: Melted; + BEGIN + m := firstMelted; + WHILE (m # NIL) & (m^.state.nr # nr) DO m := m^.next END; + IF m = NIL THEN HALT(98) END; + Sets.Unite(set, m^.set); + END AddMeltedSet; + + PROCEDURE GetStateSet(t: Target; VAR set: CRT.Set; VAR endOf: INTEGER; VAR ctx:BOOLEAN); + VAR statenr: INTEGER; (*lastS: State;*) + BEGIN + Sets.Clear(set); endOf := CRT.noSym; ctx := FALSE; (*lastS := NIL;*) + WHILE t # NIL DO + statenr := t.state.nr; + IF statenr <= lastSimState THEN Sets.Incl(set, statenr) + ELSE AddMeltedSet(statenr, set) + END; + IF t^.state^.endOf # CRT.noSym THEN + IF (endOf = CRT.noSym) OR (endOf = t^.state^.endOf) + (*OR (lastS^.firstAction # NIL) & (t^.state^.firstAction = NIL)*) THEN + endOf := t^.state.endOf; (*lastS := t^.state*) + ELSE + PutS("$Tokens "); PutI(endOf); PutS(" and "); PutI(t^.state.endOf); + PutS(" cannot be distinguished.$"); + correct:=FALSE + END + END; + IF t^.state.ctx THEN ctx := TRUE; + IF t.state.endOf # CRT.noSym THEN + PutS("$Ambiguous CONTEXT clause.$"); correct := FALSE + END + END; + t := t^.next + END + END GetStateSet; + + PROCEDURE FillWithActions(state: State; targ: Target); + VAR action,a: Action; + BEGIN + WHILE targ # NIL DO + action := targ^.state.firstAction; + WHILE action # NIL DO + NEW(a); a^ := action^; a^.target := NIL; + AddTargetList(action^.target, a^.target); + AddAction(a, state.firstAction); + action:=action^.next + END; + targ:=targ^.next + END; + END FillWithActions; + + PROCEDURE KnownMelted(set:CRT.Set; VAR melt: Melted): BOOLEAN; + BEGIN + melt := firstMelted; + WHILE melt # NIL DO + IF Sets.Equal(set, melt^.set) THEN RETURN TRUE END; + melt := melt^.next + END; + RETURN FALSE + END KnownMelted; + +BEGIN + action := state.firstAction; + WHILE action # NIL DO + IF action^.target^.next # NIL THEN (*more than one target state*) + GetStateSet(action^.target, set, endOf, ctx); + IF ~ KnownMelted(set, melt) THEN + s := NewState(); s.endOf := endOf; s.ctx := ctx; + FillWithActions(s, action^.target); + REPEAT MakeUnique(s, changed) UNTIL ~ changed; + melt := NewMelted(set, s); + END; + action^.target^.next:=NIL; + action^.target^.state := melt^.state + END; + action := action^.next + END; + Texts.Append(Oberon.Log, out.buf) +END MeltStates; + + +PROCEDURE MakeDeterministic*(VAR correct: BOOLEAN); + VAR state: State; changed: BOOLEAN; + + PROCEDURE FindCtxStates; (*find states reached by a context transition*) + VAR a: Action; state: State; + BEGIN + state := firstState; + WHILE state # NIL DO + a := state.firstAction; + WHILE a # NIL DO + IF a^.tc = CRT.contextTrans THEN a^.target^.state.ctx := TRUE END; + a := a^.next + END; + state := state.next + END; + END FindCtxStates; + +BEGIN + IF lastState = NIL THEN lastSimState := 0 ELSE lastSimState := lastState.nr END; + FindCtxStates; + state := firstState; + WHILE state # NIL DO + REPEAT MakeUnique(state, changed) UNTIL ~ changed; + state := state.next + END; + correct := TRUE; + state := firstState; + WHILE state # NIL DO MeltStates(state, correct); state := state.next END; + DeleteRedundantStates; + CombineShifts +END MakeDeterministic; + + +PROCEDURE PrintSymbol(typ, val, width: INTEGER); +VAR name: CRT.Name; len: INTEGER; +BEGIN + IF typ = CRT.class THEN + CRT.GetClassName(val, name); PutS(name); len := Length(name) + ELSIF (val >= ORD(" ")) & (val < 127) & (val # 34) THEN + Put(CHR(34)); Put(CHR(val)); Put(CHR(34)); len:=3 + ELSE + PutS("CHR("); PutI2(val, 2); Put(")"); len:=7 + END; + WHILE len < width DO Put(" "); INC(len) END +END PrintSymbol; + + +PROCEDURE PrintStates*; +VAR action: Action; first: BOOLEAN; state: State; i: INTEGER; targ: Target; set: CRT.Set; name: CRT.Name; +BEGIN + PutS("$-------- states ---------$"); + state := firstState; + WHILE state # NIL DO + action := state.firstAction; first:=TRUE; + IF state.endOf = CRT.noSym THEN PutS(" ") + ELSE PutS("E("); PutI2(state.endOf, 2); Put(")") + END; + PutI2(state.nr, 3); Put(":"); IF action = NIL THEN PutS(" $") END; + WHILE action # NIL DO + IF first THEN Put(" "); first:=FALSE ELSE PutS(" ") END; + PrintSymbol(action^.typ, action^.sym, 0); Put(" "); + targ := action^.target; + WHILE targ # NIL DO + PutI(targ^.state.nr); Put(" "); targ := targ^.next; + END; + IF action^.tc = CRT.contextTrans THEN PutS(" context$") ELSE PutS(" $") END; + action := action^.next + END; + state := state.next + END; + PutS("$-------- character classes ---------$"); + i := 0; + WHILE i <= CRT.maxC DO + CRT.GetClass(i, set); CRT.GetClassName(i, name); PutS(name); PutS(": "); + Sets.Print(out, set, 80, 13); Texts.WriteLn(out); + INC(i) + END; + Texts.Append(Oberon.Log, out.buf) +END PrintStates; + + +PROCEDURE GenComment(com:Comment); + + PROCEDURE GenBody; + BEGIN + PutS(" LOOP$"); + PutS(" IF "); PutChCond(com^.stop[0]); PutS(" THEN$"); + IF Length(com^.stop) = 1 THEN + PutS(" DEC(level); oldEols := chLine - startLine; NextCh;$"); + PutS(" IF level = 0 THEN RETURN TRUE END;$"); + ELSE + PutS(" NextCh;$"); + PutS(" IF "); PutChCond(com^.stop[1]); PutS(" THEN$"); + PutS(" DEC(level); oldEols := chLine - startLine; NextCh;$"); + PutS(" IF level=0 THEN RETURN TRUE END$"); + PutS(" END;$"); + END; + IF com^.nested THEN + PutS(" ELSIF "); PutChCond(com^.start[0]); PutS(" THEN$"); + IF Length(com^.start) = 1 THEN + PutS(" INC(level); NextCh;$"); + ELSE + PutS(" NextCh;$"); + PutS(" IF "); PutChCond(com^.start[1]); PutS(" THEN$"); + PutS(" INC(level); NextCh;$"); + PutS(" END;$"); + END; + END; + PutS(" ELSIF ch = EOF THEN RETURN FALSE$"); + PutS(" ELSE NextCh END;$"); + PutS(" END;$"); + END GenBody; + +BEGIN + PutS(" IF "); PutChCond(com^.start[0]); PutS(" THEN$"); + IF Length(com^.start) = 1 THEN + PutS(" NextCh;$"); + GenBody; + PutS(" END;"); + ELSE + PutS(" NextCh;$"); + PutS(" IF "); PutChCond(com^.start[1]); PutS(" THEN$"); + PutS(" NextCh;$"); + GenBody; + PutS(" ELSE$"); + PutS(" IF ch = EOL THEN DEC(chLine); lineStart := oldLineStart END;$"); + PutS(" DEC(chPos, 2); Texts.OpenReader(r, src, chPos+1); NextCh; RETURN FALSE$"); + PutS(" END$"); + PutS(" END;"); + END; + END GenComment; + + +PROCEDURE CopyFramePart (stopStr: ARRAY OF CHAR); (*Copy from file to file until *) + VAR ch, startCh: CHAR; i, j, high: INTEGER; +BEGIN + startCh := stopStr[0]; high := Length(stopStr) - 1; Texts.Read (fram, ch); + WHILE ch # 0X DO + IF ch = startCh THEN (* check if stopString occurs *) + i := 0; + REPEAT + IF i = high THEN RETURN END; (*stopStr[0..i] found; no unrecognized character*) + Texts.Read (fram, ch); INC(i); + UNTIL ch # stopStr[i]; + (*stopStr[0..i-1] found; 1 unrecognized character*) + j := 0; WHILE j < i DO Texts.Write(out, stopStr[j]); INC(j) END + ELSE Texts.Write (out, ch); Texts.Read(fram, ch) + END + END +END CopyFramePart; + +PROCEDURE GenLiterals; + VAR + i, j, k, l: INTEGER; + key: ARRAY 128 OF CRT.Name; + knr: ARRAY 128 OF INTEGER; + ch: CHAR; + sn: CRT.SymbolNode; +BEGIN + (*-- sort literal list*) + i := 0; k := 0; + WHILE i <= CRT.maxT DO + CRT.GetSym(i, sn); + IF sn.struct = CRT.litToken THEN + j := k-1; WHILE (j >= 0) & (sn.name < key[j]) DO key[j+1] := key[j]; knr[j+1] := knr[j]; DEC(j) END; + key[j+1] := sn.name; knr[j+1] := i; INC(k) + END; + INC(i) + END; + (*-- print case statement*) + IF k > 0 THEN + PutS(" IF (lexeme[0] >= "); PutC(key[0, 1]); PutS(") & (lexeme[0] <= "); PutC(key[k-1, 1]); PutS(") THEN$"); + PutS(" CASE lexeme[0] OF$"); + i := 0; + WHILE i < k DO + ch := key[i, 1]; (*key[i, 0] = quote*) + PutS(" | "); PutC(ch); j := i; + REPEAT + IF i = j THEN PutS(": IF lexeme = ") ELSE PutS(" ELSIF lexeme = ") END; + PutS(key[i]); PutS(" THEN sym := "); PutI(knr[i]); Put(CHR(13)); + INC(i) + UNTIL (i = k) OR (key[i, 1] # ch); + PutS(" END$"); + END; + PutS(" ELSE$ END$ END;$") + END +END GenLiterals; + + +PROCEDURE WriteState(state: State); + VAR action: Action; first, ctxEnd: BOOLEAN; sn: CRT.SymbolNode; endOf: INTEGER; + set: CRT.Set; +BEGIN + endOf := state.endOf; + IF (endOf > CRT.maxT) & (endOf # CRT.noSym) THEN (*pragmas have been moved*) + endOf := CRT.maxT + CRT.maxSymbols - endOf + END; + PutS(" | "); PutI2(state.nr, 2); PutS(": "); + first:=TRUE; ctxEnd := state.ctx; + action := state.firstAction; + WHILE action # NIL DO + IF first THEN PutS("IF "); first:=FALSE ELSE PutS(" ELSIF ") END; + IF action^.typ = CRT.char THEN PutChCond(CHR(action^.sym)) + ELSE CRT.GetClass(action^.sym, set); PutRange(set) + END; + PutS(" THEN"); + IF action.target.state.nr # state.nr THEN + PutS(" state := "); PutI(action.target.state.nr); Put(";") + END; + IF action^.tc = CRT.contextTrans THEN PutS(" INC(apx)"); ctxEnd := FALSE + ELSIF state.ctx THEN PutS(" apx := 0") + END; + PutS(" $"); + action := action^.next + END; + IF state.firstAction # NIL THEN PutS(" ELSE ") END; + IF endOf = CRT.noSym THEN PutS("sym := noSym; ") + ELSE (*final state*) + CRT.GetSym(endOf, sn); + IF ctxEnd THEN (*final context state: cut appendix*) + PutS("chPos := chPos - apx - 1; Texts.OpenReader(r, src, chPos+1); NextCh; ") + END; + PutS("sym := "); PutI(endOf); PutS("; "); + IF sn.struct = CRT.classLitToken THEN PutS("CheckLiteral; ") END + END; + PutS("RETURN$"); + IF state.firstAction # NIL THEN PutS(" END;$") END +END WriteState; + +PROCEDURE *Show (t: Texts.Text; op: INTEGER; beg, end: LONGINT); +END Show; + + +PROCEDURE WriteScanner*; +VAR + scanner: ARRAY 32 OF CHAR; + name: ARRAY 64 OF CHAR; + startTab: ARRAY 128 OF INTEGER; + com: Comment; + i, j, l: INTEGER; + gn: CRT.GraphNode; + sn: CRT.SymbolNode; + state: State; + t: Texts.Text; + + PROCEDURE FillStartTab; + VAR action: Action; i, targetState: INTEGER; class: CRT.Set; + BEGIN + startTab[0] := stateNr + 1; (*eof*) + i := 1; WHILE i < 128 DO startTab[i] := 0; INC(i) END; + action := firstState.firstAction; + WHILE action # NIL DO + targetState := action.target.state.nr; + IF action^.typ = CRT.char THEN + startTab[action^.sym] := targetState + ELSE + CRT.GetClass(action^.sym, class); i := 0; + WHILE i < 128 DO + IF Sets.In(class, i) THEN startTab[i] := targetState END; + INC(i) + END + END; + action := action^.next + END + END FillStartTab; + +BEGIN + FillStartTab; + CRT.GetNode(CRT.root, gn); CRT.GetSym(gn.p1, sn); + COPY(sn.name, scanner); l := Length(scanner); scanner[l] := "S"; scanner[l+1] := 0X; + NEW(t); Texts.Open(t, "Scanner.FRM"); Texts.OpenReader(fram, t, 0); + IF t.len = 0 THEN + Texts.WriteString(out, "Scanner.FRM not found"); Texts.WriteLn(out); + Texts.Append(Oberon.Log, out.buf); HALT(99) + END; + Texts.Append(Oberon.Log, out.buf); + + (*------- *S.MOD -------*) + CopyFramePart("-->modulename"); PutS(scanner); + CopyFramePart("-->declarations"); PutS(" noSym = "); PutI(CRT.maxT); Put(";"); + CopyFramePart("-->comment"); + com := firstComment; + WHILE com # NIL DO GenComment(com); com := com^.next END; + CopyFramePart("-->literals"); GenLiterals; + + CopyFramePart("-->GetSy1"); + IF ~ Sets.In(CRT.ignored, ORD(EOL)) THEN PutS(" IF oldEols > 0 THEN DEC(oldEols); ch := EOL END;$") END; + PutS(" WHILE (ch=20X)"); IF ~ Sets.Empty(CRT.ignored) THEN PutS(" OR ") END; + PutRange(CRT.ignored); PutS(" DO NextCh END;"); + IF firstComment # NIL THEN + PutS("$ IF ("); com := firstComment; + WHILE com # NIL DO + PutChCond(com^.start[0]); + IF com^.next # NIL THEN PutS(" OR ") END; + com := com^.next + END; + PutS(") & Comment() THEN Get(sym); RETURN END;") + END; + CopyFramePart("-->GetSy2"); + state := firstState.next; + WHILE state # NIL DO WriteState(state); state := state.next END; + PutS(" | "); PutI2(stateNr + 1, 2); PutS(": sym := 0; ch := 0X; RETURN$"); + + CopyFramePart("-->initialization"); + i := 0; + WHILE i < 32 DO + j := 0; PutS(" "); + WHILE j < 4 DO + PutS("start["); PutI(4*i+j); PutS("]:="); PutI(startTab[4*i+j]); PutS("; "); + INC(j) + END; + Texts.WriteLn(out); + INC(i) + END; + + CopyFramePart("-->modulename"); PutS(scanner); Put("."); + NEW(t); t.notify := Show; Texts.Open(t, ""); Texts.Append(t, out.buf); + l := Length(scanner); scanner[l] := "."; scanner[l+1] := "M"; scanner[l+2] := "o"; scanner[l+3] := "d"; scanner[l+4] := 0X; + Texts.Close(t, scanner) +END WriteScanner; + + +PROCEDURE Init*; +BEGIN + firstState := NIL; lastState := NIL; stateNr := -1; + rootState := NewState(); + firstMelted := NIL; firstComment := NIL +END Init; + +BEGIN + Texts.OpenWriter(out) +END CRA. diff --git a/tools/Coco/CRP.obn b/tools/Coco/CRP.obn new file mode 100644 index 0000000..a021a31 --- /dev/null +++ b/tools/Coco/CRP.obn @@ -0,0 +1,703 @@ +(* parser module generated by Coco-R *) +MODULE CRP; + +IMPORT CRS, CRT, CRA, CRX, Sets, Texts, Oberon; + +CONST + maxP = 39; + maxT = 38; + nrSets = 18; + + setSize = 32; nSets = (maxT DIV setSize) + 1; + +TYPE + SymbolSet = ARRAY nSets OF SET; + +VAR + sym: INTEGER; (* current input symbol *) + symSet: ARRAY nrSets OF SymbolSet; + +CONST + ident = 0; string = 1; (*symbol kind*) + +VAR + str: ARRAY 32 OF CHAR; + w: Texts.Writer; + genScanner: BOOLEAN; + + +PROCEDURE SemErr(nr: INTEGER); +BEGIN + CRS.Error(200+nr, CRS.pos); +END SemErr; + +PROCEDURE MatchLiteral(sp: INTEGER); (*store string either as token or as literal*) + VAR sn, sn1: CRT.SymbolNode; matchedSp: INTEGER; +BEGIN + CRT.GetSym(sp, sn); + CRA.MatchDFA(sn.name, sp, matchedSp); + IF matchedSp # CRT.noSym THEN + CRT.GetSym(matchedSp, sn1); sn1.struct := CRT.classLitToken; CRT.PutSym(matchedSp, sn1); + sn.struct := CRT.litToken + ELSE sn.struct := CRT.classToken; + END; + CRT.PutSym(sp, sn) +END MatchLiteral; + +PROCEDURE SetCtx(gp: INTEGER); (*set transition code to CRT.contextTrans*) + VAR gn: CRT.GraphNode; +BEGIN + WHILE gp > 0 DO + CRT.GetNode(gp, gn); + IF gn.typ IN {CRT.char, CRT.class} THEN + gn.p2 := CRT.contextTrans; CRT.PutNode(gp, gn) + ELSIF gn.typ IN {CRT.opt, CRT.iter} THEN SetCtx(gn.p1) + ELSIF gn.typ = CRT.alt THEN SetCtx(gn.p1); SetCtx(gn.p2) + END; + gp := gn.next + END +END SetCtx; + +PROCEDURE SetDDT(s: ARRAY OF CHAR); + VAR name: ARRAY 64 OF CHAR; i: INTEGER; ch: CHAR; +BEGIN + i := 1; + WHILE s[i] # 0X DO + ch := s[i]; INC(i); + IF (ch >= "0") & (ch <= "9") THEN CRT.ddt[ORD(ch)-ORD("0")] := TRUE END + END +END SetDDT; + +PROCEDURE FixString (VAR s: ARRAY OF CHAR; len: INTEGER); + VAR double: BOOLEAN; i: INTEGER; +BEGIN + double := FALSE; + FOR i := 0 TO len-2 DO + IF s[i] = '"' THEN double := TRUE END + END; + IF ~ double THEN s[0] := '"'; s[len-1] := '"' END +END FixString; + +(*-------------------------------------------------------------------------*) + + +PROCEDURE Error (n: INTEGER); +BEGIN CRS.Error(n, CRS.nextPos) +END Error; + +PROCEDURE Get; +BEGIN + LOOP CRS.Get(sym); + IF sym > maxT THEN + IF sym = 39 THEN + CRS.GetName(CRS.nextPos, CRS.nextLen, str); SetDDT(str) + END; + CRS.nextPos := CRS.pos; + CRS.nextCol := CRS.col; + CRS.nextLine := CRS.line; + CRS.nextLen := CRS.len; + ELSE EXIT + END +END + +END Get; + +PROCEDURE Expect(n: INTEGER); +BEGIN IF sym = n THEN Get ELSE Error(n) END +END Expect; + +PROCEDURE StartOf(s: INTEGER): BOOLEAN; +BEGIN RETURN (sym MOD setSize) IN symSet[s, sym DIV setSize] +END StartOf; + +PROCEDURE ExpectWeak(n, follow: INTEGER); +BEGIN + IF sym = n THEN Get + ELSE Error(n); WHILE ~ StartOf(follow) DO Get END + END +END ExpectWeak; + +PROCEDURE WeakSeparator(n, syFol, repFol: INTEGER): BOOLEAN; + VAR s: SymbolSet; i: INTEGER; +BEGIN + IF sym = n THEN Get; RETURN TRUE + ELSIF StartOf(repFol) THEN RETURN FALSE + ELSE + i := 0; WHILE i < nSets DO s[i] := symSet[syFol, i] + symSet[repFol, i] + symSet[0, i]; INC(i) END; + Error(n); WHILE ~ ((sym MOD setSize) IN s[sym DIV setSize]) DO Get END; + RETURN StartOf(syFol) + END +END WeakSeparator; + +PROCEDURE ^TokenFactor(VAR gL, gR: INTEGER); +PROCEDURE ^TokenTerm(VAR gL, gR: INTEGER); +PROCEDURE ^Factor(VAR gL, gR: INTEGER); +PROCEDURE ^Term(VAR gL, gR: INTEGER); +PROCEDURE ^Symbol(VAR name: CRT.Name; VAR kind: INTEGER); +PROCEDURE ^SimSet(VAR set: CRT.Set); +PROCEDURE ^Set(VAR set: CRT.Set); +PROCEDURE ^TokenExpr(VAR gL, gR: INTEGER); +PROCEDURE ^TokenDecl(typ: INTEGER); +PROCEDURE ^SetDecl; +PROCEDURE ^Expression(VAR gL, gR: INTEGER); +PROCEDURE ^SemText(VAR semPos: CRT.Position); +PROCEDURE ^Attribs(VAR attrPos: CRT.Position); +PROCEDURE ^Declaration; +PROCEDURE ^CR; + +PROCEDURE TokenFactor(VAR gL, gR: INTEGER); + VAR kind, c: INTEGER; set: CRT.Set; name: CRT.Name; +BEGIN + gL :=0; gR := 0 ; + IF (sym = 1) OR (sym = 2) THEN + Symbol(name, kind); + IF kind = ident THEN + c := CRT.ClassWithName(name); + IF c < 0 THEN + SemErr(15); + Sets.Clear(set); c := CRT.NewClass(name, set) + END; + gL := CRT.NewNode(CRT.class, c, 0); gR := gL + ELSE (*string*) + CRT.StrToGraph(name, gL, gR) + END ; + ELSIF (sym = 23) THEN + Get; + TokenExpr(gL, gR); + Expect(24); + ELSIF (sym = 28) THEN + Get; + TokenExpr(gL, gR); + Expect(29); + CRT.MakeOption(gL, gR) ; + ELSIF (sym = 30) THEN + Get; + TokenExpr(gL, gR); + Expect(31); + CRT.MakeIteration(gL, gR) ; + ELSE Error(39) + END; +END TokenFactor; + +PROCEDURE TokenTerm(VAR gL, gR: INTEGER); + VAR gL2, gR2: INTEGER; +BEGIN + TokenFactor(gL, gR); + WHILE StartOf(1) DO + TokenFactor(gL2, gR2); + CRT.ConcatSeq(gL, gR, gL2, gR2) ; + END; + IF (sym = 33) THEN + Get; + Expect(23); + TokenExpr(gL2, gR2); + SetCtx(gL2); CRT.ConcatSeq(gL, gR, gL2, gR2) ; + Expect(24); + END; +END TokenTerm; + +PROCEDURE Factor(VAR gL, gR: INTEGER); + VAR sp, kind, c: INTEGER; name: CRT.Name; + gn: CRT.GraphNode; sn: CRT.SymbolNode; + set: CRT.Set; + undef, weak: BOOLEAN; + pos: CRT.Position; +BEGIN + gL :=0; gR := 0; weak := FALSE ; + CASE sym OF + | 1,2,27: IF (sym = 27) THEN + Get; + weak := TRUE ; + END; + Symbol(name, kind); + sp := CRT.FindSym(name); undef := sp = CRT.noSym; + IF undef THEN + IF kind = ident THEN (*forward nt*) + sp := CRT.NewSym(CRT.nt, name, 0) + ELSE (*undefined string in production*) + sp := CRT.NewSym(CRT.t, name, CRS.line); + MatchLiteral(sp) + END + END; + CRT.GetSym(sp, sn); + IF ~(sn.typ IN {CRT.t,CRT.nt}) THEN SemErr(4) END; + IF weak THEN + IF sn.typ = CRT.t THEN sn.typ := CRT.wt ELSE SemErr(23) END + END; + gL := CRT.NewNode(sn.typ, sp, CRS.line); gR := gL ; + IF (sym = 34) THEN + Attribs(pos); + CRT.GetNode(gL, gn); gn.pos := pos; CRT.PutNode(gL, gn); + CRT.GetSym(sp, sn); + IF undef THEN + sn.attrPos := pos; CRT.PutSym(sp, sn) + ELSIF sn.attrPos.beg < 0 THEN SemErr(5) + END; + IF kind # ident THEN SemErr(3) END ; + ELSIF StartOf(2) THEN + CRT.GetSym(sp, sn); + IF sn.attrPos.beg >= 0 THEN SemErr(6) END ; + ELSE Error(40) + END; + | 23: Get; + Expression(gL, gR); + Expect(24); + | 28: Get; + Expression(gL, gR); + Expect(29); + CRT.MakeOption(gL, gR) ; + | 30: Get; + Expression(gL, gR); + Expect(31); + CRT.MakeIteration(gL, gR) ; + | 36: SemText(pos); + gL := CRT.NewNode(CRT.sem, 0, 0); + gR := gL; + CRT.GetNode(gL, gn); gn.pos := pos; CRT.PutNode(gL, gn) ; + | 25: Get; + Sets.Fill(set); Sets.Excl(set, CRT.eofSy); + gL := CRT.NewNode(CRT.any, CRT.NewSet(set), 0); gR := gL ; + | 32: Get; + gL := CRT.NewNode(CRT.sync, 0, 0); gR := gL ; + ELSE Error(41) + END; +END Factor; + +PROCEDURE Term(VAR gL, gR: INTEGER); + VAR gL2, gR2: INTEGER; +BEGIN + gL := 0; gR := 0 ; + IF StartOf(3) THEN + Factor(gL, gR); + WHILE StartOf(3) DO + Factor(gL2, gR2); + CRT.ConcatSeq(gL, gR, gL2, gR2) ; + END; + ELSIF StartOf(4) THEN + gL := CRT.NewNode(CRT.eps, 0, 0); gR := gL ; + ELSE Error(42) + END; +END Term; + +PROCEDURE Symbol(VAR name: CRT.Name; VAR kind: INTEGER); +BEGIN + IF (sym = 1) THEN + Get; + kind := ident ; + ELSIF (sym = 2) THEN + Get; + kind := string ; + ELSE Error(43) + END; + CRS.GetName(CRS.pos, CRS.len, name); + IF kind = string THEN FixString(name, CRS.len) END ; +END Symbol; + +PROCEDURE SimSet(VAR set: CRT.Set); + VAR c, n, i: INTEGER; name: CRT.Name; s: ARRAY 128 OF CHAR; +BEGIN + IF (sym = 1) THEN + Get; + CRS.GetName(CRS.pos, CRS.len, name); + c := CRT.ClassWithName(name); + IF c < 0 THEN SemErr(15); Sets.Clear(set) + ELSE CRT.GetClass(c, set) + END ; + ELSIF (sym = 2) THEN + Get; + CRS.GetName(CRS.pos, CRS.len, s); + Sets.Clear(set); i := 1; + WHILE s[i] # s[0] DO + Sets.Incl(set, ORD(s[i])); INC(i) + END ; + ELSIF (sym = 22) THEN + Get; + Expect(23); + Expect(3); + CRS.GetName(CRS.pos, CRS.len, name); + n := 0; i := 0; + WHILE name[i] # 0X DO + n := 10 * n + (ORD(name[i]) - ORD("0")); + INC(i) + END; + Sets.Clear(set); Sets.Incl(set, n) ; + Expect(24); + ELSIF (sym = 25) THEN + Get; + Sets.Fill(set) ; + ELSE Error(44) + END; +END SimSet; + +PROCEDURE Set(VAR set: CRT.Set); + VAR set2: CRT.Set; +BEGIN + SimSet(set); + WHILE (sym = 20) OR (sym = 21) DO + IF (sym = 20) THEN + Get; + SimSet(set2); + Sets.Unite(set, set2) ; + ELSE + Get; + SimSet(set2); + Sets.Differ(set, set2) ; + END; + END; +END Set; + +PROCEDURE TokenExpr(VAR gL, gR: INTEGER); + VAR gL2, gR2: INTEGER; first: BOOLEAN; +BEGIN + TokenTerm(gL, gR); + first := TRUE ; + WHILE WeakSeparator(26, 1, 5) DO + TokenTerm(gL2, gR2); + IF first THEN + CRT.MakeFirstAlt(gL, gR); first := FALSE + END; + CRT.ConcatAlt(gL, gR, gL2, gR2) ; + END; +END TokenExpr; + +PROCEDURE TokenDecl(typ: INTEGER); + VAR sp, kind, gL, gR: INTEGER; sn: CRT.SymbolNode; + pos: CRT.Position; name: CRT.Name; +BEGIN + Symbol(name, kind); + IF CRT.FindSym(name) # CRT.noSym THEN SemErr(7) + ELSE + sp := CRT.NewSym(typ, name, CRS.line); + CRT.GetSym(sp, sn); sn.struct := CRT.classToken; + CRT.PutSym(sp, sn) + END ; + WHILE ~( StartOf(6) ) DO Error(45); Get END; + IF (sym = 8) THEN + Get; + TokenExpr(gL, gR); + Expect(9); + IF kind # ident THEN SemErr(13) END; + CRT.CompleteGraph(gR); + CRA.ConvertToStates(gL, sp) ; + ELSIF StartOf(7) THEN + IF kind = ident THEN genScanner := FALSE + ELSE MatchLiteral(sp) + END ; + ELSE Error(46) + END; + IF (sym = 36) THEN + SemText(pos); + IF typ = CRT.t THEN SemErr(14) END; + CRT.GetSym(sp, sn); sn.semPos := pos; CRT.PutSym(sp, sn) ; + END; +END TokenDecl; + +PROCEDURE SetDecl; + VAR c: INTEGER; set: CRT.Set; name: CRT.Name; +BEGIN + Expect(1); + CRS.GetName(CRS.pos, CRS.len, name); + c := CRT.ClassWithName(name); IF c >= 0 THEN SemErr(7) END ; + Expect(8); + Set(set); + c := CRT.NewClass(name, set) ; + Expect(9); +END SetDecl; + +PROCEDURE Expression(VAR gL, gR: INTEGER); + VAR gL2, gR2: INTEGER; first: BOOLEAN; +BEGIN + Term(gL, gR); + first := TRUE ; + WHILE WeakSeparator(26, 2, 8) DO + Term(gL2, gR2); + IF first THEN + CRT.MakeFirstAlt(gL, gR); first := FALSE + END; + CRT.ConcatAlt(gL, gR, gL2, gR2) ; + END; +END Expression; + +PROCEDURE SemText(VAR semPos: CRT.Position); +BEGIN + Expect(36); + semPos.beg := CRS.nextPos; semPos.col := CRS.nextCol ; + WHILE StartOf(9) DO + Get; + END; + Expect(37); + semPos.len := SHORT(CRS.pos - semPos.beg) ; +END SemText; + +PROCEDURE Attribs(VAR attrPos: CRT.Position); +BEGIN + Expect(34); + attrPos.beg := CRS.nextPos; attrPos.col := CRS.nextCol ; + WHILE StartOf(10) DO + Get; + END; + Expect(35); + attrPos.len := SHORT(CRS.pos - attrPos.beg) ; +END Attribs; + +PROCEDURE Declaration; + VAR gL1, gR1, gL2, gR2: INTEGER; nested: BOOLEAN; +BEGIN + IF (sym = 11) THEN + Get; + WHILE (sym = 1) DO + SetDecl; + END; + ELSIF (sym = 12) THEN + Get; + WHILE (sym = 1) OR (sym = 2) DO + TokenDecl(CRT.t); + END; + ELSIF (sym = 13) THEN + Get; + WHILE (sym = 1) OR (sym = 2) DO + TokenDecl(CRT.pr); + END; + ELSIF (sym = 14) THEN + Get; + Expect(15); + TokenExpr(gL1, gR1); + Expect(16); + TokenExpr(gL2, gR2); + IF (sym = 17) THEN + Get; + nested := TRUE ; + ELSIF StartOf(11) THEN + nested := FALSE ; + ELSE Error(47) + END; + CRA.NewComment(gL1, gL2, nested) ; + ELSIF (sym = 18) THEN + Get; + IF (sym = 19) THEN + Get; + CRT.ignoreCase := TRUE ; + ELSIF StartOf(12) THEN + Set(CRT.ignored); + ELSE Error(48) + END; + ELSE Error(49) + END; +END Declaration; + +PROCEDURE CR; + VAR undef, hasAttrs, ok, ok1: BOOLEAN; eofSy, gR: INTEGER; + gramLine, sp: INTEGER; + gn: CRT.GraphNode; sn: CRT.SymbolNode; + name, gramName: CRT.Name; +BEGIN + Expect(4); + Texts.OpenWriter(w); + CRT.Init; CRX.Init; CRA.Init; + gramLine := CRS.line; + eofSy := CRT.NewSym(CRT.t, "EOF", 0); + genScanner := TRUE; + CRT.ignoreCase := FALSE; + ok := TRUE; + Sets.Clear(CRT.ignored) ; + Expect(1); + CRS.GetName(CRS.pos, CRS.len, gramName); + CRT.semDeclPos.beg := CRS.nextPos; CRT.importPos.beg := -1; ; + WHILE StartOf(13) DO + IF (sym = 5) THEN + Get; + CRT.importPos.beg := CRS.nextPos ; + WHILE StartOf(14) DO + Get; + END; + Expect(6); + CRT.importPos.len := SHORT(CRS.pos - CRT.importPos.beg); + CRT.importPos.col := 0; + CRT.semDeclPos.beg := CRS.nextPos ; + ELSE + Get; + END; + END; + CRT.semDeclPos.len := SHORT(CRS.nextPos - CRT.semDeclPos.beg); + CRT.semDeclPos.col := 0 ; + WHILE StartOf(15) DO + Declaration; + END; + WHILE ~( (sym = 0) OR (sym = 7)) DO Error(50); Get END; + Expect(7); + IF genScanner THEN CRA.MakeDeterministic(ok) END; + CRT.nNodes := 0 ; + WHILE (sym = 1) DO + Get; + CRS.GetName(CRS.pos, CRS.len, name); + sp := CRT.FindSym(name); undef := sp = CRT.noSym; + IF undef THEN + sp := CRT.NewSym(CRT.nt, name, CRS.line); + CRT.GetSym(sp, sn); + ELSE + CRT.GetSym(sp, sn); + IF sn.typ = CRT.nt THEN + IF sn.struct > 0 THEN SemErr(7) END + ELSE SemErr(8) + END; + sn.line := CRS.line + END; + hasAttrs := sn.attrPos.beg >= 0 ; + IF (sym = 34) THEN + Attribs(sn.attrPos); + IF ~undef & ~hasAttrs THEN SemErr(9) END; + CRT.PutSym(sp, sn) ; + ELSIF (sym = 8) OR (sym = 36) THEN + IF ~undef & hasAttrs THEN SemErr(10) END ; + ELSE Error(51) + END; + IF (sym = 36) THEN + SemText(sn.semPos); + END; + ExpectWeak(8, 16); + Expression(sn.struct, gR); + CRT.CompleteGraph(gR); CRT.PutSym(sp, sn); + IF CRT.ddt[2] THEN CRT.PrintGraph END ; + ExpectWeak(9, 17); + END; + sp := CRT.FindSym(gramName); + IF sp = CRT.noSym THEN SemErr(11); + ELSE + CRT.GetSym(sp, sn); + IF sn.attrPos.beg >= 0 THEN SemErr(12) END; + CRT.root := CRT.NewNode(CRT.nt, sp, gramLine); + END ; + Expect(10); + Expect(1); + CRS.GetName(CRS.pos, CRS.len, name); + IF name # gramName THEN SemErr(17) END; + IF CRS.errors = 0 THEN + Texts.WriteString(w, " checking"); Texts.Append(Oberon.Log, w.buf); + CRT.CompSymbolSets; + IF ok THEN CRT.TestCompleteness(ok) END; + IF ok THEN + CRT.TestIfAllNtReached(ok1); CRT.FindCircularProductions(ok) + END; + IF ok THEN CRT.TestIfNtToTerm(ok) END; + IF ok THEN CRT.LL1Test(ok1) END; + IF CRT.ddt[0] THEN CRA.PrintStates END; + IF CRT.ddt[7] THEN CRT.XRef END; + IF ok THEN + Texts.WriteString(w, " +parser"); + Texts.Append(Oberon.Log, w.buf); + CRX.GenCompiler; + IF genScanner THEN + Texts.WriteString(w, " +scanner"); + Texts.Append(Oberon.Log, w.buf); + CRA.WriteScanner + END; + IF CRT.ddt[8] THEN CRX.WriteStatistics END + END + ELSE ok := FALSE + END; + IF CRT.ddt[6] THEN CRT.PrintSymbolTable END; + IF ok THEN Texts.WriteString(w, " done") END; + Texts.WriteLn(w); Texts.Append(Oberon.Log, w.buf) ; + Expect(9); +END CR; + + + +PROCEDURE Parse*; +BEGIN + Get; + CR; + +END Parse; + +BEGIN + symSet[0, 0] := {0,1,2,7,8,11,12,13,14,18}; + symSet[0, 1] := {4}; + symSet[1, 0] := {1,2,23,28,30}; + symSet[1, 1] := {}; + symSet[2, 0] := {1,2,9,23,24,25,26,27,28,29,30,31}; + symSet[2, 1] := {0,4}; + symSet[3, 0] := {1,2,23,25,27,28,30}; + symSet[3, 1] := {0,4}; + symSet[4, 0] := {9,24,26,29,31}; + symSet[4, 1] := {}; + symSet[5, 0] := {7,9,11,12,13,14,16,17,18,24,29,31}; + symSet[5, 1] := {}; + symSet[6, 0] := {0,1,2,7,8,11,12,13,14,18}; + symSet[6, 1] := {4}; + symSet[7, 0] := {1,2,7,11,12,13,14,18}; + symSet[7, 1] := {4}; + symSet[8, 0] := {9,24,29,31}; + symSet[8, 1] := {}; + symSet[9, 0] := {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[9, 1] := {0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[10, 0] := {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[10, 1] := {0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[11, 0] := {7,11,12,13,14,18}; + symSet[11, 1] := {}; + symSet[12, 0] := {1,2,22,25}; + symSet[12, 1] := {}; + symSet[13, 0] := {1,2,3,4,5,6,8,9,10,15,16,17,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[13, 1] := {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[14, 0] := {1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[14, 1] := {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + symSet[15, 0] := {11,12,13,14,18}; + symSet[15, 1] := {}; + symSet[16, 0] := {0,1,2,7,8,9,11,12,13,14,18,23,25,26,27,28,30}; + symSet[16, 1] := {0,4}; + symSet[17, 0] := {0,1,2,7,8,10,11,12,13,14,18}; + symSet[17, 1] := {4}; + +END CRP. + | 0: Msg("EOF expected") + | 1: Msg("ident expected") + | 2: Msg("string expected") + | 3: Msg("number expected") + | 4: Msg("'COMPILER' expected") + | 5: Msg("'IMPORT' expected") + | 6: Msg("';' expected") + | 7: Msg("'PRODUCTIONS' expected") + | 8: Msg("'=' expected") + | 9: Msg("'.' expected") + | 10: Msg("'END' expected") + | 11: Msg("'CHARACTERS' expected") + | 12: Msg("'TOKENS' expected") + | 13: Msg("'PRAGMAS' expected") + | 14: Msg("'COMMENTS' expected") + | 15: Msg("'FROM' expected") + | 16: Msg("'TO' expected") + | 17: Msg("'NESTED' expected") + | 18: Msg("'IGNORE' expected") + | 19: Msg("'CASE' expected") + | 20: Msg("'+' expected") + | 21: Msg("'-' expected") + | 22: Msg("'CHR' expected") + | 23: Msg("'(' expected") + | 24: Msg("')' expected") + | 25: Msg("'ANY' expected") + | 26: Msg("'|' expected") + | 27: Msg("'WEAK' expected") + | 28: Msg("'[' expected") + | 29: Msg("']' expected") + | 30: Msg("'{' expected") + | 31: Msg("'}' expected") + | 32: Msg("'SYNC' expected") + | 33: Msg("'CONTEXT' expected") + | 34: Msg("'<' expected") + | 35: Msg("'>' expected") + | 36: Msg("'(.' expected") + | 37: Msg("'.)' expected") + | 38: Msg("??? expected") + | 39: Msg("invalid TokenFactor") + | 40: Msg("invalid Factor") + | 41: Msg("invalid Factor") + | 42: Msg("invalid Term") + | 43: Msg("invalid Symbol") + | 44: Msg("invalid SimSet") + | 45: Msg("this symbol not expected in TokenDecl") + | 46: Msg("invalid TokenDecl") + | 47: Msg("invalid Declaration") + | 48: Msg("invalid Declaration") + | 49: Msg("invalid Declaration") + | 50: Msg("this symbol not expected in CR") + | 51: Msg("invalid CR") diff --git a/tools/Coco/CRS.obn b/tools/Coco/CRS.obn new file mode 100644 index 0000000..f5f4508 --- /dev/null +++ b/tools/Coco/CRS.obn @@ -0,0 +1,231 @@ +(* scanner module generated by Coco-R *) +MODULE CRS; + +IMPORT Texts, SYSTEM; + +CONST + EOL = 0DX; + EOF = 0X; + maxLexLen = 127; + noSym = 38; + +TYPE + ErrorProc* = PROCEDURE (n: INTEGER; pos: LONGINT); + StartTable = ARRAY 128 OF INTEGER; + +VAR + src*: Texts.Text; (*source text. To be set by the main pgm*) + pos*: LONGINT; (*position of current symbol*) + line*, col*, len*: INTEGER; (*line, column, length of current symbol*) + nextPos*: LONGINT; (*position of lookahead symbol*) + nextLine*, nextCol*, nextLen*: INTEGER; (*line, column, length of lookahead symbol*) + errors*: INTEGER; (*number of errors detected*) + Error*: ErrorProc; + + ch: CHAR; (*current input character*) + r: Texts.Reader; (*global reader*) + chPos: LONGINT; (*position of current character*) + chLine: INTEGER; (*current line number*) + lineStart: LONGINT; (*start position of current line*) + apx: INTEGER; (*length of appendix*) + oldEols: INTEGER; (*nr. of EOLs in a comment*) + + start: StartTable; (*start state for every character*) + + +PROCEDURE NextCh; (*return global variable ch*) +BEGIN + Texts.Read(r, ch); INC(chPos); + IF ch = EOL THEN INC(chLine); lineStart := chPos + 1 END +END NextCh; + + +PROCEDURE Comment(): BOOLEAN; + VAR level, startLine: INTEGER; oldLineStart: LONGINT; +BEGIN (*Comment*) + level := 1; startLine := chLine; oldLineStart := lineStart; + IF (ch ="(") THEN + NextCh; + IF (ch ="*") THEN + NextCh; + LOOP + IF (ch ="*") THEN + NextCh; + IF (ch =")") THEN + DEC(level); oldEols := chLine - startLine; NextCh; + IF level=0 THEN RETURN TRUE END + END; + ELSIF (ch ="(") THEN + NextCh; + IF (ch ="*") THEN + INC(level); NextCh; + END; + ELSIF ch = EOF THEN RETURN FALSE + ELSE NextCh END; + END; + ELSE + IF ch = EOL THEN DEC(chLine); lineStart := oldLineStart END; + DEC(chPos, 2); Texts.OpenReader(r, src, chPos+1); NextCh; RETURN FALSE + END + END; +END Comment; + + +PROCEDURE Get*(VAR sym: INTEGER); +VAR state: INTEGER; lexeme: ARRAY maxLexLen+1 OF CHAR; + + PROCEDURE CheckLiteral; + BEGIN + IF nextLen < maxLexLen THEN lexeme[nextLen] := 0X END; + IF (lexeme[0] >= "A") & (lexeme[0] <= "W") THEN + CASE lexeme[0] OF + | "A": IF lexeme = "ANY" THEN sym := 25 + END + | "C": IF lexeme = "CASE" THEN sym := 19 + ELSIF lexeme = "CHARACTERS" THEN sym := 11 + ELSIF lexeme = "CHR" THEN sym := 22 + ELSIF lexeme = "COMMENTS" THEN sym := 14 + ELSIF lexeme = "COMPILER" THEN sym := 4 + ELSIF lexeme = "CONTEXT" THEN sym := 33 + END + | "E": IF lexeme = "END" THEN sym := 10 + END + | "F": IF lexeme = "FROM" THEN sym := 15 + END + | "I": IF lexeme = "IGNORE" THEN sym := 18 + ELSIF lexeme = "IMPORT" THEN sym := 5 + END + | "N": IF lexeme = "NESTED" THEN sym := 17 + END + | "P": IF lexeme = "PRAGMAS" THEN sym := 13 + ELSIF lexeme = "PRODUCTIONS" THEN sym := 7 + END + | "S": IF lexeme = "SYNC" THEN sym := 32 + END + | "T": IF lexeme = "TO" THEN sym := 16 + ELSIF lexeme = "TOKENS" THEN sym := 12 + END + | "W": IF lexeme = "WEAK" THEN sym := 27 + END + ELSE + END + END; + + END CheckLiteral; + +BEGIN + WHILE (ch=20X) OR (ch=CHR(9)) OR (ch=CHR(13)) OR (ch=CHR(28)) DO NextCh END; + IF ((ch ="(")) & Comment() THEN Get(sym); RETURN END; + IF ch > 7FX THEN ch := " " END; + pos := nextPos; col := nextCol; line := nextLine; len := nextLen; + nextPos := chPos; nextCol := SHORT(chPos - lineStart); nextLine := chLine; nextLen := 0; + state := start[ORD(ch)]; apx := 0; + LOOP + IF nextLen < maxLexLen THEN lexeme[nextLen] := ch END; + INC(nextLen); + NextCh; + IF state > 0 THEN + CASE state OF + | 1: IF (ch>="0") & (ch<="9") OR (ch>="A") & (ch<="Z") OR (ch>="a") & (ch<="z") THEN + ELSE sym := 1; CheckLiteral; RETURN + END; + | 2: IF (ch<=CHR(12)) OR (ch>=CHR(14)) & (ch<="!") OR (ch>="#") THEN + ELSIF (ch =CHR(34)) THEN state := 3; + ELSE sym := noSym; RETURN + END; + | 3: sym := 2; RETURN + | 4: IF (ch<=CHR(12)) OR (ch>=CHR(14)) & (ch<="&") OR (ch>="(") THEN + ELSIF (ch ="'") THEN state := 3; + ELSE sym := noSym; RETURN + END; + | 5: IF (ch>="0") & (ch<="9") THEN + ELSE sym := 3; RETURN + END; + | 6: IF (ch>="0") & (ch<="9") THEN + ELSE sym := 39; RETURN + END; + | 7: sym := 6; RETURN + | 8: sym := 8; RETURN + | 9: IF (ch =")") THEN state := 22; + ELSE sym := 9; RETURN + END; + | 10: sym := 20; RETURN + | 11: sym := 21; RETURN + | 12: IF (ch =".") THEN state := 21; + ELSE sym := 23; RETURN + END; + | 13: sym := 24; RETURN + | 14: sym := 26; RETURN + | 15: sym := 28; RETURN + | 16: sym := 29; RETURN + | 17: sym := 30; RETURN + | 18: sym := 31; RETURN + | 19: sym := 34; RETURN + | 20: sym := 35; RETURN + | 21: sym := 36; RETURN + | 22: sym := 37; RETURN + | 23: sym := 0; ch := 0X; RETURN + + END (*CASE*) + ELSE sym := noSym; RETURN (*NextCh already done*) + END (*IF*) + END (*LOOP*) +END Get; + + +PROCEDURE GetName*(pos: LONGINT; len: INTEGER; VAR s: ARRAY OF CHAR); + VAR i: INTEGER; r: Texts.Reader; +BEGIN + Texts.OpenReader(r, src, pos); + IF len >= LEN(s) THEN len := SHORT(LEN(s)) - 1 END; + i := 0; WHILE i < len DO Texts.Read(r, s[i]); INC(i) END; + s[i] := 0X +END GetName; + +PROCEDURE StdErrorProc* (n: INTEGER; pos: LONGINT); +BEGIN INC(errors) END StdErrorProc; + +PROCEDURE Reset* (t: Texts.Text; pos: LONGINT; errProc: ErrorProc); +BEGIN + src := t; Error := errProc; + Texts.OpenReader(r, src, pos); + chPos := pos - 1; chLine := 1; lineStart := 0; + oldEols := 0; apx := 0; errors := 0; + NextCh +END Reset; + +BEGIN + start[0]:=23; start[1]:=0; start[2]:=0; start[3]:=0; + start[4]:=0; start[5]:=0; start[6]:=0; start[7]:=0; + start[8]:=0; start[9]:=0; start[10]:=0; start[11]:=0; + start[12]:=0; start[13]:=0; start[14]:=0; start[15]:=0; + start[16]:=0; start[17]:=0; start[18]:=0; start[19]:=0; + start[20]:=0; start[21]:=0; start[22]:=0; start[23]:=0; + start[24]:=0; start[25]:=0; start[26]:=0; start[27]:=0; + start[28]:=0; start[29]:=0; start[30]:=0; start[31]:=0; + start[32]:=0; start[33]:=0; start[34]:=2; start[35]:=0; + start[36]:=6; start[37]:=0; start[38]:=0; start[39]:=4; + start[40]:=12; start[41]:=13; start[42]:=0; start[43]:=10; + start[44]:=0; start[45]:=11; start[46]:=9; start[47]:=0; + start[48]:=5; start[49]:=5; start[50]:=5; start[51]:=5; + start[52]:=5; start[53]:=5; start[54]:=5; start[55]:=5; + start[56]:=5; start[57]:=5; start[58]:=0; start[59]:=7; + start[60]:=19; start[61]:=8; start[62]:=20; start[63]:=0; + start[64]:=0; start[65]:=1; start[66]:=1; start[67]:=1; + start[68]:=1; start[69]:=1; start[70]:=1; start[71]:=1; + start[72]:=1; start[73]:=1; start[74]:=1; start[75]:=1; + start[76]:=1; start[77]:=1; start[78]:=1; start[79]:=1; + start[80]:=1; start[81]:=1; start[82]:=1; start[83]:=1; + start[84]:=1; start[85]:=1; start[86]:=1; start[87]:=1; + start[88]:=1; start[89]:=1; start[90]:=1; start[91]:=15; + start[92]:=0; start[93]:=16; start[94]:=0; start[95]:=0; + start[96]:=0; start[97]:=1; start[98]:=1; start[99]:=1; + start[100]:=1; start[101]:=1; start[102]:=1; start[103]:=1; + start[104]:=1; start[105]:=1; start[106]:=1; start[107]:=1; + start[108]:=1; start[109]:=1; start[110]:=1; start[111]:=1; + start[112]:=1; start[113]:=1; start[114]:=1; start[115]:=1; + start[116]:=1; start[117]:=1; start[118]:=1; start[119]:=1; + start[120]:=1; start[121]:=1; start[122]:=1; start[123]:=17; + start[124]:=14; start[125]:=18; start[126]:=0; start[127]:=0; + +END CRS. \ No newline at end of file diff --git a/tools/Coco/CRT.obn b/tools/Coco/CRT.obn new file mode 100644 index 0000000..b73cd76 --- /dev/null +++ b/tools/Coco/CRT.obn @@ -0,0 +1,994 @@ +MODULE CRT; (* H.Moessenboeck 17.11.93, Cocol-R Tables *) + +IMPORT Texts, Oberon, Sets; + +CONST + maxSymbols* = 300; (*max nr of t, nt, and pragmas*) + maxTerminals* = 256; (*max nr of terminals*) + maxNt* = 128; (*max nr of nonterminals*) + maxNodes* = 1500; (*max nr of graph nodes*) + normTrans* = 0; contextTrans* = 1; (*transition codes*) + maxSetNr = 128; (* max. number of symbol sets *) + maxClasses = 50; (* max. number of character classes *) + + (* node types *) + t* = 1; pr* = 2; nt* = 3; class* = 4; char* = 5; wt* = 6; any* = 7; eps* = 8; sync* = 9; sem* = 10; + alt* = 11; iter* = 12; opt* = 13; + + noSym* = -1; + eofSy* = 0; + + (* token kinds *) + classToken* = 0; (*token class*) + litToken* = 1; (*literal (e.g. keyword) not recognized by DFA*) + classLitToken* = 2; (*token class that can also match a literal*) + +TYPE + Name* = ARRAY 16 OF CHAR; (*symbol name*) + Position* = RECORD (*position of stretch of source text*) + beg*: LONGINT; (*start relative to beginning of file*) + len*: INTEGER; (*length*) + col*: INTEGER; (*column number of start position*) + END; + + SymbolNode* = RECORD + typ*: INTEGER; (*nt, t, pr, unknown*) + name*: Name; (*symbol name*) + struct*: INTEGER; (*typ = nt: index of 1st node of syntax graph*) + (*typ = t: token kind: literal, class, ...*) + deletable*: BOOLEAN; (*typ = nt: TRUE, if nonterminal is deletable*) + attrPos*: Position; (*position of attributes in source text*) + semPos*: Position; (*typ = pr: pos of sem action in source text*) + (*typ = nt: pos of local decls in source text *) + line*: INTEGER; (*source text line number of item in this node*) + END; + + Set* = ARRAY maxTerminals DIV Sets.size OF SET; + + GraphNode* = RECORD + typ* : INTEGER; (* nt,sts,wts,char,class,any,eps,sem,sync,alt,iter,opt*) + next*: INTEGER; (* index of successor node *) + (* next < 0: to successor in enclosing structure *) + p1*: INTEGER; (* typ IN {nt, t, wt}: index to symbol list *) + (* typ = any: index to anyset *) + (* typ = sync: index to syncset *) + (* typ = alt: index of 1st node of 1st alternative*) + (* typ IN {iter, opt}: 1st node in subexpression *) + (* typ = char: ordinal character value *) + (* typ = class: index of character class *) + p2*: INTEGER; (* typ = alt: index of 1st node of 2nd alternative*) + (* typ IN {char, class}: transition code *) + pos*: Position; (* typ IN {nt, t, wt}: pos of actual attribs *) + (* typ = sem: pos of sem action in source text. *) + line*: INTEGER; (* source text line number of item in this node *) + END; + + MarkList* = ARRAY maxNodes DIV Sets.size OF SET; + + FirstSets = ARRAY maxNt OF RECORD + ts: Set; (*terminal symbols*) + ready: BOOLEAN; (*TRUE = ts is complete*) + END; + FollowSets = ARRAY maxNt OF RECORD + ts: Set; (*terminal symbols*) + nts: Set; (*nts whose start set is to be included*) + END; + CharClass = RECORD + name: Name; (*class name*) + set: INTEGER (* ptr to set representing the class*) + END; + SymbolTable = ARRAY maxSymbols OF SymbolNode; + ClassTable = ARRAY maxClasses OF CharClass; + GraphList = ARRAY maxNodes OF GraphNode; + +VAR + maxSet*: INTEGER; (* index of last set *) + maxT*: INTEGER; (* terminals stored from 0 .. maxT *) + maxP*: INTEGER; (* pragmas stored from maxT+1 .. maxP *) + firstNt*: INTEGER; (* index of first nt: available after CompSymbolSets *) + lastNt*: INTEGER; (* index of last nt: available after CompSymbolSets *) + maxC*: INTEGER; (* index of last character class *) + semDeclPos*: Position; (*position of global semantic declarations*) + importPos*: Position; (*position of imported identifiers*) + ignored*: Set; (* characters ignored by the scanner *) + ignoreCase*: BOOLEAN; (* TRUE: scanner treats lower case as upper case*) + ddt*: ARRAY 10 OF BOOLEAN; (* debug and test switches *) + nNodes*: INTEGER; (* index of last graph node *) + root*: INTEGER; (* index of root node, filled by ATG *) + + w: Texts.Writer; + st: SymbolTable; + gn: GraphList; + first: FirstSets; (*first[i] = first symbols of st[i+firstNt]*) + follow: FollowSets; (*follow[i] = followers of st[i+firstNt]*) + chClass: ClassTable; (*character classes*) + set: ARRAY 128 OF Set; (*set[0] reserved for union of all synchronisation sets*) + dummyName: INTEGER; (*for unnamed character classes*) + +PROCEDURE ^MovePragmas; +PROCEDURE ^DelNode*(gn: GraphNode): BOOLEAN; + +PROCEDURE Str(s: ARRAY OF CHAR); +BEGIN Texts.WriteString(w, s) +END Str; + +PROCEDURE NL; +BEGIN Texts.WriteLn(w) +END NL; + +PROCEDURE Length(s: ARRAY OF CHAR): INTEGER; + VAR i: INTEGER; +BEGIN + i:=0; WHILE (i < LEN(s)) & (s[i] # 0X) DO INC(i) END; + RETURN i +END Length; + +PROCEDURE Restriction(n: INTEGER); +BEGIN + NL; Str("Restriction "); Texts.WriteInt(w, n, 0); NL; Texts.Append(Oberon.Log, w.buf); + HALT(99) +END Restriction; + +PROCEDURE ClearMarkList(VAR m: MarkList); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < maxNodes DIV Sets.size DO m[i] := {}; INC(i) END; +END ClearMarkList; + +PROCEDURE GetNode*(gp: INTEGER; VAR n: GraphNode); +BEGIN + n := gn[gp] +END GetNode; + +PROCEDURE PutNode*(gp: INTEGER; n: GraphNode); +BEGIN gn[gp] := n +END PutNode; + +PROCEDURE DelGraph*(gp: INTEGER): BOOLEAN; + VAR gn: GraphNode; +BEGIN + IF gp = 0 THEN RETURN TRUE END; (*end of graph found*) + GetNode(gp, gn); + RETURN DelNode(gn) & DelGraph(ABS(gn.next)); +END DelGraph; + +PROCEDURE NewSym*(typ: INTEGER; name: Name; line: INTEGER): INTEGER; + VAR i: INTEGER; +BEGIN + IF maxT + 1 = firstNt THEN Restriction(6) + ELSE + CASE typ OF + | t: INC(maxT); i := maxT + | pr: DEC(maxP); DEC(firstNt); DEC(lastNt); i := maxP + | nt: DEC(firstNt); i := firstNt + END; + IF maxT >= maxTerminals THEN Restriction(6) END; + st[i].typ := typ; st[i].name := name; + st[i].struct := 0; st[i].deletable := FALSE; + st[i].attrPos.beg := -1; + st[i].semPos.beg := -1; + st[i].line := line + END; + RETURN i +END NewSym; + +PROCEDURE GetSym*(sp: INTEGER; VAR sn: SymbolNode); +BEGIN sn := st[sp] +END GetSym; + +PROCEDURE PutSym*(sp: INTEGER; sn: SymbolNode); +BEGIN st[sp] := sn +END PutSym; + +PROCEDURE FindSym*(name: Name): INTEGER; + VAR i: INTEGER; +BEGIN + i := 0; (*search in terminal list*) + WHILE (i <= maxT) & (st[i].name # name) DO INC(i) END; + IF i <= maxT THEN RETURN i END; + i := firstNt; (*search in nonterminal/pragma list*) + WHILE (i < maxSymbols) & (st[i].name # name) DO INC(i) END; + IF i < maxSymbols THEN RETURN i ELSE RETURN noSym END +END FindSym; + +PROCEDURE NewSet*(s: Set): INTEGER; +BEGIN + INC(maxSet); IF maxSet > maxSetNr THEN Restriction(4) END; + set[maxSet] := s; + RETURN maxSet +END NewSet; + +PROCEDURE PrintSet(s: ARRAY OF SET; indent: INTEGER); + CONST maxLineLen = 80; + VAR col, i, len: INTEGER; empty: BOOLEAN; sn: SymbolNode; +BEGIN + i := 0; col := indent; empty := TRUE; + WHILE i <= maxT DO + IF Sets.In(s, i) THEN + empty := FALSE; GetSym(i, sn); len := Length(sn.name); + IF col + len + 2 > maxLineLen THEN + NL; col := 1; + WHILE col < indent DO Texts.Write(w, " "); INC(col) END + END; + Str(sn.name); Str(" "); + INC(col, len + 2) + END; + INC(i) + END; + IF empty THEN Str("-- empty set --") END; + NL; Texts.Append(Oberon.Log, w.buf) +END PrintSet; + +PROCEDURE CompFirstSet*(gp: INTEGER; VAR fs: Set); + VAR visited: MarkList; + + PROCEDURE CompFirst(gp: INTEGER; VAR fs: Set); + VAR s: Set; gn: GraphNode; sn: SymbolNode; + BEGIN + Sets.Clear(fs); + WHILE (gp # 0) & ~ Sets.In(visited, gp) DO + GetNode(gp, gn); Sets.Incl(visited, gp); + CASE gn.typ OF + | nt: + IF first[gn.p1 - firstNt].ready THEN + Sets.Unite(fs, first[gn.p1 - firstNt].ts); + ELSE + GetSym(gn.p1, sn); CompFirst(sn.struct, s); Sets.Unite(fs, s); + END; + | t, wt: Sets.Incl(fs, gn.p1); + | any: Sets.Unite(fs, set[gn.p1]) + | alt, iter, opt: + CompFirst(gn.p1, s); Sets.Unite(fs, s); + IF gn.typ = alt THEN CompFirst(gn.p2, s); Sets.Unite(fs, s) END + ELSE (* eps, sem, sync: nothing *) + END; + IF ~ DelNode(gn) THEN RETURN END; + gp := ABS(gn.next) + END + END CompFirst; + +BEGIN (* ComputeFirstSet *) + ClearMarkList(visited); + CompFirst(gp, fs); + IF ddt[3] THEN + NL; Str("ComputeFirstSet: gp = "); Texts.WriteInt(w, gp, 0); NL; + PrintSet(fs, 0); + END; +END CompFirstSet; + +PROCEDURE CompFirstSets; + VAR i: INTEGER; sn: SymbolNode; +BEGIN + i := firstNt; WHILE i <= lastNt DO first[i-firstNt].ready := FALSE; INC(i) END; + i := firstNt; + WHILE i <= lastNt DO (* for all nonterminals *) + GetSym(i, sn); CompFirstSet(sn.struct, first[i - firstNt].ts); + first[i - firstNt].ready := TRUE; + INC(i) + END; +END CompFirstSets; + +PROCEDURE CompExpected*(gp, sp: INTEGER; VAR exp: Set); +BEGIN + CompFirstSet(gp, exp); + IF DelGraph(gp) THEN Sets.Unite(exp, follow[sp - firstNt].ts) END +END CompExpected; + +PROCEDURE CompFollowSets; + VAR sn: SymbolNode; gn: GraphNode; curSy, i, size: INTEGER; visited: MarkList; + + PROCEDURE CompFol(gp: INTEGER); + VAR s: Set; gn: GraphNode; + BEGIN + WHILE (gp > 0) & ~ Sets.In(visited, gp) DO + GetNode(gp, gn); Sets.Incl(visited, gp); + IF gn.typ = nt THEN + CompFirstSet(ABS(gn.next), s); Sets.Unite(follow[gn.p1 - firstNt].ts, s); + IF DelGraph(ABS(gn.next)) THEN + Sets.Incl(follow[gn.p1 - firstNt].nts, curSy - firstNt) + END + ELSIF gn.typ IN {opt, iter} THEN CompFol(gn.p1) + ELSIF gn.typ = alt THEN CompFol(gn.p1); CompFol(gn.p2) + END; + gp := gn.next + END + END CompFol; + + PROCEDURE Complete(i: INTEGER); + VAR j: INTEGER; + BEGIN + IF Sets.In(visited, i) THEN RETURN END; + Sets.Incl(visited, i); + j := 0; + WHILE j <= lastNt - firstNt DO (* for all nonterminals *) + IF Sets.In(follow[i].nts, j) THEN + Complete(j); Sets.Unite(follow[i].ts, follow[j].ts); + Sets.Excl(follow[i].nts, j) + END; + INC(j) + END; + END Complete; + +BEGIN (* CompFollowSets *) + curSy := firstNt; size := (lastNt - firstNt + 2) DIV Sets.size; + WHILE curSy <= lastNt + 1 DO (* also for dummy root nt*) + Sets.Clear(follow[curSy - firstNt].ts); + i := 0; WHILE i <= size DO follow[curSy - firstNt].nts[i] := {}; INC(i) END; + INC(curSy) + END; + + curSy := firstNt; (*get direct successors of nonterminals*) + WHILE curSy <= lastNt DO + GetSym(curSy, sn); ClearMarkList(visited); CompFol(sn.struct); + INC(curSy) + END; + CompFol(root); (*curSy=lastNt+1*) + + curSy := 0; (*add indirect successors to follow.ts*) + WHILE curSy <= lastNt - firstNt DO + ClearMarkList(visited); Complete(curSy); + INC(curSy); + END; +END CompFollowSets; + + +PROCEDURE CompAnySets; + VAR curSy, i: INTEGER; sn: SymbolNode; + + PROCEDURE LeadingAny(gp: INTEGER; VAR a: GraphNode): BOOLEAN; + VAR gn: GraphNode; + BEGIN + IF gp <= 0 THEN RETURN FALSE END; + GetNode(gp, gn); + IF (gn.typ = any) THEN a := gn; RETURN TRUE + ELSE RETURN (gn.typ = alt) & (LeadingAny(gn.p1, a) OR LeadingAny(gn.p2, a)) + OR (gn.typ IN {opt, iter}) & LeadingAny(gn.p1, a) + OR DelNode(gn) & LeadingAny(gn.next, a) + END + END LeadingAny; + + PROCEDURE FindAS(gp: INTEGER); + VAR gn, gn2, a: GraphNode; s1, s2: Set; p: INTEGER; + BEGIN + WHILE gp > 0 DO + GetNode(gp, gn); + IF gn.typ IN {opt, iter} THEN + FindAS(gn.p1); + IF LeadingAny(gn.p1, a) THEN + CompFirstSet(ABS(gn.next), s1); Sets.Differ(set[a.p1], s1) + END + ELSIF gn.typ = alt THEN + p := gp; Sets.Clear(s1); + WHILE p # 0 DO + GetNode(p, gn2); FindAS(gn2.p1); + IF LeadingAny(gn2.p1, a) THEN + CompFirstSet(gn2.p2, s2); Sets.Unite(s2, s1); Sets.Differ(set[a.p1], s2) + ELSE + CompFirstSet(gn2.p1, s2); Sets.Unite(s1, s2) + END; + p := gn2.p2 + END + END; + gp := gn.next + END + END FindAS; + +BEGIN + curSy := firstNt; + WHILE curSy <= lastNt DO (* for all nonterminals *) + GetSym(curSy, sn); FindAS(sn.struct); + INC(curSy) + END +END CompAnySets; + + +PROCEDURE CompSyncSets; + VAR curSy, i: INTEGER; sn: SymbolNode; visited: MarkList; + + PROCEDURE CompSync(gp: INTEGER); + VAR s: Set; gn: GraphNode; + BEGIN + WHILE (gp > 0) & ~ Sets.In(visited, gp) DO + GetNode(gp, gn); Sets.Incl(visited, gp); + IF gn.typ = sync THEN + CompExpected(ABS(gn.next), curSy, s); + Sets.Incl(s, eofSy); Sets.Unite(set[0], s); + gn.p1 := NewSet(s); PutNode(gp, gn) + ELSIF gn.typ = alt THEN CompSync(gn.p1); CompSync(gn.p2) + ELSIF gn.typ IN {iter, opt} THEN CompSync(gn.p1) + END; + gp := gn.next + END + END CompSync; + +BEGIN + curSy := firstNt; ClearMarkList(visited); + WHILE curSy <= lastNt DO + GetSym(curSy, sn); CompSync(sn.struct); + INC(curSy); + END +END CompSyncSets; + + +PROCEDURE CompDeletableSymbols*; + VAR changed, del: BOOLEAN; i: INTEGER; sn: SymbolNode; +BEGIN + del := FALSE; + REPEAT + changed := FALSE; + i := firstNt; + WHILE i <= lastNt DO (*for all nonterminals*) + GetSym(i, sn); + IF ~sn.deletable & DelGraph(sn.struct) THEN + sn.deletable := TRUE; PutSym(i, sn); changed := TRUE; del := TRUE + END; + INC(i) + END; + UNTIL ~changed; + + i := firstNt; IF del THEN NL END; + WHILE i <= lastNt DO + GetSym(i, sn); + IF sn.deletable THEN Str(" "); Str(sn.name); Str(" deletable"); NL END; + INC(i); + END; + Texts.Append(Oberon.Log, w.buf) +END CompDeletableSymbols; + + +PROCEDURE CompSymbolSets*; + VAR i: INTEGER; sn: SymbolNode; +BEGIN + i := NewSym(t, "???", 0); (*unknown symbols get code maxT*) + MovePragmas; + CompDeletableSymbols; + CompFirstSets; + CompFollowSets; + CompAnySets; + CompSyncSets; + IF ddt[1] THEN + i := firstNt; Str("First & follow symbols:"); NL; + WHILE i <= lastNt DO (* for all nonterminals *) + GetSym(i, sn); Str(sn.name); NL; + Str("first: "); PrintSet(first[i - firstNt].ts, 10); + Str("follow: "); PrintSet(follow[i - firstNt].ts, 10); + NL; + INC(i); + END; + + IF maxSet >= 0 THEN NL; NL; Str("List of sets (ANY, SYNC): "); NL END; + i := 0; + WHILE i <= maxSet DO + Str(" set["); Texts.WriteInt (w, i, 2); Str("] = "); PrintSet(set[i], 16); + INC (i) + END; + NL; NL; Texts.Append(Oberon.Log, w.buf) + END; +END CompSymbolSets; + + +PROCEDURE GetFirstSet(sp: INTEGER; VAR s: Set); +BEGIN s := first[sp - firstNt].ts +END GetFirstSet; + +PROCEDURE GetFollowSet(sp: INTEGER; VAR s: Set); +BEGIN s := follow[sp - firstNt].ts +END GetFollowSet; + +PROCEDURE GetSet*(nr: INTEGER; VAR s: Set); +BEGIN s := set[nr] +END GetSet; + +PROCEDURE MovePragmas; + VAR i: INTEGER; +BEGIN + IF maxP > firstNt THEN + i := maxSymbols - 1; maxP := maxT; + WHILE i > lastNt DO + INC(maxP); IF maxP >= firstNt THEN Restriction(6) END; + st[maxP] := st[i]; DEC(i) + END; + END +END MovePragmas; + +PROCEDURE PrintSymbolTable*; + VAR i, j: INTEGER; + + PROCEDURE WriteTyp(typ: INTEGER); + BEGIN + CASE typ OF + | t : Str(" t "); + | pr : Str(" pr "); + | nt : Str(" nt "); + END; + END WriteTyp; + +BEGIN (* PrintSymbolTable *) + Str("Symbol Table:"); NL; NL; + Str("nr name typ hasAttribs struct del line"); NL; NL; + + i := 0; + WHILE i < maxSymbols DO + Texts.WriteInt(w, i, 3); Str(" "); + j := 0; WHILE (j < 8) & (st[i].name[j] # 0X) DO Texts.Write(w, st[i].name[j]); INC(j) END; + WHILE j < 8 DO Texts.Write(w, " "); INC(j) END; + WriteTyp(st[i].typ); + IF st[i].attrPos.beg >= 0 THEN Str(" TRUE ") ELSE Str(" FALSE") END; + Texts.WriteInt(w, st[i].struct, 10); + IF st[i].deletable THEN Str(" TRUE ") ELSE Str(" FALSE") END; + Texts.WriteInt(w, st[i].line, 6); NL; + IF i = maxT THEN i := firstNt ELSE INC(i) END + END; + NL; NL; Texts.Append(Oberon.Log, w.buf) +END PrintSymbolTable; + +PROCEDURE NewClass*(name: Name; set: Set): INTEGER; +BEGIN + INC(maxC); IF maxC >= maxClasses THEN Restriction(7) END; + IF name[0] = "#" THEN name[1] := CHR(ORD("A") + dummyName); INC(dummyName) END; + chClass[maxC].name := name; chClass[maxC].set := NewSet(set); + RETURN maxC +END NewClass; + +PROCEDURE ClassWithName*(name: Name): INTEGER; + VAR i: INTEGER; +BEGIN + i := maxC; WHILE (i >= 0) & (chClass[i].name # name) DO DEC(i) END; + RETURN i +END ClassWithName; + +PROCEDURE ClassWithSet*(s: Set): INTEGER; + VAR i: INTEGER; +BEGIN + i := maxC; WHILE (i >= 0) & ~ Sets.Equal(set[chClass[i].set], s) DO DEC(i) END; + RETURN i +END ClassWithSet; + +PROCEDURE GetClass*(n: INTEGER; VAR s: Set); +BEGIN + GetSet(chClass[n].set, s) +END GetClass; + +PROCEDURE GetClassName*(n: INTEGER; VAR name: Name); +BEGIN + name := chClass[n].name +END GetClassName; + +PROCEDURE XRef*; + CONST maxLineLen = 80; + TYPE ListPtr = POINTER TO ListNode; + ListNode = RECORD + next: ListPtr; + line: INTEGER; + END; + ListHdr = RECORD + name: Name; + lptr: ListPtr; + END; + VAR gn: GraphNode; col, i, j: INTEGER; l, p, q: ListPtr; + sn: SymbolNode; + xList: ARRAY maxSymbols OF ListHdr; + +BEGIN (* XRef *) + IF maxT <= 0 THEN RETURN END; + MovePragmas; + (* initialise cross reference list *) + i := 0; + WHILE i <= lastNt DO (* for all symbols *) + GetSym(i, sn); xList[i].name := sn.name; xList[i].lptr := NIL; + IF i = maxP THEN i := firstNt ELSE INC(i) END + END; + + (* search lines where symbol has been referenced *) + i := 1; + WHILE i <= nNodes DO (* for all graph nodes *) + GetNode(i, gn); + IF gn.typ IN {t, wt, nt} THEN + NEW(l); l^.next := xList[gn.p1].lptr; l^.line := gn.line; + xList[gn.p1].lptr := l + END; + INC(i); + END; + + (* search lines where symbol has been defined and insert in order *) + i := 1; + WHILE i <= lastNt DO (*for all symbols*) + GetSym(i, sn); p := xList[i].lptr; q := NIL; + WHILE (p # NIL) & (p^.line > sn.line) DO q := p; p := p^.next END; + NEW(l); l^.next := p; + l^.line := -sn.line; + IF q # NIL THEN q^.next := l ELSE xList[i].lptr := l END; + IF i = maxP THEN i := firstNt ELSE INC(i) END + END; + + (* print cross reference listing *) + NL; Str("Cross reference list:"); NL; NL; Str("Terminals:"); NL; Str(" 0 EOF"); NL; + i := 1; + WHILE i <= lastNt DO (*for all symbols*) + Texts.WriteInt(w, i, 3); Str(" "); + j := 0; WHILE (j < 15) & (xList[i].name[j] # 0X) DO Texts.Write(w, xList[i].name[j]); INC(j) END; + l := xList[i].lptr; col := 25; + WHILE l # NIL DO + IF col + 5 > maxLineLen THEN + NL; col := 0; WHILE col < 25 DO Texts.Write(w, " "); INC(col) END + END; + IF l^.line = 0 THEN Str("undef") ELSE Texts.WriteInt(w, l^.line, 5) END; + INC(col, 5); + l := l^.next + END; + NL; + IF i = maxT THEN NL; Str("Pragmas:"); NL END; + IF i = maxP THEN NL; Str("Nonterminals:"); NL; i := firstNt ELSE INC(i) END + END; + NL; NL; Texts.Append(Oberon.Log, w.buf) +END XRef; + + +PROCEDURE NewNode*(typ, p1, line: INTEGER): INTEGER; +BEGIN + INC(nNodes); IF nNodes > maxNodes THEN Restriction(3) END; + gn[nNodes].typ := typ; gn[nNodes].next := 0; + gn[nNodes].p1 := p1; gn[nNodes].p2 := 0; + gn[nNodes].pos.beg := -1; gn[nNodes].line := line; + RETURN nNodes; +END NewNode; + +PROCEDURE CompleteGraph*(gp: INTEGER); + VAR p: INTEGER; +BEGIN + WHILE gp # 0 DO + p := gn[gp].next; gn[gp].next := 0; gp := p + END +END CompleteGraph; + +PROCEDURE ConcatAlt*(VAR gL1, gR1: INTEGER; gL2, gR2: INTEGER); + VAR p: INTEGER; +BEGIN + gL2 := NewNode(alt, gL2, 0); + p := gL1; WHILE gn[p].p2 # 0 DO p := gn[p].p2 END; gn[p].p2 := gL2; + p := gR1; WHILE gn[p].next # 0 DO p := gn[p].next END; gn[p].next := gR2 +END ConcatAlt; + +PROCEDURE ConcatSeq*(VAR gL1, gR1: INTEGER; gL2, gR2: INTEGER); + VAR p, q: INTEGER; +BEGIN + p := gn[gR1].next; gn[gR1].next := gL2; (*head node*) + WHILE p # 0 DO (*substructure*) + q := gn[p].next; gn[p].next := -gL2; p := q + END; + gR1 := gR2 +END ConcatSeq; + +PROCEDURE MakeFirstAlt*(VAR gL, gR: INTEGER); +BEGIN + gL := NewNode(alt, gL, 0); gn[gL].next := gR; gR := gL +END MakeFirstAlt; + +PROCEDURE MakeIteration*(VAR gL, gR: INTEGER); + VAR p, q: INTEGER; +BEGIN + gL := NewNode(iter, gL, 0); p := gR; gR := gL; + WHILE p # 0 DO + q := gn[p].next; gn[p].next := - gL; p := q + END +END MakeIteration; + +PROCEDURE MakeOption*(VAR gL, gR: INTEGER); +BEGIN + gL := NewNode(opt, gL, 0); gn[gL].next := gR; gR := gL +END MakeOption; + +PROCEDURE StrToGraph*(str: ARRAY OF CHAR; VAR gL, gR: INTEGER); + VAR len, i: INTEGER; +BEGIN + gR := 0; i := 1; len := Length(str) - 1; + WHILE i < len DO + gn[gR].next := NewNode(char, ORD(str[i]), 0); gR := gn[gR].next; + INC(i) + END; + gL := gn[0].next; gn[0].next := 0 +END StrToGraph; + +PROCEDURE DelNode*(gn: GraphNode): BOOLEAN; + VAR sn: SymbolNode; + + PROCEDURE DelAlt(gp: INTEGER): BOOLEAN; + VAR gn: GraphNode; + BEGIN + IF gp <= 0 THEN RETURN TRUE END; (*end of graph found*) + GetNode(gp, gn); + RETURN DelNode(gn) & DelAlt(gn.next); + END DelAlt; + +BEGIN + IF gn.typ = nt THEN GetSym(gn.p1, sn); RETURN sn.deletable + ELSIF gn.typ = alt THEN RETURN DelAlt(gn.p1) OR (gn.p2 # 0) & DelAlt(gn.p2) + ELSE RETURN gn.typ IN {eps, iter, opt, sem, sync} + END +END DelNode; + +PROCEDURE PrintGraph*; + VAR i: INTEGER; + + PROCEDURE WriteTyp(typ: INTEGER); + BEGIN + CASE typ OF + | nt : Str("nt ") + | t : Str("t ") + | wt : Str("wt ") + | any : Str("any ") + | eps : Str("eps ") + | sem : Str("sem ") + | sync: Str("sync") + | alt : Str("alt ") + | iter: Str("iter") + | opt : Str("opt ") + ELSE Str("--- ") + END; + END WriteTyp; + +BEGIN (* PrintGraph *) + Str("GraphList:"); NL; NL; + Str(" nr typ next p1 p2 line"); NL; NL; + + i := 0; + WHILE i <= nNodes DO + Texts.WriteInt(w, i, 3); Str(" "); + WriteTyp(gn[i].typ); Texts.WriteInt(w, gn[i].next, 7); + Texts.WriteInt(w, gn[i].p1, 7); + Texts.WriteInt(w, gn[i].p2, 7); + Texts.WriteInt(w, gn[i].line, 7); + NL; + INC(i); + END; + NL; NL; Texts.Append(Oberon.Log, w.buf) +END PrintGraph; + +PROCEDURE FindCircularProductions* (VAR ok: BOOLEAN); + CONST maxList = 150; + TYPE ListEntry = RECORD + left : INTEGER; + right : INTEGER; + deleted: BOOLEAN; + END; + VAR changed, onLeftSide, onRightSide: BOOLEAN; i, j, listLength: INTEGER; + list: ARRAY maxList OF ListEntry; + singles: MarkList; + sn: SymbolNode; + + PROCEDURE GetSingles (gp: INTEGER; VAR singles: MarkList); + VAR gn: GraphNode; + BEGIN + IF gp <= 0 THEN RETURN END; (* end of graph found *) + GetNode (gp, gn); + IF gn.typ = nt THEN + IF DelGraph(ABS(gn.next)) THEN Sets.Incl(singles, gn.p1) END + ELSIF gn.typ IN {alt, iter, opt} THEN + IF DelGraph(ABS(gn.next)) THEN + GetSingles(gn.p1, singles); + IF gn.typ = alt THEN GetSingles(gn.p2, singles) END + END + END; + IF DelNode(gn) THEN GetSingles(gn.next, singles) END + END GetSingles; + +BEGIN (* FindCircularProductions *) + i := firstNt; listLength := 0; + WHILE i <= lastNt DO (* for all nonterminals i *) + ClearMarkList (singles); GetSym (i, sn); + GetSingles (sn.struct, singles); (* get nt's j such that i-->j *) + j := firstNt; + WHILE j <= lastNt DO (* for all nonterminals j *) + IF Sets.In(singles, j) THEN + list[listLength].left := i; list[listLength].right := j; + list[listLength].deleted := FALSE; + INC (listLength) + END; + INC(j) + END; + INC(i) + END; + + REPEAT + i := 0; changed := FALSE; + WHILE i < listLength DO + IF ~ list[i].deleted THEN + j := 0; onLeftSide := FALSE; onRightSide := FALSE; + WHILE j < listLength DO + IF ~ list[j].deleted THEN + IF list[i].left = list[j].right THEN onRightSide := TRUE END; + IF list[j].left = list[i].right THEN onLeftSide := TRUE END + END; + INC(j) + END; + IF ~ onRightSide OR ~ onLeftSide THEN + list[i].deleted := TRUE; changed := TRUE + END + END; + INC(i) + END + UNTIL ~ changed; + + i := 0; ok := TRUE; + WHILE i < listLength DO + IF ~ list[i].deleted THEN + ok := FALSE; + GetSym(list[i].left, sn); NL; Str(" "); Str(sn.name); Str(" --> "); + GetSym(list[i].right, sn); Str(sn.name) + END; + INC(i) + END; + Texts.Append(Oberon.Log, w.buf) +END FindCircularProductions; + + +PROCEDURE LL1Test* (VAR ll1: BOOLEAN); + VAR sn: SymbolNode; curSy: INTEGER; + + PROCEDURE LL1Error (cond, ts: INTEGER); + VAR sn: SymbolNode; + BEGIN + ll1 := FALSE; + GetSym (curSy, sn); Str(" LL1 error in "); Str(sn.name); Str(": "); + IF ts > 0 THEN GetSym (ts, sn); Str(sn.name); Str(" is ") END; + CASE cond OF + 1: Str(" start of several alternatives.") + | 2: Str(" start & successor of deletable structure") + | 3: Str(" an ANY node that matchs no symbol") + END; + NL; Texts.Append(Oberon.Log, w.buf) + END LL1Error; + + PROCEDURE Check (cond: INTEGER; VAR s1, s2: Set); + VAR i: INTEGER; + BEGIN + i := 0; + WHILE i <= maxT DO + IF Sets.In(s1, i) & Sets.In(s2, i) THEN LL1Error(cond, i) END; + INC(i) + END + END Check; + + PROCEDURE CheckAlternatives (gp: INTEGER); + VAR gn, gn1: GraphNode; s1, s2: Set; p: INTEGER; + BEGIN + WHILE gp > 0 DO + GetNode(gp, gn); + IF gn.typ = alt THEN + p := gp; Sets.Clear(s1); + WHILE p # 0 DO (*for all alternatives*) + GetNode(p, gn1); CompExpected(gn1.p1, curSy, s2); + Check(1, s1, s2); Sets.Unite(s1, s2); + CheckAlternatives(gn1.p1); + p := gn1.p2 + END + ELSIF gn.typ IN {opt, iter} THEN + CompExpected(gn.p1, curSy, s1); + CompExpected(ABS(gn.next), curSy, s2); + Check(2, s1, s2); + CheckAlternatives(gn.p1) + ELSIF gn.typ = any THEN + GetSet(gn.p1, s1); + IF Sets.Empty(s1) THEN LL1Error(3, 0) END (*e.g. {ANY} ANY or [ANY] ANY*) + END; + gp := gn.next + END + END CheckAlternatives; + +BEGIN (* LL1Test *) + curSy := firstNt; ll1 := TRUE; + WHILE curSy <= lastNt DO (*for all nonterminals*) + GetSym(curSy, sn); CheckAlternatives (sn.struct); + INC (curSy) + END; +END LL1Test; + + +PROCEDURE TestCompleteness* (VAR ok: BOOLEAN); + VAR sp: INTEGER; sn: SymbolNode; +BEGIN + sp := firstNt; ok := TRUE; + WHILE sp <= lastNt DO (*for all nonterminals*) + GetSym (sp, sn); + IF sn.struct = 0 THEN + ok := FALSE; NL; Str(" No production for "); Str(sn.name); Texts.Append(Oberon.Log, w.buf) + END; + INC(sp) + END +END TestCompleteness; + + +PROCEDURE TestIfAllNtReached* (VAR ok: BOOLEAN); + VAR gn: GraphNode; sp: INTEGER; reached: MarkList; sn: SymbolNode; + + PROCEDURE MarkReachedNts (gp: INTEGER); + VAR gn: GraphNode; sn: SymbolNode; + BEGIN + WHILE gp > 0 DO + GetNode(gp, gn); + IF gn.typ = nt THEN + IF ~ Sets.In(reached, gn.p1) THEN (*new nt reached*) + Sets.Incl(reached, gn.p1); + GetSym(gn.p1, sn); MarkReachedNts(sn.struct) + END + ELSIF gn.typ IN {alt, iter, opt} THEN + MarkReachedNts(gn.p1); + IF gn.typ = alt THEN MarkReachedNts(gn.p2) END + END; + gp := gn.next + END + END MarkReachedNts; + +BEGIN (* TestIfAllNtReached *) + ClearMarkList(reached); + GetNode(root, gn); Sets.Incl(reached, gn.p1); + GetSym(gn.p1, sn); MarkReachedNts(sn.struct); + + sp := firstNt; ok := TRUE; + WHILE sp <= lastNt DO (*for all nonterminals*) + IF ~ Sets.In(reached, sp) THEN + ok := FALSE; GetSym(sp, sn); NL; Str(" "); Str(sn.name); Str(" cannot be reached") + END; + INC(sp) + END; + Texts.Append(Oberon.Log, w.buf) +END TestIfAllNtReached; + + +PROCEDURE TestIfNtToTerm* (VAR ok: BOOLEAN); + VAR changed: BOOLEAN; gn: GraphNode; sp: INTEGER; + sn: SymbolNode; + termList: MarkList; + + PROCEDURE IsTerm (gp: INTEGER): BOOLEAN; + VAR gn: GraphNode; + BEGIN + WHILE gp > 0 DO + GetNode(gp, gn); + IF (gn.typ = nt) & ~ Sets.In(termList, gn.p1) + OR (gn.typ = alt) & ~ IsTerm(gn.p1) & ~ IsTerm(gn.p2) THEN RETURN FALSE + END; + gp := gn.next + END; + RETURN TRUE + END IsTerm; + +BEGIN (* TestIfNtToTerm *) + ClearMarkList(termList); + REPEAT + sp := firstNt; changed := FALSE; + WHILE sp <= lastNt DO + IF ~ Sets.In(termList, sp) THEN + GetSym(sp, sn); + IF IsTerm(sn.struct) THEN Sets.Incl(termList, sp); changed := TRUE END + END; + INC(sp) + END + UNTIL ~changed; + + sp := firstNt; ok := TRUE; + WHILE sp <= lastNt DO + IF ~ Sets.In(termList, sp) THEN + ok := FALSE; GetSym(sp, sn); NL; Str(" "); Str(sn.name); Str(" cannot be derived to terminals") + END; + INC(sp) + END; + Texts.Append(Oberon.Log, w.buf) +END TestIfNtToTerm; + +PROCEDURE Init*; +BEGIN + maxSet := 0; Sets.Clear(set[0]); Sets.Incl(set[0], eofSy); + firstNt := maxSymbols; maxP := maxSymbols; maxT := -1; maxC := -1; + lastNt := maxP - 1; + dummyName := 0; + nNodes := 0 +END Init; + +BEGIN (* CRT *) + (* The dummy node gn[0] ensures that none of the procedures + above have to check for 0 indices. *) + nNodes := 0; + gn[0].typ := -1; gn[0].p1 := 0; gn[0].p2 := 0; gn[0].next := 0; gn[0].line := 0; + Texts.OpenWriter(w) +END CRT. diff --git a/tools/Coco/CRX.obn b/tools/Coco/CRX.obn new file mode 100644 index 0000000..967d19a --- /dev/null +++ b/tools/Coco/CRX.obn @@ -0,0 +1,474 @@ +MODULE CRX; (* H.Moessenboeck 17.11.93 *) + +IMPORT Oberon, Texts, Sets, CRS, CRT, SYSTEM; + +CONST + symSetSize = 100; + maxTerm = 3; (* sets of size < maxTerm are enumerated *) + + tErr = 0; altErr = 1; syncErr = 2; + EOL = 0DX; + +VAR + maxSS: INTEGER; (* number of symbol sets *) + errorNr: INTEGER; (* highest parser error number *) + curSy: INTEGER; (* symbol whose production is currently generated *) + err, w: Texts.Writer; + fram: Texts.Reader; + src: Texts.Reader; + syn: Texts.Writer; + scanner: ARRAY 32 OF CHAR; + symSet: ARRAY symSetSize OF CRT.Set; + + +PROCEDURE Restriction(n: INTEGER); +BEGIN + Texts.WriteLn(w); Texts.WriteString(w, "Restriction "); + Texts.WriteInt(w, n, 0); Texts.WriteLn(w); Texts.Append(Oberon.Log, w.buf); + HALT(99) +END Restriction; + +PROCEDURE PutS(s: ARRAY OF CHAR); + VAR i: INTEGER; +BEGIN i := 0; + WHILE (i < LEN(s)) & (s[i] # 0X) DO + IF s[i] = "$" THEN Texts.WriteLn(syn) ELSE Texts.Write(syn, s[i]) END; + INC(i) + END +END PutS; + +PROCEDURE PutI(i: INTEGER); +BEGIN Texts.WriteInt(syn, i, 0) +END PutI; + +PROCEDURE Indent(n: INTEGER); + VAR i: INTEGER; +BEGIN i := 0; WHILE i < n DO Texts.Write(syn, " "); INC(i) END +END Indent; + +PROCEDURE PutSet(s: SET); + VAR i: INTEGER; first: BOOLEAN; +BEGIN + i := 0; first := TRUE; + WHILE i < Sets.size DO + IF i IN s THEN + IF first THEN first := FALSE ELSE Texts.Write(syn, ",") END; + PutI(i) + END; + INC(i) + END +END PutSet; + +PROCEDURE PutSet1(s: CRT.Set); + VAR i: INTEGER; first: BOOLEAN; +BEGIN + i := 0; first := TRUE; + WHILE i <= CRT.maxT DO + IF Sets.In(s, i) THEN + IF first THEN first := FALSE ELSE Texts.Write(syn, ",") END; + PutI(i) + END; + INC(i) + END +END PutSet1; + +PROCEDURE Length*(s: ARRAY OF CHAR): INTEGER; + VAR i: INTEGER; +BEGIN + i:=0; WHILE (i < LEN(s)) & (s[i] # 0X) DO INC(i) END; + RETURN i +END Length; + +PROCEDURE Alternatives(gp: INTEGER): INTEGER; + VAR gn: CRT.GraphNode; n: INTEGER; +BEGIN + n := 0; + WHILE gp > 0 DO + CRT.GetNode(gp, gn); gp := gn.p2; INC(n) + END; + RETURN n +END Alternatives; + +PROCEDURE CopyFramePart (stopStr: ARRAY OF CHAR); (*Copy from file to file until *) + VAR ch, startCh: CHAR; i, j, high: INTEGER; +BEGIN + startCh := stopStr[0]; high := Length(stopStr) - 1; Texts.Read (fram, ch); + WHILE ch # 0X DO + IF ch = startCh THEN (* check if stopString occurs *) + i := 0; + REPEAT + IF i = high THEN RETURN END; (*stopStr[0..i] found; no unrecognized character*) + Texts.Read (fram, ch); INC(i); + UNTIL ch # stopStr[i]; + (*stopStr[0..i-1] found; 1 unrecognized character*) + j := 0; WHILE j < i DO Texts.Write(syn, stopStr[j]); INC(j) END + ELSE Texts.Write (syn, ch); Texts.Read(fram, ch) + END + END +END CopyFramePart; + +PROCEDURE CopySourcePart (pos: CRT.Position; indent: INTEGER); +(*Copy sequence from to *) + VAR ch: CHAR; i: INTEGER; nChars: LONGINT; r: Texts.Reader; +BEGIN + IF (pos.beg >= 0) & (pos.len > 0) THEN + Texts.OpenReader(r, CRS.src, pos.beg); Texts.Read(r, ch); + nChars := pos.len - 1; + Indent(indent); + LOOP + WHILE ch = EOL DO + Texts.WriteLn(syn); Indent(indent); + IF nChars > 0 THEN Texts.Read(r, ch); DEC(nChars) ELSE EXIT END; + i := pos.col; + WHILE (ch = " ") & (i > 0) DO (* skip blanks at beginning of line *) + IF nChars > 0 THEN Texts.Read(r, ch); DEC (nChars) ELSE EXIT END; + DEC(i) + END + END; + Texts.Write (syn, ch); + IF nChars > 0 THEN Texts.Read(r, ch); DEC (nChars) ELSE EXIT END + END + END + +(* IF pos.beg >= 0 THEN + Texts.OpenReader(r, CRS.src, pos.beg); + nChars := pos.len; col := pos.col - 1; ch := " "; + WHILE (nChars > 0) & (ch = " ") DO (*skip leading blanks*) + Texts.Read(r, ch); DEC(nChars); INC(col) + END; + Indent(indent); + LOOP + WHILE ch = EOL DO + Texts.WriteLn(syn); Indent(indent); + IF nChars > 0 THEN Texts.Read(r, ch); DEC(nChars) ELSE EXIT END; + i := col - 1; + WHILE (ch = " ") & (i > 0) DO (* skip blanks at beginning of line *) + IF nChars > 0 THEN Texts.Read(r, ch); DEC (nChars) ELSE EXIT END; + DEC(i) + END + END; + Texts.Write (syn, ch); + IF nChars > 0 THEN Texts.Read(r, ch); DEC (nChars) ELSE EXIT END + END (* LOOP *) + END *) +END CopySourcePart; + +PROCEDURE GenErrorMsg (errTyp, errSym: INTEGER; VAR errNr: INTEGER); + VAR i: INTEGER; name: ARRAY 32 OF CHAR; sn: CRT.SymbolNode; +BEGIN + INC (errorNr); errNr := errorNr; + CRT.GetSym (errSym, sn); COPY(sn.name, name); + i := 0; WHILE name[i] # 0X DO IF name[i] = CHR(34) THEN name[i] := "'" END; INC(i) END; + Texts.WriteString(err, " |"); + Texts.WriteInt (err, errNr, 3); Texts.WriteString (err, ": Msg("); Texts.Write(err, CHR(34)); + CASE errTyp OF + | tErr : Texts.WriteString (err, name); Texts.WriteString (err, " expected") + | altErr : Texts.WriteString (err, "invalid "); Texts.WriteString (err, name) + | syncErr: Texts.WriteString (err, "this symbol not expected in "); Texts.WriteString (err, name) + END; + Texts.Write(err, CHR(34)); Texts.Write(err, ")"); Texts.WriteLn(err) +END GenErrorMsg; + +PROCEDURE NewCondSet (set: CRT.Set): INTEGER; + VAR i: INTEGER; +BEGIN + i := 1; (*skip symSet[0]*) + WHILE i <= maxSS DO + IF Sets.Equal(set, symSet[i]) THEN RETURN i END; + INC(i) + END; + INC(maxSS); IF maxSS > symSetSize THEN Restriction (9) END; + symSet[maxSS] := set; + RETURN maxSS +END NewCondSet; + +PROCEDURE GenCond (set: CRT.Set); + VAR sx, i, n: INTEGER; + + PROCEDURE Small(s: CRT.Set): BOOLEAN; + BEGIN + i := Sets.size; + WHILE i <= CRT.maxT DO + IF Sets.In(set, i) THEN RETURN FALSE END; + INC(i) + END; + RETURN TRUE + END Small; + +BEGIN + n := Sets.Elements(set, i); + (*IF n = 0 THEN PutS(" FALSE") (*this branch should never be taken*) + ELSIF (n > 1) & Small(set) THEN + PutS(" sym IN {"); PutSet(set[0]); PutS("} ") + ELSIF n <= maxTerm THEN + i := 0; + WHILE i <= CRT.maxT DO + IF Sets.In (set, i) THEN + PutS(" (sym = "); PutI(i); Texts.Write(syn, ")"); + DEC(n); IF n > 0 THEN PutS(" OR") END + END; + INC(i) + END + ELSE PutS(" sym IN symSet["); PutI(NewCondSet(set)); PutS(",0]") + END;*) + IF n = 0 THEN PutS(" FALSE") (*this branch should never be taken*) + ELSIF n <= maxTerm THEN + i := 0; + WHILE i <= CRT.maxT DO + IF Sets.In (set, i) THEN + PutS(" (sym = "); PutI(i); Texts.Write(syn, ")"); + DEC(n); IF n > 0 THEN PutS(" OR") END + END; + INC(i) + END + ELSE PutS(" StartOf("); PutI(NewCondSet(set)); PutS(") ") + END; + +END GenCond; + +PROCEDURE GenCode (gp, indent: INTEGER; checked: CRT.Set); + VAR gn, gn2: CRT.GraphNode; sn: CRT.SymbolNode; gp2: INTEGER; + s1, s2: CRT.Set; errNr, alts: INTEGER; equal: BOOLEAN; +BEGIN + WHILE gp > 0 DO + CRT.GetNode (gp, gn); + CASE gn.typ OF + + | CRT.nt: + Indent(indent); + CRT.GetSym(gn.p1, sn); PutS(sn.name); + IF gn.pos.beg >= 0 THEN + Texts.Write(syn, "("); CopySourcePart(gn.pos, 0); Texts.Write(syn, ")") + END; + PutS(";$") + + | CRT.t: + CRT.GetSym(gn.p1, sn); Indent(indent); + IF Sets.In(checked, gn.p1) THEN + PutS("Get;$") + ELSE + PutS("Expect("); PutI(gn.p1); PutS(");$") + END + + | CRT.wt: + CRT.CompExpected(ABS(gn.next), curSy, s1); + CRT.GetSet(0, s2); Sets.Unite(s1, s2); + CRT.GetSym(gn.p1, sn); Indent(indent); + PutS("ExpectWeak("); PutI(gn.p1); PutS(", "); PutI(NewCondSet(s1)); PutS(");$") + + | CRT.any: + Indent(indent); PutS("Get;$") + + | CRT.eps: (* nothing *) + + | CRT.sem: + CopySourcePart(gn.pos, indent); PutS(";$"); + + | CRT.sync: + CRT.GetSet(gn.p1, s1); + GenErrorMsg (syncErr, curSy, errNr); + Indent(indent); + PutS("WHILE ~("); GenCond(s1); PutS(") DO Error("); + PutI(errNr); PutS("); Get END;$") + + | CRT.alt: + CRT.CompFirstSet(gp, s1); equal := Sets.Equal(s1, checked); + alts := Alternatives(gp); + IF alts > 5 THEN Indent(indent); PutS("CASE sym OF$") END; + gp2 := gp; + WHILE gp2 # 0 DO + CRT.GetNode(gp2, gn2); + CRT.CompExpected(gn2.p1, curSy, s1); + Indent(indent); + IF alts > 5 THEN PutS("| "); PutSet1(s1); PutS(": ") (*case labels*) + ELSIF gp2 = gp THEN PutS("IF"); GenCond(s1); PutS(" THEN$") + ELSIF (gn2.p2 = 0) & equal THEN PutS("ELSE$") + ELSE PutS("ELSIF"); GenCond(s1); PutS(" THEN$") + END; + Sets.Unite(s1, checked); + GenCode(gn2.p1, indent + 2, s1); + gp2 := gn2.p2 + END; + IF ~ equal THEN + GenErrorMsg(altErr, curSy, errNr); + Indent(indent); PutS("ELSE Error("); PutI(errNr); PutS(")$") + END; + Indent(indent); PutS("END;$") + + | CRT.iter: + CRT.GetNode(gn.p1, gn2); + Indent(indent); PutS("WHILE"); + IF gn2.typ = CRT.wt THEN + CRT.CompExpected(ABS(gn2.next), curSy, s1); + CRT.CompExpected(ABS(gn.next), curSy, s2); + CRT.GetSym(gn2.p1, sn); + PutS(" WeakSeparator("); PutI(gn2.p1); PutS(", "); PutI(NewCondSet(s1)); + PutS(", "); PutI(NewCondSet(s2)); PutS(") "); + Sets.Clear(s1); (*for inner structure*) + IF gn2.next > 0 THEN gp2 := gn2.next ELSE gp2 := 0 END + ELSE + gp2 := gn.p1; CRT.CompFirstSet(gp2, s1); GenCond(s1) + END; + PutS(" DO$"); + GenCode(gp2, indent + 2, s1); + Indent(indent); PutS("END;$") + + | CRT.opt: + CRT.CompFirstSet(gn.p1, s1); + IF ~ Sets.Equal(checked, s1) THEN + Indent(indent); PutS("IF"); GenCond(s1); PutS(" THEN$"); + GenCode(gn.p1, indent + 2, s1); + Indent(indent); PutS("END;$") + ELSE GenCode(gn.p1, indent, checked) + END + + END; (*CASE*) + IF ~ (gn.typ IN {CRT.eps, CRT.sem, CRT.sync}) THEN Sets.Clear(checked) END; + gp := gn.next + END +END GenCode; + +PROCEDURE GenCodePragmas; + VAR i, p: INTEGER; sn: CRT.SymbolNode; + + PROCEDURE P(s1, s2: ARRAY OF CHAR); + BEGIN + PutS(" "); PutS(scanner); PutS(s1); PutS(" := "); PutS(scanner); PutS(s2); PutS(";$") + END P; + +BEGIN + i := CRT.maxT + 1; + WHILE i <= CRT.maxP DO + CRT.GetSym(i, sn); + PutS(" IF sym = "); PutI(i); PutS(" THEN$"); CopySourcePart(sn.semPos, 9); PutS("$ END;$"); + INC(i) + END; + P(".nextPos", ".pos"); P(".nextCol", ".col"); P(".nextLine", ".line"); P(".nextLen", ".len") +END GenCodePragmas; + +PROCEDURE GenProcedureHeading (sn: CRT.SymbolNode; forward: BOOLEAN); +BEGIN + PutS("PROCEDURE "); + IF forward THEN Texts.Write(syn, "^") END; + PutS(sn.name); + IF sn.attrPos.beg >= 0 THEN + Texts.Write(syn, "("); CopySourcePart(sn.attrPos, 0); Texts.Write(syn, ")") + END; + PutS(";$") +END GenProcedureHeading; + +PROCEDURE GenForwardRefs; + VAR sp: INTEGER; sn: CRT.SymbolNode; +BEGIN + IF ~ CRT.ddt[5] THEN + sp := CRT.firstNt; + WHILE sp <= CRT.lastNt DO (* for all nonterminals *) + CRT.GetSym (sp, sn); GenProcedureHeading(sn, TRUE); + INC(sp) + END; + Texts.WriteLn(syn) + END +END GenForwardRefs; + +PROCEDURE GenProductions; + VAR sn: CRT.SymbolNode; checked: CRT.Set; +BEGIN + curSy := CRT.firstNt; + WHILE curSy <= CRT.lastNt DO (* for all nonterminals *) + CRT.GetSym (curSy, sn); GenProcedureHeading (sn, FALSE); + IF sn.semPos.beg >= 0 THEN CopySourcePart(sn.semPos, 2); PutS(" $") END; + PutS("BEGIN$"); Sets.Clear(checked); + GenCode (sn.struct, 2, checked); + PutS("END "); PutS(sn.name); PutS(";$$"); + INC (curSy); + END; +END GenProductions; + +PROCEDURE InitSets; + VAR i, j: INTEGER; +BEGIN + i := 0; CRT.GetSet(0, symSet[0]); + WHILE i <= maxSS DO + j := 0; + WHILE j <= CRT.maxT DIV Sets.size DO + PutS(" symSet["); PutI(i); PutS(", ");PutI(j); + PutS("] := {"); PutSet(symSet[i, j]); PutS("};$"); + INC(j) + END; + INC(i) + END +END InitSets; + +PROCEDURE *Show(t: Texts.Text; op: INTEGER; beg, end: LONGINT); +BEGIN END Show; + +PROCEDURE GenCompiler*; + VAR errNr, i: INTEGER; checked: CRT.Set; + gn: CRT.GraphNode; sn: CRT.SymbolNode; + parser: ARRAY 32 OF CHAR; + t: Texts.Text; pos: LONGINT; + ch1, ch2: CHAR; +BEGIN + CRT.GetNode(CRT.root, gn); CRT.GetSym(gn.p1, sn); + COPY(sn.name, parser); i := Length(parser); parser[i] := "P"; parser[i+1] := 0X; + COPY(parser, scanner); scanner[i] := "S"; + + NEW(t); Texts.Open(t, "Parser.FRM"); Texts.OpenReader(fram, t, 0); + IF t.len = 0 THEN + Texts.WriteString(w, "Parser.FRM not found"); Texts.WriteLn(w); + Texts.Append(Oberon.Log, w.buf); HALT(99) + END; + + Texts.OpenWriter(err); Texts.WriteLn(err); + i := 0; + WHILE i <= CRT.maxT DO GenErrorMsg(tErr, i, errNr); INC(i) END; + + (*----- write *P.Mod -----*) + Texts.OpenWriter(syn); + NEW(t); t.notify := Show; Texts.Open(t, ""); + CopyFramePart("-->modulename"); PutS(parser); + CopyFramePart("-->scanner"); PutS(scanner); + IF CRT.importPos.beg >= 0 THEN PutS(", "); CopySourcePart(CRT.importPos, 0) END; + CopyFramePart("-->constants"); + PutS("maxP = "); PutI(CRT.maxP); PutS(";$"); + PutS(" maxT = "); PutI(CRT.maxT); PutS(";$"); + PutS(" nrSets = ;$"); Texts.Append(t, syn.buf); pos := t.len - 2; + CopyFramePart("-->declarations"); CopySourcePart(CRT.semDeclPos, 0); + CopyFramePart("-->errors"); PutS(scanner); PutS(".Error(n, "); PutS(scanner); PutS(".nextPos)"); + CopyFramePart("-->scanProc"); + IF CRT.maxT = CRT.maxP THEN PutS(scanner); PutS(".Get(sym)") + ELSE + PutS("LOOP "); PutS(scanner); PutS(".Get(sym);$"); + PutS(" IF sym > maxT THEN$"); + GenCodePragmas; + PutS(" ELSE EXIT$"); + PutS(" END$"); + PutS("END$") + END; + CopyFramePart("-->productions"); GenForwardRefs; GenProductions; + CopyFramePart("-->parseRoot"); Sets.Clear(checked); GenCode (CRT.root, 2, checked); + CopyFramePart("-->initialization"); InitSets; + CopyFramePart("-->modulename"); PutS(parser); Texts.Write(syn, "."); + Texts.Append(t, syn.buf); Texts.Append(t, err.buf); + PutI(maxSS+1); (*if no set, maxSS = -1*) Texts.Insert(t, pos, syn.buf); + i := Length(parser); parser[i] := "."; parser[i+1] := "M"; parser[i+2] := "o"; parser[i+3] := "d"; parser[i+4] := 0X; + Texts.Close(t, parser) +END GenCompiler; + +PROCEDURE WriteStatistics*; +BEGIN + Texts.WriteInt (w, CRT.maxT + 1, 0); Texts.WriteString(w, " t, "); + Texts.WriteInt (w, CRT.maxSymbols - CRT.firstNt + CRT.maxT + 1, 0); Texts.WriteString(w, " syms, "); + Texts.WriteInt (w, CRT.nNodes, 0); Texts.WriteString(w, " nodes, "); + Texts.WriteInt (w, maxSS, 0); Texts.WriteString(w, "sets"); + Texts.WriteLn(w); Texts.Append(Oberon.Log, w.buf) +END WriteStatistics; + +PROCEDURE Init*; +BEGIN + errorNr := -1; maxSS := 0 (*symSet[0] reserved for all SYNC sets*) +END Init; + +BEGIN + Texts.OpenWriter(w) +END CRX. diff --git a/tools/Coco/Coco b/tools/Coco/Coco new file mode 100755 index 0000000..653b35f --- /dev/null +++ b/tools/Coco/Coco @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +DIR=$(dirname "$0") +PATH="../..:$PATH" + +java -ea -cp "$DIR/classes" Launcher Coco.$* diff --git a/tools/Coco/Coco.Tool b/tools/Coco/Coco.Tool new file mode 100644 index 0000000..643019f --- /dev/null +++ b/tools/Coco/Coco.Tool @@ -0,0 +1,83 @@ +Coco/R - the Oberon scanner and parser generator + +For a complete documentation see the postscript file Coco.Report.ps. + +Compiler.Compile + Sets.Mod CRS.Mod CRT.Mod CRA.Mod CRX.Mod CRP.Mod Coco.Mod ~ + +NOTE: the option character should be changed to "\" in Coco.Mod for Unix implementations. + + +Coco.Compile * +Coco.Compile ~ +Coco.Compile ^ +Coco.Compile @ + +(*________________________ usage ________________________*) + +Coco.Compile [options] + +The file CR.ATG is an example of an input file to Coco. If the grammar in the input file has the name X +the generated scanner has the name XS.Mod and the generated parser has the name XP.Mod. + +Options: + + /X generates a cross reference list of all syntax symbols + /S generates a list of all terminal start symbols and successors of nonterminal symbols. + +Interface of the generated scanner: + + DEFINITION XS; + IMPORT Texts; + TYPE + ErrorProc = PROCEDURE (n: INTEGER; pos: LONGINT); + VAR + Error: ErrorProc; + col, errors, len, line, nextCol, nextLen, nextLine: INTEGER; + nextPos, pos: LONGINT; + src: Texts.Text; + PROCEDURE Reset (t: Texts.Text; pos: LONGINT; errProc: ErrorProc); + PROCEDURE Get(VAR sym: INTEGER); + PROCEDURE GetName(pos: LONGINT; len: INTEGER; VAR name: ARRAY OF CHAR); + PROCEDURE StdErrorProc (n: INTEGER; pos: LONGINT); + END XS. + +Interface of the generated parser: + + DEFINITION XP; + PROCEDURE Parse; + END XP. + +Example how to use the generated parts; + + Texts.OpenScanner(s, Oberon.Par.Text, Oberon.Par.Pos); Texts.Scan(s); + IF s.class = Texts.Name THEN + NEW(text); Texts.Open(text, s.s); + XS.Reset(text, 0, MyErrorHandler); + XP.Parse; + END + + +Error handling in the generated parser: + +The grammar has to contain hints, from which Coco can generate appropriate error handling. +The hints can be placed arbitrarily on the right-hand side of a production: + + SYNC Denotes a synchronisation point. At such points symbols are skipped until a symbol + is found which is a legal continuation symbol at that point (or eof). SYNC is usually + placed at points where particularly "safe" symbols are expected, i.e., symbols that + are rarely missing or misspelled. + + WEAK s s is an arbitrary terminal symbol (e.g., ";") which is considered "weak", because it is + frequently missing or misspelled (e.g., a semicolon between statements). + +Example: + + Statement = + SYNC + ( ident WEAK ":=" Expression + | "IF" Expression "THEN" StatSeq ["ELSE" StatSeq] "END" + | "WHILE" Expression "DO" StatSeq "END" + ). + StatSeq = + Statement { WEAK ";" Statement}.þ diff --git a/tools/Coco/Coco.obn b/tools/Coco/Coco.obn new file mode 100644 index 0000000..5f43695 --- /dev/null +++ b/tools/Coco/Coco.obn @@ -0,0 +1,172 @@ +(* Implementation restrictions + 3 too many nodes in graph (>1500) CRG.NewNode + 4 too many sets (ANY-symbols or SYNC symbols) CRT.NewAnySet, + CRT.ComputeSyncSet + 6 too many symbols (>300) CRT.NewSym + 7 too many character classes (>50) CRT.NewClass + 9 too many conditions in generated code (>100) CRX.NewCondSet + + Trace output (ddt settings: ${digit}) + 0 Prints states of automaton + 1 Prints start symbols and followers of nonterminals (also option /s) + 2 Prints the internal graph + 3 Trace of start symbol set computation + 4 Trace of follow set computation + 5 suppresses FORWARD declarations in parser (for multipass compilers) + 6 Prints the symbol list + 7 Prints a cross reference list (also option /x) + 8 Write statistics +==========================================================================*) +MODULE Coco; + +IMPORT Oberon, Texts, CRS, CRP, CRT; + +CONST minErrDist = 8; + +VAR w: Texts.Writer; lastErrPos: LONGINT; + + +PROCEDURE Error (n: INTEGER; pos: LONGINT); + + PROCEDURE Msg (s: ARRAY OF CHAR); + BEGIN Texts.WriteString(w, s) + END Msg; + +BEGIN + INC(CRS.errors); + IF pos < lastErrPos + minErrDist THEN lastErrPos := pos; RETURN END; + lastErrPos := pos; + Texts.WriteInt(w, pos, 3); Texts.WriteString(w, ": "); + IF n < 200 THEN + CASE n OF + | 0: Msg("EOF expected") + | 1: Msg("ident expected") + | 2: Msg("string expected") + | 3: Msg("number expected") + | 4: Msg("'COMPILER' expected") + | 5: Msg("'IMPORT' expected") + | 6: Msg("';' expected") + | 7: Msg("'PRODUCTIONS' expected") + | 8: Msg("'=' expected") + | 9: Msg("'.' expected") + | 10: Msg("'END' expected") + | 11: Msg("'CHARACTERS' expected") + | 12: Msg("'TOKENS' expected") + | 13: Msg("'PRAGMAS' expected") + | 14: Msg("'COMMENTS' expected") + | 15: Msg("'FROM' expected") + | 16: Msg("'TO' expected") + | 17: Msg("'NESTED' expected") + | 18: Msg("'IGNORE' expected") + | 19: Msg("'CASE' expected") + | 20: Msg("'+' expected") + | 21: Msg("'-' expected") + | 22: Msg("'CHR' expected") + | 23: Msg("'(' expected") + | 24: Msg("')' expected") + | 25: Msg("'ANY' expected") + | 26: Msg("'|' expected") + | 27: Msg("'WEAK' expected") + | 28: Msg("'[' expected") + | 29: Msg("']' expected") + | 30: Msg("'{' expected") + | 31: Msg("'}' expected") + | 32: Msg("'SYNC' expected") + | 33: Msg("'CONTEXT' expected") + | 34: Msg("'<' expected") + | 35: Msg("'>' expected") + | 36: Msg("'(.' expected") + | 37: Msg("'.)' expected") + | 38: Msg("??? expected") + | 39: Msg("invalid TokenFactor") + | 40: Msg("invalid Factor") + | 41: Msg("invalid Factor") + | 42: Msg("invalid Term") + | 43: Msg("invalid Symbol") + | 44: Msg("invalid SimSet") + | 45: Msg("this symbol not expected in TokenDecl") + | 46: Msg("invalid TokenDecl") + | 47: Msg("invalid Declaration") + | 48: Msg("invalid Declaration") + | 49: Msg("invalid Declaration") + | 50: Msg("this symbol not expected in Coco") + | 51: Msg("invalid start of the program") + ELSE Texts.WriteString(w, "error "); Texts.WriteInt(w, n, 0) + END + ELSE + CASE n OF + | 201: Msg("unexpected end of file"); + | 202: Msg("string terminator not on this line"); + | 203: Msg("a literal must not have attributes"); + | 204: Msg("this symbol kind not allowed in production"); + | 205: Msg("symbol declared without attributes"); + | 206: Msg("symbol declared with attributes"); + | 207: Msg("name declared twice"); + | 208: Msg("this type not allowed on left side of production"); + | 209: Msg("symbol earlier referenced without attributes"); + | 210: Msg("symbol earlier referenced with attributes"); + | 211: Msg("missing production for grammar name"); + | 212: Msg("grammar symbol must not have attributes"); + | 213: Msg("a literal must not be declared with a structure") + | 214: Msg("semantic action not allowed here") + | 215: Msg("undefined name") + | 216: Msg("attributes not allowed in token declaration") + | 217: Msg("name does not match name in heading") + | 220: Msg("token may be empty") + | 221: Msg("token must not start with an iteration") + | 222: Msg("only characters allowed in comment declaration") + | 223: Msg("only terminals may be weak") + | 224: + | 225: Msg("comment delimiter must not exceed 2 characters") + | 226: Msg("character set contains more than one character") + ELSE Texts.WriteString(w, "error "); Texts.WriteInt(w, n, 0) + END + END; +Texts.WriteLn(w); Texts.Append(Oberon.Log, w.buf) +END Error; + +PROCEDURE Options(VAR s: Texts.Scanner); + VAR i: INTEGER; +BEGIN + IF s.nextCh = "/" THEN Texts.Scan(s); Texts.Scan(s); + IF s.class = Texts.Name THEN i := 0; + WHILE s.s[i] # 0X DO + IF CAP(s.s[i]) = "X" THEN CRT.ddt[7] := TRUE + ELSIF CAP(s.s[i]) = "S" THEN CRT.ddt[1] := TRUE + END; + INC(i) + END + END + END; +END Options; + + +PROCEDURE Compile*; + VAR s: Texts.Scanner; src, t: Texts.Text; + pos, beg, end, time: LONGINT; i: INTEGER; +BEGIN + Texts.OpenScanner(s, Oberon.Par.text, Oberon.Par.pos); Texts.Scan(s); + src := NIL; pos := 0; + IF (s.class = Texts.Char) & (s.c = "^") THEN + Oberon.GetSelection(t, beg, end, time); + IF time >= 0 THEN Texts.OpenScanner(s, t, beg); Texts.Scan(s) END + END; + IF s.class = Texts.Name THEN + NEW(src); Texts.Open(src, s.s); + ELSIF (s.class = Texts.Char) & (s.c = "@") THEN + Oberon.GetSelection(t, beg, end, time); + IF time >= 0 THEN src := t; pos := beg; s.s := " " END + END; + IF src # NIL THEN + Texts.WriteString (w, "Coco/R - Compiler-Compiler V2.2"); Texts.Append(Oberon.Log, w.buf); + i := 0; WHILE i < 10 DO CRT.ddt[i] := FALSE; INC(i) END; + Options(s); + Texts.WriteLn(w); Texts.WriteString(w, s.s); Texts.Append(Oberon.Log, w.buf); + CRS.Reset(src, pos, Error); lastErrPos := -10; + CRP.Parse + END +END Compile; + +BEGIN + Texts.OpenWriter(w) +END Coco. diff --git a/tools/Coco/Parser.FRM b/tools/Coco/Parser.FRM new file mode 100644 index 0000000..dc35235 --- /dev/null +++ b/tools/Coco/Parser.FRM @@ -0,0 +1,65 @@ +(* parser module generated by Coco-R *) +MODULE -->modulename; + +IMPORT -->scanner; + +CONST + -->constants + setSize = 32; nSets = (maxT DIV setSize) + 1; + +TYPE + SymbolSet = ARRAY nSets OF SET; + +VAR + sym: INTEGER; (* current input symbol *) + symSet: ARRAY nrSets OF SymbolSet; + +-->declarations + +PROCEDURE Error (n: INTEGER); +BEGIN -->errors +END Error; + +PROCEDURE Get; +BEGIN + -->scanProc +END Get; + +PROCEDURE Expect(n: INTEGER); +BEGIN IF sym = n THEN Get ELSE Error(n) END +END Expect; + +PROCEDURE StartOf(s: INTEGER): BOOLEAN; +BEGIN RETURN (sym MOD setSize) IN symSet[s, sym DIV setSize] +END StartOf; + +PROCEDURE ExpectWeak(n, follow: INTEGER); +BEGIN + IF sym = n THEN Get + ELSE Error(n); WHILE ~ StartOf(follow) DO Get END + END +END ExpectWeak; + +PROCEDURE WeakSeparator(n, syFol, repFol: INTEGER): BOOLEAN; + VAR s: SymbolSet; i: INTEGER; +BEGIN + IF sym = n THEN Get; RETURN TRUE + ELSIF StartOf(repFol) THEN RETURN FALSE + ELSE + i := 0; WHILE i < nSets DO s[i] := symSet[syFol, i] + symSet[repFol, i] + symSet[0, i]; INC(i) END; + Error(n); WHILE ~ ((sym MOD setSize) IN s[sym DIV setSize]) DO Get END; + RETURN StartOf(syFol) + END +END WeakSeparator; + +-->productions + +PROCEDURE Parse*; +BEGIN + Get; +-->parseRoot +END Parse; + +BEGIN +-->initialization +END -->modulename. diff --git a/tools/Coco/README b/tools/Coco/README new file mode 100644 index 0000000..4ebde52 --- /dev/null +++ b/tools/Coco/README @@ -0,0 +1 @@ +ftp://ftp.ethoberon.ethz.ch/Oberon/OberonV4/Sources/Coco/ diff --git a/tools/Coco/Scanner.FRM b/tools/Coco/Scanner.FRM new file mode 100644 index 0000000..103ad86 --- /dev/null +++ b/tools/Coco/Scanner.FRM @@ -0,0 +1,103 @@ +(* scanner module generated by Coco-R *) +MODULE -->modulename; + +IMPORT Texts, SYSTEM; + +CONST + EOL = 0DX; + EOF = 0X; + maxLexLen = 127; +-->declarations + +TYPE + ErrorProc* = PROCEDURE (n: INTEGER; pos: LONGINT); + StartTable = ARRAY 128 OF INTEGER; + +VAR + src*: Texts.Text; (*source text. To be set by the main pgm*) + pos*: LONGINT; (*position of current symbol*) + line*, col*, len*: INTEGER; (*line, column, length of current symbol*) + nextPos*: LONGINT; (*position of lookahead symbol*) + nextLine*, nextCol*, nextLen*: INTEGER; (*line, column, length of lookahead symbol*) + errors*: INTEGER; (*number of errors detected*) + Error*: ErrorProc; + + ch: CHAR; (*current input character*) + r: Texts.Reader; (*global reader*) + chPos: LONGINT; (*position of current character*) + chLine: INTEGER; (*current line number*) + lineStart: LONGINT; (*start position of current line*) + apx: INTEGER; (*length of appendix*) + oldEols: INTEGER; (*nr. of EOLs in a comment*) + + start: StartTable; (*start state for every character*) + + +PROCEDURE NextCh; (*return global variable ch*) +BEGIN + Texts.Read(r, ch); INC(chPos); + IF ch = EOL THEN INC(chLine); lineStart := chPos + 1 END +END NextCh; + + +PROCEDURE Comment(): BOOLEAN; + VAR level, startLine: INTEGER; oldLineStart: LONGINT; +BEGIN (*Comment*) + level := 1; startLine := chLine; oldLineStart := lineStart; +-->comment +END Comment; + + +PROCEDURE Get*(VAR sym: INTEGER); +VAR state: INTEGER; lexeme: ARRAY maxLexLen+1 OF CHAR; + + PROCEDURE CheckLiteral; + BEGIN + IF nextLen < maxLexLen THEN lexeme[nextLen] := 0X END; +-->literals + END CheckLiteral; + +BEGIN +-->GetSy1 + IF ch > 7FX THEN ch := " " END; + pos := nextPos; col := nextCol; line := nextLine; len := nextLen; + nextPos := chPos; nextCol := SHORT(chPos - lineStart); nextLine := chLine; nextLen := 0; + state := start[ORD(ch)]; apx := 0; + LOOP + IF nextLen < maxLexLen THEN lexeme[nextLen] := ch END; + INC(nextLen); + NextCh; + IF state > 0 THEN + CASE state OF +-->GetSy2 + END (*CASE*) + ELSE sym := noSym; RETURN (*NextCh already done*) + END (*IF*) + END (*LOOP*) +END Get; + + +PROCEDURE GetName*(pos: LONGINT; len: INTEGER; VAR s: ARRAY OF CHAR); + VAR i: INTEGER; r: Texts.Reader; +BEGIN + Texts.OpenReader(r, src, pos); + IF len >= LEN(s) THEN len := SHORT(LEN(s)) - 1 END; + i := 0; WHILE i < len DO Texts.Read(r, s[i]); INC(i) END; + s[i] := 0X +END GetName; + +PROCEDURE StdErrorProc* (n: INTEGER; pos: LONGINT); +BEGIN INC(errors) END StdErrorProc; + +PROCEDURE Reset* (t: Texts.Text; pos: LONGINT; errProc: ErrorProc); +BEGIN + src := t; Error := errProc; + Texts.OpenReader(r, src, pos); + chPos := pos - 1; chLine := 1; lineStart := 0; + oldEols := 0; apx := 0; errors := 0; + NextCh +END Reset; + +BEGIN +-->initialization +END -->modulename. diff --git a/tools/Coco/Sets.obn b/tools/Coco/Sets.obn new file mode 100644 index 0000000..4b94318 --- /dev/null +++ b/tools/Coco/Sets.obn @@ -0,0 +1,138 @@ +MODULE Sets; + +IMPORT Texts; + +CONST size* = 32; + + +PROCEDURE Clear*(VAR s: ARRAY OF SET); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < LEN(s) DO s[i] := {}; INC(i) END +END Clear; + + +PROCEDURE Fill*(VAR s: ARRAY OF SET); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < LEN(s) DO s[i] := {0 .. size-1}; INC(i) END +END Fill; + + +PROCEDURE Incl*(VAR s: ARRAY OF SET; x: INTEGER); +BEGIN INCL(s[x DIV size], x MOD size) +END Incl; + + +PROCEDURE Excl*(VAR s: ARRAY OF SET; x: INTEGER); +BEGIN EXCL(s[x DIV size], x MOD size) +END Excl; + + +PROCEDURE In*(VAR s: ARRAY OF SET; x: INTEGER): BOOLEAN; +BEGIN RETURN x MOD size IN s[x DIV size] +END In; + + +PROCEDURE Includes*(VAR s1, s2: ARRAY OF SET): BOOLEAN; + VAR i: INTEGER; +BEGIN + i := 0; + WHILE i < LEN(s1) DO + IF s1[i] + s2[i] # s1[i] THEN RETURN FALSE END; + INC(i) + END; + RETURN TRUE; +END Includes; + + +PROCEDURE Elements*(VAR s: ARRAY OF SET; VAR lastElem: INTEGER): INTEGER; + VAR i, n, max: INTEGER; +BEGIN + i := 0; n := 0; max := SHORT(LEN(s)) * size; + WHILE i < max DO + IF (i MOD size) IN s[i DIV size] THEN INC(n); lastElem := i END; + INC(i) + END; + RETURN n +END Elements; + + +PROCEDURE Empty*(VAR s: ARRAY OF SET): BOOLEAN; + VAR i: INTEGER; +BEGIN + i := 0; + WHILE i < LEN(s) DO + IF s[i] # {} THEN RETURN FALSE END; + INC(i) + END; + RETURN TRUE +END Empty; + + +PROCEDURE Equal*(VAR s1, s2: ARRAY OF SET): BOOLEAN; + VAR i: INTEGER; +BEGIN + i := 0; + WHILE i < LEN(s1) DO + IF s1[i] # s2[i] THEN RETURN FALSE END; + INC(i) + END; + RETURN TRUE +END Equal; + + +PROCEDURE Different*(VAR s1, s2: ARRAY OF SET): BOOLEAN; + VAR i: INTEGER; +BEGIN + i := 0; + WHILE i < LEN(s1) DO + IF s1[i] * s2[i] # {} THEN RETURN FALSE END; + INC(i) + END; + RETURN TRUE +END Different; + + +PROCEDURE Unite*(VAR s1, s2: ARRAY OF SET); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < LEN(s1) DO s1[i] := s1[i] + s2[i]; INC(i) END +END Unite; + + +PROCEDURE Differ*(VAR s1, s2: ARRAY OF SET); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < LEN(s1) DO s1[i] := s1[i] - s2[i]; INC(i) END +END Differ; + + +PROCEDURE Intersect*(VAR s1, s2, s3: ARRAY OF SET); + VAR i: INTEGER; +BEGIN + i := 0; WHILE i < LEN(s1) DO s3[i] := s1[i] * s2[i]; INC(i) END +END Intersect; + + +PROCEDURE Print*(VAR f: Texts.Writer; s: ARRAY OF SET; w, indent: INTEGER); + VAR col, i, max: INTEGER; +BEGIN + i := 0; col := indent; max := SHORT(LEN(s)) * size; + Texts.Write(f, "{"); + WHILE i < max DO + IF In(s, i) THEN + IF col + 4 > w THEN + Texts.WriteLn(f); + col := 0; WHILE col < indent DO Texts.Write(f, " "); INC(col) END + END; + Texts.WriteInt(f, i, 3); Texts.Write(f, ","); + INC(col, 4) + END; + INC(i) + END; + Texts.Write(f, "}") +END Print; + + +END Sets. diff --git a/tools/Coco/make.sh b/tools/Coco/make.sh new file mode 100755 index 0000000..a8437c4 --- /dev/null +++ b/tools/Coco/make.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +PATH="../..:$PATH" + +obn-compile.sh Coco diff --git a/tools/Coco/voc/.gitignore b/tools/Coco/voc/.gitignore new file mode 100644 index 0000000..8c32a7e --- /dev/null +++ b/tools/Coco/voc/.gitignore @@ -0,0 +1,5 @@ +Coco.Compile +*.o +*.sym +*.h +*.c diff --git a/tools/Coco/voc/CocoCompile.Mod b/tools/Coco/voc/CocoCompile.Mod new file mode 100644 index 0000000..3466192 --- /dev/null +++ b/tools/Coco/voc/CocoCompile.Mod @@ -0,0 +1,4 @@ +MODULE CocoCompile; +IMPORT Coco; +BEGIN Coco.Compile; +END CocoCompile. diff --git a/tools/Coco/voc/make.sh b/tools/Coco/voc/make.sh new file mode 100755 index 0000000..6690356 --- /dev/null +++ b/tools/Coco/voc/make.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +voc ../Sets.obn ../CRS.obn ../CRT.obn ../CRX.obn ../CRA.obn ../CRP.obn ../Coco.obn CocoCompile.Mod -m +mv CocoCompile Coco.Compile +rm -f *.o *.sym *.h *.c diff --git a/tools/TextV4/.gitignore b/tools/TextV4/.gitignore new file mode 100644 index 0000000..d9da078 --- /dev/null +++ b/tools/TextV4/.gitignore @@ -0,0 +1,2 @@ +classes +tmp diff --git a/TextV4/TextV4 b/tools/TextV4/TextV4 similarity index 100% rename from TextV4/TextV4 rename to tools/TextV4/TextV4 diff --git a/TextV4/TextV4.obn b/tools/TextV4/TextV4.obn similarity index 100% rename from TextV4/TextV4.obn rename to tools/TextV4/TextV4.obn diff --git a/tools/TextV4/make.sh b/tools/TextV4/make.sh new file mode 100755 index 0000000..5f45bb9 --- /dev/null +++ b/tools/TextV4/make.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +PATH="../..:$PATH" + +obn-compile.sh TextV4 diff --git a/tools/TextV4/voc/.gitignore b/tools/TextV4/voc/.gitignore new file mode 100644 index 0000000..7008097 --- /dev/null +++ b/tools/TextV4/voc/.gitignore @@ -0,0 +1,7 @@ +TextV4.Print +TextV4.ToAscii +TextV4.ToText +*.o +*.sym +*.h +*.c diff --git a/TextV4/voc/TextV4Print.Mod b/tools/TextV4/voc/TextV4Print.Mod similarity index 100% rename from TextV4/voc/TextV4Print.Mod rename to tools/TextV4/voc/TextV4Print.Mod diff --git a/TextV4/voc/TextV4ToAscii.Mod b/tools/TextV4/voc/TextV4ToAscii.Mod similarity index 100% rename from TextV4/voc/TextV4ToAscii.Mod rename to tools/TextV4/voc/TextV4ToAscii.Mod diff --git a/TextV4/voc/TextV4ToText.Mod b/tools/TextV4/voc/TextV4ToText.Mod similarity index 100% rename from TextV4/voc/TextV4ToText.Mod rename to tools/TextV4/voc/TextV4ToText.Mod diff --git a/TextV4/voc/make.sh b/tools/TextV4/voc/make.sh similarity index 57% rename from TextV4/voc/make.sh rename to tools/TextV4/voc/make.sh index fb63176..50e6814 100755 --- a/TextV4/voc/make.sh +++ b/tools/TextV4/voc/make.sh @@ -3,4 +3,7 @@ set -e voc -F ../TextV4.obn TextV4ToAscii.Mod -m TextV4ToText.Mod -m TextV4Print.Mod -m +mv TextV4ToAscii TextV4.ToAscii +mv TextV4ToText TextV4.ToText +mv TextV4Print TextV4.Print rm -f *.o *.sym *.h *.c