2 (* ==================================================================== *)
3 (* *)
4 (* SymFileRW: Symbol-file reading and writing for GPCP. *)
5 (* Copyright (c) John Gough 1999 -- 2011. *)
6 (* *)
7 (* ==================================================================== *)
11 IMPORT
12 GPCPcopyright,
13 RTS,
14 Error,
15 Console,
21 Visitor,
22 ExprDesc,
28 FileNames;
30 (* ========================================================================= *
31 // Collected syntax ---
32 //
33 // SymFile = Header [String (falSy | truSy | <other attribute>)]
34 // [ VersionName ]
35 // {Import | Constant | Variable | Type | Procedure}
36 // TypeList Key.
37 // -- optional String is external name.
38 // -- falSy ==> Java class
39 // -- truSy ==> Java interface
40 // -- others ...
41 // Header = magic modSy Name.
42 // VersionName= numSy longint numSy longint numSy longint.
43 // -- mj# mn# bld rv# 8xbyte extract
44 // Import = impSy Name [String] Key.
45 // -- optional string is explicit external name of class
46 // Constant = conSy Name Literal.
47 // Variable = varSy Name TypeOrd.
48 // Type = typSy Name TypeOrd.
49 // Procedure = prcSy Name [String] FormalType.
50 // -- optional string is explicit external name of procedure
51 // Method = mthSy Name byte byte TypeOrd [String] [Name] FormalType.
52 // -- optional string is explicit external name of method
53 // FormalType = [retSy TypeOrd] frmSy {parSy byte TypeOrd [String]} endFm.
54 // -- optional phrase is return type for proper procedures
55 // TypeOrd = ordinal.
56 // TypeHeader = tDefS Ord [fromS Ord Name].
57 // -- optional phrase occurs if:
58 // -- type not from this module, i.e. indirect export
59 // TypeList = start { Array | Record | Pointer | ProcType |
60 // Enum | Vector | NamedType } close.
61 // Array = TypeHeader arrSy TypeOrd (Byte | Number | <empty>) endAr.
62 // -- nullable phrase is array length for fixed length arrays
63 // Vector = TypeHeader vecSy TypeOrd endAr.
64 // Pointer = TypeHeader ptrSy TypeOrd.
65 // Event = TypeHeader evtSy FormalType.
66 // ProcType = TypeHeader pTpSy FormalType.
67 // Record = TypeHeader recSy recAtt [truSy | falSy]
68 // [basSy TypeOrd] [iFcSy {basSy TypeOrd}]
69 // {Name TypeOrd} {Method} {Statics} endRc.
70 // -- truSy ==> is an extension of external interface
71 // -- falSy ==> is an extension of external class
72 // -- basSy option defines base type, if not ANY / j.l.Object
73 // Statics = ( Constant | Variable | Procedure ).
74 // Enum = TypeHeader eTpSy { Constant } endRc.
75 // NamedType = TypeHeader.
76 // Name = namSy byte UTFstring.
77 // Literal = Number | String | Set | Char | Real | falSy | truSy.
78 // Byte = bytSy byte.
79 // String = strSy UTFstring.
80 // Number = numSy longint.
81 // Real = fltSy ieee-double.
82 // Set = setSy integer.
83 // Key = keySy integer..
84 // Char = chrSy unicode character.
85 //
86 // Notes on the syntax:
87 // All record types must have a Name field, even though this is often
88 // redundant. The issue is that every record type (including those that
89 // are anonymous in CP) corresponds to a IR class, and the definer
90 // and the user of the class _must_ agree on the IR name of the class.
91 // The same reasoning applies to procedure types, which must have equal
92 // interface names in all modules.
93 //
94 // Notes on the fine print about UTFstring --- November 2011 clarification.
95 // The character sequence in the symbol file is modified UTF-8, that is
96 // it may represent CHR(0), U+0000, by the bytes 0xC0, 0x80. String
97 // constants may thus contain embedded nulls.
98 //
99 // ======================================================================== *)
101 CONST
115 CONST
121 (* ============================================================ *)
123 TYPE
131 (* Recycled scratch area *)
135 TYPE
153 (* ============================================================ *)
160 (* ============================================================ *)
166 (* ============================================================ *)
171 (* ============================================================ *)
174 BEGIN
178 (* ============================================================ *)
179 (* ======== Various writing utility procedures ======= *)
180 (* ============================================================ *)
184 BEGIN
187 (*
188 * Initialization: cSum starts at zero. Since impOrd of
189 * the module is zero, impOrd of the imports starts at 1.
190 *)
199 (* ======================================= *)
204 (* need to turn off overflow checking here *)
211 (* ======================================= *
212 * This method writes a UTF-8 byte sequence that
213 * represents the input string up to but not
214 * including the terminating null character.
215 *)
220 BEGIN
247 (* ======================================= *
248 * This method writes a UTF-8 byte sequence that
249 * represents the input string up to but not
250 * including the final null character. The
251 * string may include embedded null characters.
252 * Thus if the last meaningfull character is null
253 * there will be two nulls at the end.
254 *)
260 BEGIN
288 (* ======================================= *)
291 BEGIN
296 (* ======================================= *)
299 BEGIN
304 (* ======================================= *)
307 BEGIN
313 (* ======================================= *)
318 BEGIN
327 (* ======================================= *)
332 BEGIN
343 (* ======================================= *)
346 BEGIN
351 (* ======================================= *)
354 BEGIN
359 (* ======================================= *)
363 BEGIN
369 (* ======================================= *)
372 BEGIN
378 ELSE
383 (* ======================================= *)
386 (*
387 * This proceedure facilitates the naming rules
388 * for records and (runtime) classes: -
389 *
390 * (1) Classes derived from named record types have
391 * names synthesized from the record typename.
392 * (2) If a named pointer is bound to an anon record
393 * the class takes its name from the pointer name.
394 * (3) If both the pointer and the record types have
395 * names, the class is named from the record.
396 *)
398 (* ------------------------------------ *)
400 BEGIN
408 (* ------------------------------------ *)
409 BEGIN
412 (*
413 * We wish to ensure that anonymous records are
414 * never emitted before their binding pointer
415 * types. This ensures that we do not need to
416 * merge types when reading the files.
417 *)
424 (*
425 * If a pointer to record is being emitted, and
426 * the pointer is NOT anonymous, then the class
427 * is known by the name of the record. Thus the
428 * record name must be emitted, at least opaquely.
429 * Furthermore, we must indicate the binding
430 * relationship between the pointer and record.
431 * (It is possible that DCode need record size.)
432 *)
449 (* ============================================================ *)
450 (* ======== Various writing procedures ======= *)
451 (* ============================================================ *)
454 (*
455 ** FormalType = [retSy TypeOrd] frmSy {parSy Byte TypeOrd [String]} endFm.
456 *)
459 BEGIN
470 (*
471 * Emit Optional Parameter name
472 *)
480 (* ======================================= *)
486 (*
487 ** Constant = conSy Name Literal.
488 ** Literal = Number | String | Set | Char | Real | falSy | truSy.
489 *)
490 BEGIN
509 (* ======================================= *)
512 (*
513 ** Type = TypeSy Name TypeOrd.
514 *)
515 BEGIN
521 (* ======================================= *)
524 (*
525 ** Variable = varSy Name TypeOrd.
526 *)
527 BEGIN
533 (* ======================================= *)
536 (*
537 ** Import = impSy Name.
538 *)
539 BEGIN
550 (* ======================================= *)
553 (*
554 ** Procedure = prcSy Name FormalType.
555 *)
556 BEGIN
564 (* ======================================= *)
567 (*
568 ** Method = mthSy Name Byte Byte TypeOrd [strSy ] FormalType.
569 *)
570 BEGIN
582 (* ======================================= *)
586 BEGIN
591 ELSE
597 (* ======================================= *)
600 BEGIN
607 BEGIN
612 ELSE
618 BEGIN
624 (* ======================================= *)
627 (*
628 ** TypeHeader = typSy Ord [fromS Ord Name].
629 *)
632 (* =================================== *)
634 BEGIN
642 (* =================================== *)
643 BEGIN
650 ELSE
653 ELSE
656 (*
657 * mod := moduleOrd(t.idnt);
658 *)
662 (*
663 * Convert native types back to RTS.nativeXXX, if necessary.
664 * That is ... if the native module is not explicitly imported.
665 *)
674 (* ======================================= *)
677 BEGIN
680 (*
681 * IF t.force # D.noEmit THEN (* Don't emit structure unless forced *)
682 *)
696 (* ======================================= *)
702 (*
703 ** Record = TypeHeader recSy recAtt [truSy | falSy | <others>]
704 ** [basSy TypeOrd] [iFcSy {basSy TypeOrd}]
705 ** {Name TypeOrd} {Method} {Statics} endRc.
706 *)
707 BEGIN
710 (*
711 * IF t.force # D.noEmit THEN (* Don't emit structure unless forced *)
712 *)
718 (* ########## *)
724 (* ########## *)
729 (* ########## *)
737 (* ########## *)
767 (* ======================================= *)
772 (*
773 ** Enum = TypeHeader eTpSy { constant } endRc.
774 *)
775 BEGIN
783 (* D.AppendType(f.modS.expRecs, t); *)
786 (* ======================================= *)
789 BEGIN
793 (* ======================================= *)
796 BEGIN
799 (*
800 * IF (t.force # D.noEmit) OR (* Only emit structure if *)
802 *)
808 (* ======================================= *)
811 BEGIN
818 (* ======================================= *)
823 BEGIN
824 (*
825 * We cannot use a FOR loop here, as the tide changes
826 * during evaluation, as a result of reaching new types.
827 *)
842 (* ======================================= *)
850 (* ----------------------------------- *)
853 BEGIN
857 ELSE
862 (* ----------------------------------- *)
863 (*
864 ** SymFile = Header [String (falSy | truSy | <others>)]
865 ** [ VersionName]
866 ** {Import | Constant | Variable
867 ** | Type | Procedure | Method} TypeList.
868 ** Header = magic modSy Name.
869 ** VersionName= numSy longint numSy longint numSy longint.
870 ** -- mj# mn# bld rv# 8xbyte extract
871 *)
872 BEGIN
873 (*
874 * Create the SymFile structure, and open the output file.
875 *)
877 (* Start of alternative gpcp1.2 code *)
881 ELSE
890 ELSE
891 (*
892 * Emit the symbol file header
893 *)
895 (* End of alternative gpcp1.2 code *)
898 ELSE
908 (*
909 * Emit the optional TypeName, if required.
910 *
911 * VersionName= numSy longint numSy longint numSy longint.
912 * -- mj# mn# bld rv# 8xbyte extract
913 *)
919 (*
920 * Create the symbol table visitor, an extension of
921 * Symbols.SymForAll type. Emit symbols from the scope.
922 *)
926 (*
927 * Now emit the types on the worklist.
928 *)
932 (*
933 * Now emit the accumulated checksum key symbol.
934 *)
942 (* ============================================================ *)
943 (* ======== Various reading utility procedures ======= *)
944 (* ============================================================ *)
947 BEGIN
951 (* ======================================= *)
954 CONST
962 BEGIN
965 (*
966 * len is the length in bytes of the UTF8 representation
967 *)
969 (*
970 * Worst case the number of chars will equal byte-number.
971 *)
986 ELSE
997 ELSE
1000 ELSE
1003 ELSE
1011 (* ======================================= *)
1014 BEGIN
1018 (* ======================================= *)
1022 (* overflow checking off here *)
1026 (* ======================================= *)
1032 (* overflow checking off here *)
1040 (* ======================================= *)
1044 BEGIN
1049 (* ======================================= *)
1053 BEGIN
1056 ELSE
1062 (* ============================================================ *)
1063 (* ======== Symbol File Reader ======= *)
1064 (* ============================================================ *)
1068 BEGIN
1078 (* ======================================= *)
1080 (* ======================================= *)
1083 BEGIN
1088 (* ======================================= *)
1092 BEGIN
1096 | namSy :
1098 | strSy :
1102 | bytSy :
1106 | numSy :
1108 | fltSy :
1110 | chrSy :
1116 (* ======================================= *)
1119 BEGIN
1124 (* ======================================= *)
1137 BEGIN
1141 ELSE
1146 BEGIN
1156 (* #### *)
1166 (* #### *)
1168 (* S.SemError.Report(129, token.lin, token.col); *)
1171 ELSE
1175 ELSE
1181 (* normal case, nothing to do *)
1184 ELSE
1185 (* S.SemError.Report(130, token.lin, token.col); *)
1201 (* ============================================ *)
1208 BEGIN
1214 BEGIN
1217 ELSE
1224 (* ============================================ *)
1231 BEGIN
1236 BEGIN
1243 (* ============================================ *)
1246 (* insert, taking into account possible overloaded methods. *)
1247 VAR
1253 BEGIN
1255 (*
1256 * D.getName.Of(s, sS);
1257 * S.SemError.RepSt2(172, iS, sS, S.line, S.col);
1258 *)
1262 BEGIN
1268 (* ============================================ *)
1272 BEGIN
1286 (* ============================================ *)
1291 BEGIN
1296 ELSE
1298 REPEAT
1307 (* ============================================ *)
1311 BEGIN
1317 (* ============================================ *)
1321 (*
1322 ** FormalType = [retSy TypeOrd] frmSy {parSy Byte TypeOrd [String]} endFm.
1323 // -- optional phrase is return type for proper procedures
1324 *)
1327 BEGIN
1339 (* Skip over optional parameter name string *)
1350 (* ============================================ *)
1353 (* Assert: the current symbol ptrSy *)
1354 (* Pointer = TypeHeader ptrSy TypeOrd. *)
1359 BEGIN
1364 (*
1365 * Check if there is space in the tArray for this
1366 * element, otherwise expand using typeOf().
1367 *)
1372 ELSE
1381 (* ============================================ *)
1384 (* Assert: the current symbol is pTpSy. *)
1385 (* ProcType = TypeHeader pTpSy FormalType. *)
1386 BEGIN
1391 (* ============================================ *)
1394 (* Assert: the current symbol is evtSy. *)
1395 (* EventType = TypeHeader evtSy FormalType. *)
1396 BEGIN
1401 (* ============================================ *)
1404 (* Assert: at entry the current symbol is arrSy. *)
1405 (* Array = TypeHeader arrSy TypeOrd (Byte | Number | ) endAr. *)
1406 (* -- nullable phrase is array length for fixed length arrays *)
1409 BEGIN
1419 (* ELSE length := 0 *)
1425 (* ============================================ *)
1428 (* Assert: at entry the current symbol is vecSy. *)
1429 (* Vector = TypeHeader vecSy TypeOrd endAr. *)
1432 BEGIN
1440 (* ============================================ *)
1445 (* ============================================ *)
1448 (* Assert: at entry the current symbol is recSy. *)
1449 (* Record = TypeHeader recSy recAtt [truSy | falSy | <others>] *)
1450 (* [basSy TypeOrd] [iFcSy {basSy TypeOrd}] *)
1451 (* {Name TypeOrd} {Method} {Statics} endRc. *)
1452 CONST
1464 BEGIN
1468 (*
1469 * The recAtt field has two other bits piggy-backed onto it.
1470 * The noNew Field of xAttr is just added on in the writing
1471 * and is stripped off here. The valRc field is used to lock
1472 * in foreign value classes, even though they have basTp # NIL.
1473 *)
1488 (*
1489 * Do not override extrnNm values set
1490 * by *Maker.Init for Native* types.
1491 *)
1497 (*
1498 * Do not override baseTp values set
1499 * by *Maker.Init for Native* types.
1500 *)
1553 ELSE
1557 (* #### *)
1563 (* #### *)
1568 (* ============================================ *)
1571 (* Assert: at entry the current symbol is eTpSy. *)
1572 (* Enum = TypeHeader eTpSy { Constant} endRc. *)
1575 BEGIN
1588 (* ============================================ *)
1591 (* Type = typSy Name TypeOrd. *)
1595 BEGIN
1596 (*
1597 * Post: every previously unknown typId 'id'
1598 * has the property: id.type.idnt = id.
1599 * If oldI # newT, then the new typId has
1600 * newT.type.idnt = oldI.
1601 *)
1616 (* ============================================ *)
1619 (* Import = impSy Name [String] Key. *)
1620 (* -- optional string is external name *)
1621 (* first symbol should be namSy here. *)
1625 BEGIN
1634 (* Shouldn't this be an error? *)
1637 (* probably don't need to do anything here ... *)
1654 ELSE
1662 (* ============================================ *)
1665 (* Constant = conSy Name Literal. *)
1666 (* Name = namSy byte UTFstring. *)
1667 (* Assert: f.sSym = namSy. *)
1670 BEGIN
1681 (* ============================================ *)
1684 (* Variable = varSy Name TypeOrd. *)
1687 BEGIN
1696 (* ============================================ *)
1699 (* Procedure = prcSy Name[String]FormalType. *)
1700 (* This is a static proc, mths come with Recs *)
1703 BEGIN
1712 (* and leave scopeNm = NIL *)
1720 (* IF this is a java module, do some semantic checks *)
1721 (* ... *)
1725 (* ============================================ *)
1728 (* Method = mthSy Name byte byte TypeOrd [String][Name] FormalType. *)
1733 BEGIN
1742 (* byte1 is the method attributes *)
1744 (* byte2 is param form of receiver *)
1746 (* next 1 or 2 bytes are rcv-type *)
1752 (* and leave scopeNm = NIL *)
1755 (* Skip over optional receiver name string *)
1759 (* End skip over optional receiver name *)
1764 (* IF this is a java module, do some semantic checks *)
1768 (* ============================================ *)
1771 (* TypeList = start { Array | Record | Pointer *)
1772 (* | ProcType | Vector} close. *)
1773 (* TypeHeader = tDefS Ord [fromS Ord Name]. *)
1786 (* ================================ *)
1788 BEGIN
1797 ELSE
1801 (* ================================ *)
1802 BEGIN
1808 (* Do type header *)
1813 (*
1814 * The [fromS modOrd typNam] appears if the type is imported.
1815 * There are two cases:
1816 * (1) this is the first time that "mod.typNam" has been
1817 * seen during this compilation
1818 * ==> insert a new typId descriptor in mod.symTb
1819 * (2) this name is already in the mod.symTb table
1820 * ==> fetch the previous descriptor
1821 *)
1833 (*
1834 * In the new symbol table format we do not wish
1835 * to include details of indirectly imported types.
1836 * However, there may be a reference to the bound
1837 * type of an indirectly imported pointer. In this
1838 * case we need to make sure that the otherwise
1839 * bound type declaration catches the same opaque
1840 * type descriptor.
1841 *)
1857 (*
1858 * A name has been declared for this type, tpIdnt is
1859 * the (possibly previously known) id descriptor, and
1860 * tpDesc is the newly parsed descriptor of the type.
1861 *)
1864 ELSE
1868 ELSE
1875 (*
1876 * This is the special case of an anonymous
1877 * bound type of an imported pointer. In the
1878 * new type resolver we want this to remain
1879 * as an opaque type until *all* symbol files
1880 * have been fully processed.
1881 * So ... override the parsed type.
1882 *)
1884 ELSE
1889 (*
1890 * This is the normal case
1891 *)
1908 (*
1909 * First we fix up all symbolic references in the
1910 * the type array. Postcondition is : no element
1911 * of the type array directly or indirectly refers
1912 * to a temporary type.
1913 *)
1918 (*
1919 * At this stage we want to check the base types
1920 * of every defined record type. If the base type
1921 * is imported then we check.
1922 * Define 'set' := dfScp.xAttr * {weak, need}; then ...
1923 *
1924 * set = {D.need} ==> module is explicitly imported
1925 *
1926 * set = {D.weak} ==> module must be imported, but is not
1927 * on the import worklist at this stage
1928 * set = {D.weak, D.need} ==> module must be imported, and is
1929 * already on the import worklist.
1930 *)
1949 (*
1950 * We now fix up all references in the symbol table
1951 * that still refer to temporary symbol-file types.
1952 *)
1957 (*
1958 * Now check that all overloaded ids are necessary
1959 *)
1966 (* ============================================ *)
1969 (*
1970 // SymFile = Header [String (falSy | truSy | <others>)]
1971 // {Import | Constant | Variable | Type | Procedure}
1972 // TypeList Key.
1973 // Header = magic modSy Name.
1974 //
1975 // magic has already been recognized.
1976 *)
1978 BEGIN
2024 (*
2025 // The CPS format only provides for version information if
2026 // there is also a strong key token. Do not propagate random
2027 // junk with PeToCps from assemblies with version info only
2028 *)
2033 LOOP
2046 (*
2047 * Now read the typelist.
2048 *)
2050 (*
2051 * Now check the module key.
2052 *)
2063 (* ============================================================ *)
2064 (* ======== SymFileSFA visitor method ======= *)
2065 (* ============================================================ *)
2068 BEGIN
2081 (* ============================================================ *)
2082 (* ======== TypeLinker visitor method ======= *)
2083 (* ============================================================ *)
2086 BEGIN
2090 ELSE
2097 (* ============================================================ *)
2098 (* ======== ResolveAll visitor method ======= *)
2099 (* ============================================================ *)
2102 BEGIN
2106 (* ============================================================ *)
2107 (* ======== Symbol file parser method ======= *)
2108 (* ============================================================ *)
2112 BEGIN
2119 (* ============================================ *)
2126 BEGIN
2127 (*
2128 * The list of scopes has been constructed by
2129 * the parser, while reading the import list.
2130 * In the case of already known scopes the list
2131 * references the original descriptor.
2132 *
2133 * Unlike the previous version (SymFileRW) this
2134 * routine may mutate the length of the sequence.
2135 *)
2137 (*
2138 * Copy the incoming sequence.
2139 *)
2142 (*
2143 * Now import modules on the list.
2144 *)
2155 (*
2156 * If sysLib has NOT been explicitly imported, then
2157 * insert dummy definitions for the native object methods
2158 * so that user code may explictly extend RTS.NativeObject
2159 * and override these methods.
2160 *)
2169 (*
2170 * Copy the (possibly mutated) sequence out.
2171 *)
2175 (* ============================================================ *)
2176 BEGIN
2180 (* ============================================================ *)