1-- Copyright (c) 1990 Regents of the University of California. 2-- All rights reserved. 3-- 4-- This software was developed by John Self of the Arcadia project 5-- at the University of California, Irvine. 6-- 7-- Redistribution and use in source and binary forms are permitted 8-- provided that the above copyright notice and this paragraph are 9-- duplicated in all such forms and that any documentation, 10-- advertising materials, and other materials related to such 11-- distribution and use acknowledge that the software was developed 12-- by the University of California, Irvine. The name of the 13-- University may not be used to endorse or promote products derived 14-- from this software without specific prior written permission. 15-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 16-- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 17-- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 18 19-- TITLE equivalence class 20-- AUTHOR: John Self (UCI) 21-- DESCRIPTION finds equivalence classes so DFA will be smaller 22-- $Header: /co/ua/self/arcadia/aflex/ada/src/RCS/ecsB.a,v 1.7 90/01/12 15:19:54 self Exp Locker: self $ 23 24with Misc; 25with Unicode; 26 27package body ECS is 28 29 use Unicode; 30 31-- ccl2ecl - convert character classes to set of equivalence classes 32 33 procedure CCL2ECL is 34 ICH, NEWLEN, CCLP, CCLMEC : INTEGER; 35 begin 36 for I in 1 .. LASTCCL loop 37 38 -- we loop through each character class, and for each character 39 -- in the class, add the character's equivalence class to the 40 -- new "character" class we are creating. Thus when we are all 41 -- done, character classes will really consist of collections 42 -- of equivalence classes 43 NEWLEN := 0; 44 CCLP := CCLMAP(I); 45 46 for CCLS in 0 .. CCLLEN(I) - 1 loop 47 ICH := Unicode_Character'Pos (CCLTBL (CCLP + CCLS)); 48 CCLMEC := ECGROUP(ICH); 49 if (CCLMEC > 0) then 50 CCLTBL(CCLP + NEWLEN) := Unicode_Character'Val (CCLMEC); 51 NEWLEN := NEWLEN + 1; 52 end if; 53 end loop; 54 55 CCLLEN(I) := NEWLEN; 56 end loop; 57 end CCL2ECL; 58 59 60 -- cre8ecs - associate equivalence class numbers with class members 61 -- fwd is the forward linked-list of equivalence class members. bck 62 -- is the backward linked-list, and num is the number of class members. 63 -- Returned is the number of classes. 64 65 procedure CRE8ECS(FWD : C_SIZE_ARRAY; 66 BCK : in out C_SIZE_ARRAY; 67 NUM : INTEGER; 68 RESULT : out INTEGER) is 69 J, NUMCL : INTEGER; 70 begin 71 NUMCL := 0; 72 73 -- create equivalence class numbers. From now on, abs( bck(x) ) 74 -- is the equivalence class number for object x. If bck(x) 75 -- is positive, then x is the representative of its equivalence 76 -- class. 77 for I in 1 .. NUM loop 78 if (BCK(I) = NIL) then 79 NUMCL := NUMCL + 1; 80 BCK(I) := NUMCL; 81 J := FWD(I); 82 while (J /= NIL) loop 83 BCK(J) := -NUMCL; 84 J := FWD(J); 85 end loop; 86 end if; 87 end loop; 88 RESULT := NUMCL; 89 return; 90 end CRE8ECS; 91 92 93 -- mkeccl - update equivalence classes based on character class xtions 94 -- where ccls contains the elements of the character class, lenccl is the 95 -- number of elements in the ccl, fwd is the forward link-list of equivalent 96 -- characters, bck is the backward link-list, and llsiz size of the link-list 97 98 procedure MKECCL 99 (CCLS : Unicode_Character_Array; 100 LENCCL : Integer; 101 FWD, BCK : in out UNBOUNDED_INT_ARRAY; 102 LLSIZ : Integer) 103 is 104 CCLP, OLDEC, NEWEC, CCLM, I, J : INTEGER; 105 PROC_ARRAY : BOOLEAN_PTR; 106 begin 107 108 -- note that it doesn't matter whether or not the character class is 109 -- negated. The same results will be obtained in either case. 110 CCLP := CCLS'FIRST; 111 112 -- this array tells whether or not a character class has been processed. 113 PROC_ARRAY := new BOOLEAN_ARRAY(CCLS'FIRST .. CCLS'LAST); 114 for CCL_INDEX in CCLS'FIRST .. CCLS'LAST loop 115 PROC_ARRAY(CCL_INDEX) := FALSE; 116 end loop; 117 118 while (CCLP < LENCCL + CCLS'FIRST) loop 119 CCLM := Unicode_Character'Pos (CCLS (CCLP)); 120 OLDEC := BCK(CCLM); 121 NEWEC := CCLM; 122 123 J := CCLP + 1; 124 125 I := FWD(CCLM); 126 while ((I /= NIL) and (I <= LLSIZ)) loop 127 128 -- look for the symbol in the character class 129 130 while ((J < LENCCL + CCLS'FIRST) 131 and ((CCLS(J) <= Unicode_Character'Val(I)) or PROC_ARRAY(J))) 132 loop 133 if (CCLS(J) = Unicode_Character'Val (I)) then 134 135 -- we found an old companion of cclm in the ccl. 136 -- link it into the new equivalence class and flag it as 137 -- having been processed 138 BCK(I) := NEWEC; 139 FWD(NEWEC) := I; 140 NEWEC := I; 141 PROC_ARRAY(J) := TRUE; 142 143 -- set flag so we don't reprocess 144 145 -- get next equivalence class member 146 -- continue 2 147 goto NEXT_PT; 148 end if; 149 J := J + 1; 150 end loop; 151 152 -- symbol isn't in character class. Put it in the old equivalence 153 -- class 154 BCK(I) := OLDEC; 155 156 if (OLDEC /= NIL) then 157 FWD(OLDEC) := I; 158 end if; 159 160 OLDEC := I; 161 <<NEXT_PT>> I := FWD(I); 162 end loop; 163 164 if ((BCK(CCLM) /= NIL) or (OLDEC /= BCK(CCLM))) then 165 BCK(CCLM) := NIL; 166 FWD(OLDEC) := NIL; 167 end if; 168 169 FWD(NEWEC) := NIL; 170 171 -- find next ccl member to process 172 CCLP := CCLP + 1; 173 174 while ((CCLP < LENCCL + CCLS'FIRST) and PROC_ARRAY(CCLP)) loop 175 176 -- reset "doesn't need processing" flag 177 PROC_ARRAY(CCLP) := FALSE; 178 CCLP := CCLP + 1; 179 end loop; 180 end loop; 181 exception 182 when STORAGE_ERROR => 183 Misc.Aflex_Fatal ("dynamic memory failure in mkeccl()"); 184 end MKECCL; 185 186 187 -- mkechar - create equivalence class for single character 188 189 procedure MKECHAR(TCH : in INTEGER; 190 FWD, BCK : in out C_SIZE_ARRAY) is 191 begin 192 193 -- if until now the character has been a proper subset of 194 -- an equivalence class, break it away to create a new ec 195 if (FWD(TCH) /= NIL) then 196 BCK(FWD(TCH)) := BCK(TCH); 197 end if; 198 199 if (BCK(TCH) /= NIL) then 200 FWD(BCK(TCH)) := FWD(TCH); 201 end if; 202 203 FWD(TCH) := NIL; 204 BCK(TCH) := NIL; 205 end MKECHAR; 206 207end ECS; 208