1-- Copyright (c) 1990 Regents of the University of California.
2-- All rights reserved.
3--
4-- This software was developed by John Self of the Arcadia project
5-- at the University of California, Irvine.
6--
7-- Redistribution and use in source and binary forms are permitted
8-- provided that the above copyright notice and this paragraph are
9-- duplicated in all such forms and that any documentation,
10-- advertising materials, and other materials related to such
11-- distribution and use acknowledge that the software was developed
12-- by the University of California, Irvine.  The name of the
13-- University may not be used to endorse or promote products derived
14-- from this software without specific prior written permission.
15-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16-- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17-- WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18
19-- TITLE equivalence class
20-- AUTHOR: John Self (UCI)
21-- DESCRIPTION finds equivalence classes so DFA will be smaller
22-- $Header: /co/ua/self/arcadia/aflex/ada/src/RCS/ecsB.a,v 1.7 90/01/12 15:19:54 self Exp Locker: self $
23
24with Misc;
25with Unicode;
26
27package body ECS is
28
29   use Unicode;
30
31-- ccl2ecl - convert character classes to set of equivalence classes
32
33  procedure CCL2ECL is
34    ICH, NEWLEN, CCLP, CCLMEC : INTEGER;
35  begin
36    for I in 1 .. LASTCCL loop
37
38      -- we loop through each character class, and for each character
39      -- in the class, add the character's equivalence class to the
40      -- new "character" class we are creating.  Thus when we are all
41      -- done, character classes will really consist of collections
42      -- of equivalence classes
43      NEWLEN := 0;
44      CCLP := CCLMAP(I);
45
46      for CCLS in 0 .. CCLLEN(I) - 1 loop
47        ICH := Unicode_Character'Pos (CCLTBL (CCLP + CCLS));
48        CCLMEC := ECGROUP(ICH);
49        if (CCLMEC > 0) then
50          CCLTBL(CCLP + NEWLEN) := Unicode_Character'Val (CCLMEC);
51          NEWLEN := NEWLEN + 1;
52        end if;
53      end loop;
54
55      CCLLEN(I) := NEWLEN;
56    end loop;
57  end CCL2ECL;
58
59
60  -- cre8ecs - associate equivalence class numbers with class members
61  --  fwd is the forward linked-list of equivalence class members.  bck
62  --  is the backward linked-list, and num is the number of class members.
63  --  Returned is the number of classes.
64
65  procedure CRE8ECS(FWD    : C_SIZE_ARRAY;
66                    BCK    : in out C_SIZE_ARRAY;
67                    NUM    : INTEGER;
68                    RESULT : out INTEGER) is
69    J, NUMCL : INTEGER;
70  begin
71    NUMCL := 0;
72
73    -- create equivalence class numbers.  From now on, abs( bck(x) )
74    -- is the equivalence class number for object x.  If bck(x)
75    -- is positive, then x is the representative of its equivalence
76    -- class.
77    for I in 1 .. NUM loop
78      if (BCK(I) = NIL) then
79        NUMCL := NUMCL + 1;
80        BCK(I) := NUMCL;
81        J := FWD(I);
82        while (J /= NIL) loop
83          BCK(J) :=  -NUMCL;
84          J := FWD(J);
85        end loop;
86      end if;
87    end loop;
88    RESULT := NUMCL;
89    return;
90  end CRE8ECS;
91
92
93  -- mkeccl - update equivalence classes based on character class xtions
94  -- where ccls contains the elements of the character class, lenccl is the
95  -- number of elements in the ccl, fwd is the forward link-list of equivalent
96  -- characters, bck is the backward link-list, and llsiz size of the link-list
97
98   procedure MKECCL
99    (CCLS     : Unicode_Character_Array;
100     LENCCL   : Integer;
101     FWD, BCK : in out UNBOUNDED_INT_ARRAY;
102     LLSIZ    : Integer)
103   is
104    CCLP, OLDEC, NEWEC, CCLM, I, J : INTEGER;
105    PROC_ARRAY                     : BOOLEAN_PTR;
106  begin
107
108    -- note that it doesn't matter whether or not the character class is
109    -- negated.  The same results will be obtained in either case.
110    CCLP := CCLS'FIRST;
111
112    -- this array tells whether or not a character class has been processed.
113    PROC_ARRAY := new BOOLEAN_ARRAY(CCLS'FIRST .. CCLS'LAST);
114    for CCL_INDEX in CCLS'FIRST .. CCLS'LAST loop
115      PROC_ARRAY(CCL_INDEX) := FALSE;
116    end loop;
117
118    while (CCLP < LENCCL + CCLS'FIRST) loop
119      CCLM := Unicode_Character'Pos (CCLS (CCLP));
120      OLDEC := BCK(CCLM);
121      NEWEC := CCLM;
122
123      J := CCLP + 1;
124
125      I := FWD(CCLM);
126      while ((I /= NIL) and (I <= LLSIZ)) loop
127
128            -- look for the symbol in the character class
129
130            while ((J < LENCCL + CCLS'FIRST)
131                   and ((CCLS(J) <= Unicode_Character'Val(I)) or PROC_ARRAY(J)))
132            loop
133               if (CCLS(J) = Unicode_Character'Val (I)) then
134
135            -- we found an old companion of cclm in the ccl.
136            -- link it into the new equivalence class and flag it as
137            -- having been processed
138            BCK(I) := NEWEC;
139            FWD(NEWEC) := I;
140            NEWEC := I;
141            PROC_ARRAY(J) := TRUE;
142
143            -- set flag so we don't reprocess
144
145            -- get next equivalence class member
146            -- continue 2
147            goto NEXT_PT;
148          end if;
149          J := J + 1;
150        end loop;
151
152        -- symbol isn't in character class.  Put it in the old equivalence
153        -- class
154        BCK(I) := OLDEC;
155
156        if (OLDEC /= NIL) then
157          FWD(OLDEC) := I;
158        end if;
159
160        OLDEC := I;
161        <<NEXT_PT>> I := FWD(I);
162      end loop;
163
164      if ((BCK(CCLM) /= NIL) or (OLDEC /= BCK(CCLM))) then
165        BCK(CCLM) := NIL;
166        FWD(OLDEC) := NIL;
167      end if;
168
169      FWD(NEWEC) := NIL;
170
171      -- find next ccl member to process
172      CCLP := CCLP + 1;
173
174      while ((CCLP < LENCCL + CCLS'FIRST) and PROC_ARRAY(CCLP)) loop
175
176        -- reset "doesn't need processing" flag
177        PROC_ARRAY(CCLP) := FALSE;
178        CCLP := CCLP + 1;
179      end loop;
180    end loop;
181  exception
182    when STORAGE_ERROR =>
183      Misc.Aflex_Fatal ("dynamic memory failure in mkeccl()");
184  end MKECCL;
185
186
187  -- mkechar - create equivalence class for single character
188
189  procedure MKECHAR(TCH      : in INTEGER;
190                    FWD, BCK : in out C_SIZE_ARRAY) is
191  begin
192
193    -- if until now the character has been a proper subset of
194    -- an equivalence class, break it away to create a new ec
195    if (FWD(TCH) /= NIL) then
196      BCK(FWD(TCH)) := BCK(TCH);
197    end if;
198
199    if (BCK(TCH) /= NIL) then
200      FWD(BCK(TCH)) := FWD(TCH);
201    end if;
202
203    FWD(TCH) := NIL;
204    BCK(TCH) := NIL;
205  end MKECHAR;
206
207end ECS;
208