1----------------------------------------------------------------------
2--  Rules.Style.Keyword - Package body                              --
3--                                                                  --
4--  This software  is (c) The European Organisation  for the Safety --
5--  of Air  Navigation (EUROCONTROL) and Adalog  2004-2005. The Ada --
6--  Controller  is  free software;  you can redistribute  it and/or --
7--  modify  it under  terms of  the GNU  General Public  License as --
8--  published by the Free Software Foundation; either version 2, or --
9--  (at your  option) any later version.  This  unit is distributed --
10--  in the hope  that it will be useful,  but WITHOUT ANY WARRANTY; --
11--  without even the implied warranty of MERCHANTABILITY or FITNESS --
12--  FOR A  PARTICULAR PURPOSE.  See the GNU  General Public License --
13--  for more details.   You should have received a  copy of the GNU --
14--  General Public License distributed  with this program; see file --
15--  COPYING.   If not, write  to the  Free Software  Foundation, 59 --
16--  Temple Place - Suite 330, Boston, MA 02111-1307, USA.           --
17--                                                                  --
18--  As  a special  exception, if  other files  instantiate generics --
19--  from the units  of this program, or if you  link this unit with --
20--  other files  to produce  an executable, this  unit does  not by --
21--  itself cause the resulting executable  to be covered by the GNU --
22--  General  Public  License.   This  exception  does  not  however --
23--  invalidate any  other reasons why the executable  file might be --
24--  covered by the GNU Public License.                              --
25--                                                                  --
26--  This  software is  distributed  in  the hope  that  it will  be --
27--  useful,  but WITHOUT  ANY  WARRANTY; without  even the  implied --
28--  warranty  of  MERCHANTABILITY   or  FITNESS  FOR  A  PARTICULAR --
29--  PURPOSE.                                                        --
30----------------------------------------------------------------------
31
32-- Ada
33with
34  Ada.Characters.Handling,
35  Ada.Characters.Latin_1,
36  Ada.Strings.Wide_Maps.Wide_Constants;
37
38--ASIS
39with
40  Asis.Text;
41
42-- Adalog
43with
44  Utilities;
45
46-- AdaCtl
47with
48  Framework.Reports;
49
50package body Rules.Style.Keyword is
51
52   -- Algorithm
53   --
54   -- There is no way to manage keywords from the tree, since keywords have disappeared at that level!
55   -- Therefore, we need to scan the source line, which requires a kind of lexical analyzer.
56   --
57   -- We use an automat, where each "state" is the current letter. If the letter matches, the next state
58   -- is the next entry in the automat. If it does not match, the next possible state (if any) is given
59   -- by the "if_not_matched" entry.
60   --
61   -- Note that this algorithm is such that the source is scanned only once, with only one comparison
62   -- per letter. Efficiency is a concern here, since the whole text is parsed!
63
64   type Index is range 0 .. 318;
65   subtype Positive_Index is Index range 1 .. Index'Last;
66   type Node is
67      record
68         Char           : Wide_Character;
69         If_Not_Matched : Index;
70      end record;
71
72   Automat : constant array (Positive_Index) of Node :=
73                -------------'a'
74                --1 abort
75               (('b',14), ('o',6),  ('r',0), ('t',0),  ('.',0),
76                --6 abs
77                          ('s',0),  ('.',8),
78                --8 abstract
79                                    ('t',0), ('r',0),  ('a',0), ('c',0), ('t',0), ('.',0),
80                --14 accept
81                ('c',23), ('c',0),  ('e',0), ('p',20), ('t',0), ('.',0),
82                --20 access
83                                             ('s',0),  ('s',0), ('.',0),
84                --23 aliased
85                ('l',32), ('i',30), ('a',0), ('s',0),  ('e',0), ('d',0), ('.',0),
86                --30 all
87                          ('l',0),  ('.',0),
88                --32 and
89                ('n',35), ('d',0),  ('.',0),
90                --35 array
91                ('r',40), ('r',0),  ('a',0), ('y',0), ('.',0),
92                --40 at
93                ('t', 0),  ('.', 0),
94
95                -------------'b'
96                --42 begin
97                ('e',47), ('g',0),  ('i',0), ('n',0), ('.',0),
98                --47 body
99                ('o',0),  ('d',0),  ('y',0), ('.',0),
100
101                -------------'c'
102                --51 case
103                ('a',55), ('s',0),  ('e',0), ('.',0),
104                --55 constant
105                ('o',0),  ('n',0),  ('s',0), ('t',0), ('a',0), ('n',0), ('t',0), ('.',0),
106
107                -------------'d'
108                --63 declare
109                ('e',77), ('c',70), ('l',0), ('a',0), ('r',0), ('e',0), ('.',0),
110                --70 delay
111                          ('l',0),  ('a',74), ('y',0), ('.',0),
112                --74 delta
113                                    ('t',0), ('a',0), ('.',0),
114                --77 digits
115                ('i',83), ('g',0),  ('i',0), ('t',0), ('s',0), ('.',0),
116                --83 do
117                ('o',0),  ('.',0),
118
119                -------------'e'
120                --85 else
121                ('l',92),  ('s',0),   ('e',89), ('.',0),
122                --89 elsif
123                                      ('i',0),  ('f',0), ('.',0),
124                --92 end
125                ('n',99),  ('d',95),  ('.',0),
126                --95 entry
127                           ('t',0),   ('r',0), ('y',0), ('.',0),
128                --99 exception
129                ('x',0),   ('c',108), ('e',0), ('p',0), ('t',0), ('i',0), ('o',0), ('n',0), ('.',0),
130                --108 exit
131                           ('i',0),   ('t',0), ('.',0),
132
133                -------------'f'
134                --111 for
135                ('o',114), ('r',0),   ('.',0),
136                --114 function
137                ('u',0),   ('n',0),   ('c',0), ('t',0), ('i',0), ('o',0), ('n',0), ('.',0),
138
139                -------------'g'
140                --122 generic
141                ('e',129), ('n',0),   ('e',0), ('r',0), ('i',0), ('c',0), ('.',0),
142                --129 goto
143                ('o',0),   ('t',0),   ('o',0), ('.',0),
144
145                -------------'i'
146                --133 if
147                ('f',135), ('.',0),
148                --135 in
149                ('n',137), ('.',0),
150                --137 is
151                ('s',0),   ('.',0),
152
153                -------------'l'
154                --139 limited
155                ('i',146), ('m',0), ('i',0), ('t',0), ('e',0), ('d',0), ('.',0),
156                --146 loop
157                ('o',0),   ('o',0), ('p',0), ('.',0),
158
159                -------------'m'
160                --150 mod
161                ('o',0),   ('d',0), ('.',0),
162
163                -------------'n'
164                --153 new
165                ('e',156), ('w',0), ('.',0),
166                --156 not
167                ('o',159), ('t',0), ('.',0),
168                --159 null
169                ('u',0),   ('l',0), ('l',0), ('.',0),
170
171                -------------'o'
172                --163 of
173                ('f',165), ('.',0),
174                --165 or
175                ('r',167), ('.',0),
176                --167 others
177                ('t',173), ('h',0), ('e',0), ('r',0), ('s',0), ('.',0),
178                --173 out
179                ('u',0),   ('t',0), ('.',0),
180
181                -------------'p'
182                --176 package
183                ('a',183), ('c',0),   ('k',0),   ('a',0), ('g',0), ('e',0), ('.',0),
184                --183 pragma
185                ('r',0),   ('a',189), ('g',0),   ('m',0), ('a',0), ('.',0),
186                --189 private
187                           ('i',195), ('v',0),   ('a',0), ('t',0), ('e',0), ('.',0),
188                --195 procedure
189                           ('o',0),   ('c',203), ('e',0), ('d',0), ('u',0), ('r',0), ('e',0), ('.',0),
190                --203 protected
191                                      ('t',0), ('e',0), ('c',0), ('t',0), ('e',0), ('d',0), ('.',0),
192
193                -------------'r'
194                --210 raise
195                ('a',219), ('i',215), ('s',0), ('e',0), ('.',0),
196                --215 range
197                           ('n',0),   ('g',0), ('e',0), ('.',0),
198                --219 record
199                ('e',0),   ('c',225), ('o',0), ('r',0), ('d',0), ('.',0),
200                --225 rem
201                           ('m',227), ('.',0),
202                --227 renames
203                           ('n',233), ('a',0), ('m',0), ('e',0), ('s',0), ('.',0),
204                --233 requeue
205                           ('q',239), ('u',0), ('e',0), ('u',0), ('e',0), ('.',0),
206                --239 return
207                           ('t',244), ('u',0), ('r',0), ('n',0), ('.',0),
208                --244 reverse
209                           ('v',0),   ('e',0), ('r',0), ('s',0), ('e',0), ('.',0),
210
211                -------------'s'
212                --250 select
213                ('e',263), ('l',256), ('e',0), ('c',0), ('t',0), ('.',0),
214                --256 separate
215                           ('p',0),   ('a',0), ('r',0), ('a',0), ('t',0), ('e',0), ('.',0),
216                --263 subtype
217                ('u',0),   ('b',0),   ('t',0), ('y',0), ('p',0), ('e',0), ('.',0),
218
219                -------------'t'
220                --270 tagged
221                ('a',279), ('g',276), ('g',0), ('e',0), ('d',0), ('.',0),
222                --276 task
223                           ('s',0),   ('k',0), ('.',0),
224                --279 terminate
225                ('e',288), ('r',0),   ('m',0), ('i',0), ('n',0), ('a',0), ('t',0), ('e',0), ('.',0),
226                --288 then
227                ('h',292), ('e',0),   ('n',0), ('.',0),
228                --292 type
229                ('y',0),   ('p',0),   ('e',0), ('.',0),
230
231                -------------'u'
232                --296 until
233                ('n',301), ('t',0),   ('i',0), ('l',0), ('.',0),
234                --301 use
235                ('s',0),   ('e',0),   ('.',0),
236
237                -------------'w'
238                --304 when
239                ('h',312), ('e',308), ('n',0), ('.',0),
240                --308 while
241                           ('i',0),   ('l',0), ('e',0), ('.',0),
242                --312 with
243                ('i',0), ('t',0), ('h',0), ('.',0),
244
245                -------------'x'
246                --316 xor
247                ('o',0), ('r',0), ('.',0)
248               );
249   Start : constant array (Wide_Character range 'a' .. 'z') of Index :=
250             ('a' => 1,
251              'b' => 42,
252              'c' => 51,
253              'd' => 63,
254              'e' => 85,
255              'f' => 111,
256              'g' => 122,
257              'i' => 133,
258              'l' => 139,
259              'm' => 150,
260              'n' => 153,
261              'o' => 163,
262              'p' => 176,
263              'r' => 210,
264              's' => 250,
265              't' => 270,
266              'u' => 296,
267              'w' => 304,
268              'x' => 316,
269              others => 0);
270
271   use Ada.Characters.Handling, Ada.Strings.Wide_Maps;
272   Number_Set : constant Wide_Character_Set
273     := To_Set (Ranges => (('0', '9'), ('_', '_'), ('#', '#'), ('a', 'f'), ('A', 'F')));
274   Identifier_Set : constant Ada.Strings.Wide_Maps.Wide_Character_Set
275     := To_Set (Ranges => (('a', 'z'), ('A', 'Z'), ('_', '_'), ('0', '9'),
276                           (Wide_Character'Succ (To_Wide_Character (Character'Last)), Wide_Character'Last)));
277   ------------------
278   -- Process_Line --
279   ------------------
280
281   Previous_Is_Tick : Boolean := False;
282   -- If the character before the start of a word is a single quote, the word cannot be
283   -- a keyword. We need this special trick because of 'Access and 'Range. This needs to
284   -- be a global variable, because the quote is not necessarily on the same line as the
285   -- word; the following is legal Ada:
286   --    for I in S
287   --    '
288   --    Range loop ...
289   -- We need to be careful however, because 'in' is a keyword in:
290   --    if 'a' in character then ...
291   Wide_HT : constant Wide_Character := Wide_Character'Val(Character'Pos (Ada.Characters.Latin_1.HT));
292   procedure Process_Line (Line : in Asis.Program_Text; Loc : in Framework.Location; Expected : in Casing_Set) is
293      use Ada.Strings, Ada.Strings.Wide_Maps.Wide_Constants;
294      use Utilities;
295
296      type States is (Search_Begin, In_Quotes, In_Number, Skipping, Analyzing);
297      State    : States := Search_Begin;
298      Kw_State : Index;
299      Lower_C  : Wide_Character;
300      First    : Positive;
301      Last     : Natural := Line'Last;
302
303      type Casing is (Upper, Lower, Title, Mixed, Unknown);
304      Case_First, Case_Others : Casing;
305
306      procedure Do_Report (Kw_Start, Kw_Stop : Positive) is
307         use Framework, Framework.Reports;
308
309      begin  -- Do_Report
310         Report (Rule_Id,
311                 Corresponding_Context (St_Casing_Keyword),
312                 Create_Location (Get_File_Name (Loc), Get_First_Line (Loc), Asis.Text.Character_Position (Kw_Start)),
313                 "Wrong casing of """ & Line (Kw_Start .. Kw_Stop)
314                 & """, should be " & Should_Be (Line (Kw_Start .. Kw_Stop), Expected));
315      end Do_Report;
316
317   begin  -- Process_Line
318      for I in Line'Range loop
319         if Line (I) = '-' and then I /= Line'Last and then Line (I + 1) = '-' then
320            -- Comment
321            Last := I-1;
322            exit;
323         end if;
324
325         case State is
326            when Search_Begin =>
327               if Previous_Is_Tick and Is_In (Line (I), Identifier_Set) then
328                  -- Assume it is the beginning of an identifier, cannot be a keyword
329                  State := Skipping;
330               elsif Line (I) = '"' then
331                  -- beware of '"'
332                  if I /= Line'First and then Line (I - 1) /= ''' then
333                     State := In_Quotes;
334                  end if;
335               elsif Line (I) in '0' .. '9' then
336                  State := In_Number;
337               else
338                  Lower_C := Value (Lower_Case_Map, Line (I));
339                  if Lower_C in 'a' .. 'z' then
340                     Kw_State := Start (Lower_C);
341                     if Kw_State = 0 then
342                        State := Skipping;
343                     else
344                        State := Analyzing;
345                        if Line (I) = Lower_C then
346                           Case_First := Lower;
347                        else
348                           Case_First := Upper;
349                        end if;
350                        Case_Others := Unknown;
351                        First := I;
352                     end if;
353                  end if;
354               end if;
355
356            when In_Quotes =>
357               if Line (I) = '"' then
358                  State := Search_Begin;
359               end if;
360
361            when In_Number =>
362               if not Is_In (Line (I), Number_Set) then
363                  State := Search_Begin;
364               end if;
365
366            when Skipping =>
367               if not Is_In (Line (I), Identifier_Set) then
368                  State := Search_Begin;
369               end if;
370
371            when Analyzing =>
372               Lower_C := Value (Lower_Case_Map, Line (I));
373               if Lower_C in 'a' .. 'z' then
374                  loop
375                     if Lower_C = Automat (Kw_State).Char then
376                        Kw_State := Kw_State + 1;
377                        case Case_Others is
378                           when Upper =>
379                              if Line (I) = Lower_C then
380                                 Case_Others := Mixed;
381                              end if;
382                           when Lower =>
383                              if Line (I) /= Lower_C then
384                                 Case_Others := Mixed;
385                              end if;
386                           when Title =>
387                              Failure ("Case_Others is Title");
388                           when Mixed =>
389                              null;
390                           when Unknown =>
391                              if Line (I) = Lower_C then
392                                 Case_Others := Lower;
393                              else
394                                 Case_Others := Upper;
395                              end if;
396                        end case;
397                        exit;
398                     end if;
399                     Kw_State := Automat (Kw_State).If_Not_Matched;
400                     if Kw_State = 0 then
401                        State := Skipping;
402                        exit;
403                     end if;
404                  end loop;
405               elsif not Is_In (Lower_C, Identifier_Set) and then Automat (Kw_State).Char = '.' then
406                  -- Keyword found
407                  case Case_Others is
408                     when Upper =>
409                        if Case_First /= Upper or else not Expected (Ca_Uppercase) then
410                           Do_Report (First, I-1);
411                        end if;
412                     when Lower =>
413                        if Case_First = Upper then
414                           if not Expected (Ca_Titlecase) then
415                              Do_Report (First, I - 1);
416                           end if;
417                        else
418                           if not Expected (Ca_Lowercase) then
419                              Do_Report (First, I - 1);
420                           end if;
421                        end if;
422                     when Title =>
423                        Failure ("Case_Others is Title");
424                     when Mixed =>
425                        Do_Report (First, I - 1);
426                     when Unknown =>
427                        Failure ("Case_Others is Unknown");
428                  end case;
429
430                  State := Search_Begin;
431               elsif not Is_In (Line (I), Identifier_Set) then
432                  State := Search_Begin;
433               else
434                  State := Skipping;
435               end if;
436         end case;
437
438         if State /= In_Quotes then
439            case Line (I) is
440               when Wide_Space | Wide_HT =>
441                  null;
442               when ''' =>
443                  -- The following is not perfectly correct in a general parser to recognize
444                  -- a tick from an attribute, because of things like character'('a')
445                  -- However, it is sufficient here since we just want to protect against
446                  -- 'range and 'access
447                  Previous_Is_Tick := I > Line'First + 1 and then Line (I-2) /= ''';
448               when others =>
449                  Previous_Is_Tick := False;
450            end case;
451         end if;
452      end loop;
453
454      if State = Analyzing and then Automat (Kw_State).Char = '.' then
455         -- Line ended with keyword
456         case Case_Others is
457            when Upper =>
458               if Case_First /= Upper or else not Expected (Ca_Uppercase) then
459                  Do_Report (First, Last);
460               end if;
461            when Lower =>
462               if Case_First = Upper then
463                  if not Expected (Ca_Titlecase) then
464                     Do_Report (First, Last);
465                  end if;
466               else
467                  if not Expected (Ca_Lowercase) then
468                     Do_Report (First, Last);
469                  end if;
470               end if;
471            when Title =>
472               Failure ("Case_Others is Title");
473            when Mixed =>
474               Do_Report (First, Last);
475            when Unknown =>
476               Failure ("Case_Others is Unknown");
477         end case;
478      end if;
479   end Process_Line;
480
481end Rules.Style.Keyword;
482