1#!/usr/bin/mawk -f
2
3# parse a C declaration by recursive descent
4#
5#  decl.awk with extra escapes \
6
7################################################
8############################################
9
10
11#   lexical scanner -- gobble()
12#   input : string s -- treated as a regular expression
13#   gobble eats SPACE, then eats longest match of s off front
14#   of global variable line.
15#   Cuts the matched part off of line
16#
17
18
19function gobble(s,  x)
20{
21  sub( /^ /, "", line)  # eat SPACE if any
22
23  # surround s with parenthesis to make sure ^ acts on the
24  # whole thing
25
26  match(line, "^" "(" s ")")
27  x = substr(line, 1, RLENGTH)
28  line = substr(line, RLENGTH+1)
29  return x
30}
31
32
33function ptr_to(n,  x)  # print "pointer to" , n times
34{ n = int(n)
35  if ( n <= 0 )  return ""
36  x = "pointer to" ; n--
37  while ( n-- )  x = x " pointer to"
38  return x
39}
40
41
42#recursively get a decl
43# returns an english description of the declaration or
44# "" if not a C declaration.
45
46function  decl(   x, t, ptr_part)
47{
48
49  x = gobble("[* ]+")   # get list of *** ...
50  gsub(/ /, "", x)   # remove all SPACES
51  ptr_part = ptr_to( length(x) )
52
53  # We expect to see either an identifier or '('
54  #
55
56  if ( gobble("\\(") )
57  {
58    # this is the recursive descent part
59    # we expect to match a declaration and closing ')'
60    # If not return "" to indicate  failure
61
62      if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return ""
63
64  }
65  else  #  expecting an identifier
66  {
67    if ( (x = gobble(id)) == "" )  return ""
68    x = x ":"
69  }
70
71  # finally look for ()
72  # or  [ opt_size ]
73
74  while ( 1 )
75     if ( gobble( funct_mark ) )  x = x " function returning"
76     else
77     if ( t = gobble( array_mark ) )
78     { gsub(/ /, "", t)
79       x = x " array" t " of"
80     }
81     else  break
82
83
84   x = x " "  ptr_part
85   return x
86}
87
88
89BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*"
90        funct_mark = "\\([ \t]*\\)"
91	array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]"
92
93# I've assumed types are keywords or all CAPS or end in _t
94# Other conventions could be added.
95
96    type0 = "int|char|short|long|double|float|void"
97    type1 = "[_A-Z][_A-Z0-9]*"  #  types are CAPS
98    type2 = "[_A-Za-z][_A-Za-z0-9]*_t"  # end in _t
99
100    types = "(" type0 "|" type1 "|" type2 ")"
101}
102
103
104{
105
106    gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments
107    gsub( /[ \t]+/, " ")  # squeeze white space to a single space
108
109
110    line = $0
111
112    scope = gobble( "extern|static" )
113
114    if ( type = gobble("(struct|union|enum) ") )
115    		type = type gobble(id)  #  get the tag
116    else
117    {
118
119       type = gobble("(un)?signed ") gobble( types )
120
121    }
122
123    if ( ! type )  next
124
125    if ( (x = decl()) && gobble( ";") )
126    {
127      x  =  x " " type
128      if ( scope )  x = x " (" scope ")"
129      gsub( /  +/, " ", x)  #
130      print x
131    }
132
133}
134