1#!/usr/bin/mawk -f 2 3# parse a C declaration by recursive descent 4# 5# decl.awk with extra escapes \ 6 7################################################ 8############################################ 9 10 11# lexical scanner -- gobble() 12# input : string s -- treated as a regular expression 13# gobble eats SPACE, then eats longest match of s off front 14# of global variable line. 15# Cuts the matched part off of line 16# 17 18 19function gobble(s, x) 20{ 21 sub( /^ /, "", line) # eat SPACE if any 22 23 # surround s with parenthesis to make sure ^ acts on the 24 # whole thing 25 26 match(line, "^" "(" s ")") 27 x = substr(line, 1, RLENGTH) 28 line = substr(line, RLENGTH+1) 29 return x 30} 31 32 33function ptr_to(n, x) # print "pointer to" , n times 34{ n = int(n) 35 if ( n <= 0 ) return "" 36 x = "pointer to" ; n-- 37 while ( n-- ) x = x " pointer to" 38 return x 39} 40 41 42#recursively get a decl 43# returns an english description of the declaration or 44# "" if not a C declaration. 45 46function decl( x, t, ptr_part) 47{ 48 49 x = gobble("[* ]+") # get list of *** ... 50 gsub(/ /, "", x) # remove all SPACES 51 ptr_part = ptr_to( length(x) ) 52 53 # We expect to see either an identifier or '(' 54 # 55 56 if ( gobble("\\(") ) 57 { 58 # this is the recursive descent part 59 # we expect to match a declaration and closing ')' 60 # If not return "" to indicate failure 61 62 if ( (x = decl()) == "" || gobble( "\\)" ) == "" ) return "" 63 64 } 65 else # expecting an identifier 66 { 67 if ( (x = gobble(id)) == "" ) return "" 68 x = x ":" 69 } 70 71 # finally look for () 72 # or [ opt_size ] 73 74 while ( 1 ) 75 if ( gobble( funct_mark ) ) x = x " function returning" 76 else 77 if ( t = gobble( array_mark ) ) 78 { gsub(/ /, "", t) 79 x = x " array" t " of" 80 } 81 else break 82 83 84 x = x " " ptr_part 85 return x 86} 87 88 89BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*" 90 funct_mark = "\\([ \t]*\\)" 91 array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]" 92 93# I've assumed types are keywords or all CAPS or end in _t 94# Other conventions could be added. 95 96 type0 = "int|char|short|long|double|float|void" 97 type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS 98 type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t 99 100 types = "(" type0 "|" type1 "|" type2 ")" 101} 102 103 104{ 105 106 gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments 107 gsub( /[ \t]+/, " ") # squeeze white space to a single space 108 109 110 line = $0 111 112 scope = gobble( "extern|static" ) 113 114 if ( type = gobble("(struct|union|enum) ") ) 115 type = type gobble(id) # get the tag 116 else 117 { 118 119 type = gobble("(un)?signed ") gobble( types ) 120 121 } 122 123 if ( ! type ) next 124 125 if ( (x = decl()) && gobble( ";") ) 126 { 127 x = x " " type 128 if ( scope ) x = x " (" scope ")" 129 gsub( / +/, " ", x) # 130 print x 131 } 132 133} 134