1 %{ 2 /*- 3 * %sccs.include.proprietary.c% 4 */ 5 6 #ifndef lint 7 static char sccsid[] = "@(#)style1.l 8.2 (Berkeley) 01/28/94"; 8 #endif /* not lint */ 9 10 /* break out words, output cap + word(inverted) */ 11 12 #include <stdio.h> 13 #include <ctype.h> 14 #define OUT() for(i=yyleng-1;i>=0; i--)putchar(my_yytext[i]); putchar('\n') 15 #define OUT1(nam) printf("%c:%s\n",nam,my_yytext) 16 #define OUTN(string) printf("%s\n",string) 17 #include "names.h" 18 #include "nhash.c" 19 #include "dict.c" 20 #include "ydict.c" 21 #include "abbrev.c" 22 char nt[] = "D:n't"; 23 char qs[] = "c:'s"; 24 char fin[] = "E:."; 25 int NOCAPS = 0; /* if set all caps are turned to lower case */ 26 int i,j; 27 int dot = 0; 28 int first = 1; 29 int qflg,nflg; 30 int cap = 0; 31 32 static char my_yytext[YY_BUF_SIZE]; 33 34 #define YY_USER_ACTION strcpy(my_yytext, yytext); 35 %} 36 37 L [a-z] 38 N [0-9] 39 C [A-Z] 40 A [a-zA-Z] 41 P [a-zA-Z0-9] 42 43 %% 44 ^[.!].+[\n] { 45 if(dot){ 46 OUTN(fin); 47 dot = 0; 48 first = 1; 49 } 50 printf(":%s",my_yytext); 51 } 52 May { 53 if(first == 0){ 54 OUT1(NOUN); 55 } 56 else { 57 first = 0; 58 my_yytext[0] = tolower(my_yytext[0]); 59 cap = 1; 60 goto wd; 61 } 62 } 63 "U.S." { 64 OUT1(NOUN); 65 } 66 {C}{L}*'[s] { 67 pos(1); 68 if(first==1)first=0; 69 } 70 {C}+['][s] { 71 if(NOCAPS) 72 for(i=0;i<yyleng;i++) 73 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 74 OUT1(POS); 75 } 76 {P}+([-]{P}+)+ { 77 if(NOCAPS) 78 for(i=0;i<yyleng;i++) 79 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 80 OUT1(NOUN_ADJ); 81 } 82 {C}{C}+ { 83 if(NOCAPS) 84 for(i=0;i<yyleng;i++) 85 my_yytext[i] = tolower(my_yytext[i]); 86 if((i=input()) == 's'){ 87 my_yytext[yyleng++] = 's'; 88 my_yytext[yyleng] = '\0'; 89 OUT1(PNOUN); 90 } 91 else { 92 unput(i); 93 if(!NOCAPS) 94 for(i=0;i<yyleng;i++)my_yytext[i] = tolower(my_yytext[i]); 95 goto wd; 96 } 97 } 98 [LD][']{C}{L}* { 99 if(NOCAPS){ 100 my_yytext[0] = tolower(my_yytext[0]); 101 my_yytext[2] = tolower(my_yytext[2]); 102 } 103 OUT1(NOUN_ADJ); 104 } 105 {C}{L}* { 106 if(first==1) 107 first=0; 108 else cap = 1; 109 if(yyleng==1 && my_yytext[0] == 'I'){ 110 cap = 0; 111 goto wd; 112 } 113 my_yytext[0] = tolower(my_yytext[0]); 114 goto wd; 115 } 116 {N}":"{N}{N} { 117 OUT1(NOUN_ADJ); 118 } 119 ({N}*[,])*({N}+".")+[ \t\n]+{C} { 120 for(i=yyleng-1;i>0;i--) 121 if(my_yytext[i] == '.')break; 122 unput(my_yytext[yyleng-1]); 123 my_yytext[i] = '\0'; 124 OUT1(NOUN_ADJ); 125 OUTN(fin); 126 first = 1; 127 } 128 ([hH]e"/"[sS]he)|([sS]he"/"[hH]e) { 129 if(NOCAPS) 130 if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 131 OUT1(PRONS); 132 } 133 ([hH]is"/"[hH]er)|([hH]er"/"[hH]is) { 134 if(NOCAPS) 135 if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 136 OUT1(POS); 137 } 138 [ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* { 139 if(my_yytext[yyleng-1] == '.'){ 140 if(ahead() == 0)dot=1; 141 } 142 if(NOCAPS) 143 for(i=0;i<yyleng;i++) 144 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 145 OUT1(NOUN_ADJ); 146 } 147 {N}+([,]{N}+)*("."{N}+)*[']*[s]* { 148 OUT1(NOUN_ADJ); 149 } 150 {N}*([,]{N}+)*("."{N}+)+[']*[s]* { 151 OUT1(NOUN_ADJ); 152 } 153 {N}+([,]{N}+)*("."{N}*)*[']*[s]* { 154 if(my_yytext[yyleng-1] == '.')dot=1; 155 OUT1(NOUN_ADJ); 156 } 157 ({A}*{N}+{A}*)+ { 158 if(input() == '.') 159 ahead(); 160 if(NOCAPS) 161 for(i=0;i<yyleng;i++) 162 if(isupper(my_yytext[i]))my_yytext[i]=tolower(my_yytext[i]); 163 OUT1(NOUN_ADJ); 164 } 165 {N}+[%] { 166 OUT1(NOUN_ADJ); 167 } 168 "$"{N}+([,]{N}+)*("."{N}*)* { 169 if(my_yytext[yyleng-1] == '.')dot=1; 170 OUT1(NOUN); 171 } 172 [Aa]"."[ ]*[Mm]"." { 173 OUT1(ADJ_ADV); 174 } 175 [Pp]"."[ ]*[Mm]"." { 176 OUT1(ADJ_ADV); 177 } 178 "a."[ ]*"d." { 179 OUT1(ADJ_ADV); 180 } 181 "b."[ ]*"c." { 182 OUT1(ADJ_ADV); 183 } 184 "i."[ ]*"e." { 185 OUT1(PREP); 186 } 187 "e."[ ]*"g." { 188 OUT1(PREP); 189 } 190 "etc."[ \n]*[,)]* { 191 i = my_yytext[4]; 192 my_yytext[4] = '\0'; 193 OUT1(NOUN); 194 my_yytext[4] = i; 195 my_yytext[0] = my_yytext[yyleng-1]; 196 my_yytext[1] = '\0'; 197 if(my_yytext[0] == ',' || my_yytext[0] == ')') 198 OUT1(','); 199 else { 200 OUTN(fin); 201 first = 1; 202 } 203 } 204 "et al." { 205 OUT1(NOUN); 206 } 207 in"."[ \n]*{C} { 208 unput(my_yytext[yyleng-1]); 209 my_yytext[2] = '\0'; 210 OUT1(PREP); 211 OUTN(fin); 212 first = 1; 213 } 214 Ph"."[ ]*[Dd]"." { 215 OUT1(ADJ); 216 } 217 [A-Z]"." { 218 dot=1; 219 OUT1(NOUN); 220 } 221 can't { 222 my_yytext[3]='\0'; 223 yyleng -= 2; 224 nflg=1; 225 goto wd; 226 } 227 won't { 228 OUT1('X'); 229 } 230 ain't { 231 OUT1('g'); 232 } 233 {L}+n't { 234 nflg=1; 235 my_yytext[yyleng-3]='\0'; 236 yyleng -= 3; 237 goto wd; 238 } 239 [A-Z]{L}+n't { 240 my_yytext[0] = tolower(my_yytext[0]); 241 nflg=1; 242 my_yytext[yyleng-3]='\0'; 243 yyleng -= 3; 244 goto wd; 245 } 246 o'clock { 247 OUT1(ADV); 248 } 249 {L}+'[s] { 250 pos(0); 251 } 252 'll { 253 OUT1(lookup("will",1,0)); 254 } 255 've { 256 OUT1(lookup("have",1,0)); 257 } 258 're { 259 OUT1(lookup("are",1,0)); 260 } 261 'd { 262 OUT1(lookup("had",1,0)); 263 } 264 'm { 265 OUT1(lookup("am",1,0)); 266 } 267 'ld { 268 OUT1(lookup("would",1,0)); 269 } 270 {L}+ { 271 wd: 272 if((j = lookup(my_yytext,1,0)) != 0){ 273 first=0; 274 if(cap){ 275 if(!NOCAPS) 276 my_yytext[0] = toupper(my_yytext[0]); 277 cap = 0; 278 if(dot)OUTN(fin); 279 } 280 dot=0; 281 OUT1(j); 282 if(nflg==1){ 283 nflg=0; 284 OUTN(nt); 285 } 286 } 287 else{ 288 first = dot=0; 289 if(my_yytext[yyleng-1] == 'y' && cap == 0){ 290 switch(my_yytext[yyleng-2]){ 291 case 'c': look(cy,yyleng-2,NOUN); 292 break; 293 case 'f': look(fy,yyleng-2,VERB); 294 break; 295 case 'l': look(ly,yyleng-2,ADV); 296 break; 297 case 'g': if(my_yytext[yyleng-3] == 'o'){ 298 OUT1(NOUN); 299 break; 300 } 301 look(gy,yyleng-2,ADJ); 302 break; 303 case 'r': switch(my_yytext[yyleng-3]){ 304 case 'a': look(ary,yyleng-3,ADJ); 305 break; 306 case 'o': look(ory,yyleng-3,ADJ); 307 break; 308 case 'e': look(ery,yyleng-3,NOUN); 309 break; 310 default: look(ry,yyleng-2,NOUN); 311 } 312 break; 313 case 't': if(my_yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); 314 else look(ty,yyleng-2,ADJ); 315 break; 316 default: OUT(); 317 } 318 } 319 else { 320 if(cap){ 321 if(!NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 322 cap = 0; 323 OUT1(NOUN_ADJ); 324 } 325 else { 326 OUT(); 327 } 328 } 329 } 330 } 331 [\n] ; 332 [ ]+ ; 333 [\t]+ ; 334 ";" { 335 OUT1(';'); 336 first=1; 337 } 338 (\"|`|')+ { 339 if(dot){ 340 OUTN(fin); 341 dot=0; 342 } 343 if(qflg==1){ 344 qflg=0; 345 OUT1('"'); 346 } 347 else { 348 qflg=1; 349 first=1; 350 OUT1('"'); 351 } 352 } 353 ".\"" { 354 qflg=0; 355 first=1; 356 OUT1(END); 357 } 358 "..." { 359 OUT1(','); 360 } 361 {A}{A}+"." { 362 my_yytext[yyleng-1] = '\0'; 363 if((j=abbrev(my_yytext,1,0)) != 0){ 364 if(isupper(my_yytext[0])){ 365 if(NOCAPS)my_yytext[0] = tolower(my_yytext[0]); 366 if(first == 1)first=0; 367 } 368 my_yytext[yyleng-1] = '.'; 369 OUT1(j); 370 } 371 else { 372 j = ahead(); 373 if(j == 0) 374 yyleng--; 375 for(i=0;i<yyleng;i++) 376 if(isupper(my_yytext[i])){ 377 my_yytext[i] = tolower(my_yytext[i]); 378 if(i == 0)cap = 1; 379 else cap = 0; 380 } 381 if(j == 0)goto wd; 382 OUT1(NOUN_ADJ); 383 } 384 } 385 "." { 386 first=1; 387 OUT1(END); 388 } 389 "!\"" { 390 qflg=0; 391 first=1; 392 OUT1(END); 393 } 394 "!" { 395 first=1; 396 OUT1(END); 397 } 398 "?\"" { 399 qflg=0; 400 first=1; 401 OUT1(END); 402 } 403 "?" { 404 first=1; 405 OUT1(END); 406 } 407 ":" { 408 OUT1(','); 409 first=1; 410 } 411 [-]+ { 412 OUT1(','); 413 first=1; 414 } 415 "," { 416 OUT1(','); 417 } 418 (\[|\(|\{|\]|\)|\}) { 419 OUT1(','); 420 } 421 . { 422 /* fprintf(stderr,"nwords funny char: %c\n",my_yytext[0])*/ ; 423 } 424 %% 425 look(f,n,cc) 426 char (*f)(); 427 int n; 428 char cc; 429 { 430 int nn; 431 char save; 432 save=my_yytext[n]; 433 my_yytext[n] = '\0'; 434 nn=(*f)(my_yytext,1,0); 435 my_yytext[n] = save; 436 if(nn != 0){ 437 OUT1(nn); 438 } 439 else { 440 OUT1(cc); 441 } 442 } 443 pos(flg){ 444 int ii,j; 445 if(flg == 1)my_yytext[0] = tolower(my_yytext[0]); 446 for(ii=yyleng-1;my_yytext[ii] != '\''; ii--); 447 my_yytext[ii] = '\0'; 448 if((j=lookup(my_yytext,1,0)) != 0){ 449 yyleng = ii; 450 OUT1(j); 451 OUTN(qs); 452 } 453 else{ 454 if(flg==1 && !NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 455 my_yytext[ii] = '\''; 456 OUT1(POS); 457 } 458 } 459 char *filename="-"; 460 461 main(argc,argv) 462 int argc; 463 char *argv[]; 464 { 465 register int rc=0; 466 putchar(':'); putchar('\n'); 467 getd(); 468 getab(); 469 ygetd(); 470 if(argc<=1) { 471 yylex(); 472 OUTN(fin); 473 }else{ 474 while(argc>1) { 475 if(freopen(argv[1],"r",stdin)==NULL) { 476 fprintf(stderr,"%s: cannot open\n", argv[1]); 477 rc++; 478 }else{ 479 filename=argv[1]; 480 yylex(); 481 OUTN(fin); 482 } 483 argc--; argv++; 484 } 485 } 486 return(rc); 487 } 488 ahead(){ 489 register int c; 490 if(isalnum((c=input()))){ 491 my_yytext[yyleng++] = '.'; 492 while(!isspace((c=input() ))) 493 my_yytext[yyleng++] = c; 494 my_yytext[yyleng] = '\0'; 495 unput(c); 496 return(1); 497 } 498 unput(c); 499 unput('.'); 500 return(0); 501 } 502