1 %{ 2 /*- 3 * %sccs.include.proprietary.c% 4 */ 5 6 #ifndef lint 7 static char sccsid[] = "@(#)style1.l 4.4 (Berkeley) 04/14/92"; 8 #endif /* not lint */ 9 10 /* break out words, output cap + word(inverted) */ 11 12 #include <stdio.h> 13 #include <ctype.h> 14 #define OUT() for(i=yyleng-1;i>=0; i--)putchar(my_yytext[i]); putchar('\n') 15 #define OUT1(nam) printf("%c:%s\n",nam,my_yytext) 16 #define OUTN(string) printf("%s\n",string) 17 #include "names.h" 18 #include "nhash.c" 19 #include "dict.c" 20 #include "ydict.c" 21 #include "abbrev.c" 22 char nt[] = "D:n't"; 23 char qs[] = "c:'s"; 24 char fin[] = "E:."; 25 int NOCAPS = 0; /* if set all caps are turned to lower case */ 26 int i,j; 27 int dot = 0; 28 int first = 1; 29 int qflg,nflg; 30 int cap = 0; 31 32 static char my_yytext[YY_BUF_SIZE]; 33 34 #define YY_USER_ACTION strcpy(my_yytext, yytext); 35 %} 36 37 L [a-z] 38 N [0-9] 39 C [A-Z] 40 A [a-zA-Z] 41 P [a-zA-Z0-9] 42 43 %% 44 ^[.!].+[\n] { 45 if(dot){ 46 OUTN(fin); 47 dot = 0; 48 first = 1; 49 } 50 printf(":%s",my_yytext); 51 } 52 May { 53 if(first == 0){ 54 OUT1(NOUN); 55 } 56 else { 57 first = 0; 58 my_yytext[0] = tolower(my_yytext[0]); 59 cap = 1; 60 goto wd; 61 } 62 } 63 "U.S." { 64 OUT1(NOUN); 65 } 66 {C}{L}*'[s] { 67 pos(1); 68 if(first==1)first=0; 69 } 70 {C}+['][s] { 71 if(NOCAPS) 72 for(i=0;i<yyleng;i++) 73 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 74 OUT1(POS); 75 } 76 {P}+([-]{P}+)+ { 77 if(NOCAPS) 78 for(i=0;i<yyleng;i++) 79 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 80 OUT1(NOUN_ADJ); 81 } 82 {C}{C}+ { 83 if(NOCAPS) 84 for(i=0;i<yyleng;i++) 85 my_yytext[i] = tolower(my_yytext[i]); 86 if((i=input()) == 's'){ 87 my_yytext[yyleng++] = 's'; 88 my_yytext[yyleng] = '\0'; 89 OUT1(PNOUN); 90 } 91 else { 92 unput(i); 93 if(!NOCAPS) 94 for(i=0;i<yyleng;i++)my_yytext[i] = tolower(my_yytext[i]); 95 goto wd; 96 } 97 } 98 [LD][']{C}{L}* { 99 if(NOCAPS){ 100 my_yytext[0] = tolower(my_yytext[0]); 101 my_yytext[2] = tolower(my_yytext[2]); 102 } 103 OUT1(NOUN_ADJ); 104 } 105 {C}{L}* { 106 if(first==1) 107 first=0; 108 else cap = 1; 109 if(yyleng==1 && my_yytext[0] == 'I'){ 110 cap = 0; 111 goto wd; 112 } 113 my_yytext[0] = tolower(my_yytext[0]); 114 goto wd; 115 } 116 {N}":"{N}{N} { 117 OUT1(NOUN_ADJ); 118 } 119 ({N}*[,])*({N}+".")+[ \t\n]+{C} { 120 for(i=yyleng-1;i>0;i--) 121 if(my_yytext[i] == '.')break; 122 unput(my_yytext[yyleng-1]); 123 my_yytext[i] = '\0'; 124 OUT1(NOUN_ADJ); 125 OUTN(fin); 126 first = 1; 127 } 128 ([hH]e"/"[sS]he)|([sS]he"/"[hH]e) { 129 if(NOCAPS) 130 if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 131 OUT1(PRONS); 132 } 133 ([hH]is"/"[hH]er)|([hH]er"/"[hH]is) { 134 if(NOCAPS) 135 if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 136 OUT1(POS); 137 } 138 [ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* { 139 if(my_yytext[yyleng-1] == '.'){ 140 if(ahead() == 0)dot=1; 141 } 142 if(NOCAPS) 143 for(i=0;i<yyleng;i++) 144 if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 145 OUT1(NOUN_ADJ); 146 } 147 {N}+([,]{N}+)*("."{N}+)*[']*[s]* { 148 OUT1(NOUN_ADJ); 149 } 150 {N}*([,]{N}+)*("."{N}+)+[']*[s]* { 151 OUT1(NOUN_ADJ); 152 } 153 {N}+([,]{N}+)*("."{N}*)*[']*[s]* { 154 if(my_yytext[yyleng-1] == '.')dot=1; 155 OUT1(NOUN_ADJ); 156 } 157 ({A}*{N}+{A}*)+ { 158 if(input() == '.') 159 ahead(); 160 if(NOCAPS) 161 for(i=0;i<yyleng;i++) 162 if(isupper(my_yytext[i]))my_yytext[i]=tolower(my_yytext[i]); 163 OUT1(NOUN_ADJ); 164 } 165 {N}+[%] { 166 OUT1(NOUN_ADJ); 167 } 168 "$"{N}+([,]{N}+)*("."{N}*)* { 169 if(my_yytext[yyleng-1] == '.')dot=1; 170 OUT1(NOUN); 171 } 172 [Aa]"."[ ]*[Mm]"." { 173 OUT1(ADJ_ADV); 174 } 175 [Pp]"."[ ]*[Mm]"." { 176 OUT1(ADJ_ADV); 177 } 178 "a."[ ]*"d." { 179 OUT1(ADJ_ADV); 180 } 181 "b."[ ]*"c." { 182 OUT1(ADJ_ADV); 183 } 184 "i."[ ]*"e." { 185 OUT1(PREP); 186 } 187 "e."[ ]*"g." { 188 OUT1(PREP); 189 } 190 "etc."[ \n]*[,)]* { 191 i = my_yytext[4]; 192 my_yytext[4] = '\0'; 193 OUT1(NOUN); 194 my_yytext[4] = i; 195 my_yytext[0] = my_yytext[yyleng-1]; 196 my_yytext[1] = '\0'; 197 if(my_yytext[0] == ',' || my_yytext[0] == ')') 198 OUT1(','); 199 else { 200 OUTN(fin); 201 first = 1; 202 } 203 } 204 "et al." { 205 OUT1(NOUN); 206 } 207 in"."[ \n]*{C} { 208 unput(my_yytext[yyleng-1]); 209 my_yytext[2] = '\0'; 210 OUT1(PREP); 211 OUTN(fin); 212 first = 1; 213 } 214 Ph"."[ ]*[Dd]"." { 215 OUT1(ADJ); 216 } 217 [A-Z]"." { 218 dot=1; 219 OUT1(NOUN); 220 } 221 can't { 222 my_yytext[3]='\0'; 223 yyleng -= 2; 224 nflg=1; 225 goto wd; 226 } 227 won't { 228 OUT1('X'); 229 } 230 ain't { 231 OUT1('g'); 232 } 233 {L}+n't { 234 nflg=1; 235 my_yytext[yyleng-3]='\0'; 236 yyleng -= 3; 237 goto wd; 238 } 239 [A-Z]{L}+n't { 240 my_yytext[0] = tolower(my_yytext[0]); 241 nflg=1; 242 my_yytext[yyleng-3]='\0'; 243 yyleng -= 3; 244 goto wd; 245 } 246 o'clock { 247 OUT1(ADV); 248 } 249 {L}+'[s] { 250 pos(0); 251 } 252 'll { 253 OUT1(lookup("will",1,0)); 254 } 255 've { 256 OUT1(lookup("have",1,0)); 257 } 258 're { 259 OUT1(lookup("are",1,0)); 260 } 261 'd { 262 OUT1(lookup("had",1,0)); 263 } 264 'm { 265 OUT1(lookup("am",1,0)); 266 } 267 'ld { 268 OUT1(lookup("would",1,0)); 269 } 270 {L}+ { 271 wd: 272 if((j = lookup(my_yytext,1,0)) != 0){ 273 first=0; 274 if(cap){ 275 if(!NOCAPS) 276 my_yytext[0] = toupper(my_yytext[0]); 277 cap = 0; 278 if(dot)OUTN(fin); 279 } 280 dot=0; 281 OUT1(j); 282 if(nflg==1){ 283 nflg=0; 284 OUTN(nt); 285 } 286 } 287 else{ 288 first = dot=0; 289 if(my_yytext[yyleng-1] == 'y' && cap == 0){ 290 switch(my_yytext[yyleng-2]){ 291 case 'c': look(cy,yyleng-2,NOUN); 292 break; 293 case 'f': look(fy,yyleng-2,VERB); 294 break; 295 case 'l': look(ly,yyleng-2,ADV); 296 break; 297 case 'g': if(my_yytext[yyleng-3] == 'o'){ 298 OUT1(NOUN); 299 break; 300 } 301 look(gy,yyleng-2,ADJ); 302 break; 303 case 'r': switch(my_yytext[yyleng-3]){ 304 case 'a': look(ary,yyleng-3,ADJ); 305 break; 306 case 'o': look(ory,yyleng-3,ADJ); 307 break; 308 case 'e': look(ery,yyleng-3,NOUN); 309 break; 310 default: look(ry,yyleng-2,NOUN); 311 } 312 break; 313 case 't': if(my_yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); 314 else look(ty,yyleng-2,ADJ); 315 break; 316 default: OUT(); 317 } 318 } 319 else { 320 if(cap){ 321 if(!NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 322 cap = 0; 323 OUT1(NOUN_ADJ); 324 } 325 else { 326 OUT(); 327 } 328 } 329 } 330 } 331 [\n] ; 332 [ ]+ ; 333 [\t]+ ; 334 ";" { 335 OUT1(';'); 336 first=1; 337 } 338 (\"|`|')+ { 339 if(dot){ 340 OUTN(fin); 341 dot=0; 342 } 343 if(qflg==1){ 344 qflg=0; 345 OUT1('"'); 346 } 347 else { 348 qflg=1; 349 first=1; 350 OUT1('"'); 351 } 352 } 353 ".\"" { 354 qflg=0; 355 first=1; 356 OUT1(END); 357 } 358 "..." { 359 OUT1(','); 360 } 361 "/." { 362 first = 1; 363 OUT1(END); 364 } 365 {A}{A}+"." { 366 my_yytext[yyleng-1] = '\0'; 367 if((j=abbrev(my_yytext,1,0)) != 0){ 368 if(isupper(my_yytext[0])){ 369 if(NOCAPS)my_yytext[0] = tolower(my_yytext[0]); 370 if(first == 1)first=0; 371 } 372 my_yytext[yyleng-1] = '.'; 373 OUT1(j); 374 } 375 else { 376 j = ahead(); 377 if(j == 0) 378 yyleng--; 379 for(i=0;i<yyleng;i++) 380 if(isupper(my_yytext[i])){ 381 my_yytext[i] = tolower(my_yytext[i]); 382 if(i == 0)cap = 1; 383 else cap = 0; 384 } 385 if(j == 0)goto wd; 386 OUT1(NOUN_ADJ); 387 } 388 } 389 "." { 390 first=1; 391 OUT1(END); 392 } 393 "!\"" { 394 qflg=0; 395 first=1; 396 OUT1(END); 397 } 398 "!" { 399 first=1; 400 OUT1(END); 401 } 402 "?\"" { 403 qflg=0; 404 first=1; 405 OUT1(END); 406 } 407 "?" { 408 first=1; 409 OUT1(END); 410 } 411 ":" { 412 OUT1(','); 413 first=1; 414 } 415 [-]+ { 416 OUT1(','); 417 first=1; 418 } 419 "," { 420 OUT1(','); 421 } 422 (\[|\(|\{|\]|\)|\}) { 423 OUT1(','); 424 } 425 . { 426 /* fprintf(stderr,"nwords funny char: %c\n",my_yytext[0])*/ ; 427 } 428 %% 429 look(f,n,cc) 430 char (*f)(); 431 int n; 432 char cc; 433 { 434 int nn; 435 char save; 436 save=my_yytext[n]; 437 my_yytext[n] = '\0'; 438 nn=(*f)(my_yytext,1,0); 439 my_yytext[n] = save; 440 if(nn != 0){ 441 OUT1(nn); 442 } 443 else { 444 OUT1(cc); 445 } 446 } 447 pos(flg){ 448 int ii,j; 449 if(flg == 1)my_yytext[0] = tolower(my_yytext[0]); 450 for(ii=yyleng-1;my_yytext[ii] != '\''; ii--); 451 my_yytext[ii] = '\0'; 452 if((j=lookup(my_yytext,1,0)) != 0){ 453 yyleng = ii; 454 OUT1(j); 455 OUTN(qs); 456 } 457 else{ 458 if(flg==1 && !NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 459 my_yytext[ii] = '\''; 460 OUT1(POS); 461 } 462 } 463 char *filename="-"; 464 465 main(argc,argv) 466 int argc; 467 char *argv[]; 468 { 469 register int rc=0; 470 putchar(':'); putchar('\n'); 471 getd(); 472 getab(); 473 ygetd(); 474 if(argc<=1) { 475 yylex(); 476 OUTN(fin); 477 }else{ 478 while(argc>1) { 479 if(freopen(argv[1],"r",stdin)==NULL) { 480 fprintf(stderr,"%s: cannot open\n", argv[1]); 481 rc++; 482 }else{ 483 filename=argv[1]; 484 yylex(); 485 OUTN(fin); 486 } 487 argc--; argv++; 488 } 489 } 490 return(rc); 491 } 492 ahead(){ 493 register int c; 494 if(isalnum((c=input()))){ 495 my_yytext[yyleng++] = '.'; 496 while(!isspace((c=input() ))) 497 my_yytext[yyleng++] = c; 498 my_yytext[yyleng] = '\0'; 499 unput(c); 500 return(1); 501 } 502 unput(c); 503 unput('.'); 504 return(0); 505 } 506