1 %{ 2 /*- 3 * %sccs.include.proprietary.c% 4 */ 5 6 #ifndef lint 7 static char sccsid[] = "@(#)style1.l 4.3 (Berkeley) 04/17/91"; 8 #endif /* not lint */ 9 10 /* break out words, output cap + word(inverted) */ 11 12 #include <stdio.h> 13 #include <ctype.h> 14 #define OUT() for(i=yyleng-1;i>=0; i--)putchar(yytext[i]); putchar('\n') 15 #define OUT1(nam) printf("%c:%s\n",nam,yytext) 16 #define OUTN(string) printf("%s\n",string) 17 #include "names.h" 18 #include "nhash.c" 19 #include "dict.c" 20 #include "ydict.c" 21 #include "abbrev.c" 22 char nt[] = "D:n't"; 23 char qs[] = "c:'s"; 24 char fin[] = "E:."; 25 int NOCAPS = 0; /* if set all caps are turned to lower case */ 26 int i,j; 27 int dot = 0; 28 int first = 1; 29 int qflg,nflg; 30 int cap = 0; 31 %} 32 %p 3000 33 %a 3300 34 %o 4500 35 36 L [a-z] 37 N [0-9] 38 C [A-Z] 39 A [a-zA-Z] 40 P [a-zA-Z0-9] 41 42 %% 43 ^[.!].+[\n] { 44 if(dot){ 45 OUTN(fin); 46 dot = 0; 47 first = 1; 48 } 49 printf(":%s",yytext); 50 } 51 May { 52 if(first == 0){ 53 OUT1(NOUN); 54 } 55 else { 56 first = 0; 57 yytext[0] = tolower(yytext[0]); 58 cap = 1; 59 goto wd; 60 } 61 } 62 "U.S." { 63 OUT1(NOUN); 64 } 65 {C}{L}*'[s] { 66 pos(1); 67 if(first==1)first=0; 68 } 69 {C}+['][s] { 70 if(NOCAPS) 71 for(i=0;i<yyleng;i++) 72 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); 73 OUT1(POS); 74 } 75 {P}+([-]{P}+)+ { 76 if(NOCAPS) 77 for(i=0;i<yyleng;i++) 78 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); 79 OUT1(NOUN_ADJ); 80 } 81 {C}{C}+ { 82 if(NOCAPS) 83 for(i=0;i<yyleng;i++) 84 yytext[i] = tolower(yytext[i]); 85 if((i=input()) == 's'){ 86 yytext[yyleng++] = 's'; 87 yytext[yyleng] = '\0'; 88 OUT1(PNOUN); 89 } 90 else { 91 unput(i); 92 if(!NOCAPS) 93 for(i=0;i<yyleng;i++)yytext[i] = tolower(yytext[i]); 94 goto wd; 95 } 96 } 97 [LD][']{C}{L}* { 98 if(NOCAPS){ 99 yytext[0] = tolower(yytext[0]); 100 yytext[2] = tolower(yytext[2]); 101 } 102 OUT1(NOUN_ADJ); 103 } 104 {C}{L}* { 105 if(first==1) 106 first=0; 107 else cap = 1; 108 if(yyleng==1 && yytext[0] == 'I'){ 109 cap = 0; 110 goto wd; 111 } 112 yytext[0] = tolower(yytext[0]); 113 goto wd; 114 } 115 {N}":"{N}{N} { 116 OUT1(NOUN_ADJ); 117 } 118 ({N}*[,])*({N}+".")+[ \t\n]+{C} { 119 for(i=yyleng-1;i>0;i--) 120 if(yytext[i] == '.')break; 121 unput(yytext[yyleng-1]); 122 yytext[i] = '\0'; 123 OUT1(NOUN_ADJ); 124 OUTN(fin); 125 first = 1; 126 } 127 ([hH]e"/"[sS]he)|([sS]he"/"[hH]e) { 128 if(NOCAPS) 129 if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]); 130 OUT1(PRONS); 131 } 132 ([hH]is"/"[hH]er)|([hH]er"/"[hH]is) { 133 if(NOCAPS) 134 if(isupper(yytext[0]))yytext[0] = tolower(yytext[0]); 135 OUT1(POS); 136 } 137 [ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* { 138 if(yytext[yyleng-1] == '.'){ 139 if(ahead() == 0)dot=1; 140 } 141 if(NOCAPS) 142 for(i=0;i<yyleng;i++) 143 if(isupper(yytext[i]))yytext[i] = tolower(yytext[i]); 144 OUT1(NOUN_ADJ); 145 } 146 {N}+([,]{N}+)*("."{N}+)*[']*[s]* { 147 OUT1(NOUN_ADJ); 148 } 149 {N}*([,]{N}+)*("."{N}+)+[']*[s]* { 150 OUT1(NOUN_ADJ); 151 } 152 {N}+([,]{N}+)*("."{N}*)*[']*[s]* { 153 if(yytext[yyleng-1] == '.')dot=1; 154 OUT1(NOUN_ADJ); 155 } 156 ({A}*{N}+{A}*)+ { 157 if(input() == '.') 158 ahead(); 159 if(NOCAPS) 160 for(i=0;i<yyleng;i++) 161 if(isupper(yytext[i]))yytext[i]=tolower(yytext[i]); 162 OUT1(NOUN_ADJ); 163 } 164 {N}+[%] { 165 OUT1(NOUN_ADJ); 166 } 167 "$"{N}+([,]{N}+)*("."{N}*)* { 168 if(yytext[yyleng-1] == '.')dot=1; 169 OUT1(NOUN); 170 } 171 [Aa]"."[ ]*[Mm]"." { 172 OUT1(ADJ_ADV); 173 } 174 [Pp]"."[ ]*[Mm]"." { 175 OUT1(ADJ_ADV); 176 } 177 "a."[ ]*"d." { 178 OUT1(ADJ_ADV); 179 } 180 "b."[ ]*"c." { 181 OUT1(ADJ_ADV); 182 } 183 "i."[ ]*"e." { 184 OUT1(PREP); 185 } 186 "e."[ ]*"g." { 187 OUT1(PREP); 188 } 189 "etc."[ \n]*[,)]* { 190 i = yytext[4]; 191 yytext[4] = '\0'; 192 OUT1(NOUN); 193 yytext[4] = i; 194 yytext[0] = yytext[yyleng-1]; 195 yytext[1] = '\0'; 196 if(yytext[0] == ',' || yytext[0] == ')') 197 OUT1(','); 198 else { 199 OUTN(fin); 200 first = 1; 201 } 202 } 203 "et al." { 204 OUT1(NOUN); 205 } 206 in"."[ \n]*{C} { 207 unput(yytext[yyleng-1]); 208 yytext[2] = '\0'; 209 OUT1(PREP); 210 OUTN(fin); 211 first = 1; 212 } 213 Ph"."[ ]*[Dd]"." { 214 OUT1(ADJ); 215 } 216 [A-Z]"." { 217 dot=1; 218 OUT1(NOUN); 219 } 220 can't { 221 yytext[3]='\0'; 222 yyleng -= 2; 223 nflg=1; 224 goto wd; 225 } 226 won't { 227 OUT1('X'); 228 } 229 ain't { 230 OUT1('g'); 231 } 232 {L}+n't { 233 nflg=1; 234 yytext[yyleng-3]='\0'; 235 yyleng -= 3; 236 goto wd; 237 } 238 [A-Z]{L}+n't { 239 yytext[0] = tolower(yytext[0]); 240 nflg=1; 241 yytext[yyleng-3]='\0'; 242 yyleng -= 3; 243 goto wd; 244 } 245 o'clock { 246 OUT1(ADV); 247 } 248 {L}+'[s] { 249 pos(0); 250 } 251 'll { 252 OUT1(lookup("will",1,0)); 253 } 254 've { 255 OUT1(lookup("have",1,0)); 256 } 257 're { 258 OUT1(lookup("are",1,0)); 259 } 260 'd { 261 OUT1(lookup("had",1,0)); 262 } 263 'm { 264 OUT1(lookup("am",1,0)); 265 } 266 'ld { 267 OUT1(lookup("would",1,0)); 268 } 269 {L}+ { 270 wd: 271 if((j = lookup(yytext,1,0)) != 0){ 272 first=0; 273 if(cap){ 274 if(!NOCAPS) 275 yytext[0] = toupper(yytext[0]); 276 cap = 0; 277 if(dot)OUTN(fin); 278 } 279 dot=0; 280 OUT1(j); 281 if(nflg==1){ 282 nflg=0; 283 OUTN(nt); 284 } 285 } 286 else{ 287 first = dot=0; 288 if(yytext[yyleng-1] == 'y' && cap == 0){ 289 switch(yytext[yyleng-2]){ 290 case 'c': look(cy,yyleng-2,NOUN); 291 break; 292 case 'f': look(fy,yyleng-2,VERB); 293 break; 294 case 'l': look(ly,yyleng-2,ADV); 295 break; 296 case 'g': if(yytext[yyleng-3] == 'o'){ 297 OUT1(NOUN); 298 break; 299 } 300 look(gy,yyleng-2,ADJ); 301 break; 302 case 'r': switch(yytext[yyleng-3]){ 303 case 'a': look(ary,yyleng-3,ADJ); 304 break; 305 case 'o': look(ory,yyleng-3,ADJ); 306 break; 307 case 'e': look(ery,yyleng-3,NOUN); 308 break; 309 default: look(ry,yyleng-2,NOUN); 310 } 311 break; 312 case 't': if(yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); 313 else look(ty,yyleng-2,ADJ); 314 break; 315 default: OUT(); 316 } 317 } 318 else { 319 if(cap){ 320 if(!NOCAPS)yytext[0] = toupper(yytext[0]); 321 cap = 0; 322 OUT1(NOUN_ADJ); 323 } 324 else { 325 OUT(); 326 } 327 } 328 } 329 } 330 [\n] ; 331 [ ]+ ; 332 [\t]+ ; 333 ";" { 334 OUT1(';'); 335 first=1; 336 } 337 (\"|`|')+ { 338 if(dot){ 339 OUTN(fin); 340 dot=0; 341 } 342 if(qflg==1){ 343 qflg=0; 344 OUT1('"'); 345 } 346 else { 347 qflg=1; 348 first=1; 349 OUT1('"'); 350 } 351 } 352 ".\"" { 353 qflg=0; 354 first=1; 355 OUT1(END); 356 } 357 "..." { 358 OUT1(','); 359 } 360 "/." { 361 first = 1; 362 OUT1(END); 363 } 364 {A}{A}+"." { 365 yytext[yyleng-1] = '\0'; 366 if((j=abbrev(yytext,1,0)) != 0){ 367 if(isupper(yytext[0])){ 368 if(NOCAPS)yytext[0] = tolower(yytext[0]); 369 if(first == 1)first=0; 370 } 371 yytext[yyleng-1] = '.'; 372 OUT1(j); 373 } 374 else { 375 j = ahead(); 376 if(j == 0) 377 yyleng--; 378 for(i=0;i<yyleng;i++) 379 if(isupper(yytext[i])){ 380 yytext[i] = tolower(yytext[i]); 381 if(i == 0)cap = 1; 382 else cap = 0; 383 } 384 if(j == 0)goto wd; 385 OUT1(NOUN_ADJ); 386 } 387 } 388 "." { 389 first=1; 390 OUT1(END); 391 } 392 "!\"" { 393 qflg=0; 394 first=1; 395 OUT1(END); 396 } 397 "!" { 398 first=1; 399 OUT1(END); 400 } 401 "?\"" { 402 qflg=0; 403 first=1; 404 OUT1(END); 405 } 406 "?" { 407 first=1; 408 OUT1(END); 409 } 410 ":" { 411 OUT1(','); 412 first=1; 413 } 414 [-]+ { 415 OUT1(','); 416 first=1; 417 } 418 "," { 419 OUT1(','); 420 } 421 (\[|\(|\{|\]|\)|\}) { 422 OUT1(','); 423 } 424 . { 425 /* fprintf(stderr,"nwords funny char: %c\n",yytext[0])*/ ; 426 } 427 %% 428 look(f,n,cc) 429 char (*f)(); 430 int n; 431 char cc; 432 { 433 int nn; 434 char save; 435 save=yytext[n]; 436 yytext[n] = '\0'; 437 nn=(*f)(yytext,1,0); 438 yytext[n] = save; 439 if(nn != 0){ 440 OUT1(nn); 441 } 442 else { 443 OUT1(cc); 444 } 445 } 446 pos(flg){ 447 int ii,j; 448 if(flg == 1)yytext[0] = tolower(yytext[0]); 449 for(ii=yyleng-1;yytext[ii] != '\''; ii--); 450 yytext[ii] = '\0'; 451 if((j=lookup(yytext,1,0)) != 0){ 452 yyleng = ii; 453 OUT1(j); 454 OUTN(qs); 455 } 456 else{ 457 if(flg==1 && !NOCAPS)yytext[0] = toupper(yytext[0]); 458 yytext[ii] = '\''; 459 OUT1(POS); 460 } 461 } 462 char *filename="-"; 463 464 main(argc,argv) 465 int argc; 466 char *argv[]; 467 { 468 register int rc=0; 469 putchar(':'); putchar('\n'); 470 getd(); 471 getab(); 472 ygetd(); 473 if(argc<=1) { 474 yylex(); 475 OUTN(fin); 476 }else{ 477 while(argc>1) { 478 if(freopen(argv[1],"r",stdin)==NULL) { 479 fprintf(stderr,"%s: cannot open\n", argv[1]); 480 rc++; 481 }else{ 482 filename=argv[1]; 483 yylex(); 484 OUTN(fin); 485 } 486 argc--; argv++; 487 } 488 } 489 return(rc); 490 } 491 ahead(){ 492 register int c; 493 if(isalnum((c=input()))){ 494 yytext[yyleng++] = '.'; 495 while(!isspace((c=input() ))) 496 yytext[yyleng++] = c; 497 yytext[yyleng] = '\0'; 498 unput(c); 499 return(1); 500 } 501 unput(c); 502 unput('.'); 503 return(0); 504 } 505