170468683Srrh %{ 23ff68154Sbostic /*- 33ff68154Sbostic * %sccs.include.proprietary.c% 43ff68154Sbostic */ 570468683Srrh 670468683Srrh #ifndef lint 7*e03cf71fSvern static char sccsid[] = "@(#)style1.l 8.2 (Berkeley) 01/28/94"; 83ff68154Sbostic #endif /* not lint */ 93ff68154Sbostic 103ff68154Sbostic /* break out words, output cap + word(inverted) */ 1170468683Srrh 1270468683Srrh #include <stdio.h> 13261246b6Srrh #include <ctype.h> 1440da8d46Svern #define OUT() for(i=yyleng-1;i>=0; i--)putchar(my_yytext[i]); putchar('\n') 1540da8d46Svern #define OUT1(nam) printf("%c:%s\n",nam,my_yytext) 1670468683Srrh #define OUTN(string) printf("%s\n",string) 1770468683Srrh #include "names.h" 1870468683Srrh #include "nhash.c" 1970468683Srrh #include "dict.c" 2070468683Srrh #include "ydict.c" 21261246b6Srrh #include "abbrev.c" 2270468683Srrh char nt[] = "D:n't"; 2370468683Srrh char qs[] = "c:'s"; 2470468683Srrh char fin[] = "E:."; 25261246b6Srrh int NOCAPS = 0; /* if set all caps are turned to lower case */ 2670468683Srrh int i,j; 2770468683Srrh int dot = 0; 2870468683Srrh int first = 1; 2970468683Srrh int qflg,nflg; 3070468683Srrh int cap = 0; 3140da8d46Svern 3240da8d46Svern static char my_yytext[YY_BUF_SIZE]; 3340da8d46Svern 3440da8d46Svern #define YY_USER_ACTION strcpy(my_yytext, yytext); 3570468683Srrh %} 3670468683Srrh 3770468683Srrh L [a-z] 3870468683Srrh N [0-9] 3970468683Srrh C [A-Z] 40261246b6Srrh A [a-zA-Z] 41261246b6Srrh P [a-zA-Z0-9] 4270468683Srrh 4370468683Srrh %% 44261246b6Srrh ^[.!].+[\n] { 45261246b6Srrh if(dot){ 46261246b6Srrh OUTN(fin); 47261246b6Srrh dot = 0; 48261246b6Srrh first = 1; 49261246b6Srrh } 5040da8d46Svern printf(":%s",my_yytext); 51261246b6Srrh } 52261246b6Srrh May { 53261246b6Srrh if(first == 0){ 54261246b6Srrh OUT1(NOUN); 55261246b6Srrh } 56261246b6Srrh else { 57261246b6Srrh first = 0; 5840da8d46Svern my_yytext[0] = tolower(my_yytext[0]); 59261246b6Srrh cap = 1; 60261246b6Srrh goto wd; 61261246b6Srrh } 62261246b6Srrh } 63261246b6Srrh "U.S." { 6470468683Srrh OUT1(NOUN); 6570468683Srrh } 6670468683Srrh {C}{L}*'[s] { 6770468683Srrh pos(1); 6870468683Srrh if(first==1)first=0; 6970468683Srrh } 70261246b6Srrh {C}+['][s] { 71261246b6Srrh if(NOCAPS) 72261246b6Srrh for(i=0;i<yyleng;i++) 7340da8d46Svern if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 7470468683Srrh OUT1(POS); 7570468683Srrh } 76261246b6Srrh {P}+([-]{P}+)+ { 77261246b6Srrh if(NOCAPS) 78261246b6Srrh for(i=0;i<yyleng;i++) 7940da8d46Svern if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 8070468683Srrh OUT1(NOUN_ADJ); 8170468683Srrh } 8270468683Srrh {C}{C}+ { 83261246b6Srrh if(NOCAPS) 84261246b6Srrh for(i=0;i<yyleng;i++) 8540da8d46Svern my_yytext[i] = tolower(my_yytext[i]); 8670468683Srrh if((i=input()) == 's'){ 8740da8d46Svern my_yytext[yyleng++] = 's'; 8840da8d46Svern my_yytext[yyleng] = '\0'; 89261246b6Srrh OUT1(PNOUN); 9070468683Srrh } 9170468683Srrh else { 9270468683Srrh unput(i); 93261246b6Srrh if(!NOCAPS) 9440da8d46Svern for(i=0;i<yyleng;i++)my_yytext[i] = tolower(my_yytext[i]); 9570468683Srrh goto wd; 9670468683Srrh } 9770468683Srrh } 9870468683Srrh [LD][']{C}{L}* { 99261246b6Srrh if(NOCAPS){ 10040da8d46Svern my_yytext[0] = tolower(my_yytext[0]); 10140da8d46Svern my_yytext[2] = tolower(my_yytext[2]); 102261246b6Srrh } 10370468683Srrh OUT1(NOUN_ADJ); 10470468683Srrh } 10570468683Srrh {C}{L}* { 10670468683Srrh if(first==1) 10770468683Srrh first=0; 10870468683Srrh else cap = 1; 10940da8d46Svern if(yyleng==1 && my_yytext[0] == 'I'){ 11070468683Srrh cap = 0; 11170468683Srrh goto wd; 11270468683Srrh } 11340da8d46Svern my_yytext[0] = tolower(my_yytext[0]); 11470468683Srrh goto wd; 11570468683Srrh } 116261246b6Srrh {N}":"{N}{N} { 11770468683Srrh OUT1(NOUN_ADJ); 11870468683Srrh } 11970468683Srrh ({N}*[,])*({N}+".")+[ \t\n]+{C} { 12070468683Srrh for(i=yyleng-1;i>0;i--) 12140da8d46Svern if(my_yytext[i] == '.')break; 12240da8d46Svern unput(my_yytext[yyleng-1]); 12340da8d46Svern my_yytext[i] = '\0'; 12470468683Srrh OUT1(NOUN_ADJ); 12570468683Srrh OUTN(fin); 12670468683Srrh first = 1; 12770468683Srrh } 128261246b6Srrh ([hH]e"/"[sS]he)|([sS]he"/"[hH]e) { 129261246b6Srrh if(NOCAPS) 13040da8d46Svern if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 131261246b6Srrh OUT1(PRONS); 132261246b6Srrh } 133261246b6Srrh ([hH]is"/"[hH]er)|([hH]er"/"[hH]is) { 134261246b6Srrh if(NOCAPS) 13540da8d46Svern if(isupper(my_yytext[0]))my_yytext[0] = tolower(my_yytext[0]); 136261246b6Srrh OUT1(POS); 137261246b6Srrh } 138261246b6Srrh [ \t`]*[a-zA-Z0-9.]*("\/"[a-zA-Z0-9.]+)+[']* { 13940da8d46Svern if(my_yytext[yyleng-1] == '.'){ 140261246b6Srrh if(ahead() == 0)dot=1; 141261246b6Srrh } 142261246b6Srrh if(NOCAPS) 143261246b6Srrh for(i=0;i<yyleng;i++) 14440da8d46Svern if(isupper(my_yytext[i]))my_yytext[i] = tolower(my_yytext[i]); 14570468683Srrh OUT1(NOUN_ADJ); 14670468683Srrh } 14770468683Srrh {N}+([,]{N}+)*("."{N}+)*[']*[s]* { 14870468683Srrh OUT1(NOUN_ADJ); 14970468683Srrh } 15070468683Srrh {N}*([,]{N}+)*("."{N}+)+[']*[s]* { 15170468683Srrh OUT1(NOUN_ADJ); 15270468683Srrh } 15370468683Srrh {N}+([,]{N}+)*("."{N}*)*[']*[s]* { 15440da8d46Svern if(my_yytext[yyleng-1] == '.')dot=1; 15570468683Srrh OUT1(NOUN_ADJ); 15670468683Srrh } 157261246b6Srrh ({A}*{N}+{A}*)+ { 158261246b6Srrh if(input() == '.') 159261246b6Srrh ahead(); 160261246b6Srrh if(NOCAPS) 161261246b6Srrh for(i=0;i<yyleng;i++) 16240da8d46Svern if(isupper(my_yytext[i]))my_yytext[i]=tolower(my_yytext[i]); 16370468683Srrh OUT1(NOUN_ADJ); 16470468683Srrh } 16570468683Srrh {N}+[%] { 16670468683Srrh OUT1(NOUN_ADJ); 16770468683Srrh } 16870468683Srrh "$"{N}+([,]{N}+)*("."{N}*)* { 16940da8d46Svern if(my_yytext[yyleng-1] == '.')dot=1; 17070468683Srrh OUT1(NOUN); 17170468683Srrh } 17270468683Srrh [Aa]"."[ ]*[Mm]"." { 17370468683Srrh OUT1(ADJ_ADV); 17470468683Srrh } 17570468683Srrh [Pp]"."[ ]*[Mm]"." { 17670468683Srrh OUT1(ADJ_ADV); 17770468683Srrh } 17870468683Srrh "a."[ ]*"d." { 17970468683Srrh OUT1(ADJ_ADV); 18070468683Srrh } 18170468683Srrh "b."[ ]*"c." { 18270468683Srrh OUT1(ADJ_ADV); 18370468683Srrh } 18470468683Srrh "i."[ ]*"e." { 18570468683Srrh OUT1(PREP); 18670468683Srrh } 18770468683Srrh "e."[ ]*"g." { 18870468683Srrh OUT1(PREP); 18970468683Srrh } 19070468683Srrh "etc."[ \n]*[,)]* { 19140da8d46Svern i = my_yytext[4]; 19240da8d46Svern my_yytext[4] = '\0'; 19370468683Srrh OUT1(NOUN); 19440da8d46Svern my_yytext[4] = i; 19540da8d46Svern my_yytext[0] = my_yytext[yyleng-1]; 19640da8d46Svern my_yytext[1] = '\0'; 19740da8d46Svern if(my_yytext[0] == ',' || my_yytext[0] == ')') 19870468683Srrh OUT1(','); 19970468683Srrh else { 20070468683Srrh OUTN(fin); 20170468683Srrh first = 1; 20270468683Srrh } 20370468683Srrh } 20470468683Srrh "et al." { 20570468683Srrh OUT1(NOUN); 20670468683Srrh } 20770468683Srrh in"."[ \n]*{C} { 20840da8d46Svern unput(my_yytext[yyleng-1]); 20940da8d46Svern my_yytext[2] = '\0'; 21070468683Srrh OUT1(PREP); 21170468683Srrh OUTN(fin); 21270468683Srrh first = 1; 21370468683Srrh } 21470468683Srrh Ph"."[ ]*[Dd]"." { 21570468683Srrh OUT1(ADJ); 21670468683Srrh } 21770468683Srrh [A-Z]"." { 21870468683Srrh dot=1; 21970468683Srrh OUT1(NOUN); 22070468683Srrh } 22170468683Srrh can't { 22240da8d46Svern my_yytext[3]='\0'; 22370468683Srrh yyleng -= 2; 22470468683Srrh nflg=1; 22570468683Srrh goto wd; 22670468683Srrh } 22770468683Srrh won't { 22870468683Srrh OUT1('X'); 22970468683Srrh } 230261246b6Srrh ain't { 231261246b6Srrh OUT1('g'); 232261246b6Srrh } 23370468683Srrh {L}+n't { 23470468683Srrh nflg=1; 23540da8d46Svern my_yytext[yyleng-3]='\0'; 23670468683Srrh yyleng -= 3; 23770468683Srrh goto wd; 23870468683Srrh } 23970468683Srrh [A-Z]{L}+n't { 24040da8d46Svern my_yytext[0] = tolower(my_yytext[0]); 24170468683Srrh nflg=1; 24240da8d46Svern my_yytext[yyleng-3]='\0'; 24370468683Srrh yyleng -= 3; 24470468683Srrh goto wd; 24570468683Srrh } 24670468683Srrh o'clock { 24770468683Srrh OUT1(ADV); 24870468683Srrh } 24970468683Srrh {L}+'[s] { 25070468683Srrh pos(0); 25170468683Srrh } 25270468683Srrh 'll { 25370468683Srrh OUT1(lookup("will",1,0)); 25470468683Srrh } 25570468683Srrh 've { 25670468683Srrh OUT1(lookup("have",1,0)); 25770468683Srrh } 25870468683Srrh 're { 25970468683Srrh OUT1(lookup("are",1,0)); 26070468683Srrh } 26170468683Srrh 'd { 26270468683Srrh OUT1(lookup("had",1,0)); 26370468683Srrh } 26470468683Srrh 'm { 26570468683Srrh OUT1(lookup("am",1,0)); 26670468683Srrh } 26770468683Srrh 'ld { 26870468683Srrh OUT1(lookup("would",1,0)); 26970468683Srrh } 27070468683Srrh {L}+ { 27170468683Srrh wd: 27240da8d46Svern if((j = lookup(my_yytext,1,0)) != 0){ 27370468683Srrh first=0; 27470468683Srrh if(cap){ 275261246b6Srrh if(!NOCAPS) 27640da8d46Svern my_yytext[0] = toupper(my_yytext[0]); 27770468683Srrh cap = 0; 27870468683Srrh if(dot)OUTN(fin); 27970468683Srrh } 28070468683Srrh dot=0; 28170468683Srrh OUT1(j); 28270468683Srrh if(nflg==1){ 28370468683Srrh nflg=0; 28470468683Srrh OUTN(nt); 28570468683Srrh } 28670468683Srrh } 28770468683Srrh else{ 28870468683Srrh first = dot=0; 28940da8d46Svern if(my_yytext[yyleng-1] == 'y' && cap == 0){ 29040da8d46Svern switch(my_yytext[yyleng-2]){ 29170468683Srrh case 'c': look(cy,yyleng-2,NOUN); 29270468683Srrh break; 29370468683Srrh case 'f': look(fy,yyleng-2,VERB); 29470468683Srrh break; 29570468683Srrh case 'l': look(ly,yyleng-2,ADV); 29670468683Srrh break; 29740da8d46Svern case 'g': if(my_yytext[yyleng-3] == 'o'){ 29870468683Srrh OUT1(NOUN); 29970468683Srrh break; 30070468683Srrh } 30170468683Srrh look(gy,yyleng-2,ADJ); 30270468683Srrh break; 30340da8d46Svern case 'r': switch(my_yytext[yyleng-3]){ 30470468683Srrh case 'a': look(ary,yyleng-3,ADJ); 30570468683Srrh break; 30670468683Srrh case 'o': look(ory,yyleng-3,ADJ); 30770468683Srrh break; 30870468683Srrh case 'e': look(ery,yyleng-3,NOUN); 30970468683Srrh break; 31070468683Srrh default: look(ry,yyleng-2,NOUN); 31170468683Srrh } 31270468683Srrh break; 31340da8d46Svern case 't': if(my_yytext[yyleng-3] == 'i')look(ity,yyleng-3,NOUN); 31470468683Srrh else look(ty,yyleng-2,ADJ); 31570468683Srrh break; 31670468683Srrh default: OUT(); 31770468683Srrh } 31870468683Srrh } 31970468683Srrh else { 32070468683Srrh if(cap){ 32140da8d46Svern if(!NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 32270468683Srrh cap = 0; 32370468683Srrh OUT1(NOUN_ADJ); 32470468683Srrh } 32570468683Srrh else { 32670468683Srrh OUT(); 32770468683Srrh } 32870468683Srrh } 32970468683Srrh } 33070468683Srrh } 33170468683Srrh [\n] ; 33270468683Srrh [ ]+ ; 33370468683Srrh [\t]+ ; 33470468683Srrh ";" { 33570468683Srrh OUT1(';'); 33670468683Srrh first=1; 33770468683Srrh } 33870468683Srrh (\"|`|')+ { 33970468683Srrh if(dot){ 34070468683Srrh OUTN(fin); 34170468683Srrh dot=0; 34270468683Srrh } 34370468683Srrh if(qflg==1){ 34470468683Srrh qflg=0; 34570468683Srrh OUT1('"'); 34670468683Srrh } 34770468683Srrh else { 34870468683Srrh qflg=1; 34970468683Srrh first=1; 35070468683Srrh OUT1('"'); 35170468683Srrh } 35270468683Srrh } 35370468683Srrh ".\"" { 35470468683Srrh qflg=0; 35570468683Srrh first=1; 35670468683Srrh OUT1(END); 35770468683Srrh } 35870468683Srrh "..." { 35970468683Srrh OUT1(','); 36070468683Srrh } 361261246b6Srrh {A}{A}+"." { 36240da8d46Svern my_yytext[yyleng-1] = '\0'; 36340da8d46Svern if((j=abbrev(my_yytext,1,0)) != 0){ 36440da8d46Svern if(isupper(my_yytext[0])){ 36540da8d46Svern if(NOCAPS)my_yytext[0] = tolower(my_yytext[0]); 366261246b6Srrh if(first == 1)first=0; 367261246b6Srrh } 36840da8d46Svern my_yytext[yyleng-1] = '.'; 369261246b6Srrh OUT1(j); 370261246b6Srrh } 371261246b6Srrh else { 372261246b6Srrh j = ahead(); 373261246b6Srrh if(j == 0) 374261246b6Srrh yyleng--; 375261246b6Srrh for(i=0;i<yyleng;i++) 37640da8d46Svern if(isupper(my_yytext[i])){ 37740da8d46Svern my_yytext[i] = tolower(my_yytext[i]); 378261246b6Srrh if(i == 0)cap = 1; 379261246b6Srrh else cap = 0; 380261246b6Srrh } 381261246b6Srrh if(j == 0)goto wd; 382261246b6Srrh OUT1(NOUN_ADJ); 383261246b6Srrh } 384261246b6Srrh } 38570468683Srrh "." { 38670468683Srrh first=1; 38770468683Srrh OUT1(END); 38870468683Srrh } 38970468683Srrh "!\"" { 39070468683Srrh qflg=0; 39170468683Srrh first=1; 39270468683Srrh OUT1(END); 39370468683Srrh } 39470468683Srrh "!" { 39570468683Srrh first=1; 39670468683Srrh OUT1(END); 39770468683Srrh } 39870468683Srrh "?\"" { 39970468683Srrh qflg=0; 40070468683Srrh first=1; 40170468683Srrh OUT1(END); 40270468683Srrh } 40370468683Srrh "?" { 40470468683Srrh first=1; 40570468683Srrh OUT1(END); 40670468683Srrh } 40770468683Srrh ":" { 40870468683Srrh OUT1(','); 40970468683Srrh first=1; 41070468683Srrh } 41170468683Srrh [-]+ { 41270468683Srrh OUT1(','); 41370468683Srrh first=1; 41470468683Srrh } 41570468683Srrh "," { 41670468683Srrh OUT1(','); 41770468683Srrh } 41870468683Srrh (\[|\(|\{|\]|\)|\}) { 41970468683Srrh OUT1(','); 42070468683Srrh } 42170468683Srrh . { 42240da8d46Svern /* fprintf(stderr,"nwords funny char: %c\n",my_yytext[0])*/ ; 42370468683Srrh } 42470468683Srrh %% 42570468683Srrh look(f,n,cc) 42670468683Srrh char (*f)(); 42770468683Srrh int n; 42870468683Srrh char cc; 42970468683Srrh { 43070468683Srrh int nn; 43170468683Srrh char save; 43240da8d46Svern save=my_yytext[n]; 43340da8d46Svern my_yytext[n] = '\0'; 43440da8d46Svern nn=(*f)(my_yytext,1,0); 43540da8d46Svern my_yytext[n] = save; 43670468683Srrh if(nn != 0){ 43770468683Srrh OUT1(nn); 43870468683Srrh } 43970468683Srrh else { 44070468683Srrh OUT1(cc); 44170468683Srrh } 44270468683Srrh } 44370468683Srrh pos(flg){ 44470468683Srrh int ii,j; 44540da8d46Svern if(flg == 1)my_yytext[0] = tolower(my_yytext[0]); 44640da8d46Svern for(ii=yyleng-1;my_yytext[ii] != '\''; ii--); 44740da8d46Svern my_yytext[ii] = '\0'; 44840da8d46Svern if((j=lookup(my_yytext,1,0)) != 0){ 44970468683Srrh yyleng = ii; 45070468683Srrh OUT1(j); 45170468683Srrh OUTN(qs); 45270468683Srrh } 45370468683Srrh else{ 45440da8d46Svern if(flg==1 && !NOCAPS)my_yytext[0] = toupper(my_yytext[0]); 45540da8d46Svern my_yytext[ii] = '\''; 45670468683Srrh OUT1(POS); 45770468683Srrh } 45870468683Srrh } 45970468683Srrh char *filename="-"; 46070468683Srrh 46170468683Srrh main(argc,argv) 46270468683Srrh int argc; 46370468683Srrh char *argv[]; 46470468683Srrh { 46570468683Srrh register int rc=0; 46670468683Srrh putchar(':'); putchar('\n'); 46770468683Srrh getd(); 468261246b6Srrh getab(); 46970468683Srrh ygetd(); 47070468683Srrh if(argc<=1) { 47170468683Srrh yylex(); 472261246b6Srrh OUTN(fin); 47370468683Srrh }else{ 47470468683Srrh while(argc>1) { 47570468683Srrh if(freopen(argv[1],"r",stdin)==NULL) { 47670468683Srrh fprintf(stderr,"%s: cannot open\n", argv[1]); 47770468683Srrh rc++; 47870468683Srrh }else{ 47970468683Srrh filename=argv[1]; 48070468683Srrh yylex(); 481261246b6Srrh OUTN(fin); 48270468683Srrh } 48370468683Srrh argc--; argv++; 48470468683Srrh } 48570468683Srrh } 48670468683Srrh return(rc); 48770468683Srrh } 488261246b6Srrh ahead(){ 489261246b6Srrh register int c; 490261246b6Srrh if(isalnum((c=input()))){ 49140da8d46Svern my_yytext[yyleng++] = '.'; 492261246b6Srrh while(!isspace((c=input() ))) 49340da8d46Svern my_yytext[yyleng++] = c; 49440da8d46Svern my_yytext[yyleng] = '\0'; 495261246b6Srrh unput(c); 496261246b6Srrh return(1); 497261246b6Srrh } 498261246b6Srrh unput(c); 499261246b6Srrh unput('.'); 500261246b6Srrh return(0); 501261246b6Srrh } 502