xref: /original-bsd/usr.bin/diction/style3/part.l (revision 9e892dcf)
1 %{
2 
3 /*-
4  * %sccs.include.proprietary.c%
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)part.l	8.1 (Berkeley) 06/06/93";
9 #endif /* not lint */
10 
11 #include "style.h"
12 #include "names.h"
13 #include "conp.h"
14 
15 #undef yywrap
16 
17 FILE *deb;
18 int nosave = 1;
19 int part = 0;
20 int barebones = 0;
21 int topic = 0;
22 int style = 1;
23 int pastyle = 0;
24 int pstyle = 0;
25 int lstyle = 0;
26 int rstyle = 0;
27 int estyle = 0;
28 int nstyle = 0;
29 int Nstyle = 0;
30 int lthresh;
31 int rthresh;
32 int nomin;
33 char s[SCHAR];
34 char *sptr = s;
35 struct ss sent[SLENG];
36 struct ss *sentp = sent;
37 float wperc();
38 float sperc();
39 float typersent();
40 float vperc();
41 int numsent = 0;
42 int qcount = 0;
43 int icount = 0;
44 long vowel = 0;
45 long numwds = 0;
46 long twds = 0;
47 long numnonf = 0;
48 long letnonf = 0;
49 int maxsent = 0;
50 int maxindex = 0;
51 int minsent = 30;
52 int minindex = 0;
53 int simple = 0;
54 int compound = 0;
55 int compdx = 0;
56 int prepc = 0;
57 int conjc = 0;
58 int complex = 0;
59 int tobe = 0;
60 int adj = 0;
61 int infin = 0;
62 int pron = 0;
63 int passive = 0;
64 int aux = 0;
65 int adv = 0;
66 int verbc = 0;
67 int tverbc = 0;
68 int noun = 0;
69 long numlet = 0;
70 int beg[15]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
71 int sleng[50];
72 int nsleng = 0;
73 int j,jj,i;
74 int comma = 0;
75 int cflg;
76 int question;
77 int quote = 0;
78 char *st;
79 int initf = 0;
80 int over = 1;
81 int nroff = 0;
82 int nrofflg = 0;
83 int leng[MAXPAR];
84 int sentno= 0;
85 %}
86 C	[A-DF-Za-z]
87 %%
88 ^{C}:.+	{
89 collect:
90 	sentp->cc = sentp->ic = yytext[0];
91 	if(sentp->cc == NOM)
92 		sentp->cc = NOUN;
93 collect1:
94 	nsleng++;
95 	sentp->leng = yyleng-2;
96 	sentp++->sp = sptr;
97 	if(sentp >= &sent[SLENG-1]){
98 		if(over)fprintf(stderr,"sentence %d too many words\n",numsent+2);
99 		over=0;
100 		sentp--;
101 	}
102 	if(sptr+yyleng >= &s[SCHAR-1]){
103 		if(over)fprintf(stderr,"sentence %d too many characters\n",numsent+2);
104 		over=0;
105 	}
106 	else {
107 		for(i=2;i<yyleng;i++)*sptr++=yytext[i];
108 		*sptr++ = '\0';
109 	}
110 	}
111 ^";:".+	{
112 	sentp->cc=END;
113 	sentp->ic = ';';
114 	goto collect1;
115 	}
116 ^",:".+	{
117 	comma++;
118 	goto collect;
119 	}
120 ^",:"[:,-]+	{
121 	comma++;
122 	goto collect;
123 	}
124 [\n]	;
125 ^"\":".+	{
126 	goto collect;
127 	}
128 ^E:"/."	{
129 	cflg = 1;
130 	goto sdone;
131 	}
132 ^E:.+	{
133 	cflg = 0;
134 sdone:
135 	over=1;
136 	sentp->cc=sentp->ic=END;
137 	sentp++->sp = sptr;
138 	for(i=2;i<yyleng;i++)*sptr++=yytext[i];
139 	*sptr++='\0';
140 	if(yytext[2]=='?')question=1;
141 	else question=0;
142 
143 fragment:
144 	jj=0;
145 	if(quote == 1 && sent[jj].cc == ED){
146 		sent[jj].cc = VERB;
147 		quote = 0;
148 	}
149 	if(sent[jj].cc=='"')jj++;
150 	if(sent[jj].cc==SUBCONJ){
151 		if(sent[jj+1].cc == ','){
152 			sent[jj].cc=ADV;
153 			jj += 2;
154 			comma--;
155 		}
156 		else {
157 			jj=scan(1,',',0);
158 			if(jj != -1)jj++;
159 			comma--;
160 		}
161 	}
162 	if(jj != -1){
163 		if(sent[jj].cc==CONJ || sent[jj].cc=='"')jj++;
164 		while((jj=scan(jj,END,cflg)) != -1){
165 			jj++;
166 			if(sent[jj].cc == SUBCONJ && sent[jj+1].cc == ','){
167 				sent[jj].cc=ADV;
168 				jj += 2;
169 				comma--;
170 			}
171 		}
172 	}
173 	st = sent[i].sp;
174 	if(*(st+1) == '"')
175 		if(*st == '?' || *st == '!')quote = 1;
176 	outp();
177 	nsleng = 0;
178 	if(nroff){
179 		if(sentno > 0){
180 			printf(".SL \"");
181 			for(i=0;i<sentno;i++)
182 				printf(" %d",leng[i]);
183 			printf("\"\n");
184 			sentno = 0;
185 		}
186 		printf("%s",&yytext[1]);
187 		nroff = 0;
188 	}
189 	sptr=s;
190 	sentp=sent;
191 	comma=0;
192 	}
193 ;.+	{
194 	if(style){
195 		nomin = atoi(&yytext[1]);
196 	}
197 	}
198 ^:.+[\n]	{
199 	nrofflg=1;
200 	if(sentp != sent){
201 		sentp->cc = sentp->ic = END;
202 		sentp++->sp = sptr;
203 		*sptr++ = '.';
204 		*sptr++ = '\0';
205 		over = 1;
206 		nroff = 1;
207 		goto fragment;
208 	}
209 	if(sentno > 0){
210 		printf(".SL \"");
211 		for(i=0;i<sentno;i++)
212 			printf(" %d",leng[i]);
213 		printf("\"\n");
214 		sentno = 0;
215 	}
216 	printf("%s",&yytext[1]);
217 	}
218 %%
219 yywrap(){
220 	int ii;
221 	int ml,mg,lsum,gsum;
222 	float aindex, avl, avw;
223 	float cindex,kindex,findex,fgrad;
224 	float syl, avsy, adjs,snonf;
225 	FILE *io;
226 
227 	if(style){
228 	if(numwds == 0 || numsent == 0)exit(0);
229 	avw = (float)(numwds)/(float)(numsent);
230 	avl = (float)(numlet)/(float)(numwds);
231 	aindex = 4.71*avl + .5*avw -21.43;
232 	syl = .9981*vowel-.3432*twds;
233 	avsy = syl/twds;
234 	kindex = 11.8*avsy+.39*avw-15.59;
235 	findex = 206.835-84.6*avsy-1.015*avw;
236 	if(findex < 30.)fgrad = 17.;
237 	else if(findex > 100.) fgrad = 4.;
238 	else if(findex > 70.)fgrad=(100.-findex)/10 +5.;
239 	else if(findex > 60.)fgrad =(70.-findex)/10+8.;
240 	else if(findex >50.)fgrad=(60.-findex)/5+10;
241 	else fgrad=(50.-findex)/6.66 +13.;
242 	adjs = 100 * (float)numsent/numwds;
243 	cindex = 5.89*avl-.3*adjs-15.8;
244 	printf("readability grades:\n	(Kincaid) %4.1f  (auto) %4.1f  (Coleman-Liau) %4.1f  (Flesch) %4.1f (%4.1f)\n",kindex,aindex,cindex,fgrad,findex);
245 	printf("sentence info:\n");
246 	printf("	no. sent %d no. wds %ld\n",numsent,numwds);
247 	printf("	av sent leng %4.1f av word leng %4.2f\n",avw,avl);
248 	printf("	no. questions %d no. imperatives %d\n",qcount,icount);
249 	if(numnonf != 0){
250 		snonf = (float)(letnonf)/(float)(numnonf);
251 	}
252 	printf("	no. nonfunc wds %ld  %4.1f%%   av leng %4.2f\n",numnonf,(float)(numnonf)*100/numwds,snonf);
253 	mg = avw + 10.5;
254 	if(mg > 49)mg = 49;
255 	ml = avw - 4.5;
256 	if(ml <= 0)ml = 1;
257 	else if(ml > 49)ml=48;
258 	gsum = lsum = 0;
259 	for(ii=0;ii<50;ii++){
260 		if(ii < ml)lsum += sleng[ii];
261 		else if(ii > mg)gsum+= sleng[ii];
262 	}
263 	printf("	short sent (<%d)%3.0f%% (%d) long sent (>%d) %3.0f%% (%d)\n",ml,sperc(lsum),lsum,mg,sperc(gsum),gsum);
264 	printf("	longest sent %d wds at sent %d; shortest sent %d wds at sent %d\n",maxsent,maxindex,minsent,minindex);
265 	printf("sentence types:\n");
266 	printf("	simple %3.0f%% (%d) complex %3.0f%% (%d)\n",sperc(simple),simple,sperc(complex),complex);
267 	printf("	compound %3.0f%% (%d) compound-complex %3.0f%% (%d)\n",sperc(compound),compound,sperc(compdx),compdx);
268 	printf("word usage:\n");
269 	printf("	verb types as %% of total verbs\n");
270 	printf("	tobe %3.0f%% (%d) aux %3.0f%% (%d) inf %3.0f%% (%d)\n",vperc(tobe),tobe,vperc(aux),aux,vperc(infin),infin);
271 	if(verbc != 0)adjs = (float)(passive)*100/(float)(verbc);
272 	else adjs=0;
273 	printf("	passives as %% of non-inf verbs %3.0f%% (%d)\n",adjs,passive);
274 	printf("	types as %% of total\n");
275 	printf("	prep %3.1f%% (%d) conj %3.1f%% (%d) adv %3.1f%% (%d)\n",wperc(prepc),prepc,wperc(conjc),conjc,wperc(adv),adv);
276 	printf("	noun %3.1f%% (%d) adj %3.1f%% (%d) pron %3.1f%% (%d)\n",wperc(noun),noun,
277 		wperc(adj),adj,wperc(pron),pron);
278 	printf("	nominalizations %3.0f %% (%d)\n",wperc(nomin),nomin);
279 	printf("sentence beginnings:\n");
280 	ii=beg[0]+beg[7]+beg[6]+beg[3]+beg[8];
281 	printf("	subject opener: noun (%d) pron (%d) pos (%d) adj (%d) art (%d) tot %3.0f%%\n",
282 beg[0],beg[7],beg[6],beg[3],beg[8],sperc(ii));
283 	printf("	prep %3.0f%% (%d) adv %3.0f%% (%d) \n",sperc(beg[9]),beg[9],sperc(beg[4]),beg[4]);
284 	printf("	verb %3.0f%% (%d) ",sperc(beg[1]+beg[10]+beg[11]),beg[1]+beg[10]+beg[11]);
285 	printf(" sub_conj %3.0f%% (%d) conj %3.0f%% (%d)\n",sperc(beg[13]),beg[13],sperc(beg[5]),beg[5]);
286 	printf("	expletives %3.0f%% (%d)\n",sperc(beg[14]),beg[14]);
287 #ifdef SCATCH
288 	if(nosave && (fopen(SCATCH,"r")) != NULL){
289 	if(((io=fopen(SCATCH,"a")) != NULL)){
290 		fprintf(io," read %4.1f %4.1f %4.1f %4.1f %4.1f\n",kindex, aindex, cindex, findex, fgrad);
291 		fprintf(io," sentl %d %ld %4.2f %4.2f %d %d %ld %4.2f\n",numsent,numwds,avw,avl,qcount,icount,numnonf,snonf);
292 		fprintf(io," l var %d %d %d %d %d\n",ml,lsum,mg,gsum,maxsent);
293 		fprintf(io," t var %d %d %d %d\n",simple,complex,compound,compdx);
294 		fprintf(io," verbs %d %d %d %d %d %d\n",tverbc,verbc,tobe,aux,infin,passive);
295 		fprintf(io," ty %d %d %d %d %d %d %d\n",prepc,conjc,adv,noun,adj,pron,nomin);
296 		fprintf(io," beg %d %d %d %d %d %d\n",beg[0],beg[7],beg[6],beg[3],beg[8],ii);
297 		fprintf(io," sbeg %d %d %d %d %d %d\n",beg[9],beg[4],beg[1]+beg[10]+beg[11],beg[13],beg[5],beg[14]);
298 		}
299 	}
300 #endif
301 	}
302 	return(1);
303 }
304 float
305 wperc(a)
306 {
307 	return((float)(a)*100/numwds);
308 }
309 float
310 sperc(a)
311 {
312 	return((float)(a)*100/numsent);
313 }
314 float
315 typersent(a)
316 {
317 return((float)(a)/numsent);
318 }
319 float
320 vperc(a)
321 {
322 	if(tverbc == 0)return(0);
323 	return((float)(a)*100/tverbc);
324 }
325 main(argc,argv)
326 char **argv;
327 {
328 	while(--argc > 0 && (++argv)[0][0] == '-' ){
329 		switch(argv[0][1]){
330 		case 'd': nosave = 0;
331 			continue;
332 		case 's': style=1;
333 			continue;
334 		case 'p': pastyle=style=1;
335 			continue;
336 		case 'a': pstyle=style=1;
337 			continue;
338 		case 'e': estyle = style = 1;
339 			continue;
340 		case 'n': nstyle = style = 1;
341 			continue;
342 		case 'N': Nstyle = style = 1;
343 			continue;
344 		case 'l': style=lstyle=1;
345 			lthresh = atoi(*(++argv));
346 			argc--;
347 			continue;
348 		case 'r':
349 			style=rstyle=1;
350 			rthresh = atoi(*(++argv));
351 			argc--;
352 			continue;
353 		case 'P':
354 			part = 1;
355 			style = 0;
356 			continue;
357 		case 'b':		/* print bare bones info rje */
358 			barebones = 1;
359 			style = 0;
360 			continue;
361 		case 'T':		/*topic*/
362 			style = 0;
363 			topic = 1;
364 			continue;
365 		default:
366 			fprintf(stderr,"unknown flag to part %s\n",*argv);
367 			exit(1);
368 		}
369 		argv++;
370 	}
371 #ifdef SNOM
372 	if(fopen(SNOM,"r") != NULL){
373 		deb = fopen(SNOM,"a");	/* SAVE NOM*/
374 	}
375 #else
376 	deb = NULL;
377 #endif
378 	yylex();
379 	if(nrofflg && sentno > 0){
380 		printf(".SL \"");
381 		for(i=0;i<sentno;i++)
382 			printf(" %d",leng[i]);
383 		printf("\"\n");
384 	}
385 }
386