1 
2 /*
3 ** nbench1.c
4 */
5 
6 /********************************
7 **       BYTEmark (tm)         **
8 ** BYTE NATIVE MODE BENCHMARKS **
9 **       VERSION 2             **
10 **                             **
11 ** Included in this source     **
12 ** file:                       **
13 **  Numeric Heapsort           **
14 **  String Heapsort            **
15 **  Bitfield test              **
16 **  Floating point emulation   **
17 **  Fourier coefficients       **
18 **  Assignment algorithm       **
19 **  IDEA Encyption             **
20 **  Huffman compression        **
21 **  Back prop. neural net      **
22 **  LU Decomposition           **
23 **    (linear equations)       **
24 ** ----------                  **
25 ** Rick Grehan, BYTE Magazine  **
26 *********************************
27 **
28 ** BYTEmark (tm)
29 ** BYTE's Native Mode Benchmarks
30 ** Rick Grehan, BYTE Magazine
31 **
32 ** Creation:
33 ** Revision: 3/95;10/95
34 **  10/95 - Removed allocation that was taking place inside
35 **   the LU Decomposition benchmark. Though it didn't seem to
36 **   make a difference on systems we ran it on, it nonetheless
37 **   removes an operating system dependency that probably should
38 **   not have been there.
39 **
40 ** DISCLAIMER
41 ** The source, executable, and documentation files that comprise
42 ** the BYTEmark benchmarks are made available on an "as is" basis.
43 ** This means that we at BYTE Magazine have made every reasonable
44 ** effort to verify that the there are no errors in the source and
45 ** executable code.  We cannot, however, guarantee that the programs
46 ** are error-free.  Consequently, McGraw-HIll and BYTE Magazine make
47 ** no claims in regard to the fitness of the source code, executable
48 ** code, and documentation of the BYTEmark.
49 **  Furthermore, BYTE Magazine, McGraw-Hill, and all employees
50 ** of McGraw-Hill cannot be held responsible for any damages resulting
51 ** from the use of this code or the results obtained from using
52 ** this code.
53 */
54 
55 /*
56 ** INCLUDES
57 */
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <strings.h>
62 #include <math.h>
63 #include "nmglobal.h"
64 #include "nbench1.h"
65 #include "wordcat.h"
66 
67 #ifdef DEBUG
68 static int numsort_status=0;
69 static int stringsort_status=0;
70 #endif
71 
72 /*********************
73 ** NUMERIC HEAPSORT **
74 **********************
75 ** This test implements a heapsort algorithm, performed on an
76 ** array of longs.
77 */
78 
79 /**************
80 ** DoNumSort **
81 ***************
82 ** This routine performs the CPU numeric sort test.
83 ** NOTE: Last version incorrectly stated that the routine
84 **  returned result in # of longword sorted per second.
85 **  Not so; the routine returns # of iterations per sec.
86 */
87 
DoNumSort(void)88 void DoNumSort(void)
89 {
90 SortStruct *numsortstruct;      /* Local pointer to global struct */
91 farlong *arraybase;     /* Base pointers of array */
92 long accumtime;         /* Accumulated time */
93 double iterations;      /* Iteration counter */
94 char *errorcontext;     /* Error context string pointer */
95 int systemerror;        /* For holding error codes */
96 
97 /*
98 ** Link to global structure
99 */
100 numsortstruct=&global_numsortstruct;
101 
102 /*
103 ** Set the error context string.
104 */
105 errorcontext="CPU:Numeric Sort";
106 
107 /*
108 ** See if we need to do self adjustment code.
109 */
110 if(numsortstruct->adjust==0)
111 {
112 	/*
113 	** Self-adjustment code.  The system begins by sorting 1
114 	** array.  If it does that in no time, then two arrays
115 	** are built and sorted.  This process continues until
116 	** enough arrays are built to handle the tolerance.
117 	*/
118 	numsortstruct->numarrays=1;
119 	while(1)
120 	{
121 		/*
122 		** Allocate space for arrays
123 		*/
124 		arraybase=(farlong *)AllocateMemory(sizeof(long) *
125 			numsortstruct->numarrays * numsortstruct->arraysize,
126 			&systemerror);
127 		if(systemerror)
128 		{       ReportError(errorcontext,systemerror);
129 			FreeMemory((farvoid *)arraybase,
130 				  &systemerror);
131 			ErrorExit();
132 		}
133 
134 		/*
135 		** Do an iteration of the numeric sort.  If the
136 		** elapsed time is less than or equal to the permitted
137 		** minimum, then allocate for more arrays and
138 		** try again.
139 		*/
140 		if(DoNumSortIteration(arraybase,
141 			numsortstruct->arraysize,
142 			numsortstruct->numarrays)>global_min_ticks)
143 			break;          /* We're ok...exit */
144 
145 		FreeMemory((farvoid *)arraybase,&systemerror);
146 		if(numsortstruct->numarrays++>NUMNUMARRAYS)
147 		{       printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
148 			ErrorExit();
149 		}
150 	}
151 }
152 else
153 {       /*
154 	** Allocate space for arrays
155 	*/
156 	arraybase=(farlong *)AllocateMemory(sizeof(long) *
157 		numsortstruct->numarrays * numsortstruct->arraysize,
158 		&systemerror);
159 	if(systemerror)
160 	{       ReportError(errorcontext,systemerror);
161 		FreeMemory((farvoid *)arraybase,
162 			  &systemerror);
163 		ErrorExit();
164 	}
165 
166 }
167 /*
168 ** All's well if we get here.  Repeatedly perform sorts until the
169 ** accumulated elapsed time is greater than # of seconds requested.
170 */
171 accumtime=0L;
172 iterations=(double)0.0;
173 
174 do {
175 	accumtime+=DoNumSortIteration(arraybase,
176 		numsortstruct->arraysize,
177 		numsortstruct->numarrays);
178 	iterations+=(double)1.0;
179 } while(TicksToSecs(accumtime)<numsortstruct->request_secs);
180 
181 /*
182 ** Clean up, calculate results, and go home.  Be sure to
183 ** show that we don't have to rerun adjustment code.
184 */
185 FreeMemory((farvoid *)arraybase,&systemerror);
186 
187 numsortstruct->sortspersec=iterations *
188 	(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
189 
190 if(numsortstruct->adjust==0)
191 	numsortstruct->adjust=1;
192 
193 #ifdef DEBUG
194 if (numsort_status==0) printf("Numeric sort: OK\n");
195 numsort_status=0;
196 #endif
197 return;
198 }
199 
200 /***********************
201 ** DoNumSortIteration **
202 ************************
203 ** This routine executes one iteration of the numeric
204 ** sort benchmark.  It returns the number of ticks
205 ** elapsed for the iteration.
206 */
DoNumSortIteration(farlong * arraybase,ulong arraysize,uint numarrays)207 static ulong DoNumSortIteration(farlong *arraybase,
208 		ulong arraysize,
209 		uint numarrays)
210 {
211 ulong elapsed;          /* Elapsed ticks */
212 ulong i;
213 /*
214 ** Load up the array with random numbers
215 */
216 LoadNumArrayWithRand(arraybase,arraysize,numarrays);
217 
218 /*
219 ** Start the stopwatch
220 */
221 elapsed=StartStopwatch();
222 
223 /*
224 ** Execute a heap of heapsorts
225 */
226 for(i=0;i<numarrays;i++)
227 	NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
228 
229 /*
230 ** Get elapsed time
231 */
232 elapsed=StopStopwatch(elapsed);
233 #ifdef DEBUG
234 {
235 	for(i=0;i<arraysize-1;i++)
236 	{       /*
237 		** Compare to check for proper
238 		** sort.
239 		*/
240 		if(arraybase[i+1]<arraybase[i])
241 		{       printf("Sort Error\n");
242 			numsort_status=1;
243                         break;
244 		}
245 	}
246 }
247 #endif
248 
249 return(elapsed);
250 }
251 
252 /*************************
253 ** LoadNumArrayWithRand **
254 **************************
255 ** Load up an array with random longs.
256 */
LoadNumArrayWithRand(farlong * array,ulong arraysize,uint numarrays)257 static void LoadNumArrayWithRand(farlong *array,     /* Pointer to arrays */
258 		ulong arraysize,
259 		uint numarrays)         /* # of elements in array */
260 {
261 long i;                 /* Used for index */
262 farlong *darray;        /* Destination array pointer */
263 /*
264 ** Initialize the random number generator
265 */
266 /* randnum(13L); */
267 randnum((int32)13);
268 
269 /*
270 ** Load up first array with randoms
271 */
272 for(i=0L;i<arraysize;i++)
273         /* array[i]=randnum(0L); */
274 	array[i]=randnum((int32)0);
275 
276 /*
277 ** Now, if there's more than one array to load, copy the
278 ** first into each of the others.
279 */
280 darray=array;
281 while(--numarrays)
282 {       darray+=arraysize;
283 	for(i=0L;i<arraysize;i++)
284 		darray[i]=array[i];
285 }
286 
287 return;
288 }
289 
290 /****************
291 ** NumHeapSort **
292 *****************
293 ** Pass this routine a pointer to an array of long
294 ** integers.  Also pass in minimum and maximum offsets.
295 ** This routine performs a heap sort on that array.
296 */
NumHeapSort(farlong * array,ulong bottom,ulong top)297 static void NumHeapSort(farlong *array,
298 	ulong bottom,           /* Lower bound */
299 	ulong top)              /* Upper bound */
300 {
301 ulong temp;                     /* Used to exchange elements */
302 ulong i;                        /* Loop index */
303 
304 /*
305 ** First, build a heap in the array
306 */
307 for(i=(top/2L); i>0; --i)
308 	NumSift(array,i,top);
309 
310 /*
311 ** Repeatedly extract maximum from heap and place it at the
312 ** end of the array.  When we get done, we'll have a sorted
313 ** array.
314 */
315 for(i=top; i>0; --i)
316 {       NumSift(array,bottom,i);
317 	temp=*array;                    /* Perform exchange */
318 	*array=*(array+i);
319 	*(array+i)=temp;
320 }
321 return;
322 }
323 
324 /************
325 ** NumSift **
326 *************
327 ** Peforms the sift operation on a numeric array,
328 ** constructing a heap in the array.
329 */
NumSift(farlong * array,ulong i,ulong j)330 static void NumSift(farlong *array,     /* Array of numbers */
331 	ulong i,                /* Minimum of array */
332 	ulong j)                /* Maximum of array */
333 {
334 unsigned long k;
335 long temp;                              /* Used for exchange */
336 
337 while((i+i)<=j)
338 {
339 	k=i+i;
340 	if(k<j)
341 		if(array[k]<array[k+1L])
342 			++k;
343 	if(array[i]<array[k])
344 	{
345 		temp=array[k];
346 		array[k]=array[i];
347 		array[i]=temp;
348 		i=k;
349 	}
350 	else
351 		i=j+1;
352 }
353 return;
354 }
355 
356 /********************
357 ** STRING HEAPSORT **
358 ********************/
359 
360 /*****************
361 ** DoStringSort **
362 ******************
363 ** This routine performs the CPU string sort test.
364 ** Arguments:
365 **      requested_secs = # of seconds to execute test
366 **      stringspersec = # of strings per second sorted (RETURNED)
367 */
DoStringSort(void)368 void DoStringSort(void)
369 {
370 
371 SortStruct *strsortstruct;      /* Local for sort structure */
372 faruchar *arraybase;            /* Base pointer of char array */
373 long accumtime;                 /* Accumulated time */
374 double iterations;              /* # of iterations */
375 char *errorcontext;             /* Error context string pointer */
376 int systemerror;                /* For holding error code */
377 
378 /*
379 ** Link to global structure
380 */
381 strsortstruct=&global_strsortstruct;
382 
383 /*
384 ** Set the error context
385 */
386 errorcontext="CPU:String Sort";
387 
388 /*
389 ** See if we have to perform self-adjustment code
390 */
391 if(strsortstruct->adjust==0)
392 {
393 	/*
394 	** Initialize the number of arrays.
395 	*/
396 	strsortstruct->numarrays=1;
397 	while(1)
398 	{
399 		/*
400 		** Allocate space for array.  We'll add an extra 100
401 		** bytes to protect memory as strings move around
402 		** (this can happen during string adjustment)
403 		*/
404 		arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
405 			(long)strsortstruct->numarrays,&systemerror);
406 		if(systemerror)
407 		{       ReportError(errorcontext,systemerror);
408 			ErrorExit();
409 		}
410 
411 		/*
412 		** Do an iteration of the string sort.  If the
413 		** elapsed time is less than or equal to the permitted
414 		** minimum, then de-allocate the array, reallocate a
415 		** an additional array, and try again.
416 		*/
417 		if(DoStringSortIteration(arraybase,
418 			strsortstruct->numarrays,
419 			strsortstruct->arraysize)>global_min_ticks)
420 			break;          /* We're ok...exit */
421 
422 		FreeMemory((farvoid *)arraybase,&systemerror);
423 		strsortstruct->numarrays+=1;
424 	}
425 }
426 else
427 {
428 	/*
429 	** We don't have to perform self adjustment code.
430 	** Simply allocate the space for the array.
431 	*/
432 	arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
433 		(long)strsortstruct->numarrays,&systemerror);
434 	if(systemerror)
435 	{       ReportError(errorcontext,systemerror);
436 		ErrorExit();
437 	}
438 }
439 /*
440 ** All's well if we get here.  Repeatedly perform sorts until the
441 ** accumulated elapsed time is greater than # of seconds requested.
442 */
443 accumtime=0L;
444 iterations=(double)0.0;
445 
446 do {
447 	accumtime+=DoStringSortIteration(arraybase,
448 				strsortstruct->numarrays,
449 				strsortstruct->arraysize);
450 	iterations+=(double)strsortstruct->numarrays;
451 } while(TicksToSecs(accumtime)<strsortstruct->request_secs);
452 
453 /*
454 ** Clean up, calculate results, and go home.
455 ** Set flag to show we don't need to rerun adjustment code.
456 */
457 FreeMemory((farvoid *)arraybase,&systemerror);
458 strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
459 if(strsortstruct->adjust==0)
460 	strsortstruct->adjust=1;
461 #ifdef DEBUG
462 if (stringsort_status==0) printf("String sort: OK\n");
463 stringsort_status=0;
464 #endif
465 return;
466 }
467 
468 /**************************
469 ** DoStringSortIteration **
470 ***************************
471 ** This routine executes one iteration of the string
472 ** sort benchmark.  It returns the number of ticks
473 ** Note that this routine also builds the offset pointer
474 ** array.
475 */
DoStringSortIteration(faruchar * arraybase,uint numarrays,ulong arraysize)476 static ulong DoStringSortIteration(faruchar *arraybase,
477 		uint numarrays,ulong arraysize)
478 {
479 farulong *optrarray;            /* Offset pointer array */
480 unsigned long elapsed;          /* Elapsed ticks */
481 unsigned long nstrings;         /* # of strings in array */
482 int syserror;                   /* System error code */
483 unsigned int i;                 /* Index */
484 farulong *tempobase;            /* Temporary offset pointer base */
485 faruchar *tempsbase;            /* Temporary string base pointer */
486 
487 /*
488 ** Load up the array(s) with random numbers
489 */
490 optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
491 
492 /*
493 ** Set temp base pointers...they will be modified as the
494 ** benchmark proceeds.
495 */
496 tempobase=optrarray;
497 tempsbase=arraybase;
498 
499 /*
500 ** Start the stopwatch
501 */
502 elapsed=StartStopwatch();
503 
504 /*
505 ** Execute heapsorts
506 */
507 for(i=0;i<numarrays;i++)
508 {       StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
509 	tempobase+=nstrings;    /* Advance base pointers */
510 	tempsbase+=arraysize+100;
511 }
512 
513 /*
514 ** Record elapsed time
515 */
516 elapsed=StopStopwatch(elapsed);
517 
518 #ifdef DEBUG
519 {
520 	unsigned long i;
521 	for(i=0;i<nstrings-1;i++)
522 	{       /*
523 		** Compare strings to check for proper
524 		** sort.
525 		*/
526 		if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
527 		{       printf("Sort Error\n");
528 			stringsort_status=1;
529                         break;
530 		}
531 	}
532 }
533 #endif
534 
535 /*
536 ** Release the offset pointer array built by
537 ** LoadStringArray()
538 */
539 FreeMemory((farvoid *)optrarray,&syserror);
540 
541 /*
542 ** Return elapsed ticks.
543 */
544 return(elapsed);
545 }
546 
547 /********************
548 ** LoadStringArray **
549 *********************
550 ** Initialize the string array with random strings of
551 ** varying sizes.
552 ** Returns the pointer to the offset pointer array.
553 ** Note that since we're creating a number of arrays, this
554 ** routine builds one array, then copies it into the others.
555 */
LoadStringArray(faruchar * strarray,uint numarrays,ulong * nstrings,ulong arraysize)556 static farulong *LoadStringArray(faruchar *strarray, /* String array */
557 	uint numarrays,                 /* # of arrays */
558 	ulong *nstrings,                /* # of strings */
559 	ulong arraysize)                /* Size of array */
560 {
561 faruchar *tempsbase;            /* Temporary string base pointer */
562 farulong *optrarray;            /* Local for pointer */
563 farulong *tempobase;            /* Temporary offset pointer base pointer */
564 unsigned long curroffset;       /* Current offset */
565 int fullflag;                   /* Indicates full array */
566 unsigned char stringlength;     /* Length of string */
567 unsigned char i;                /* Index */
568 unsigned long j;                /* Another index */
569 unsigned int k;                 /* Yet another index */
570 unsigned int l;                 /* Ans still one more index */
571 int systemerror;                /* For holding error code */
572 
573 /*
574 ** Initialize random number generator.
575 */
576 /* randnum(13L); */
577 randnum((int32)13);
578 
579 /*
580 ** Start with no strings.  Initialize our current offset pointer
581 ** to 0.
582 */
583 *nstrings=0L;
584 curroffset=0L;
585 fullflag=0;
586 
587 do
588 {
589 	/*
590 	** Allocate a string with a random length no
591 	** shorter than 4 bytes and no longer than
592 	** 80 bytes.  Note we have to also make sure
593 	** there's room in the array.
594 	*/
595         /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
596 	stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
597 	if((unsigned long)stringlength+curroffset+1L>=arraysize)
598 	{       stringlength=(unsigned char)((arraysize-curroffset-1L) &
599 				0xFF);
600 		fullflag=1;     /* Indicates a full */
601 	}
602 
603 	/*
604 	** Store length at curroffset and advance current offset.
605 	*/
606 	*(strarray+curroffset)=stringlength;
607 	curroffset++;
608 
609 	/*
610 	** Fill up the rest of the string with random bytes.
611 	*/
612 	for(i=0;i<stringlength;i++)
613 	{       *(strarray+curroffset)=
614 		        /* (unsigned char)(abs_randwc((long)0xFE)); */
615 			(unsigned char)(abs_randwc((int32)0xFE));
616 		curroffset++;
617 	}
618 
619 	/*
620 	** Increment the # of strings counter.
621 	*/
622 	*nstrings+=1L;
623 
624 } while(fullflag==0);
625 
626 /*
627 ** We now have initialized a single full array.  If there
628 ** is more than one array, copy the original into the
629 ** others.
630 */
631 k=1;
632 tempsbase=strarray;
633 while(k<numarrays)
634 {       tempsbase+=arraysize+100;         /* Set base */
635 	for(l=0;l<arraysize;l++)
636 		tempsbase[l]=strarray[l];
637 	k++;
638 }
639 
640 /*
641 ** Now the array is full, allocate enough space for an
642 ** offset pointer array.
643 */
644 optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
645 		numarrays,
646 		&systemerror);
647 if(systemerror)
648 {       ReportError("CPU:Stringsort",systemerror);
649 	FreeMemory((void *)strarray,&systemerror);
650 	ErrorExit();
651 }
652 
653 /*
654 ** Go through the newly-built string array, building
655 ** offsets and putting them into the offset pointer
656 ** array.
657 */
658 curroffset=0;
659 for(j=0;j<*nstrings;j++)
660 {       *(optrarray+j)=curroffset;
661 	curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
662 }
663 
664 /*
665 ** As above, we've made one copy of the offset pointers,
666 ** so duplicate this array in the remaining ones.
667 */
668 k=1;
669 tempobase=optrarray;
670 while(k<numarrays)
671 {       tempobase+=*nstrings;
672 	for(l=0;l<*nstrings;l++)
673 		tempobase[l]=optrarray[l];
674 	k++;
675 }
676 
677 /*
678 ** All done...go home.  Pass local pointer back.
679 */
680 return(optrarray);
681 }
682 
683 /**************
684 ** stradjust **
685 ***************
686 ** Used by the string heap sort.  Call this routine to adjust the
687 ** string at offset i to length l.  The members of the string array
688 ** are moved accordingly and the length of the string at offset i
689 ** is set to l.
690 */
stradjust(farulong * optrarray,faruchar * strarray,ulong nstrings,ulong i,uchar l)691 static void stradjust(farulong *optrarray,      /* Offset pointer array */
692 	faruchar *strarray,                     /* String array */
693 	ulong nstrings,                         /* # of strings */
694 	ulong i,                                /* Offset to adjust */
695 	uchar l)                                /* New length */
696 {
697 unsigned long nbytes;           /* # of bytes to move */
698 unsigned long j;                /* Index */
699 int direction;                  /* Direction indicator */
700 unsigned char adjamount;        /* Adjustment amount */
701 
702 /*
703 ** If new length is less than old length, the direction is
704 ** down.  If new length is greater than old length, the
705 ** direction is up.
706 */
707 direction=(int)l - (int)*(strarray+*(optrarray+i));
708 adjamount=(unsigned char)abs(direction);
709 
710 /*
711 ** See if the adjustment is being made to the last
712 ** string in the string array.  If so, we don't have to
713 ** do anything more than adjust the length field.
714 */
715 if(i==(nstrings-1L))
716 {       *(strarray+*(optrarray+i))=l;
717 	return;
718 }
719 
720 /*
721 ** Calculate the total # of bytes in string array from
722 ** location i+1 to end of array.  Whether we're moving "up" or
723 ** down, this is how many bytes we'll have to move.
724 */
725 nbytes=*(optrarray+nstrings-1L) +
726 	(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
727 	*(optrarray+i+1L);
728 
729 /*
730 ** Calculate the source and the destination.  Source is
731 ** string position i+1.  Destination is string position i+l
732 ** (i+"ell"...don't confuse 1 and l).
733 ** Hand this straight to memmove and let it handle the
734 ** "overlap" problem.
735 */
736 MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
737 	(farvoid *)(strarray+*(optrarray+i+1)),
738 	(unsigned long)nbytes);
739 
740 /*
741 ** We have to adjust the offset pointer array.
742 ** This covers string i+1 to numstrings-1.
743 */
744 for(j=i+1;j<nstrings;j++)
745 	if(direction<0)
746 		*(optrarray+j)=*(optrarray+j)-adjamount;
747 	else
748 		*(optrarray+j)=*(optrarray+j)+adjamount;
749 
750 /*
751 ** Store the new length and go home.
752 */
753 *(strarray+*(optrarray+i))=l;
754 return;
755 }
756 
757 /****************
758 ** strheapsort **
759 *****************
760 ** Pass this routine a pointer to an array of unsigned char.
761 ** The array is presumed to hold strings occupying at most
762 ** 80 bytes (counts a byte count).
763 ** This routine also needs a pointer to an array of offsets
764 ** which represent string locations in the array, and
765 ** an unsigned long indicating the number of strings
766 ** in the array.
767 */
StrHeapSort(farulong * optrarray,faruchar * strarray,ulong numstrings,ulong bottom,ulong top)768 static void StrHeapSort(farulong *optrarray, /* Offset pointers */
769 	faruchar *strarray,             /* Strings array */
770 	ulong numstrings,               /* # of strings in array */
771 	ulong bottom,                   /* Region to sort...bottom */
772 	ulong top)                      /* Region to sort...top */
773 {
774 unsigned char temp[80];                 /* Used to exchange elements */
775 unsigned char tlen;                     /* Temp to hold length */
776 unsigned long i;                        /* Loop index */
777 
778 
779 /*
780 ** Build a heap in the array
781 */
782 for(i=(top/2L); i>0; --i)
783 	strsift(optrarray,strarray,numstrings,i,top);
784 
785 /*
786 ** Repeatedly extract maximum from heap and place it at the
787 ** end of the array.  When we get done, we'll have a sorted
788 ** array.
789 */
790 for(i=top; i>0; --i)
791 {
792 	strsift(optrarray,strarray,numstrings,0,i);
793 
794 	/* temp = string[0] */
795 	tlen=*strarray;
796 	MoveMemory((farvoid *)&temp[0], /* Perform exchange */
797 		(farvoid *)strarray,
798 		(unsigned long)(tlen+1));
799 
800 
801 	/* string[0]=string[i] */
802 	tlen=*(strarray+*(optrarray+i));
803 	stradjust(optrarray,strarray,numstrings,0,tlen);
804 	MoveMemory((farvoid *)strarray,
805 		(farvoid *)(strarray+*(optrarray+i)),
806 		(unsigned long)(tlen+1));
807 
808 	/* string[i]=temp */
809 	tlen=temp[0];
810 	stradjust(optrarray,strarray,numstrings,i,tlen);
811 	MoveMemory((farvoid *)(strarray+*(optrarray+i)),
812 		(farvoid *)&temp[0],
813 		(unsigned long)(tlen+1));
814 
815 }
816 return;
817 }
818 
819 /****************
820 ** str_is_less **
821 *****************
822 ** Pass this function:
823 **      1) A pointer to an array of offset pointers
824 **      2) A pointer to a string array
825 **      3) The number of elements in the string array
826 **      4) Offsets to two strings (a & b)
827 ** This function returns TRUE if string a is < string b.
828 */
str_is_less(farulong * optrarray,faruchar * strarray,ulong numstrings,ulong a,ulong b)829 static int str_is_less(farulong *optrarray, /* Offset pointers */
830 	faruchar *strarray,                     /* String array */
831 	ulong numstrings,                       /* # of strings */
832 	ulong a, ulong b)                       /* Offsets */
833 {
834 int slen;               /* String length */
835 
836 /*
837 ** Determine which string has the minimum length.  Use that
838 ** to call strncmp().  If they match up to that point, the
839 ** string with the longer length wins.
840 */
841 slen=(int)*(strarray+*(optrarray+a));
842 if(slen > (int)*(strarray+*(optrarray+b)))
843 	slen=(int)*(strarray+*(optrarray+b));
844 
845 slen=strncmp((char *)(strarray+*(optrarray+a)),
846 		(char *)(strarray+*(optrarray+b)),slen);
847 
848 if(slen==0)
849 {
850 	/*
851 	** They match.  Return true if the length of a
852 	** is greater than the length of b.
853 	*/
854 	if(*(strarray+*(optrarray+a)) >
855 		*(strarray+*(optrarray+b)))
856 		return(TRUE);
857 	return(FALSE);
858 }
859 
860 if(slen<0) return(TRUE);        /* a is strictly less than b */
861 
862 return(FALSE);                  /* Only other possibility */
863 }
864 
865 /************
866 ** strsift **
867 *************
868 ** Pass this function:
869 **      1) A pointer to an array of offset pointers
870 **      2) A pointer to a string array
871 **      3) The number of elements in the string array
872 **      4) Offset within which to sort.
873 ** Sift the array within the bounds of those offsets (thus
874 ** building a heap).
875 */
strsift(farulong * optrarray,faruchar * strarray,ulong numstrings,ulong i,ulong j)876 static void strsift(farulong *optrarray,        /* Offset pointers */
877 	faruchar *strarray,                     /* String array */
878 	ulong numstrings,                       /* # of strings */
879 	ulong i, ulong j)                       /* Offsets */
880 {
881 unsigned long k;                /* Temporaries */
882 unsigned char temp[80];
883 unsigned char tlen;             /* For string lengths */
884 
885 
886 while((i+i)<=j)
887 {
888 	k=i+i;
889 	if(k<j)
890 		if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
891 			++k;
892 	if(str_is_less(optrarray,strarray,numstrings,i,k))
893 	{
894 		/* temp=string[k] */
895 		tlen=*(strarray+*(optrarray+k));
896 		MoveMemory((farvoid *)&temp[0],
897 			(farvoid *)(strarray+*(optrarray+k)),
898 			(unsigned long)(tlen+1));
899 
900 		/* string[k]=string[i] */
901 		tlen=*(strarray+*(optrarray+i));
902 		stradjust(optrarray,strarray,numstrings,k,tlen);
903 		MoveMemory((farvoid *)(strarray+*(optrarray+k)),
904 			(farvoid *)(strarray+*(optrarray+i)),
905 			(unsigned long)(tlen+1));
906 
907 		/* string[i]=temp */
908 		tlen=temp[0];
909 		stradjust(optrarray,strarray,numstrings,i,tlen);
910 		MoveMemory((farvoid *)(strarray+*(optrarray+i)),
911 			(farvoid *)&temp[0],
912 			(unsigned long)(tlen+1));
913 		i=k;
914 	}
915 	else
916 		i=j+1;
917 }
918 return;
919 }
920 
921 /************************
922 ** BITFIELD OPERATIONS **
923 *************************/
924 
925 /*************
926 ** DoBitops **
927 **************
928 ** Perform the bit operations test portion of the CPU
929 ** benchmark.  Returns the iterations per second.
930 */
DoBitops(void)931 void DoBitops(void)
932 {
933 BitOpStruct *locbitopstruct;    /* Local bitop structure */
934 farulong *bitarraybase;         /* Base of bitmap array */
935 farulong *bitoparraybase;       /* Base of bitmap operations array */
936 ulong nbitops;                  /* # of bitfield operations */
937 ulong accumtime;                /* Accumulated time in ticks */
938 double iterations;              /* # of iterations */
939 char *errorcontext;             /* Error context string */
940 int systemerror;                /* For holding error codes */
941 int ticks;
942 
943 /*
944 ** Link to global structure.
945 */
946 locbitopstruct=&global_bitopstruct;
947 
948 /*
949 ** Set the error context.
950 */
951 errorcontext="CPU:Bitfields";
952 
953 /*
954 ** See if we need to run adjustment code.
955 */
956 if(locbitopstruct->adjust==0)
957 {
958 	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
959 		sizeof(ulong),&systemerror);
960 	if(systemerror)
961 	{       ReportError(errorcontext,systemerror);
962 		ErrorExit();
963 	}
964 
965 	/*
966 	** Initialize bitfield operations array to [2,30] elements
967 	*/
968 	locbitopstruct->bitoparraysize=30L;
969 
970 	while(1)
971 	{
972 		/*
973 		** Allocate space for operations array
974 		*/
975 		bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
976 			sizeof(ulong),
977 			&systemerror);
978 		if(systemerror)
979 		{       ReportError(errorcontext,systemerror);
980 			FreeMemory((farvoid *)bitarraybase,&systemerror);
981 			ErrorExit();
982 		}
983 		/*
984 		** Do an iteration of the bitmap test.  If the
985 		** elapsed time is less than or equal to the permitted
986 		** minimum, then de-allocate the array, reallocate a
987 		** larger version, and try again.
988 		*/
989 		ticks=DoBitfieldIteration(bitarraybase,
990 					   bitoparraybase,
991 					   locbitopstruct->bitoparraysize,
992 					   &nbitops);
993 #ifdef DEBUG
994 #ifdef LINUX
995 	        if (locbitopstruct->bitoparraysize==30L){
996 		  /* this is the first loop, write a debug file */
997 		  FILE *file;
998 		  unsigned long *running_base; /* same as farulong */
999 		  long counter;
1000 		  file=fopen("debugbit.dat","w");
1001 		  running_base=bitarraybase;
1002 		  for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
1003 #ifdef LONG64
1004 		    fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
1005 		    fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
1006 		    if ((counter+1)%4==0) fprintf(file,"\n");
1007 #else
1008 		    fprintf(file,"%08lX",*running_base);
1009 		    if ((counter+1)%8==0) fprintf(file,"\n");
1010 #endif
1011 		    running_base=running_base+1;
1012 		  }
1013 		  fclose(file);
1014 		  printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
1015 		}
1016 #endif
1017 #endif
1018 
1019 		if (ticks>global_min_ticks) break;      /* We're ok...exit */
1020 
1021 		FreeMemory((farvoid *)bitoparraybase,&systemerror);
1022 		locbitopstruct->bitoparraysize+=100L;
1023 	}
1024 }
1025 else
1026 {
1027 	/*
1028 	** Don't need to do self adjustment, just allocate
1029 	** the array space.
1030 	*/
1031 	bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
1032 		sizeof(ulong),&systemerror);
1033 	if(systemerror)
1034 	{       ReportError(errorcontext,systemerror);
1035 		ErrorExit();
1036 	}
1037 	bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
1038 		sizeof(ulong),
1039 		&systemerror);
1040 	if(systemerror)
1041 	{       ReportError(errorcontext,systemerror);
1042 		FreeMemory((farvoid *)bitarraybase,&systemerror);
1043 		ErrorExit();
1044 	}
1045 }
1046 
1047 /*
1048 ** All's well if we get here.  Repeatedly perform bitops until the
1049 ** accumulated elapsed time is greater than # of seconds requested.
1050 */
1051 accumtime=0L;
1052 iterations=(double)0.0;
1053 do {
1054 	accumtime+=DoBitfieldIteration(bitarraybase,
1055 			bitoparraybase,
1056 			locbitopstruct->bitoparraysize,&nbitops);
1057 	iterations+=(double)nbitops;
1058 } while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
1059 
1060 /*
1061 ** Clean up, calculate results, and go home.
1062 ** Also, set adjustment flag to show that we don't have
1063 ** to do self adjusting in the future.
1064 */
1065 FreeMemory((farvoid *)bitarraybase,&systemerror);
1066 FreeMemory((farvoid *)bitoparraybase,&systemerror);
1067 locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
1068 if(locbitopstruct->adjust==0)
1069 	locbitopstruct->adjust=1;
1070 
1071 return;
1072 }
1073 
1074 /************************
1075 ** DoBitfieldIteration **
1076 *************************
1077 ** Perform a single iteration of the bitfield benchmark.
1078 ** Return the # of ticks accumulated by the operation.
1079 */
DoBitfieldIteration(farulong * bitarraybase,farulong * bitoparraybase,long bitoparraysize,ulong * nbitops)1080 static ulong DoBitfieldIteration(farulong *bitarraybase,
1081 		farulong *bitoparraybase,
1082 		long bitoparraysize,
1083 		ulong *nbitops)
1084 {
1085 long i;                         /* Index */
1086 ulong bitoffset;                /* Offset into bitmap */
1087 ulong elapsed;                  /* Time to execute */
1088 /*
1089 ** Clear # bitops counter
1090 */
1091 *nbitops=0L;
1092 
1093 /*
1094 ** Construct a set of bitmap offsets and run lengths.
1095 ** The offset can be any random number from 0 to the
1096 ** size of the bitmap (in bits).  The run length can
1097 ** be any random number from 1 to the number of bits
1098 ** between the offset and the end of the bitmap.
1099 ** Note that the bitmap has 8192 * 32 bits in it.
1100 ** (262,144 bits)
1101 */
1102 /*
1103 ** Reset random number generator so things repeat.
1104 ** Also reset the bit array we work on.
1105 ** added by Uwe F. Mayer
1106 */
1107 randnum((int32)13);
1108 for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
1109 {
1110 #ifdef LONG64
1111 	*(bitarraybase+i)=(ulong)0x5555555555555555;
1112 #else
1113 	*(bitarraybase+i)=(ulong)0x55555555;
1114 #endif
1115 }
1116 randnum((int32)13);
1117 /* end of addition of code */
1118 
1119 for (i=0;i<bitoparraysize;i++)
1120 {
1121 	/* First item is offset */
1122         /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
1123 	*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
1124 
1125 	/* Next item is run length */
1126 	/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
1127 	*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
1128 }
1129 
1130 /*
1131 ** Array of offset and lengths built...do an iteration of
1132 ** the test.
1133 ** Start the stopwatch.
1134 */
1135 elapsed=StartStopwatch();
1136 
1137 /*
1138 ** Loop through array off offset/run length pairs.
1139 ** Execute operation based on modulus of index.
1140 */
1141 for(i=0;i<bitoparraysize;i++)
1142 {
1143 	switch(i % 3)
1144 	{
1145 
1146 		case 0: /* Set run of bits */
1147 			ToggleBitRun(bitarraybase,
1148 				*(bitoparraybase+i+i),
1149 				*(bitoparraybase+i+i+1),
1150 				1);
1151 			break;
1152 
1153 		case 1: /* Clear run of bits */
1154 			ToggleBitRun(bitarraybase,
1155 				*(bitoparraybase+i+i),
1156 				*(bitoparraybase+i+i+1),
1157 				0);
1158 			break;
1159 
1160 		case 2: /* Complement run of bits */
1161 			FlipBitRun(bitarraybase,
1162 				*(bitoparraybase+i+i),
1163 				*(bitoparraybase+i+i+1));
1164 			break;
1165 	}
1166 }
1167 
1168 /*
1169 ** Return elapsed time
1170 */
1171 return(StopStopwatch(elapsed));
1172 }
1173 
1174 
1175 /*****************************
1176 **     ToggleBitRun          *
1177 ******************************
1178 ** Set or clear a run of nbits starting at
1179 ** bit_addr in bitmap.
1180 */
ToggleBitRun(farulong * bitmap,ulong bit_addr,ulong nbits,uint val)1181 static void ToggleBitRun(farulong *bitmap, /* Bitmap */
1182 		ulong bit_addr,         /* Address of bits to set */
1183 		ulong nbits,            /* # of bits to set/clr */
1184 		uint val)               /* 1 or 0 */
1185 {
1186 unsigned long bindex;   /* Index into array */
1187 unsigned long bitnumb;  /* Bit number */
1188 
1189 while(nbits--)
1190 {
1191 #ifdef LONG64
1192 	bindex=bit_addr>>6;     /* Index is number /64 */
1193 	bitnumb=bit_addr % 64;   /* Bit number in word */
1194 #else
1195 	bindex=bit_addr>>5;     /* Index is number /32 */
1196 	bitnumb=bit_addr % 32;  /* bit number in word */
1197 #endif
1198 	if(val)
1199 		bitmap[bindex]|=(1L<<bitnumb);
1200 	else
1201 		bitmap[bindex]&=~(1L<<bitnumb);
1202 	bit_addr++;
1203 }
1204 return;
1205 }
1206 
1207 /***************
1208 ** FlipBitRun **
1209 ****************
1210 ** Complements a run of bits.
1211 */
FlipBitRun(farulong * bitmap,ulong bit_addr,ulong nbits)1212 static void FlipBitRun(farulong *bitmap,        /* Bit map */
1213 		ulong bit_addr,                 /* Bit address */
1214 		ulong nbits)                    /* # of bits to flip */
1215 {
1216 unsigned long bindex;   /* Index into array */
1217 unsigned long bitnumb;  /* Bit number */
1218 
1219 while(nbits--)
1220 {
1221 #ifdef LONG64
1222 	bindex=bit_addr>>6;     /* Index is number /64 */
1223 	bitnumb=bit_addr % 64;  /* Bit number in longword */
1224 #else
1225 	bindex=bit_addr>>5;     /* Index is number /32 */
1226 	bitnumb=bit_addr % 32;  /* Bit number in longword */
1227 #endif
1228 	bitmap[bindex]^=(1L<<bitnumb);
1229 	bit_addr++;
1230 }
1231 
1232 return;
1233 }
1234 
1235 /*****************************
1236 ** FLOATING-POINT EMULATION **
1237 *****************************/
1238 
1239 /**************
1240 ** DoEmFloat **
1241 ***************
1242 ** Perform the floating-point emulation routines portion of the
1243 ** CPU benchmark.  Returns the operations per second.
1244 */
DoEmFloat(void)1245 void DoEmFloat(void)
1246 {
1247 EmFloatStruct *locemfloatstruct;        /* Local structure */
1248 InternalFPF *abase;             /* Base of A array */
1249 InternalFPF *bbase;             /* Base of B array */
1250 InternalFPF *cbase;             /* Base of C array */
1251 ulong accumtime;                /* Accumulated time in ticks */
1252 double iterations;              /* # of iterations */
1253 ulong tickcount;                /* # of ticks */
1254 char *errorcontext;             /* Error context string pointer */
1255 int systemerror;                /* For holding error code */
1256 ulong loops;                    /* # of loops */
1257 
1258 /*
1259 ** Link to global structure
1260 */
1261 locemfloatstruct=&global_emfloatstruct;
1262 
1263 /*
1264 ** Set the error context
1265 */
1266 errorcontext="CPU:Floating Emulation";
1267 
1268 
1269 /*
1270 ** Test the emulation routines.
1271 */
1272 #ifdef DEBUG
1273 #endif
1274 
1275 abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1276 		&systemerror);
1277 if(systemerror)
1278 {       ReportError(errorcontext,systemerror);
1279 	ErrorExit();
1280 }
1281 
1282 bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1283 		&systemerror);
1284 if(systemerror)
1285 {       ReportError(errorcontext,systemerror);
1286 	FreeMemory((farvoid *)abase,&systemerror);
1287 	ErrorExit();
1288 }
1289 
1290 cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
1291 		&systemerror);
1292 if(systemerror)
1293 {       ReportError(errorcontext,systemerror);
1294 	FreeMemory((farvoid *)abase,&systemerror);
1295 	FreeMemory((farvoid *)bbase,&systemerror);
1296 	ErrorExit();
1297 }
1298 
1299 /*
1300 ** Set up the arrays
1301 */
1302 SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
1303 
1304 /*
1305 ** See if we need to do self-adjusting code.
1306 */
1307 if(locemfloatstruct->adjust==0)
1308 {
1309 	locemfloatstruct->loops=0;
1310 
1311 	/*
1312 	** Do an iteration of the tests.  If the elapsed time is
1313 	** less than minimum, increase the loop count and try
1314 	** again.
1315 	*/
1316 	for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
1317 	{       tickcount=DoEmFloatIteration(abase,bbase,cbase,
1318 			locemfloatstruct->arraysize,
1319 			loops);
1320 		if(tickcount>global_min_ticks)
1321 		{       locemfloatstruct->loops=loops;
1322 			break;
1323 		}
1324 	}
1325 }
1326 
1327 /*
1328 ** Verify that selft adjustment code worked.
1329 */
1330 if(locemfloatstruct->loops==0)
1331 {       printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
1332 	FreeMemory((farvoid *)abase,&systemerror);
1333 	FreeMemory((farvoid *)bbase,&systemerror);
1334 	FreeMemory((farvoid *)cbase,&systemerror);
1335 	ErrorExit();
1336 }
1337 
1338 /*
1339 ** All's well if we get here.  Repeatedly perform floating
1340 ** tests until the accumulated time is greater than the
1341 ** # of seconds requested.
1342 ** Each iteration performs arraysize * 3 operations.
1343 */
1344 accumtime=0L;
1345 iterations=(double)0.0;
1346 do {
1347 	accumtime+=DoEmFloatIteration(abase,bbase,cbase,
1348 			locemfloatstruct->arraysize,
1349 			locemfloatstruct->loops);
1350 	iterations+=(double)1.0;
1351 } while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
1352 
1353 
1354 /*
1355 ** Clean up, calculate results, and go home.
1356 ** Also, indicate that adjustment is done.
1357 */
1358 FreeMemory((farvoid *)abase,&systemerror);
1359 FreeMemory((farvoid *)bbase,&systemerror);
1360 FreeMemory((farvoid *)cbase,&systemerror);
1361 
1362 locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
1363 		(double)TicksToFracSecs(accumtime);
1364 if(locemfloatstruct->adjust==0)
1365 	locemfloatstruct->adjust=1;
1366 
1367 #ifdef DEBUG
1368 printf("----------------------------------------------------------------------------\n");
1369 #endif
1370 return;
1371 }
1372 
1373 /*************************
1374 ** FOURIER COEFFICIENTS **
1375 *************************/
1376 
1377 /**************
1378 ** DoFourier **
1379 ***************
1380 ** Perform the transcendental/trigonometric portion of the
1381 ** benchmark.  This benchmark calculates the first n
1382 ** fourier coefficients of the function (x+1)^x defined
1383 ** on the interval 0,2.
1384 */
DoFourier(void)1385 void DoFourier(void)
1386 {
1387 FourierStruct *locfourierstruct;        /* Local fourier struct */
1388 fardouble *abase;               /* Base of A[] coefficients array */
1389 fardouble *bbase;               /* Base of B[] coefficients array */
1390 unsigned long accumtime;        /* Accumulated time in ticks */
1391 double iterations;              /* # of iterations */
1392 char *errorcontext;             /* Error context string pointer */
1393 int systemerror;                /* For error code */
1394 
1395 /*
1396 ** Link to global structure
1397 */
1398 locfourierstruct=&global_fourierstruct;
1399 
1400 /*
1401 ** Set error context string
1402 */
1403 errorcontext="FPU:Transcendental";
1404 
1405 /*
1406 ** See if we need to do self-adjustment code.
1407 */
1408 if(locfourierstruct->adjust==0)
1409 {
1410 	locfourierstruct->arraysize=100L;       /* Start at 100 elements */
1411 	while(1)
1412 	{
1413 
1414 		abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1415 				&systemerror);
1416 		if(systemerror)
1417 		{       ReportError(errorcontext,systemerror);
1418 			ErrorExit();
1419 		}
1420 
1421 		bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1422 				&systemerror);
1423 		if(systemerror)
1424 		{       ReportError(errorcontext,systemerror);
1425 			FreeMemory((void *)abase,&systemerror);
1426 			ErrorExit();
1427 		}
1428 		/*
1429 		** Do an iteration of the tests.  If the elapsed time is
1430 		** less than or equal to the permitted minimum, re-allocate
1431 		** larger arrays and try again.
1432 		*/
1433 		if(DoFPUTransIteration(abase,bbase,
1434 			locfourierstruct->arraysize)>global_min_ticks)
1435 			break;          /* We're ok...exit */
1436 
1437 		/*
1438 		** Make bigger arrays and try again.
1439 		*/
1440 		FreeMemory((farvoid *)abase,&systemerror);
1441 		FreeMemory((farvoid *)bbase,&systemerror);
1442 		locfourierstruct->arraysize+=50L;
1443 	}
1444 }
1445 else
1446 {       /*
1447 	** Don't need self-adjustment.  Just allocate the
1448 	** arrays, and go.
1449 	*/
1450 	abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1451 			&systemerror);
1452 	if(systemerror)
1453 	{       ReportError(errorcontext,systemerror);
1454 		ErrorExit();
1455 	}
1456 
1457 	bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
1458 			&systemerror);
1459 	if(systemerror)
1460 	{       ReportError(errorcontext,systemerror);
1461 		FreeMemory((void *)abase,&systemerror);
1462 		ErrorExit();
1463 	}
1464 }
1465 /*
1466 ** All's well if we get here.  Repeatedly perform integration
1467 ** tests until the accumulated time is greater than the
1468 ** # of seconds requested.
1469 */
1470 accumtime=0L;
1471 iterations=(double)0.0;
1472 do {
1473 	accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
1474 	iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
1475 } while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
1476 
1477 
1478 /*
1479 ** Clean up, calculate results, and go home.
1480 ** Also set adjustment flag to indicate no adjust code needed.
1481 */
1482 FreeMemory((farvoid *)abase,&systemerror);
1483 FreeMemory((farvoid *)bbase,&systemerror);
1484 
1485 locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
1486 
1487 if(locfourierstruct->adjust==0)
1488 	locfourierstruct->adjust=1;
1489 
1490 return;
1491 }
1492 
1493 /************************
1494 ** DoFPUTransIteration **
1495 *************************
1496 ** Perform an iteration of the FPU Transcendental/trigonometric
1497 ** benchmark.  Here, an iteration consists of calculating the
1498 ** first n fourier coefficients of the function (x+1)^x on
1499 ** the interval 0,2.  n is given by arraysize.
1500 ** NOTE: The # of integration steps is fixed at
1501 ** 200.
1502 */
DoFPUTransIteration(fardouble * abase,fardouble * bbase,ulong arraysize)1503 static ulong DoFPUTransIteration(fardouble *abase,      /* A coeffs. */
1504 			fardouble *bbase,               /* B coeffs. */
1505 			ulong arraysize)                /* # of coeffs */
1506 {
1507 double omega;           /* Fundamental frequency */
1508 unsigned long i;        /* Index */
1509 unsigned long elapsed;  /* Elapsed time */
1510 
1511 /*
1512 ** Start the stopwatch
1513 */
1514 elapsed=StartStopwatch();
1515 
1516 /*
1517 ** Calculate the fourier series.  Begin by
1518 ** calculating A[0].
1519 */
1520 
1521 *abase=TrapezoidIntegrate((double)0.0,
1522 			(double)2.0,
1523 			200,
1524 			(double)0.0,    /* No omega * n needed */
1525 			0 )/(double)2.0;
1526 
1527 /*
1528 ** Calculate the fundamental frequency.
1529 ** ( 2 * pi ) / period...and since the period
1530 ** is 2, omega is simply pi.
1531 */
1532 omega=(double)3.1415926535897932;
1533 
1534 for(i=1;i<arraysize;i++)
1535 {
1536 
1537 	/*
1538 	** Calculate A[i] terms.  Note, once again, that we
1539 	** can ignore the 2/period term outside the integral
1540 	** since the period is 2 and the term cancels itself
1541 	** out.
1542 	*/
1543 	*(abase+i)=TrapezoidIntegrate((double)0.0,
1544 			(double)2.0,
1545 			200,
1546 			omega * (double)i,
1547 			1);
1548 
1549 	/*
1550 	** Calculate the B[i] terms.
1551 	*/
1552 	*(bbase+i)=TrapezoidIntegrate((double)0.0,
1553 			(double)2.0,
1554 			200,
1555 			omega * (double)i,
1556 			2);
1557 
1558 }
1559 #ifdef DEBUG
1560 {
1561   int i;
1562   printf("\nA[i]=\n");
1563   for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
1564   printf("\nB[i]=\n(undefined) ");
1565   for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
1566 }
1567 #endif
1568 /*
1569 ** All done, stop the stopwatch
1570 */
1571 return(StopStopwatch(elapsed));
1572 }
1573 
1574 /***********************
1575 ** TrapezoidIntegrate **
1576 ************************
1577 ** Perform a simple trapezoid integration on the
1578 ** function (x+1)**x.
1579 ** x0,x1 set the lower and upper bounds of the
1580 ** integration.
1581 ** nsteps indicates # of trapezoidal sections
1582 ** omegan is the fundamental frequency times
1583 **  the series member #
1584 ** select = 0 for the A[0] term, 1 for cosine terms, and
1585 **   2 for sine terms.
1586 ** Returns the value.
1587 */
TrapezoidIntegrate(double x0,double x1,int nsteps,double omegan,int select)1588 static double TrapezoidIntegrate( double x0,            /* Lower bound */
1589 			double x1,              /* Upper bound */
1590 			int nsteps,             /* # of steps */
1591 			double omegan,          /* omega * n */
1592 			int select)
1593 {
1594 double x;               /* Independent variable */
1595 double dx;              /* Stepsize */
1596 double rvalue;          /* Return value */
1597 
1598 
1599 /*
1600 ** Initialize independent variable
1601 */
1602 x=x0;
1603 
1604 /*
1605 ** Calculate stepsize
1606 */
1607 dx=(x1 - x0) / (double)nsteps;
1608 
1609 /*
1610 ** Initialize the return value.
1611 */
1612 rvalue=thefunction(x0,omegan,select)/(double)2.0;
1613 
1614 /*
1615 ** Compute the other terms of the integral.
1616 */
1617 if(nsteps!=1)
1618 {       --nsteps;               /* Already done 1 step */
1619 	while(--nsteps )
1620 	{
1621 		x+=dx;
1622 		rvalue+=thefunction(x,omegan,select);
1623 	}
1624 }
1625 /*
1626 ** Finish computation
1627 */
1628 rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
1629 
1630 return(rvalue);
1631 }
1632 
1633 /****************
1634 ** thefunction **
1635 *****************
1636 ** This routine selects the function to be used
1637 ** in the Trapezoid integration.
1638 ** x is the independent variable
1639 ** omegan is omega * n
1640 ** select chooses which of the sine/cosine functions
1641 **  are used.  note the special case for select=0.
1642 */
thefunction(double x,double omegan,int select)1643 static double thefunction(double x,             /* Independent variable */
1644 		double omegan,          /* Omega * term */
1645 		int select)             /* Choose term */
1646 {
1647 
1648 /*
1649 ** Use select to pick which function we call.
1650 */
1651 switch(select)
1652 {
1653 	case 0: return(pow(x+(double)1.0,x));
1654 
1655 	case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
1656 
1657 	case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
1658 }
1659 
1660 /*
1661 ** We should never reach this point, but the following
1662 ** keeps compilers from issuing a warning message.
1663 */
1664 return(0.0);
1665 }
1666 
1667 /*************************
1668 ** ASSIGNMENT ALGORITHM **
1669 *************************/
1670 
1671 /*************
1672 ** DoAssign **
1673 **************
1674 ** Perform an assignment algorithm.
1675 ** The algorithm was adapted from the step by step guide found
1676 ** in "Quantitative Decision Making for Business" (Gordon,
1677 **  Pressman, and Cohn; Prentice-Hall)
1678 **
1679 **
1680 ** NOTES:
1681 ** 1. Even though the algorithm distinguishes between
1682 **    ASSIGNROWS and ASSIGNCOLS, as though the two might
1683 **    be different, it does presume a square matrix.
1684 **    I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
1685 **    This makes for some algorithmically-correct but
1686 **    probably non-optimal constructs.
1687 **
1688 */
DoAssign(void)1689 void DoAssign(void)
1690 {
1691 AssignStruct *locassignstruct;  /* Local structure ptr */
1692 farlong *arraybase;
1693 char *errorcontext;
1694 int systemerror;
1695 ulong accumtime;
1696 double iterations;
1697 
1698 /*
1699 ** Link to global structure
1700 */
1701 locassignstruct=&global_assignstruct;
1702 
1703 /*
1704 ** Set the error context string.
1705 */
1706 errorcontext="CPU:Assignment";
1707 
1708 /*
1709 ** See if we need to do self adjustment code.
1710 */
1711 if(locassignstruct->adjust==0)
1712 {
1713 	/*
1714 	** Self-adjustment code.  The system begins by working on 1
1715 	** array.  If it does that in no time, then two arrays
1716 	** are built.  This process continues until
1717 	** enough arrays are built to handle the tolerance.
1718 	*/
1719 	locassignstruct->numarrays=1;
1720 	while(1)
1721 	{
1722 		/*
1723 		** Allocate space for arrays
1724 		*/
1725 		arraybase=(farlong *) AllocateMemory(sizeof(long)*
1726 			ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
1727 			 &systemerror);
1728 		if(systemerror)
1729 		{       ReportError(errorcontext,systemerror);
1730 			FreeMemory((farvoid *)arraybase,
1731 			  &systemerror);
1732 			ErrorExit();
1733 		}
1734 
1735 		/*
1736 		** Do an iteration of the assignment alg.  If the
1737 		** elapsed time is less than or equal to the permitted
1738 		** minimum, then allocate for more arrays and
1739 		** try again.
1740 		*/
1741 		if(DoAssignIteration(arraybase,
1742 			locassignstruct->numarrays)>global_min_ticks)
1743 			break;          /* We're ok...exit */
1744 
1745 		FreeMemory((farvoid *)arraybase, &systemerror);
1746 		locassignstruct->numarrays++;
1747 	}
1748 }
1749 else
1750 {       /*
1751 	** Allocate space for arrays
1752 	*/
1753 	arraybase=(farlong *)AllocateMemory(sizeof(long)*
1754 		ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
1755 		 &systemerror);
1756 	if(systemerror)
1757 	{       ReportError(errorcontext,systemerror);
1758 		FreeMemory((farvoid *)arraybase,
1759 		  &systemerror);
1760 		ErrorExit();
1761 	}
1762 }
1763 
1764 /*
1765 ** All's well if we get here.  Do the tests.
1766 */
1767 accumtime=0L;
1768 iterations=(double)0.0;
1769 
1770 do {
1771 	accumtime+=DoAssignIteration(arraybase,
1772 		locassignstruct->numarrays);
1773 	iterations+=(double)1.0;
1774 } while(TicksToSecs(accumtime)<locassignstruct->request_secs);
1775 
1776 /*
1777 ** Clean up, calculate results, and go home.  Be sure to
1778 ** show that we don't have to rerun adjustment code.
1779 */
1780 FreeMemory((farvoid *)arraybase,&systemerror);
1781 
1782 locassignstruct->iterspersec=iterations *
1783 	(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
1784 
1785 if(locassignstruct->adjust==0)
1786 	locassignstruct->adjust=1;
1787 
1788 return;
1789 
1790 }
1791 
1792 /**********************
1793 ** DoAssignIteration **
1794 ***********************
1795 ** This routine executes one iteration of the assignment test.
1796 ** It returns the number of ticks elapsed in the iteration.
1797 */
DoAssignIteration(farlong * arraybase,ulong numarrays)1798 static ulong DoAssignIteration(farlong *arraybase,
1799 	ulong numarrays)
1800 {
1801 longptr abase;                  /* local pointer */
1802 ulong elapsed;          /* Elapsed ticks */
1803 ulong i;
1804 
1805 /*
1806 ** Set up local pointer
1807 */
1808 abase.ptrs.p=arraybase;
1809 
1810 /*
1811 ** Load up the arrays with a random table.
1812 */
1813 LoadAssignArrayWithRand(arraybase,numarrays);
1814 
1815 /*
1816 ** Start the stopwatch
1817 */
1818 elapsed=StartStopwatch();
1819 
1820 /*
1821 ** Execute assignment algorithms
1822 */
1823 for(i=0;i<numarrays;i++)
1824 {       /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
1825         /* Fixed  by Eike Dierks */
1826 	Assignment(*abase.ptrs.ap);
1827 	abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
1828 }
1829 
1830 /*
1831 ** Get elapsed time
1832 */
1833 return(StopStopwatch(elapsed));
1834 }
1835 
1836 /****************************
1837 ** LoadAssignArrayWithRand **
1838 *****************************
1839 ** Load the assignment arrays with random numbers.  All positive.
1840 ** These numbers represent costs.
1841 */
LoadAssignArrayWithRand(farlong * arraybase,ulong numarrays)1842 static void LoadAssignArrayWithRand(farlong *arraybase,
1843 	ulong numarrays)
1844 {
1845 longptr abase,abase1;   /* Local for array pointer */
1846 ulong i;
1847 
1848 /*
1849 ** Set local array pointer
1850 */
1851 abase.ptrs.p=arraybase;
1852 abase1.ptrs.p=arraybase;
1853 
1854 /*
1855 ** Set up the first array.  Then just copy it into the
1856 ** others.
1857 */
1858 LoadAssign(*(abase.ptrs.ap));
1859 if(numarrays>1)
1860 	for(i=1;i<numarrays;i++)
1861 	  {     /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
1862 	        /* Fixed  by Eike Dierks */
1863 	        abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
1864 		CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
1865 	}
1866 
1867 return;
1868 }
1869 
1870 /***************
1871 ** LoadAssign **
1872 ****************
1873 ** The array given by arraybase is loaded with positive random
1874 ** numbers.  Elements in the array are capped at 5,000,000.
1875 */
LoadAssign(farlong arraybase[][ASSIGNCOLS])1876 static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
1877 {
1878 ushort i,j;
1879 
1880 /*
1881 ** Reset random number generator so things repeat.
1882 */
1883 /* randnum(13L); */
1884 randnum((int32)13);
1885 
1886 for(i=0;i<ASSIGNROWS;i++)
1887   for(j=0;j<ASSIGNROWS;j++){
1888     /* arraybase[i][j]=abs_randwc(5000000L);*/
1889     arraybase[i][j]=abs_randwc((int32)5000000);
1890   }
1891 
1892 return;
1893 }
1894 
1895 /*****************
1896 ** CopyToAssign **
1897 ******************
1898 ** Copy the contents of one array to another.  This is called by
1899 ** the routine that builds the initial array, and is used to copy
1900 ** the contents of the intial array into all following arrays.
1901 */
CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],farlong arrayto[ASSIGNROWS][ASSIGNCOLS])1902 static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
1903 		farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
1904 {
1905 ushort i,j;
1906 
1907 for(i=0;i<ASSIGNROWS;i++)
1908 	for(j=0;j<ASSIGNCOLS;j++)
1909 		arrayto[i][j]=arrayfrom[i][j];
1910 
1911 return;
1912 }
1913 
1914 /***************
1915 ** Assignment **
1916 ***************/
Assignment(farlong arraybase[][ASSIGNCOLS])1917 static void Assignment(farlong arraybase[][ASSIGNCOLS])
1918 {
1919 short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
1920 
1921 /*
1922 ** First, calculate minimum costs
1923 */
1924 calc_minimum_costs(arraybase);
1925 
1926 /*
1927 ** Repeat following until the number of rows selected
1928 ** equals the number of rows in the tableau.
1929 */
1930 while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
1931 {         second_assignments(arraybase,assignedtableau);
1932 }
1933 
1934 #ifdef DEBUG
1935 {
1936 	int i,j;
1937 	printf("\nColumn choices for each row\n");
1938 	for(i=0;i<ASSIGNROWS;i++)
1939 	{
1940 	        printf("R%03d: ",i);
1941 		for(j=0;j<ASSIGNCOLS;j++)
1942 			if(assignedtableau[i][j]==1)
1943 				printf("%03d ",j);
1944 	}
1945 }
1946 #endif
1947 
1948 return;
1949 }
1950 
1951 /***********************
1952 ** calc_minimum_costs **
1953 ************************
1954 ** Revise the tableau by calculating the minimum costs on a
1955 ** row and column basis.  These minima are subtracted from
1956 ** their rows and columns, creating a new tableau.
1957 */
calc_minimum_costs(long tableau[][ASSIGNCOLS])1958 static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
1959 {
1960 ushort i,j;              /* Index variables */
1961 long currentmin;        /* Current minimum */
1962 /*
1963 ** Determine minimum costs on row basis.  This is done by
1964 ** subtracting -- on a row-per-row basis -- the minum value
1965 ** for that row.
1966 */
1967 for(i=0;i<ASSIGNROWS;i++)
1968 {
1969 	currentmin=MAXPOSLONG;  /* Initialize minimum */
1970 	for(j=0;j<ASSIGNCOLS;j++)
1971 		if(tableau[i][j]<currentmin)
1972 			currentmin=tableau[i][j];
1973 
1974 	for(j=0;j<ASSIGNCOLS;j++)
1975 		tableau[i][j]-=currentmin;
1976 }
1977 
1978 /*
1979 ** Determine minimum cost on a column basis.  This works
1980 ** just as above, only now we step through the array
1981 ** column-wise
1982 */
1983 for(j=0;j<ASSIGNCOLS;j++)
1984 {
1985 	currentmin=MAXPOSLONG;  /* Initialize minimum */
1986 	for(i=0;i<ASSIGNROWS;i++)
1987 		if(tableau[i][j]<currentmin)
1988 			currentmin=tableau[i][j];
1989 
1990 	/*
1991 	** Here, we'll take the trouble to see if the current
1992 	** minimum is zero.  This is likely worth it, since the
1993 	** preceding loop will have created at least one zero in
1994 	** each row.  We can save ourselves a few iterations.
1995 	*/
1996 	if(currentmin!=0)
1997 		for(i=0;i<ASSIGNROWS;i++)
1998 			tableau[i][j]-=currentmin;
1999 }
2000 
2001 return;
2002 }
2003 
2004 /**********************
2005 ** first_assignments **
2006 ***********************
2007 ** Do first assignments.
2008 ** The assignedtableau[] array holds a set of values that
2009 ** indicate the assignment of a value, or its elimination.
2010 ** The values are:
2011 **      0 = Item is neither assigned nor eliminated.
2012 **      1 = Item is assigned
2013 **      2 = Item is eliminated
2014 ** Returns the number of selections made.  If this equals
2015 ** the number of rows, then an optimum has been determined.
2016 */
first_assignments(long tableau[][ASSIGNCOLS],short assignedtableau[][ASSIGNCOLS])2017 static int first_assignments(long tableau[][ASSIGNCOLS],
2018 		short assignedtableau[][ASSIGNCOLS])
2019 {
2020 ushort i,j,k;                   /* Index variables */
2021 ushort numassigns;              /* # of assignments */
2022 ushort totnumassigns;           /* Total # of assignments */
2023 ushort numzeros;                /* # of zeros in row */
2024 int selected=0;                 /* Flag used to indicate selection */
2025 
2026 /*
2027 ** Clear the assignedtableau, setting all members to show that
2028 ** no one is yet assigned, eliminated, or anything.
2029 */
2030 for(i=0;i<ASSIGNROWS;i++)
2031 	for(j=0;j<ASSIGNCOLS;j++)
2032 		assignedtableau[i][j]=0;
2033 
2034 totnumassigns=0;
2035 do {
2036 	numassigns=0;
2037 	/*
2038 	** Step through rows.  For each one that is not currently
2039 	** assigned, see if the row has only one zero in it.  If so,
2040 	** mark that as an assigned row/col.  Eliminate other zeros
2041 	** in the same column.
2042 	*/
2043 	for(i=0;i<ASSIGNROWS;i++)
2044 	{       numzeros=0;
2045 		for(j=0;j<ASSIGNCOLS;j++)
2046 			if(tableau[i][j]==0L)
2047 				if(assignedtableau[i][j]==0)
2048 				{       numzeros++;
2049 					selected=j;
2050 				}
2051 		if(numzeros==1)
2052 		{       numassigns++;
2053 			totnumassigns++;
2054 			assignedtableau[i][selected]=1;
2055 			for(k=0;k<ASSIGNROWS;k++)
2056 				if((k!=i) &&
2057 				   (tableau[k][selected]==0))
2058 					assignedtableau[k][selected]=2;
2059 		}
2060 	}
2061 	/*
2062 	** Step through columns, doing same as above.  Now, be careful
2063 	** of items in the other rows of a selected column.
2064 	*/
2065 	for(j=0;j<ASSIGNCOLS;j++)
2066 	{       numzeros=0;
2067 		for(i=0;i<ASSIGNROWS;i++)
2068 			if(tableau[i][j]==0L)
2069 				if(assignedtableau[i][j]==0)
2070 				{       numzeros++;
2071 					selected=i;
2072 				}
2073 		if(numzeros==1)
2074 		{       numassigns++;
2075 			totnumassigns++;
2076 			assignedtableau[selected][j]=1;
2077 			for(k=0;k<ASSIGNCOLS;k++)
2078 				if((k!=j) &&
2079 				   (tableau[selected][k]==0))
2080 					assignedtableau[selected][k]=2;
2081 		}
2082 	}
2083 	/*
2084 	** Repeat until no more assignments to be made.
2085 	*/
2086 } while(numassigns!=0);
2087 
2088 /*
2089 ** See if we can leave at this point.
2090 */
2091 if(totnumassigns==ASSIGNROWS) return(totnumassigns);
2092 
2093 /*
2094 ** Now step through the array by row.  If you find any unassigned
2095 ** zeros, pick the first in the row.  Eliminate all zeros from
2096 ** that same row & column.  This occurs if there are multiple optima...
2097 ** possibly.
2098 */
2099 for(i=0;i<ASSIGNROWS;i++)
2100 {       selected=-1;
2101 	for(j=0;j<ASSIGNCOLS;j++)
2102 		if((tableau[i][j]==0L) &&
2103 		   (assignedtableau[i][j]==0))
2104 		{       selected=j;
2105 			break;
2106 		}
2107 	if(selected!=-1)
2108 	{       assignedtableau[i][selected]=1;
2109 		totnumassigns++;
2110 		for(k=0;k<ASSIGNCOLS;k++)
2111 			if((k!=selected) &&
2112 			   (tableau[i][k]==0L))
2113 				assignedtableau[i][k]=2;
2114 		for(k=0;k<ASSIGNROWS;k++)
2115 			if((k!=i) &&
2116 			   (tableau[k][selected]==0L))
2117 				assignedtableau[k][selected]=2;
2118 	}
2119 }
2120 
2121 return(totnumassigns);
2122 }
2123 
2124 /***********************
2125 ** second_assignments **
2126 ************************
2127 ** This section of the algorithm creates the revised
2128 ** tableau, and is difficult to explain.  I suggest you
2129 ** refer to the algorithm's source, mentioned in comments
2130 ** toward the beginning of the program.
2131 */
second_assignments(long tableau[][ASSIGNCOLS],short assignedtableau[][ASSIGNCOLS])2132 static void second_assignments(long tableau[][ASSIGNCOLS],
2133 		short assignedtableau[][ASSIGNCOLS])
2134 {
2135 int i,j;                                /* Indexes */
2136 short linesrow[ASSIGNROWS];
2137 short linescol[ASSIGNCOLS];
2138 long smallest;                          /* Holds smallest value */
2139 ushort numassigns;                      /* Number of assignments */
2140 ushort newrows;                         /* New rows to be considered */
2141 /*
2142 ** Clear the linesrow and linescol arrays.
2143 */
2144 for(i=0;i<ASSIGNROWS;i++)
2145 	linesrow[i]=0;
2146 for(i=0;i<ASSIGNCOLS;i++)
2147 	linescol[i]=0;
2148 
2149 /*
2150 ** Scan rows, flag each row that has no assignment in it.
2151 */
2152 for(i=0;i<ASSIGNROWS;i++)
2153 {       numassigns=0;
2154 	for(j=0;j<ASSIGNCOLS;j++)
2155 		if(assignedtableau[i][j]==1)
2156 		{       numassigns++;
2157 			break;
2158 		}
2159 	if(numassigns==0) linesrow[i]=1;
2160 }
2161 
2162 do {
2163 
2164 	newrows=0;
2165 	/*
2166 	** For each row checked above, scan for any zeros.  If found,
2167 	** check the associated column.
2168 	*/
2169 	for(i=0;i<ASSIGNROWS;i++)
2170 	{       if(linesrow[i]==1)
2171 			for(j=0;j<ASSIGNCOLS;j++)
2172 				if(tableau[i][j]==0)
2173 					linescol[j]=1;
2174 	}
2175 
2176 	/*
2177 	** Now scan checked columns.  If any contain assigned zeros, check
2178 	** the associated row.
2179 	*/
2180 	for(j=0;j<ASSIGNCOLS;j++)
2181 		if(linescol[j]==1)
2182 			for(i=0;i<ASSIGNROWS;i++)
2183 				if((assignedtableau[i][j]==1) &&
2184 					(linesrow[i]!=1))
2185 				{
2186 					linesrow[i]=1;
2187 					newrows++;
2188 				}
2189 } while(newrows!=0);
2190 
2191 /*
2192 ** linesrow[n]==0 indicate rows covered by imaginary line
2193 ** linescol[n]==1 indicate cols covered by imaginary line
2194 ** For all cells not covered by imaginary lines, determine smallest
2195 ** value.
2196 */
2197 smallest=MAXPOSLONG;
2198 for(i=0;i<ASSIGNROWS;i++)
2199 	if(linesrow[i]!=0)
2200 		for(j=0;j<ASSIGNCOLS;j++)
2201 			if(linescol[j]!=1)
2202 				if(tableau[i][j]<smallest)
2203 					smallest=tableau[i][j];
2204 
2205 /*
2206 ** Subtract smallest from all cells in the above set.
2207 */
2208 for(i=0;i<ASSIGNROWS;i++)
2209 	if(linesrow[i]!=0)
2210 		for(j=0;j<ASSIGNCOLS;j++)
2211 			if(linescol[j]!=1)
2212 				tableau[i][j]-=smallest;
2213 
2214 /*
2215 ** Add smallest to all cells covered by two lines.
2216 */
2217 for(i=0;i<ASSIGNROWS;i++)
2218 	if(linesrow[i]==0)
2219 		for(j=0;j<ASSIGNCOLS;j++)
2220 			if(linescol[j]==1)
2221 				tableau[i][j]+=smallest;
2222 
2223 return;
2224 }
2225 
2226 /********************
2227 ** IDEA Encryption **
2228 *********************
2229 ** IDEA - International Data Encryption Algorithm.
2230 ** Based on code presented in Applied Cryptography by Bruce Schneier.
2231 ** Which was based on code developed by Xuejia Lai and James L. Massey.
2232 ** Other modifications made by Colin Plumb.
2233 **
2234 */
2235 
2236 /***********
2237 ** DoIDEA **
2238 ************
2239 ** Perform IDEA encryption.  Note that we time encryption & decryption
2240 ** time as being a single loop.
2241 */
DoIDEA(void)2242 void DoIDEA(void)
2243 {
2244 IDEAStruct *locideastruct;      /* Loc pointer to global structure */
2245 int i;
2246 IDEAkey Z,DK;
2247 u16 userkey[8];
2248 ulong accumtime;
2249 double iterations;
2250 char *errorcontext;
2251 int systemerror;
2252 faruchar *plain1;               /* First plaintext buffer */
2253 faruchar *crypt1;               /* Encryption buffer */
2254 faruchar *plain2;               /* Second plaintext buffer */
2255 
2256 /*
2257 ** Link to global data
2258 */
2259 locideastruct=&global_ideastruct;
2260 
2261 /*
2262 ** Set error context
2263 */
2264 errorcontext="CPU:IDEA";
2265 
2266 /*
2267 ** Re-init random-number generator.
2268 */
2269 /* randnum(3L); */
2270 randnum((int32)3);
2271 
2272 /*
2273 ** Build an encryption/decryption key
2274 */
2275 for (i=0;i<8;i++)
2276         /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
2277 	userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
2278 for(i=0;i<KEYLEN;i++)
2279 	Z[i]=0;
2280 
2281 /*
2282 ** Compute encryption/decryption subkeys
2283 */
2284 en_key_idea(userkey,Z);
2285 de_key_idea(Z,DK);
2286 
2287 /*
2288 ** Allocate memory for buffers.  We'll make 3, called plain1,
2289 ** crypt1, and plain2.  It works like this:
2290 **   plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
2291 ** So, plain1 and plain2 should match.
2292 ** Also, fill up plain1 with sample text.
2293 */
2294 plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2295 if(systemerror)
2296 {
2297 	ReportError(errorcontext,systemerror);
2298 	ErrorExit();
2299 }
2300 
2301 crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2302 if(systemerror)
2303 {
2304 	ReportError(errorcontext,systemerror);
2305 	FreeMemory((farvoid *)plain1,&systemerror);
2306 	ErrorExit();
2307 }
2308 
2309 plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
2310 if(systemerror)
2311 {
2312 	ReportError(errorcontext,systemerror);
2313 	FreeMemory((farvoid *)plain1,&systemerror);
2314 	FreeMemory((farvoid *)crypt1,&systemerror);
2315 	ErrorExit();
2316 }
2317 /*
2318 ** Note that we build the "plaintext" by simply loading
2319 ** the array up with random numbers.
2320 */
2321 for(i=0;i<locideastruct->arraysize;i++)
2322 	plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
2323 
2324 /*
2325 ** See if we need to perform self adjustment loop.
2326 */
2327 if(locideastruct->adjust==0)
2328 {
2329 	/*
2330 	** Do self-adjustment.  This involves initializing the
2331 	** # of loops and increasing the loop count until we
2332 	** get a number of loops that we can use.
2333 	*/
2334 	for(locideastruct->loops=100L;
2335 	  locideastruct->loops<MAXIDEALOOPS;
2336 	  locideastruct->loops+=10L)
2337 		if(DoIDEAIteration(plain1,crypt1,plain2,
2338 		  locideastruct->arraysize,
2339 		  locideastruct->loops,
2340 		  Z,DK)>global_min_ticks) break;
2341 }
2342 
2343 /*
2344 ** All's well if we get here.  Do the test.
2345 */
2346 accumtime=0L;
2347 iterations=(double)0.0;
2348 
2349 do {
2350 	accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
2351 		locideastruct->arraysize,
2352 		locideastruct->loops,Z,DK);
2353 	iterations+=(double)locideastruct->loops;
2354 } while(TicksToSecs(accumtime)<locideastruct->request_secs);
2355 
2356 /*
2357 ** Clean up, calculate results, and go home.  Be sure to
2358 ** show that we don't have to rerun adjustment code.
2359 */
2360 FreeMemory((farvoid *)plain1,&systemerror);
2361 FreeMemory((farvoid *)crypt1,&systemerror);
2362 FreeMemory((farvoid *)plain2,&systemerror);
2363 locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
2364 
2365 if(locideastruct->adjust==0)
2366 	locideastruct->adjust=1;
2367 
2368 return;
2369 
2370 }
2371 
2372 /********************
2373 ** DoIDEAIteration **
2374 *********************
2375 ** Execute a single iteration of the IDEA encryption algorithm.
2376 ** Actually, a single iteration is one encryption and one
2377 ** decryption.
2378 */
DoIDEAIteration(faruchar * plain1,faruchar * crypt1,faruchar * plain2,ulong arraysize,ulong nloops,IDEAkey Z,IDEAkey DK)2379 static ulong DoIDEAIteration(faruchar *plain1,
2380 			faruchar *crypt1,
2381 			faruchar *plain2,
2382 			ulong arraysize,
2383 			ulong nloops,
2384 			IDEAkey Z,
2385 			IDEAkey DK)
2386 {
2387 register ulong i;
2388 register ulong j;
2389 ulong elapsed;
2390 #ifdef DEBUG
2391 int status=0;
2392 #endif
2393 
2394 /*
2395 ** Start the stopwatch.
2396 */
2397 elapsed=StartStopwatch();
2398 
2399 /*
2400 ** Do everything for nloops.
2401 */
2402 for(i=0;i<nloops;i++)
2403 {
2404 	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
2405 		cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z);       /* Encrypt */
2406 
2407 	for(j=0;j<arraysize;j+=(sizeof(u16)*4))
2408 		cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK);      /* Decrypt */
2409 }
2410 
2411 #ifdef DEBUG
2412 for(j=0;j<arraysize;j++)
2413 	if(*(plain1+j)!=*(plain2+j)){
2414 		printf("IDEA Error! \n");
2415                 status=1;
2416                 }
2417 if (status==0) printf("IDEA: OK\n");
2418 #endif
2419 
2420 /*
2421 ** Get elapsed time.
2422 */
2423 return(StopStopwatch(elapsed));
2424 }
2425 
2426 /********
2427 ** mul **
2428 *********
2429 ** Performs multiplication, modulo (2**16)+1.  This code is structured
2430 ** on the assumption that untaken branches are cheaper than taken
2431 ** branches, and that the compiler doesn't schedule branches.
2432 */
mul(register u16 a,register u16 b)2433 static u16 mul(register u16 a, register u16 b)
2434 {
2435 register u32 p;
2436 if(a)
2437 {       if(b)
2438 	{       p=(u32)(a*b);
2439 		b=low16(p);
2440 		a=(u16)(p>>16);
2441 		return(b-a+(b<a));
2442 	}
2443 	else
2444 		return(1-a);
2445 }
2446 else
2447 	return(1-b);
2448 }
2449 
2450 /********
2451 ** inv **
2452 *********
2453 ** Compute multiplicative inverse of x, modulo (2**16)+1
2454 ** using Euclid's GCD algorithm.  It is unrolled twice
2455 ** to avoid swapping the meaning of the registers.  And
2456 ** some subtracts are changed to adds.
2457 */
inv(u16 x)2458 static u16 inv(u16 x)
2459 {
2460 u16 t0, t1;
2461 u16 q, y;
2462 
2463 if(x<=1)
2464 	return(x);      /* 0 and 1 are self-inverse */
2465 t1=0x10001 / x;
2466 y=0x10001 % x;
2467 if(y==1)
2468 	return(low16(1-t1));
2469 t0=1;
2470 do {
2471 	q=x/y;
2472 	x=x%y;
2473 	t0+=q*t1;
2474 	if(x==1) return(t0);
2475 	q=y/x;
2476 	y=y%x;
2477 	t1+=q*t0;
2478 } while(y!=1);
2479 return(low16(1-t1));
2480 }
2481 
2482 /****************
2483 ** en_key_idea **
2484 *****************
2485 ** Compute IDEA encryption subkeys Z
2486 */
en_key_idea(u16 * userkey,u16 * Z)2487 static void en_key_idea(u16 *userkey, u16 *Z)
2488 {
2489 int i,j;
2490 
2491 /*
2492 ** shifts
2493 */
2494 for(j=0;j<8;j++)
2495 	Z[j]=*userkey++;
2496 for(i=0;j<KEYLEN;j++)
2497 {       i++;
2498 	Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
2499 	Z+=i&8;
2500 	i&=7;
2501 }
2502 return;
2503 }
2504 
2505 /****************
2506 ** de_key_idea **
2507 *****************
2508 ** Compute IDEA decryption subkeys DK from encryption
2509 ** subkeys Z.
2510 */
de_key_idea(IDEAkey Z,IDEAkey DK)2511 static void de_key_idea(IDEAkey Z, IDEAkey DK)
2512 {
2513 IDEAkey TT;
2514 int j;
2515 u16 t1, t2, t3;
2516 u16 *p;
2517 p=(u16 *)(TT+KEYLEN);
2518 
2519 t1=inv(*Z++);
2520 t2=-*Z++;
2521 t3=-*Z++;
2522 *--p=inv(*Z++);
2523 *--p=t3;
2524 *--p=t2;
2525 *--p=t1;
2526 
2527 for(j=1;j<ROUNDS;j++)
2528 {       t1=*Z++;
2529 	*--p=*Z++;
2530 	*--p=t1;
2531 	t1=inv(*Z++);
2532 	t2=-*Z++;
2533 	t3=-*Z++;
2534 	*--p=inv(*Z++);
2535 	*--p=t2;
2536 	*--p=t3;
2537 	*--p=t1;
2538 }
2539 t1=*Z++;
2540 *--p=*Z++;
2541 *--p=t1;
2542 t1=inv(*Z++);
2543 t2=-*Z++;
2544 t3=-*Z++;
2545 *--p=inv(*Z++);
2546 *--p=t3;
2547 *--p=t2;
2548 *--p=t1;
2549 /*
2550 ** Copy and destroy temp copy
2551 */
2552 for(j=0,p=TT;j<KEYLEN;j++)
2553 {       *DK++=*p;
2554 	*p++=0;
2555 }
2556 
2557 return;
2558 }
2559 
2560 /*
2561 ** MUL(x,y)
2562 ** This #define creates a macro that computes x=x*y modulo 0x10001.
2563 ** Requires temps t16 and t32.  Also requires y to be strictly 16
2564 ** bits.  Here, I am using the simplest form.  May not be the
2565 ** fastest. -- RG
2566 */
2567 /* #define MUL(x,y) (x=mul(low16(x),y)) */
2568 
2569 /****************
2570 ** cipher_idea **
2571 *****************
2572 ** IDEA encryption/decryption algorithm.
2573 */
cipher_idea(u16 in[4],u16 out[4],register IDEAkey Z)2574 static void cipher_idea(u16 in[4],
2575 		u16 out[4],
2576 		register IDEAkey Z)
2577 {
2578 register u16 x1, x2, x3, x4, t1, t2;
2579 /* register u16 t16;
2580 register u16 t32; */
2581 int r=ROUNDS;
2582 
2583 x1=*in++;
2584 x2=*in++;
2585 x3=*in++;
2586 x4=*in;
2587 
2588 do {
2589 	MUL(x1,*Z++);
2590 	x2+=*Z++;
2591 	x3+=*Z++;
2592 	MUL(x4,*Z++);
2593 
2594 	t2=x1^x3;
2595 	MUL(t2,*Z++);
2596 	t1=t2+(x2^x4);
2597 	MUL(t1,*Z++);
2598 	t2=t1+t2;
2599 
2600 	x1^=t1;
2601 	x4^=t2;
2602 
2603 	t2^=x2;
2604 	x2=x3^t1;
2605 	x3=t2;
2606 } while(--r);
2607 MUL(x1,*Z++);
2608 *out++=x1;
2609 *out++=x3+*Z++;
2610 *out++=x2+*Z++;
2611 MUL(x4,*Z);
2612 *out=x4;
2613 return;
2614 }
2615 
2616 /************************
2617 ** HUFFMAN COMPRESSION **
2618 ************************/
2619 
2620 /**************
2621 ** DoHuffman **
2622 ***************
2623 ** Execute a huffman compression on a block of plaintext.
2624 ** Note that (as with IDEA encryption) an iteration of the
2625 ** Huffman test includes a compression AND a decompression.
2626 ** Also, the compression cycle includes building the
2627 ** Huffman tree.
2628 */
DoHuffman(void)2629 void DoHuffman(void)
2630 {
2631 HuffStruct *lochuffstruct;      /* Loc pointer to global data */
2632 char *errorcontext;
2633 int systemerror;
2634 ulong accumtime;
2635 double iterations;
2636 farchar *comparray;
2637 farchar *decomparray;
2638 farchar *plaintext;
2639 
2640 /*
2641 ** Link to global data
2642 */
2643 lochuffstruct=&global_huffstruct;
2644 
2645 /*
2646 ** Set error context.
2647 */
2648 errorcontext="CPU:Huffman";
2649 
2650 /*
2651 ** Allocate memory for the plaintext and the compressed text.
2652 ** We'll be really pessimistic here, and allocate equal amounts
2653 ** for both (though we know...well, we PRESUME) the compressed
2654 ** stuff will take less than the plain stuff.
2655 ** Also note that we'll build a 3rd buffer to decompress
2656 ** into, and we preallocate space for the huffman tree.
2657 ** (We presume that the Huffman tree will grow no larger
2658 ** than 512 bytes.  This is actually a super-conservative
2659 ** estimate...but, who cares?)
2660 */
2661 plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2662 if(systemerror)
2663 {       ReportError(errorcontext,systemerror);
2664 	ErrorExit();
2665 }
2666 comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2667 if(systemerror)
2668 {       ReportError(errorcontext,systemerror);
2669 	FreeMemory(plaintext,&systemerror);
2670 	ErrorExit();
2671 }
2672 decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
2673 if(systemerror)
2674 {       ReportError(errorcontext,systemerror);
2675 	FreeMemory(plaintext,&systemerror);
2676 	FreeMemory(comparray,&systemerror);
2677 	ErrorExit();
2678 }
2679 
2680 hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
2681 	&systemerror);
2682 if(systemerror)
2683 {       ReportError(errorcontext,systemerror);
2684 	FreeMemory(plaintext,&systemerror);
2685 	FreeMemory(comparray,&systemerror);
2686 	FreeMemory(decomparray,&systemerror);
2687 	ErrorExit();
2688 }
2689 
2690 /*
2691 ** Build the plaintext buffer.  Since we want this to
2692 ** actually be able to compress, we'll use the
2693 ** wordcatalog to build the plaintext stuff.
2694 */
2695 /*
2696 ** Reset random number generator so things repeat.
2697 ** added by Uwe F. Mayer
2698 */
2699 randnum((int32)13);
2700 create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
2701 plaintext[lochuffstruct->arraysize-1L]='\0';
2702 plaintextlen=lochuffstruct->arraysize;
2703 
2704 /*
2705 ** See if we need to perform self adjustment loop.
2706 */
2707 if(lochuffstruct->adjust==0)
2708 {
2709 	/*
2710 	** Do self-adjustment.  This involves initializing the
2711 	** # of loops and increasing the loop count until we
2712 	** get a number of loops that we can use.
2713 	*/
2714 	for(lochuffstruct->loops=100L;
2715 	  lochuffstruct->loops<MAXHUFFLOOPS;
2716 	  lochuffstruct->loops+=10L)
2717 		if(DoHuffIteration(plaintext,
2718 			comparray,
2719 			decomparray,
2720 		  lochuffstruct->arraysize,
2721 		  lochuffstruct->loops,
2722 		  hufftree)>global_min_ticks) break;
2723 }
2724 
2725 /*
2726 ** All's well if we get here.  Do the test.
2727 */
2728 accumtime=0L;
2729 iterations=(double)0.0;
2730 
2731 do {
2732 	accumtime+=DoHuffIteration(plaintext,
2733 		comparray,
2734 		decomparray,
2735 		lochuffstruct->arraysize,
2736 		lochuffstruct->loops,
2737 		hufftree);
2738 	iterations+=(double)lochuffstruct->loops;
2739 } while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
2740 
2741 /*
2742 ** Clean up, calculate results, and go home.  Be sure to
2743 ** show that we don't have to rerun adjustment code.
2744 */
2745 FreeMemory((farvoid *)plaintext,&systemerror);
2746 FreeMemory((farvoid *)comparray,&systemerror);
2747 FreeMemory((farvoid *)decomparray,&systemerror);
2748 FreeMemory((farvoid *)hufftree,&systemerror);
2749 lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
2750 
2751 if(lochuffstruct->adjust==0)
2752 	lochuffstruct->adjust=1;
2753 
2754 }
2755 
2756 /*********************
2757 ** create_text_line **
2758 **********************
2759 ** Create a random line of text, stored at *dt.  The line may be
2760 ** no more than nchars long.
2761 */
create_text_line(farchar * dt,long nchars)2762 static void create_text_line(farchar *dt,
2763 			long nchars)
2764 {
2765 long charssofar;        /* # of characters so far */
2766 long tomove;            /* # of characters to move */
2767 char myword[40];        /* Local buffer for words */
2768 farchar *wordptr;       /* Pointer to word from catalog */
2769 
2770 charssofar=0;
2771 
2772 do {
2773 /*
2774 ** Grab a random word from the wordcatalog
2775 */
2776 /* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
2777 wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
2778 MoveMemory((farvoid *)myword,
2779 	(farvoid *)wordptr,
2780 	(unsigned long)strlen(wordptr)+1);
2781 
2782 /*
2783 ** Append a blank.
2784 */
2785 tomove=strlen(myword)+1;
2786 myword[tomove-1]=' ';
2787 
2788 /*
2789 ** See how long it is.  If its length+charssofar > nchars, we have
2790 ** to trim it.
2791 */
2792 if((tomove+charssofar)>nchars)
2793 	tomove=nchars-charssofar;
2794 /*
2795 ** Attach the word to the current line.  Increment counter.
2796 */
2797 MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
2798 charssofar+=tomove;
2799 dt+=tomove;
2800 
2801 /*
2802 ** If we're done, bail out.  Otherwise, go get another word.
2803 */
2804 } while(charssofar<nchars);
2805 
2806 return;
2807 }
2808 
2809 /**********************
2810 ** create_text_block **
2811 ***********************
2812 ** Build a block of text randomly loaded with words.  The words
2813 ** come from the wordcatalog (which must be loaded before you
2814 ** call this).
2815 ** *tb points to the memory where the text is to be built.
2816 ** tblen is the # of bytes to put into the text block
2817 ** maxlinlen is the maximum length of any line (line end indicated
2818 **  by a carriage return).
2819 */
create_text_block(farchar * tb,ulong tblen,ushort maxlinlen)2820 static void create_text_block(farchar *tb,
2821 			ulong tblen,
2822 			ushort maxlinlen)
2823 {
2824 ulong bytessofar;       /* # of bytes so far */
2825 ulong linelen;          /* Line length */
2826 
2827 bytessofar=0L;
2828 do {
2829 
2830 /*
2831 ** Pick a random length for a line and fill the line.
2832 ** Make sure the line can fit (haven't exceeded tablen) and also
2833 ** make sure you leave room to append a carriage return.
2834 */
2835 linelen=abs_randwc(maxlinlen-6)+6;
2836 if((linelen+bytessofar)>tblen)
2837 	linelen=tblen-bytessofar;
2838 
2839 if(linelen>1)
2840 {
2841 	create_text_line(tb,linelen);
2842 }
2843 tb+=linelen-1;          /* Add the carriage return */
2844 *tb++='\n';
2845 
2846 bytessofar+=linelen;
2847 
2848 } while(bytessofar<tblen);
2849 
2850 }
2851 
2852 /********************
2853 ** DoHuffIteration **
2854 *********************
2855 ** Perform the huffman benchmark.  This routine
2856 **  (a) Builds the huffman tree
2857 **  (b) Compresses the text
2858 **  (c) Decompresses the text and verifies correct decompression
2859 */
DoHuffIteration(farchar * plaintext,farchar * comparray,farchar * decomparray,ulong arraysize,ulong nloops,huff_node * hufftree)2860 static ulong DoHuffIteration(farchar *plaintext,
2861 	farchar *comparray,
2862 	farchar *decomparray,
2863 	ulong arraysize,
2864 	ulong nloops,
2865 	huff_node *hufftree)
2866 {
2867 int i;                          /* Index */
2868 long j;                         /* Bigger index */
2869 int root;                       /* Pointer to huffman tree root */
2870 float lowfreq1, lowfreq2;       /* Low frequency counters */
2871 int lowidx1, lowidx2;           /* Indexes of low freq. elements */
2872 long bitoffset;                 /* Bit offset into text */
2873 long textoffset;                /* Char offset into text */
2874 long maxbitoffset;              /* Holds limit of bit offset */
2875 long bitstringlen;              /* Length of bitstring */
2876 int c;                          /* Character from plaintext */
2877 char bitstring[30];             /* Holds bitstring */
2878 ulong elapsed;                  /* For stopwatch */
2879 #ifdef DEBUG
2880 int status=0;
2881 #endif
2882 
2883 /*
2884 ** Start the stopwatch
2885 */
2886 elapsed=StartStopwatch();
2887 
2888 /*
2889 ** Do everything for nloops
2890 */
2891 while(nloops--)
2892 {
2893 
2894 /*
2895 ** Calculate the frequency of each byte value. Store the
2896 ** results in what will become the "leaves" of the
2897 ** Huffman tree.  Interior nodes will be built in those
2898 ** nodes greater than node #255.
2899 */
2900 for(i=0;i<256;i++)
2901 {
2902 	hufftree[i].freq=(float)0.0;
2903 	hufftree[i].c=(unsigned char)i;
2904 }
2905 
2906 for(j=0;j<arraysize;j++)
2907 	hufftree[(int)plaintext[j]].freq+=(float)1.0;
2908 
2909 for(i=0;i<256;i++)
2910 	if(hufftree[i].freq != (float)0.0)
2911 		hufftree[i].freq/=(float)arraysize;
2912 
2913 /* Reset the second half of the tree. Otherwise the loop below that
2914 ** compares the frequencies up to index 512 makes no sense. Some
2915 ** systems automatically zero out memory upon allocation, others (like
2916 ** for example DEC Unix) do not. Depending on this the loop below gets
2917 ** different data and different run times. On our alpha the data that
2918 ** was arbitrarily assigned led to an underflow error at runtime. We
2919 ** use that zeroed-out bits are in fact 0 as a float.
2920 ** Uwe F. Mayer */
2921 bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
2922 /*
2923 ** Build the huffman tree.  First clear all the parent
2924 ** pointers and left/right pointers.  Also, discard all
2925 ** nodes that have a frequency of true 0.  */
2926 for(i=0;i<512;i++)
2927 {       if(hufftree[i].freq==(float)0.0)
2928 		hufftree[i].parent=EXCLUDED;
2929 	else
2930 		hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
2931 }
2932 
2933 /*
2934 ** Go through the tree. Finding nodes of really low
2935 ** frequency.
2936 */
2937 root=255;                       /* Starting root node-1 */
2938 while(1)
2939 {
2940 	lowfreq1=(float)2.0; lowfreq2=(float)2.0;
2941 	lowidx1=-1; lowidx2=-1;
2942 	/*
2943 	** Find first lowest frequency.
2944 	*/
2945 	for(i=0;i<=root;i++)
2946 		if(hufftree[i].parent<0)
2947 			if(hufftree[i].freq<lowfreq1)
2948 			{       lowfreq1=hufftree[i].freq;
2949 				lowidx1=i;
2950 			}
2951 
2952 	/*
2953 	** Did we find a lowest value?  If not, the
2954 	** tree is done.
2955 	*/
2956 	if(lowidx1==-1) break;
2957 
2958 	/*
2959 	** Find next lowest frequency
2960 	*/
2961 	for(i=0;i<=root;i++)
2962 		if((hufftree[i].parent<0) && (i!=lowidx1))
2963 			if(hufftree[i].freq<lowfreq2)
2964 			{       lowfreq2=hufftree[i].freq;
2965 				lowidx2=i;
2966 			}
2967 
2968 	/*
2969 	** If we could only find one item, then that
2970 	** item is surely the root, and (as above) the
2971 	** tree is done.
2972 	*/
2973 	if(lowidx2==-1) break;
2974 
2975 	/*
2976 	** Attach the two new nodes to the current root, and
2977 	** advance the current root.
2978 	*/
2979 	root++;                 /* New root */
2980 	hufftree[lowidx1].parent=root;
2981 	hufftree[lowidx2].parent=root;
2982 	hufftree[root].freq=lowfreq1+lowfreq2;
2983 	hufftree[root].left=lowidx1;
2984 	hufftree[root].right=lowidx2;
2985 	hufftree[root].parent=-2;       /* Show root */
2986 }
2987 
2988 /*
2989 ** Huffman tree built...compress the plaintext
2990 */
2991 bitoffset=0L;                           /* Initialize bit offset */
2992 for(i=0;i<arraysize;i++)
2993 {
2994 	c=(int)plaintext[i];                 /* Fetch character */
2995 	/*
2996 	** Build a bit string for byte c
2997 	*/
2998 	bitstringlen=0;
2999 	while(hufftree[c].parent!=-2)
3000 	{       if(hufftree[hufftree[c].parent].left==c)
3001 			bitstring[bitstringlen]='0';
3002 		else
3003 			bitstring[bitstringlen]='1';
3004 		c=hufftree[c].parent;
3005 		bitstringlen++;
3006 	}
3007 
3008 	/*
3009 	** Step backwards through the bit string, setting
3010 	** bits in the compressed array as you go.
3011 	*/
3012 	while(bitstringlen--)
3013 	{       SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
3014 		bitoffset++;
3015 	}
3016 }
3017 
3018 /*
3019 ** Compression done.  Perform de-compression.
3020 */
3021 maxbitoffset=bitoffset;
3022 bitoffset=0;
3023 textoffset=0;
3024 do {
3025 	i=root;
3026 	while(hufftree[i].left!=-1)
3027 	{       if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
3028 			i=hufftree[i].left;
3029 		else
3030 			i=hufftree[i].right;
3031 		bitoffset++;
3032 	}
3033 	decomparray[textoffset]=hufftree[i].c;
3034 
3035 #ifdef DEBUG
3036 	if(hufftree[i].c != plaintext[textoffset])
3037 	{
3038 		/* Show error */
3039 		printf("Error at textoffset %ld\n",textoffset);
3040 		status=1;
3041 	}
3042 #endif
3043 	textoffset++;
3044 } while(bitoffset<maxbitoffset);
3045 
3046 }       /* End the big while(nloops--) from above */
3047 
3048 /*
3049 ** All done
3050 */
3051 #ifdef DEBUG
3052   if (status==0) printf("Huffman: OK\n");
3053 #endif
3054 return(StopStopwatch(elapsed));
3055 }
3056 
3057 /***************
3058 ** SetCompBit **
3059 ****************
3060 ** Set a bit in the compression array.  The value of the
3061 ** bit is set according to char bitchar.
3062 */
SetCompBit(u8 * comparray,u32 bitoffset,char bitchar)3063 static void SetCompBit(u8 *comparray,
3064 		u32 bitoffset,
3065 		char bitchar)
3066 {
3067 u32 byteoffset;
3068 int bitnumb;
3069 
3070 /*
3071 ** First calculate which element in the comparray to
3072 ** alter. and the bitnumber.
3073 */
3074 byteoffset=bitoffset>>3;
3075 bitnumb=bitoffset % 8;
3076 
3077 /*
3078 ** Set or clear
3079 */
3080 if(bitchar=='1')
3081 	comparray[byteoffset]|=(1<<bitnumb);
3082 else
3083 	comparray[byteoffset]&=~(1<<bitnumb);
3084 
3085 return;
3086 }
3087 
3088 /***************
3089 ** GetCompBit **
3090 ****************
3091 ** Return the bit value of a bit in the comparession array.
3092 ** Returns 0 if the bit is clear, nonzero otherwise.
3093 */
GetCompBit(u8 * comparray,u32 bitoffset)3094 static int GetCompBit(u8 *comparray,
3095 		u32 bitoffset)
3096 {
3097 u32 byteoffset;
3098 int bitnumb;
3099 
3100 /*
3101 ** Calculate byte offset and bit number.
3102 */
3103 byteoffset=bitoffset>>3;
3104 bitnumb=bitoffset % 8;
3105 
3106 /*
3107 ** Fetch
3108 */
3109 return((1<<bitnumb) & comparray[byteoffset] );
3110 }
3111 
3112 /********************************
3113 ** BACK PROPAGATION NEURAL NET **
3114 *********************************
3115 ** This code is a modified version of the code
3116 ** that was submitted to BYTE Magazine by
3117 ** Maureen Caudill.  It accomanied an article
3118 ** that I CANNOT NOW RECALL.
3119 ** The author's original heading/comment was
3120 ** as follows:
3121 **
3122 **  Backpropagation Network
3123 **  Written by Maureen Caudill
3124 **  in Think C 4.0 on a Macintosh
3125 **
3126 **  (c) Maureen Caudill 1988-1991
3127 **  This network will accept 5x7 input patterns
3128 **  and produce 8 bit output patterns.
3129 **  The source code may be copied or modified without restriction,
3130 **  but no fee may be charged for its use.
3131 **
3132 ** ++++++++++++++
3133 ** I have modified the code so that it will work
3134 ** on systems other than a Macintosh -- RG
3135 */
3136 
3137 /***********
3138 ** DoNNet **
3139 ************
3140 ** Perform the neural net benchmark.
3141 ** Note that this benchmark is one of the few that
3142 ** requires an input file.  That file is "NNET.DAT" and
3143 ** should be on the local directory (from which the
3144 ** benchmark program in launched).
3145 */
DoNNET(void)3146 void DoNNET(void)
3147 {
3148 NNetStruct *locnnetstruct;      /* Local ptr to global data */
3149 char *errorcontext;
3150 ulong accumtime;
3151 double iterations;
3152 
3153 /*
3154 ** Link to global data
3155 */
3156 locnnetstruct=&global_nnetstruct;
3157 
3158 /*
3159 ** Set error context
3160 */
3161 errorcontext="CPU:NNET";
3162 
3163 /*
3164 ** Init random number generator.
3165 ** NOTE: It is important that the random number generator
3166 **  be re-initialized for every pass through this test.
3167 **  The NNET algorithm uses the random number generator
3168 **  to initialize the net.  Results are sensitive to
3169 **  the initial neural net state.
3170 */
3171 /* randnum(3L); */
3172 randnum((int32)3);
3173 
3174 /*
3175 ** Read in the input and output patterns.  We'll do this
3176 ** only once here at the beginning.  These values don't
3177 ** change once loaded.
3178 */
3179 if(read_data_file()!=0)
3180    ErrorExit();
3181 
3182 
3183 /*
3184 ** See if we need to perform self adjustment loop.
3185 */
3186 if(locnnetstruct->adjust==0)
3187 {
3188 	/*
3189 	** Do self-adjustment.  This involves initializing the
3190 	** # of loops and increasing the loop count until we
3191 	** get a number of loops that we can use.
3192 	*/
3193 	for(locnnetstruct->loops=1L;
3194 	  locnnetstruct->loops<MAXNNETLOOPS;
3195 	  locnnetstruct->loops++)
3196 	  {     /*randnum(3L); */
3197 		randnum((int32)3);
3198 		if(DoNNetIteration(locnnetstruct->loops)
3199 			>global_min_ticks) break;
3200 	  }
3201 }
3202 
3203 /*
3204 ** All's well if we get here.  Do the test.
3205 */
3206 accumtime=0L;
3207 iterations=(double)0.0;
3208 
3209 do {
3210 	/* randnum(3L); */    /* Gotta do this for Neural Net */
3211 	randnum((int32)3);    /* Gotta do this for Neural Net */
3212 	accumtime+=DoNNetIteration(locnnetstruct->loops);
3213 	iterations+=(double)locnnetstruct->loops;
3214 } while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
3215 
3216 /*
3217 ** Clean up, calculate results, and go home.  Be sure to
3218 ** show that we don't have to rerun adjustment code.
3219 */
3220 locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
3221 
3222 if(locnnetstruct->adjust==0)
3223 	locnnetstruct->adjust=1;
3224 
3225 
3226 return;
3227 }
3228 
3229 /********************
3230 ** DoNNetIteration **
3231 *********************
3232 ** Do a single iteration of the neural net benchmark.
3233 ** By iteration, we mean a "learning" pass.
3234 */
DoNNetIteration(ulong nloops)3235 static ulong DoNNetIteration(ulong nloops)
3236 {
3237 ulong elapsed;          /* Elapsed time */
3238 int patt;
3239 
3240 /*
3241 ** Run nloops learning cycles.  Notice that, counted with
3242 ** the learning cycle is the weight randomization and
3243 ** zeroing of changes.  This should reduce clock jitter,
3244 ** since we don't have to stop and start the clock for
3245 ** each iteration.
3246 */
3247 elapsed=StartStopwatch();
3248 while(nloops--)
3249 {
3250 	randomize_wts();
3251 	zero_changes();
3252 	iteration_count=1;
3253 	learned = F;
3254 	numpasses = 0;
3255 	while (learned == F)
3256 	{
3257 		for (patt=0; patt<numpats; patt++)
3258 		{
3259 			worst_error = 0.0;      /* reset this every pass through data */
3260 			move_wt_changes();      /* move last pass's wt changes to momentum array */
3261 			do_forward_pass(patt);
3262 			do_back_pass(patt);
3263 			iteration_count++;
3264 		}
3265 		numpasses ++;
3266 		learned = check_out_error();
3267 	}
3268 #ifdef DEBUG
3269 printf("Learned in %d passes\n",numpasses);
3270 #endif
3271 }
3272 return(StopStopwatch(elapsed));
3273 }
3274 
3275 /*************************
3276 ** do_mid_forward(patt) **
3277 **************************
3278 ** Process the middle layer's forward pass
3279 ** The activation of middle layer's neurode is the weighted
3280 ** sum of the inputs from the input pattern, with sigmoid
3281 ** function applied to the inputs.
3282 **/
do_mid_forward(int patt)3283 static void  do_mid_forward(int patt)
3284 {
3285 double  sum;
3286 int     neurode, i;
3287 
3288 for (neurode=0;neurode<MID_SIZE; neurode++)
3289 {
3290 	sum = 0.0;
3291 	for (i=0; i<IN_SIZE; i++)
3292 	{       /* compute weighted sum of input signals */
3293 		sum += mid_wts[neurode][i]*in_pats[patt][i];
3294 	}
3295 	/*
3296 	** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
3297 	*/
3298 	sum = 1.0/(1.0+exp(-sum));
3299 	mid_out[neurode] = sum;
3300 }
3301 return;
3302 }
3303 
3304 /*********************
3305 ** do_out_forward() **
3306 **********************
3307 ** process the forward pass through the output layer
3308 ** The activation of the output layer is the weighted sum of
3309 ** the inputs (outputs from middle layer), modified by the
3310 ** sigmoid function.
3311 **/
do_out_forward()3312 static void  do_out_forward()
3313 {
3314 double sum;
3315 int neurode, i;
3316 
3317 for (neurode=0; neurode<OUT_SIZE; neurode++)
3318 {
3319 	sum = 0.0;
3320 	for (i=0; i<MID_SIZE; i++)
3321 	{       /*
3322 		** compute weighted sum of input signals
3323 		** from middle layer
3324 		*/
3325 		sum += out_wts[neurode][i]*mid_out[i];
3326 	}
3327 	/*
3328 	** Apply f(x) = 1/(1+exp(-x)) to weighted input
3329 	*/
3330 	sum = 1.0/(1.0+exp(-sum));
3331 	out_out[neurode] = sum;
3332 }
3333 return;
3334 }
3335 
3336 /*************************
3337 ** display_output(patt) **
3338 **************************
3339 ** Display the actual output vs. the desired output of the
3340 ** network.
3341 ** Once the training is complete, and the "learned" flag set
3342 ** to TRUE, then display_output sends its output to both
3343 ** the screen and to a text output file.
3344 **
3345 ** NOTE: This routine has been disabled in the benchmark
3346 ** version. -- RG
3347 **/
3348 /*
3349 void  display_output(int patt)
3350 {
3351 int             i;
3352 
3353 	fprintf(outfile,"\n Iteration # %d",iteration_count);
3354 	fprintf(outfile,"\n Desired Output:  ");
3355 
3356 	for (i=0; i<OUT_SIZE; i++)
3357 	{
3358 		fprintf(outfile,"%6.3f  ",out_pats[patt][i]);
3359 	}
3360 	fprintf(outfile,"\n Actual Output:   ");
3361 
3362 	for (i=0; i<OUT_SIZE; i++)
3363 	{
3364 		fprintf(outfile,"%6.3f  ",out_out[i]);
3365 	}
3366 	fprintf(outfile,"\n");
3367 	return;
3368 }
3369 */
3370 
3371 /**********************
3372 ** do_forward_pass() **
3373 ***********************
3374 ** control function for the forward pass through the network
3375 ** NOTE: I have disabled the call to display_output() in
3376 **  the benchmark version -- RG.
3377 **/
do_forward_pass(int patt)3378 static void  do_forward_pass(int patt)
3379 {
3380 do_mid_forward(patt);   /* process forward pass, middle layer */
3381 do_out_forward();       /* process forward pass, output layer */
3382 /* display_output(patt);        ** display results of forward pass */
3383 return;
3384 }
3385 
3386 /***********************
3387 ** do_out_error(patt) **
3388 ************************
3389 ** Compute the error for the output layer neurodes.
3390 ** This is simply Desired - Actual.
3391 **/
do_out_error(int patt)3392 static void do_out_error(int patt)
3393 {
3394 int neurode;
3395 double error,tot_error, sum;
3396 
3397 tot_error = 0.0;
3398 sum = 0.0;
3399 for (neurode=0; neurode<OUT_SIZE; neurode++)
3400 {
3401 	out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
3402 	/*
3403 	** while we're here, also compute magnitude
3404 	** of total error and worst error in this pass.
3405 	** We use these to decide if we are done yet.
3406 	*/
3407 	error = out_error[neurode];
3408 	if (error <0.0)
3409 	{
3410 		sum += -error;
3411 		if (-error > tot_error)
3412 			tot_error = -error; /* worst error this pattern */
3413 	}
3414 	else
3415 	{
3416 		sum += error;
3417 		if (error > tot_error)
3418 			tot_error = error; /* worst error this pattern */
3419 	}
3420 }
3421 avg_out_error[patt] = sum/OUT_SIZE;
3422 tot_out_error[patt] = tot_error;
3423 return;
3424 }
3425 
3426 /***********************
3427 ** worst_pass_error() **
3428 ************************
3429 ** Find the worst and average error in the pass and save it
3430 **/
worst_pass_error()3431 static void  worst_pass_error()
3432 {
3433 double error,sum;
3434 
3435 int i;
3436 
3437 error = 0.0;
3438 sum = 0.0;
3439 for (i=0; i<numpats; i++)
3440 {
3441 	if (tot_out_error[i] > error) error = tot_out_error[i];
3442 	sum += avg_out_error[i];
3443 }
3444 worst_error = error;
3445 average_error = sum/numpats;
3446 return;
3447 }
3448 
3449 /*******************
3450 ** do_mid_error() **
3451 ********************
3452 ** Compute the error for the middle layer neurodes
3453 ** This is based on the output errors computed above.
3454 ** Note that the derivative of the sigmoid f(x) is
3455 **        f'(x) = f(x)(1 - f(x))
3456 ** Recall that f(x) is merely the output of the middle
3457 ** layer neurode on the forward pass.
3458 **/
do_mid_error()3459 static void do_mid_error()
3460 {
3461 double sum;
3462 int neurode, i;
3463 
3464 for (neurode=0; neurode<MID_SIZE; neurode++)
3465 {
3466 	sum = 0.0;
3467 	for (i=0; i<OUT_SIZE; i++)
3468 		sum += out_wts[i][neurode]*out_error[i];
3469 
3470 	/*
3471 	** apply the derivative of the sigmoid here
3472 	** Because of the choice of sigmoid f(I), the derivative
3473 	** of the sigmoid is f'(I) = f(I)(1 - f(I))
3474 	*/
3475 	mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
3476 }
3477 return;
3478 }
3479 
3480 /*********************
3481 ** adjust_out_wts() **
3482 **********************
3483 ** Adjust the weights of the output layer.  The error for
3484 ** the output layer has been previously propagated back to
3485 ** the middle layer.
3486 ** Use the Delta Rule with momentum term to adjust the weights.
3487 **/
adjust_out_wts()3488 static void adjust_out_wts()
3489 {
3490 int weight, neurode;
3491 double learn,delta,alph;
3492 
3493 learn = BETA;
3494 alph  = ALPHA;
3495 for (neurode=0; neurode<OUT_SIZE; neurode++)
3496 {
3497 	for (weight=0; weight<MID_SIZE; weight++)
3498 	{
3499 		/* standard delta rule */
3500 		delta = learn * out_error[neurode] * mid_out[weight];
3501 
3502 		/* now the momentum term */
3503 		delta += alph * out_wt_change[neurode][weight];
3504 		out_wts[neurode][weight] += delta;
3505 
3506 		/* keep track of this pass's cum wt changes for next pass's momentum */
3507 		out_wt_cum_change[neurode][weight] += delta;
3508 	}
3509 }
3510 return;
3511 }
3512 
3513 /*************************
3514 ** adjust_mid_wts(patt) **
3515 **************************
3516 ** Adjust the middle layer weights using the previously computed
3517 ** errors.
3518 ** We use the Generalized Delta Rule with momentum term
3519 **/
adjust_mid_wts(int patt)3520 static void adjust_mid_wts(int patt)
3521 {
3522 int weight, neurode;
3523 double learn,alph,delta;
3524 
3525 learn = BETA;
3526 alph  = ALPHA;
3527 for (neurode=0; neurode<MID_SIZE; neurode++)
3528 {
3529 	for (weight=0; weight<IN_SIZE; weight++)
3530 	{
3531 		/* first the basic delta rule */
3532 		delta = learn * mid_error[neurode] * in_pats[patt][weight];
3533 
3534 		/* with the momentum term */
3535 		delta += alph * mid_wt_change[neurode][weight];
3536 		mid_wts[neurode][weight] += delta;
3537 
3538 		/* keep track of this pass's cum wt changes for next pass's momentum */
3539 		mid_wt_cum_change[neurode][weight] += delta;
3540 	}
3541 }
3542 return;
3543 }
3544 
3545 /*******************
3546 ** do_back_pass() **
3547 ********************
3548 ** Process the backward propagation of error through network.
3549 **/
do_back_pass(int patt)3550 void  do_back_pass(int patt)
3551 {
3552 
3553 do_out_error(patt);
3554 do_mid_error();
3555 adjust_out_wts();
3556 adjust_mid_wts(patt);
3557 
3558 return;
3559 }
3560 
3561 
3562 /**********************
3563 ** move_wt_changes() **
3564 ***********************
3565 ** Move the weight changes accumulated last pass into the wt-change
3566 ** array for use by the momentum term in this pass. Also zero out
3567 ** the accumulating arrays after the move.
3568 **/
move_wt_changes()3569 static void move_wt_changes()
3570 {
3571 int i,j;
3572 
3573 for (i = 0; i<MID_SIZE; i++)
3574 	for (j = 0; j<IN_SIZE; j++)
3575 	{
3576 		mid_wt_change[i][j] = mid_wt_cum_change[i][j];
3577 		/*
3578 		** Zero it out for next pass accumulation.
3579 		*/
3580 		mid_wt_cum_change[i][j] = 0.0;
3581 	}
3582 
3583 for (i = 0; i<OUT_SIZE; i++)
3584 	for (j=0; j<MID_SIZE; j++)
3585 	{
3586 		out_wt_change[i][j] = out_wt_cum_change[i][j];
3587 		out_wt_cum_change[i][j] = 0.0;
3588 	}
3589 
3590 return;
3591 }
3592 
3593 /**********************
3594 ** check_out_error() **
3595 ***********************
3596 ** Check to see if the error in the output layer is below
3597 ** MARGIN*OUT_SIZE for all output patterns.  If so, then
3598 ** assume the network has learned acceptably well.  This
3599 ** is simply an arbitrary measure of how well the network
3600 ** has learned -- many other standards are possible.
3601 **/
check_out_error()3602 static int check_out_error()
3603 {
3604 int result,i,error;
3605 
3606 result  = T;
3607 error   = F;
3608 worst_pass_error();     /* identify the worst error in this pass */
3609 
3610 /*
3611 #ifdef DEBUG
3612 printf("\n Iteration # %d",iteration_count);
3613 #endif
3614 */
3615 for (i=0; i<numpats; i++)
3616 {
3617 /*      printf("\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
3618 	  i+1,tot_out_error[i], avg_out_error[i]);
3619 	fprintf(outfile,
3620 	 "\n Error pattern %d:   Worst: %8.3f; Average: %8.3f",
3621 	 i+1,tot_out_error[i]);
3622 */
3623 
3624 	if (worst_error >= STOP) result = F;
3625 	if (tot_out_error[i] >= 16.0) error = T;
3626 }
3627 
3628 if (error == T) result = ERR;
3629 
3630 
3631 #ifdef DEBUG
3632 /* printf("\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
3633  worst_error,average_error);
3634 */
3635 /* fprintf(outfile,
3636  "\n Error this pass thru data:   Worst: %8.3f; Average: %8.3f",
3637   worst_error, average_error); */
3638 #endif
3639 
3640 return(result);
3641 }
3642 
3643 
3644 /*******************
3645 ** zero_changes() **
3646 ********************
3647 ** Zero out all the wt change arrays
3648 **/
zero_changes()3649 static void zero_changes()
3650 {
3651 int i,j;
3652 
3653 for (i = 0; i<MID_SIZE; i++)
3654 {
3655 	for (j=0; j<IN_SIZE; j++)
3656 	{
3657 		mid_wt_change[i][j] = 0.0;
3658 		mid_wt_cum_change[i][j] = 0.0;
3659 	}
3660 }
3661 
3662 for (i = 0; i< OUT_SIZE; i++)
3663 {
3664 	for (j=0; j<MID_SIZE; j++)
3665 	{
3666 		out_wt_change[i][j] = 0.0;
3667 		out_wt_cum_change[i][j] = 0.0;
3668 	}
3669 }
3670 return;
3671 }
3672 
3673 
3674 /********************
3675 ** randomize_wts() **
3676 *********************
3677 ** Intialize the weights in the middle and output layers to
3678 ** random values between -0.25..+0.25
3679 ** Function rand() returns a value between 0 and 32767.
3680 **
3681 ** NOTE: Had to make alterations to how the random numbers were
3682 ** created.  -- RG.
3683 **/
randomize_wts()3684 static void randomize_wts()
3685 {
3686 int neurode,i;
3687 double value;
3688 
3689 /*
3690 ** Following not used int benchmark version -- RG
3691 **
3692 **        printf("\n Please enter a random number seed (1..32767):  ");
3693 **        scanf("%d", &i);
3694 **        srand(i);
3695 */
3696 
3697 for (neurode = 0; neurode<MID_SIZE; neurode++)
3698 {
3699 	for(i=0; i<IN_SIZE; i++)
3700 	{
3701 	        /* value=(double)abs_randwc(100000L); */
3702 		value=(double)abs_randwc((int32)100000);
3703 		value=value/(double)100000.0 - (double) 0.5;
3704 		mid_wts[neurode][i] = value/2;
3705 	}
3706 }
3707 for (neurode=0; neurode<OUT_SIZE; neurode++)
3708 {
3709 	for(i=0; i<MID_SIZE; i++)
3710 	{
3711 	        /* value=(double)abs_randwc(100000L); */
3712 		value=(double)abs_randwc((int32)100000);
3713 		value=value/(double)10000.0 - (double) 0.5;
3714 		out_wts[neurode][i] = value/2;
3715 	}
3716 }
3717 
3718 return;
3719 }
3720 
3721 
3722 /*********************
3723 ** read_data_file() **
3724 **********************
3725 ** Read in the input data file and store the patterns in
3726 ** in_pats and out_pats.
3727 ** The format for the data file is as follows:
3728 **
3729 ** line#   data expected
3730 ** -----   ------------------------------
3731 ** 1               In-X-size,in-y-size,out-size
3732 ** 2               number of patterns in file
3733 ** 3               1st X row of 1st input pattern
3734 ** 4..             following rows of 1st input pattern pattern
3735 **                 in-x+2  y-out pattern
3736 **                                 1st X row of 2nd pattern
3737 **                 etc.
3738 **
3739 ** Each row of data is separated by commas or spaces.
3740 ** The data is expected to be ascii text corresponding to
3741 ** either a +1 or a 0.
3742 **
3743 ** Sample input for a 1-pattern file (The comments to the
3744 ** right may NOT be in the file unless more sophisticated
3745 ** parsing of the input is done.):
3746 **
3747 ** 5,7,8                      input is 5x7 grid, output is 8 bits
3748 ** 1                          one pattern in file
3749 ** 0,1,1,1,0                  beginning of pattern for "O"
3750 ** 1,0,0,0,1
3751 ** 1,0,0,0,1
3752 ** 1,0,0,0,1
3753 ** 1,0,0,0,1
3754 ** 1,0,0,0,0
3755 ** 0,1,1,1,0
3756 ** 0,1,0,0,1,1,1,1            ASCII code for "O" -- 0100 1111
3757 **
3758 ** Clearly, this simple scheme can be expanded or enhanced
3759 ** any way you like.
3760 **
3761 ** Returns -1 if any file error occurred, otherwise 0.
3762 **/
read_data_file()3763 static int read_data_file()
3764 {
3765 FILE *infile;
3766 
3767 int xinsize,yinsize,youtsize;
3768 int patt, element, i, row;
3769 int vals_read;
3770 int val1,val2,val3,val4,val5,val6,val7,val8;
3771 
3772 /* printf("\n Opening and retrieving data from file."); */
3773 
3774 infile = fopen(inpath, "r");
3775 if (infile == NULL)
3776 {
3777 	printf("\n CPU:NNET--error in opening file!");
3778 	return -1 ;
3779 }
3780 vals_read =fscanf(infile,"%d  %d  %d",&xinsize,&yinsize,&youtsize);
3781 if (vals_read != 3)
3782 {
3783 	printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
3784 	return -1;
3785 }
3786 vals_read=fscanf(infile,"%d",&numpats);
3787 if (vals_read !=1)
3788 {
3789 	printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
3790 	return -1;
3791 }
3792 if (numpats > MAXPATS)
3793 	numpats = MAXPATS;
3794 
3795 for (patt=0; patt<numpats; patt++)
3796 {
3797 	element = 0;
3798 	for (row = 0; row<yinsize; row++)
3799 	{
3800 		vals_read = fscanf(infile,"%d  %d  %d  %d  %d",
3801 			&val1, &val2, &val3, &val4, &val5);
3802 		if (vals_read != 5)
3803 		{
3804 			printf ("\n CPU:NNET -- failure in reading input!");
3805 			return -1;
3806 		}
3807 		element=row*xinsize;
3808 
3809 		in_pats[patt][element] = (double) val1; element++;
3810 		in_pats[patt][element] = (double) val2; element++;
3811 		in_pats[patt][element] = (double) val3; element++;
3812 		in_pats[patt][element] = (double) val4; element++;
3813 		in_pats[patt][element] = (double) val5; element++;
3814 	}
3815 	for (i=0;i<IN_SIZE; i++)
3816 	{
3817 		if (in_pats[patt][i] >= 0.9)
3818 			in_pats[patt][i] = 0.9;
3819 		if (in_pats[patt][i] <= 0.1)
3820 			in_pats[patt][i] = 0.1;
3821 	}
3822 	element = 0;
3823 	vals_read = fscanf(infile,"%d  %d  %d  %d  %d  %d  %d  %d",
3824 		&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
3825 
3826 	out_pats[patt][element] = (double) val1; element++;
3827 	out_pats[patt][element] = (double) val2; element++;
3828 	out_pats[patt][element] = (double) val3; element++;
3829 	out_pats[patt][element] = (double) val4; element++;
3830 	out_pats[patt][element] = (double) val5; element++;
3831 	out_pats[patt][element] = (double) val6; element++;
3832 	out_pats[patt][element] = (double) val7; element++;
3833 	out_pats[patt][element] = (double) val8; element++;
3834 }
3835 
3836 /* printf("\n Closing the input file now. "); */
3837 
3838 fclose(infile);
3839 return(0);
3840 }
3841 
3842 /*********************
3843 ** initialize_net() **
3844 **********************
3845 ** Do all the initialization stuff before beginning
3846 */
3847 /*
3848 static int initialize_net()
3849 {
3850 int err_code;
3851 
3852 randomize_wts();
3853 zero_changes();
3854 err_code = read_data_file();
3855 iteration_count = 1;
3856 return(err_code);
3857 }
3858 */
3859 
3860 /**********************
3861 ** display_mid_wts() **
3862 ***********************
3863 ** Display the weights on the middle layer neurodes
3864 ** NOTE: This routine is not used in the benchmark
3865 **  test -- RG
3866 **/
3867 /* static void display_mid_wts()
3868 {
3869 int             neurode, weight, row, col;
3870 
3871 fprintf(outfile,"\n Weights of Middle Layer neurodes:");
3872 
3873 for (neurode=0; neurode<MID_SIZE; neurode++)
3874 {
3875 	fprintf(outfile,"\n  Mid Neurode # %d",neurode);
3876 	for (row=0; row<IN_Y_SIZE; row++)
3877 	{
3878 		fprintf(outfile,"\n ");
3879 		for (col=0; col<IN_X_SIZE; col++)
3880 		{
3881 			weight = IN_X_SIZE * row + col;
3882 			fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
3883 		}
3884 	}
3885 }
3886 return;
3887 }
3888 */
3889 /**********************
3890 ** display_out_wts() **
3891 ***********************
3892 ** Display the weights on the output layer neurodes
3893 ** NOTE: This code is not used in the benchmark
3894 **  test -- RG
3895 */
3896 /* void  display_out_wts()
3897 {
3898 int             neurode, weight;
3899 
3900 	fprintf(outfile,"\n Weights of Output Layer neurodes:");
3901 
3902 	for (neurode=0; neurode<OUT_SIZE; neurode++)
3903 	{
3904 		fprintf(outfile,"\n  Out Neurode # %d \n",neurode);
3905 		for (weight=0; weight<MID_SIZE; weight++)
3906 		{
3907 			fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
3908 		}
3909 	}
3910 	return;
3911 }
3912 */
3913 
3914 /***********************
3915 **  LU DECOMPOSITION  **
3916 ** (Linear Equations) **
3917 ************************
3918 ** These routines come from "Numerical Recipes in Pascal".
3919 ** Note that, as in the assignment algorithm, though we
3920 ** separately define LUARRAYROWS and LUARRAYCOLS, the two
3921 ** must be the same value (this routine depends on a square
3922 ** matrix).
3923 */
3924 
3925 /*********
3926 ** DoLU **
3927 **********
3928 ** Perform the LU decomposition benchmark.
3929 */
DoLU(void)3930 void DoLU(void)
3931 {
3932 LUStruct *loclustruct;  /* Local pointer to global data */
3933 char *errorcontext;
3934 int systemerror;
3935 fardouble *a;
3936 fardouble *b;
3937 fardouble *abase;
3938 fardouble *bbase;
3939 LUdblptr ptra;
3940 int n;
3941 int i;
3942 ulong accumtime;
3943 double iterations;
3944 
3945 /*
3946 ** Link to global data
3947 */
3948 loclustruct=&global_lustruct;
3949 
3950 /*
3951 ** Set error context.
3952 */
3953 errorcontext="FPU:LU";
3954 
3955 /*
3956 ** Our first step is to build a "solvable" problem.  This
3957 ** will become the "seed" set that all others will be
3958 ** derived from. (I.E., we'll simply copy these arrays
3959 ** into the others.
3960 */
3961 a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
3962 		&systemerror);
3963 b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
3964 		&systemerror);
3965 n=LUARRAYROWS;
3966 
3967 /*
3968 ** We need to allocate a temp vector that is used by the LU
3969 ** algorithm.  This removes the allocation routine from the
3970 ** timing.
3971 */
3972 LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
3973 	&systemerror);
3974 
3975 /*
3976 ** Build a problem to be solved.
3977 */
3978 ptra.ptrs.p=a;                  /* Gotta coerce linear array to 2D array */
3979 build_problem(*ptra.ptrs.ap,n,b);
3980 
3981 /*
3982 ** Now that we have a problem built, see if we need to do
3983 ** auto-adjust.  If so, repeatedly call the DoLUIteration routine,
3984 ** increasing the number of solutions per iteration as you go.
3985 */
3986 if(loclustruct->adjust==0)
3987 {
3988 	loclustruct->numarrays=0;
3989 	for(i=1;i<=MAXLUARRAYS;i++)
3990 	{
3991 		abase=(fardouble *)AllocateMemory(sizeof(double) *
3992 			LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
3993 		if(systemerror)
3994 		{       ReportError(errorcontext,systemerror);
3995 			LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
3996 			ErrorExit();
3997 		}
3998 		bbase=(fardouble *)AllocateMemory(sizeof(double) *
3999 			LUARRAYROWS*(i+1),&systemerror);
4000 		if(systemerror)
4001 		{       ReportError(errorcontext,systemerror);
4002 			LUFreeMem(a,b,abase,(fardouble *)NULL);
4003 			ErrorExit();
4004 		}
4005 		if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
4006 		{       loclustruct->numarrays=i;
4007 			break;
4008 		}
4009 		/*
4010 		** Not enough arrays...free them all and try again
4011 		*/
4012 		FreeMemory((farvoid *)abase,&systemerror);
4013 		FreeMemory((farvoid *)bbase,&systemerror);
4014 	}
4015 	/*
4016 	** Were we able to do it?
4017 	*/
4018 	if(loclustruct->numarrays==0)
4019 	{       printf("FPU:LU -- Array limit reached\n");
4020 		LUFreeMem(a,b,abase,bbase);
4021 		ErrorExit();
4022 	}
4023 }
4024 else
4025 {       /*
4026 	** Don't need to adjust -- just allocate the proper
4027 	** number of arrays and proceed.
4028 	*/
4029 	abase=(fardouble *)AllocateMemory(sizeof(double) *
4030 		LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
4031 		&systemerror);
4032 	if(systemerror)
4033 	{       ReportError(errorcontext,systemerror);
4034 		LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
4035 		ErrorExit();
4036 	}
4037 	bbase=(fardouble *)AllocateMemory(sizeof(double) *
4038 		LUARRAYROWS*loclustruct->numarrays,&systemerror);
4039 	if(systemerror)
4040 	{
4041 		ReportError(errorcontext,systemerror);
4042 		LUFreeMem(a,b,abase,(fardouble *)NULL);
4043 		ErrorExit();
4044 	}
4045 }
4046 /*
4047 ** All's well if we get here.  Do the test.
4048 */
4049 accumtime=0L;
4050 iterations=(double)0.0;
4051 
4052 do {
4053 	accumtime+=DoLUIteration(a,b,abase,bbase,
4054 		loclustruct->numarrays);
4055 	iterations+=(double)loclustruct->numarrays;
4056 } while(TicksToSecs(accumtime)<loclustruct->request_secs);
4057 
4058 /*
4059 ** Clean up, calculate results, and go home.  Be sure to
4060 ** show that we don't have to rerun adjustment code.
4061 */
4062 loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
4063 
4064 if(loclustruct->adjust==0)
4065 	loclustruct->adjust=1;
4066 
4067 LUFreeMem(a,b,abase,bbase);
4068 return;
4069 }
4070 
4071 /**************
4072 ** LUFreeMem **
4073 ***************
4074 ** Release memory associated with LU benchmark.
4075 */
LUFreeMem(fardouble * a,fardouble * b,fardouble * abase,fardouble * bbase)4076 static void LUFreeMem(fardouble *a, fardouble *b,
4077 			fardouble *abase,fardouble *bbase)
4078 {
4079 int systemerror;
4080 
4081 FreeMemory((farvoid *)a,&systemerror);
4082 FreeMemory((farvoid *)b,&systemerror);
4083 FreeMemory((farvoid *)LUtempvv,&systemerror);
4084 
4085 if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
4086 if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
4087 return;
4088 }
4089 
4090 /******************
4091 ** DoLUIteration **
4092 *******************
4093 ** Perform an iteration of the LU decomposition benchmark.
4094 ** An iteration refers to the repeated solution of several
4095 ** identical matrices.
4096 */
DoLUIteration(fardouble * a,fardouble * b,fardouble * abase,fardouble * bbase,ulong numarrays)4097 static ulong DoLUIteration(fardouble *a,fardouble *b,
4098 		fardouble *abase, fardouble *bbase,
4099 		ulong numarrays)
4100 {
4101 fardouble *locabase;
4102 fardouble *locbbase;
4103 LUdblptr ptra;  /* For converting ptr to 2D array */
4104 ulong elapsed;
4105 ulong j,i;              /* Indexes */
4106 
4107 
4108 /*
4109 ** Move the seed arrays (a & b) into the destination
4110 ** arrays;
4111 */
4112 for(j=0;j<numarrays;j++)
4113 {       locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
4114 	locbbase=bbase+j*LUARRAYROWS;
4115 	for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
4116 		*(locabase+i)=*(a+i);
4117 	for(i=0;i<LUARRAYROWS;i++)
4118 		*(locbbase+i)=*(b+i);
4119 }
4120 
4121 /*
4122 ** Do test...begin timing.
4123 */
4124 elapsed=StartStopwatch();
4125 for(i=0;i<numarrays;i++)
4126 {       locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
4127 	locbbase=bbase+i*LUARRAYROWS;
4128 	ptra.ptrs.p=locabase;
4129 	lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
4130 }
4131 
4132 return(StopStopwatch(elapsed));
4133 }
4134 
4135 /******************
4136 ** build_problem **
4137 *******************
4138 ** Constructs a solvable set of linear equations.  It does this by
4139 ** creating an identity matrix, then loading the solution vector
4140 ** with random numbers.  After that, the identity matrix and
4141 ** solution vector are randomly "scrambled".  Scrambling is
4142 ** done by (a) randomly selecting a row and multiplying that
4143 ** row by a random number and (b) adding one randomly-selected
4144 ** row to another.
4145 */
build_problem(double a[][LUARRAYCOLS],int n,double b[LUARRAYROWS])4146 static void build_problem(double a[][LUARRAYCOLS],
4147 		int n,
4148 		double b[LUARRAYROWS])
4149 {
4150 long i,j,k,k1;  /* Indexes */
4151 double rcon;     /* Random constant */
4152 
4153 /*
4154 ** Reset random number generator
4155 */
4156 /* randnum(13L); */
4157 randnum((int32)13);
4158 
4159 /*
4160 ** Build an identity matrix.
4161 ** We'll also use this as a chance to load the solution
4162 ** vector.
4163 */
4164 for(i=0;i<n;i++)
4165 {       /* b[i]=(double)(abs_randwc(100L)+1L); */
4166 	b[i]=(double)(abs_randwc((int32)100)+(int32)1);
4167 	for(j=0;j<n;j++)
4168 		if(i==j)
4169 		        /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
4170 			a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
4171 		else
4172 			a[i][j]=(double)0.0;
4173 }
4174 
4175 #ifdef DEBUG
4176 printf("Problem:\n");
4177 for(i=0;i<n;i++)
4178 {
4179 /*
4180 	for(j=0;j<n;j++)
4181 		printf("%6.2f ",a[i][j]);
4182 */
4183 	printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
4184 /*
4185         printf("\n");
4186 */
4187 }
4188 #endif
4189 
4190 /*
4191 ** Scramble.  Do this 8n times.  See comment above for
4192 ** a description of the scrambling process.
4193 */
4194 
4195 for(i=0;i<8*n;i++)
4196 {
4197 	/*
4198 	** Pick a row and a random constant.  Multiply
4199 	** all elements in the row by the constant.
4200 	*/
4201  /*       k=abs_randwc((long)n);
4202 	rcon=(double)(abs_randwc(20L)+1L);
4203 	for(j=0;j<n;j++)
4204 		a[k][j]=a[k][j]*rcon;
4205 	b[k]=b[k]*rcon;
4206 */
4207 	/*
4208 	** Pick two random rows and add second to
4209 	** first.  Note that we also occasionally multiply
4210 	** by minus 1 so that we get a subtraction operation.
4211 	*/
4212         /* k=abs_randwc((long)n); */
4213         /* k1=abs_randwc((long)n); */
4214 	k=abs_randwc((int32)n);
4215 	k1=abs_randwc((int32)n);
4216 	if(k!=k1)
4217 	{
4218 		if(k<k1) rcon=(double)1.0;
4219 			else rcon=(double)-1.0;
4220 		for(j=0;j<n;j++)
4221 			a[k][j]+=a[k1][j]*rcon;;
4222 		b[k]+=b[k1]*rcon;
4223 	}
4224 }
4225 
4226 return;
4227 }
4228 
4229 
4230 /***********
4231 ** ludcmp **
4232 ************
4233 ** From the procedure of the same name in "Numerical Recipes in Pascal",
4234 ** by Press, Flannery, Tukolsky, and Vetterling.
4235 ** Given an nxn matrix a[], this routine replaces it by the LU
4236 ** decomposition of a rowwise permutation of itself.  a[] and n
4237 ** are input.  a[] is output, modified as follows:
4238 **   --                       --
4239 **  |  b(1,1) b(1,2) b(1,3)...  |
4240 **  |  a(2,1) b(2,2) b(2,3)...  |
4241 **  |  a(3,1) a(3,2) b(3,3)...  |
4242 **  |  a(4,1) a(4,2) a(4,3)...  |
4243 **  |  ...                      |
4244 **   --                        --
4245 **
4246 ** Where the b(i,j) elements form the upper triangular matrix of the
4247 ** LU decomposition, and the a(i,j) elements form the lower triangular
4248 ** elements.  The LU decomposition is calculated so that we don't
4249 ** need to store the a(i,i) elements (which would have laid along the
4250 ** diagonal and would have all been 1).
4251 **
4252 ** indx[] is an output vector that records the row permutation
4253 ** effected by the partial pivoting; d is output as +/-1 depending
4254 ** on whether the number of row interchanges was even or odd,
4255 ** respectively.
4256 ** Returns 0 if matrix singular, else returns 1.
4257 */
ludcmp(double a[][LUARRAYCOLS],int n,int indx[],int * d)4258 static int ludcmp(double a[][LUARRAYCOLS],
4259 		int n,
4260 		int indx[],
4261 		int *d)
4262 {
4263 
4264 double big;     /* Holds largest element value */
4265 double sum;
4266 double dum;     /* Holds dummy value */
4267 int i,j,k;      /* Indexes */
4268 int imax=0;     /* Holds max index value */
4269 double tiny;    /* A really small number */
4270 
4271 tiny=(double)1.0e-20;
4272 
4273 *d=1;           /* No interchanges yet */
4274 
4275 for(i=0;i<n;i++)
4276 {       big=(double)0.0;
4277 	for(j=0;j<n;j++)
4278 		if((double)fabs(a[i][j]) > big)
4279 			big=fabs(a[i][j]);
4280 	/* Bail out on singular matrix */
4281 	if(big==(double)0.0) return(0);
4282 	LUtempvv[i]=1.0/big;
4283 }
4284 
4285 /*
4286 ** Crout's algorithm...loop over columns.
4287 */
4288 for(j=0;j<n;j++)
4289 {       if(j!=0)
4290 		for(i=0;i<j;i++)
4291 		{       sum=a[i][j];
4292 			if(i!=0)
4293 				for(k=0;k<i;k++)
4294 					sum-=(a[i][k]*a[k][j]);
4295 			a[i][j]=sum;
4296 		}
4297 	big=(double)0.0;
4298 	for(i=j;i<n;i++)
4299 	{       sum=a[i][j];
4300 		if(j!=0)
4301 			for(k=0;k<j;k++)
4302 				sum-=a[i][k]*a[k][j];
4303 		a[i][j]=sum;
4304 		dum=LUtempvv[i]*fabs(sum);
4305 		if(dum>=big)
4306 		{       big=dum;
4307 			imax=i;
4308 		}
4309 	}
4310 	if(j!=imax)             /* Interchange rows if necessary */
4311 	{       for(k=0;k<n;k++)
4312 		{       dum=a[imax][k];
4313 			a[imax][k]=a[j][k];
4314 			a[j][k]=dum;
4315 		}
4316 		*d=-*d;         /* Change parity of d */
4317 		dum=LUtempvv[imax];
4318 		LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
4319 		LUtempvv[j]=dum;
4320 	}
4321 	indx[j]=imax;
4322 	/*
4323 	** If the pivot element is zero, the matrix is singular
4324 	** (at least as far as the precision of the machine
4325 	** is concerned.)  We'll take the original author's
4326 	** recommendation and replace 0.0 with "tiny".
4327 	*/
4328 	if(a[j][j]==(double)0.0)
4329 		a[j][j]=tiny;
4330 
4331 	if(j!=(n-1))
4332 	{       dum=1.0/a[j][j];
4333 		for(i=j+1;i<n;i++)
4334 			a[i][j]=a[i][j]*dum;
4335 	}
4336 }
4337 
4338 return(1);
4339 }
4340 
4341 /***********
4342 ** lubksb **
4343 ************
4344 ** Also from "Numerical Recipes in Pascal".
4345 ** This routine solves the set of n linear equations A X = B.
4346 ** Here, a[][] is input, not as the matrix A, but as its
4347 ** LU decomposition, created by the routine ludcmp().
4348 ** Indx[] is input as the permutation vector returned by ludcmp().
4349 **  b[] is input as the right-hand side an returns the
4350 ** solution vector X.
4351 ** a[], n, and indx are not modified by this routine and
4352 ** can be left in place for different values of b[].
4353 ** This routine takes into account the possibility that b will
4354 ** begin with many zero elements, so it is efficient for use in
4355 ** matrix inversion.
4356 */
lubksb(double a[][LUARRAYCOLS],int n,int indx[LUARRAYROWS],double b[LUARRAYROWS])4357 static void lubksb( double a[][LUARRAYCOLS],
4358 		int n,
4359 		int indx[LUARRAYROWS],
4360 		double b[LUARRAYROWS])
4361 {
4362 
4363 int i,j;        /* Indexes */
4364 int ip;         /* "pointer" into indx */
4365 int ii;
4366 double sum;
4367 
4368 /*
4369 ** When ii is set to a positive value, it will become
4370 ** the index of the first nonvanishing element of b[].
4371 ** We now do the forward substitution. The only wrinkle
4372 ** is to unscramble the permutation as we go.
4373 */
4374 ii=-1;
4375 for(i=0;i<n;i++)
4376 {       ip=indx[i];
4377 	sum=b[ip];
4378 	b[ip]=b[i];
4379 	if(ii!=-1)
4380 		for(j=ii;j<i;j++)
4381 			sum=sum-a[i][j]*b[j];
4382 	else
4383 		/*
4384 		** If a nonzero element is encountered, we have
4385 		** to do the sums in the loop above.
4386 		*/
4387 		if(sum!=(double)0.0)
4388 			ii=i;
4389 	b[i]=sum;
4390 }
4391 /*
4392 ** Do backsubstitution
4393 */
4394 for(i=(n-1);i>=0;i--)
4395 {
4396 	sum=b[i];
4397 	if(i!=(n-1))
4398 		for(j=(i+1);j<n;j++)
4399 			sum=sum-a[i][j]*b[j];
4400 	b[i]=sum/a[i][i];
4401 }
4402 return;
4403 }
4404 
4405 /************
4406 ** lusolve **
4407 *************
4408 ** Solve a linear set of equations: A x = b
4409 ** Original matrix A will be destroyed by this operation.
4410 ** Returns 0 if matrix is singular, 1 otherwise.
4411 */
lusolve(double a[][LUARRAYCOLS],int n,double b[LUARRAYROWS])4412 static int lusolve(double a[][LUARRAYCOLS],
4413 		int n,
4414 		double b[LUARRAYROWS])
4415 {
4416 int indx[LUARRAYROWS];
4417 int d;
4418 #ifdef DEBUG
4419 int i,j;
4420 #endif
4421 
4422 if(ludcmp(a,n,indx,&d)==0) return(0);
4423 
4424 /* Matrix not singular -- proceed */
4425 lubksb(a,n,indx,b);
4426 
4427 #ifdef DEBUG
4428 printf("Solution:\n");
4429 for(i=0;i<n;i++)
4430 {
4431   for(j=0;j<n;j++){
4432   /*
4433     printf("%6.2f ",a[i][j]);
4434   */
4435   }
4436   printf("%6.2f\t",b[i]);
4437   /*
4438     printf("\n");
4439   */
4440 }
4441 printf("\n");
4442 #endif
4443 
4444 return(1);
4445 }
4446