1 #define max(a,b) (a > b?a:b)
2 #define min(a,b) (a < b?a:b)
3 #include<stdio.h>
4 
5 #ifdef __cplusplus
6 extern "C"
7 #endif
getTime(double bandwidth,unsigned long int volume)8 double getTime(double bandwidth, unsigned long int volume)//return time in s
9 {
10 	//printf("vol = %lu, bandwidth = %lf\t", volume, bandwidth);
11 	return (2*volume*8)/(bandwidth*1000000000);
12 }
13 
14 #ifdef __cplusplus
15 extern "C"
16 #endif
getBW_nooverlap(double eff)17 double getBW_nooverlap(double eff)
18 {
19 	if(eff == 0) return 0;
20 
21 double intercept = 137.113;
22 double W = 65.348;
23 return intercept + W * eff;
24 }
25 
26 #ifdef __cplusplus
27 extern "C"
28 #endif
getBW_overlap(double eff)29 double getBW_overlap(double eff)
30 {
31 	if(eff == 0) return 0;
32 double intercept = 139.053;
33 double W = 55.536;
34 return intercept + W * eff;
35 
36 }
37 
38 #ifdef __cplusplus
39 extern "C"
40 #endif
getBW_nomatchg32(double eff)41 double getBW_nomatchg32(double eff)
42 {
43 	if(eff == 0) return 0;
44 double intercept = 66.560;
45 double W = 164.83;
46 return intercept + W * eff;
47 }
48 
49 #ifdef __cplusplus
50 extern "C"
51 #endif
getBW_matchl32(double eff,unsigned tbsize)52 double getBW_matchl32(double eff, unsigned tbsize)
53 {
54 	if(eff == 0) return 0;
55 double intercept = 119.2547;
56 double W = 74.8634;
57 double T = -1.4558;
58 return intercept + W * eff + T * tbsize;
59 }
60 
61 #ifdef __cplusplus
62 extern "C"
63 #endif
getBW_matchg32()64 double getBW_matchg32()
65 {
66 double intercept = 196.82;
67 return intercept;
68 }
69 
70 #ifdef __cplusplus
71 extern "C"
72 #endif
getEfficiency_nooverlap(int ilimit,int olimit,int asize,int bsize,int blockA,int blockB)73 double getEfficiency_nooverlap(int ilimit, int olimit, int asize, int bsize, int blockA, int blockB)
74 {
75 //	return 0;
76 	const int remainder1 = asize % blockA;
77         const int remainder2 = bsize % blockB;
78         const int ilimitr = ilimit * remainder1 / blockA;
79         const int olimitr = olimit * remainder2 / blockB;
80 //printf("\tilimit=%d\tolimit=%d\t", ilimit, olimit);
81 //printf("\t%d\t%d\t%d\t%d\t", ilimit/32, ilimit%32, olimit/32,olimit%32 );
82 double f1, f2, f3, f4, f;
83 f1 =  ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
84 f2 =  ((ilimitr/32) * (olimit/32) + (double)(ilimitr/32) * (olimit%32) /32+ (double)(ilimitr%32) * (olimit/32) /32 + (double)(ilimitr%32) * (olimit%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimit+31)/32)));
85 f3 =  ((ilimit/32) * (olimitr/32) + (double)(ilimit/32) * (olimitr%32) /32+ (double)(ilimit%32) * (olimitr/32) /32 + (double)(ilimit%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimit+31)/32) * ((olimitr+31)/32)));
86 f4 =  ((ilimitr/32) * (olimitr/32) + (double)(ilimitr/32) * (olimitr%32) /32+ (double)(ilimitr%32) * (olimitr/32) /32 + (double)(ilimitr%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimitr+31)/32)));
87 f = ((asize/blockA) * (bsize/blockB) *f1 + (double)((asize/blockA) * (bsize%blockB > 0) *f3)+ (double)((asize%blockA > 0) * (bsize/blockB)*f2)  + (double)((asize%blockA>0) * (bsize%blockB > 0) *f4) )/ (int)(((asize+blockA-1)/blockA) * ((bsize+blockB-1)/blockB));
88 //printf("\t%lf\t", f);
89 return f;
90 }
91 
92 #ifdef __cplusplus
93 extern "C"
94 #endif
getEfficiency_overlap(int ilimit,int olimit,int asize,int bsize,int blockA,int blockB)95 double getEfficiency_overlap(int ilimit, int olimit, int asize, int bsize, int blockA, int blockB)
96 {
97 //	return 0;
98 	const int remainder1 = asize % blockA;
99         const int remainder2 = bsize % blockB;
100         const int ilimitr = ilimit * remainder1 / blockA;
101         const int olimitr = olimit * remainder2 / blockB;
102 double f1, f2, f3, f4, f;
103 f1 =  ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
104 f2 =  ((ilimitr/32) * (olimit/32) + (double)(ilimitr/32) * (olimit%32) /32+ (double)(ilimitr%32) * (olimit/32) /32 + (double)(ilimitr%32) * (olimit%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimit+31)/32)));
105 f3 =  ((ilimit/32) * (olimitr/32) + (double)(ilimit/32) * (olimitr%32) /32+ (double)(ilimit%32) * (olimitr/32) /32 + (double)(ilimit%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimit+31)/32) * ((olimitr+31)/32)));
106 f4 =  ((ilimitr/32) * (olimitr/32) + (double)(ilimitr/32) * (olimitr%32) /32+ (double)(ilimitr%32) * (olimitr/32) /32 + (double)(ilimitr%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((olimitr+31)/32) * ((olimitr+31)/32)));
107 int amax = blockA;
108 int bmax = blockB;
109 f = ((asize/amax) * (bsize/bmax) *f1 + (double)((asize/amax) * (bsize%bmax > 0) *f3)+ (double)((asize%amax > 0) * (bsize/bmax)*f2)  + (double)((asize%amax>0) * (bsize%bmax > 0) *f4) )/ (int)(((asize+amax-1)/amax) * ((bsize+bmax-1)/bmax));
110 return f;
111 
112 
113 }
114 
115 #ifdef __cplusplus
116 extern "C"
117 #endif
getEfficiency_nomatchg32(int ilimit,int olimit)118 double getEfficiency_nomatchg32(int ilimit, int olimit)
119 {
120 	return -1;
121 	double f = ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
122 	return f;
123 }
124 
125 #ifdef __cplusplus
126 extern "C"
127 #endif
getEfficiency_matchg32(int ilimit,int olimit)128 double getEfficiency_matchg32(int ilimit, int olimit)
129 {
130 	return 1;
131 }
132 
133 #ifdef __cplusplus
134 extern "C"
135 #endif
getEfficiency_matchl32(int size0,int asize,int bsize,int blockA)136 double getEfficiency_matchl32(int size0, int asize, int bsize, int blockA)
137 {
138 double f1, f2, f3, f4, f;
139 	const int remainder1 = asize % blockA;
140         const int remainder2 = bsize % blockA;
141 const int ilimit = remainder1 * size0;
142         const int olimit = remainder2 * size0;
143 	const int plain = blockA * size0;
144 int minlimit = min(ilimit, olimit);
145 
146  //f1 =  ((plain/32)  + (double)(plain%32) /32)/ (int)((plain+31)/32);
147  //f2 =  ((ilimit/32)  + (double)(ilimit%32) /32)/ (int)(max(1,(ilimit+31)/32));
148  //f3 =  ((olimit/32)  + (double)(olimit%32) /32)/ (int)(max(1,(olimit+31)/32));
149  //f4 =  ((minlimit/32)  + (double)(minlimit%32) /32)/ (int)(max(1,(minlimit+31)/32));
150  f1 =  ((plain/32)  + (double)(plain%32) /32)/ (int)((plain+31)/32);
151  f2 =  ((ilimit/32)  + (double)(ilimit%32) /32)/ (int)(max(1,(plain+31)/32));
152  f3 =  ((olimit/32)  + (double)(olimit%32) /32)/ (int)(max(1,(plain+31)/32));
153  f4 =  ((minlimit/32)  + (double)(minlimit%32) /32)/ (int)(max(1,(plain+31)/32));
154 int amax = blockA;
155 int bmax = blockA;
156 //printf("\t%lf %lf %lf %lf\t", f1, f2, f3, f4);
157 int n1, n2, n3, n4;
158 n1 = (asize/amax) * (bsize/bmax);
159 n2 = (asize%amax > 0 ) * (bsize/bmax);
160 n3 = (asize/amax) * (bsize%bmax>0);
161 n4 = (asize%amax > 0) * (bsize%bmax > 0);
162 //printf("\t%d %d %d %d\t", n1, n2, n3, n4);
163 //f = ((asize/amax) * (bsize/bmax) *f1 + (double)(asize/amax) * (bsize%bmax > 0) *f3+ (double)(asize%amax>0) * (bsize/bmax)*f2 + (double)(asize%amax > 0) * (bsize%bmax > 0) *f4 )/ (int)(((asize+amax-1)/amax) * ((bsize+bmax-1)/bmax));
164 f = (n1*f1 + n2*f2 + n3*f3 + n4*f4)/(n1+n2+n3+n4);
165 #ifdef MODEL
166 printf("\t%d\t%lf\t",blockA, f);
167 #endif
168 return f;
169 }
170