1 #define max(a,b) (a > b?a:b)
2 #define min(a,b) (a < b?a:b)
3 #include<stdio.h>
4
5 #ifdef __cplusplus
6 extern "C"
7 #endif
getTime(double bandwidth,unsigned long int volume)8 double getTime(double bandwidth, unsigned long int volume)//return time in s
9 {
10 //printf("vol = %lu, bandwidth = %lf\t", volume, bandwidth);
11 return (2*volume*8)/(bandwidth*1000000000);
12 }
13
14 #ifdef __cplusplus
15 extern "C"
16 #endif
getBW_nooverlap(double eff)17 double getBW_nooverlap(double eff)
18 {
19 if(eff == 0) return 0;
20
21 double intercept = 137.113;
22 double W = 65.348;
23 return intercept + W * eff;
24 }
25
26 #ifdef __cplusplus
27 extern "C"
28 #endif
getBW_overlap(double eff)29 double getBW_overlap(double eff)
30 {
31 if(eff == 0) return 0;
32 double intercept = 139.053;
33 double W = 55.536;
34 return intercept + W * eff;
35
36 }
37
38 #ifdef __cplusplus
39 extern "C"
40 #endif
getBW_nomatchg32(double eff)41 double getBW_nomatchg32(double eff)
42 {
43 if(eff == 0) return 0;
44 double intercept = 66.560;
45 double W = 164.83;
46 return intercept + W * eff;
47 }
48
49 #ifdef __cplusplus
50 extern "C"
51 #endif
getBW_matchl32(double eff,unsigned tbsize)52 double getBW_matchl32(double eff, unsigned tbsize)
53 {
54 if(eff == 0) return 0;
55 double intercept = 119.2547;
56 double W = 74.8634;
57 double T = -1.4558;
58 return intercept + W * eff + T * tbsize;
59 }
60
61 #ifdef __cplusplus
62 extern "C"
63 #endif
getBW_matchg32()64 double getBW_matchg32()
65 {
66 double intercept = 196.82;
67 return intercept;
68 }
69
70 #ifdef __cplusplus
71 extern "C"
72 #endif
getEfficiency_nooverlap(int ilimit,int olimit,int asize,int bsize,int blockA,int blockB)73 double getEfficiency_nooverlap(int ilimit, int olimit, int asize, int bsize, int blockA, int blockB)
74 {
75 // return 0;
76 const int remainder1 = asize % blockA;
77 const int remainder2 = bsize % blockB;
78 const int ilimitr = ilimit * remainder1 / blockA;
79 const int olimitr = olimit * remainder2 / blockB;
80 //printf("\tilimit=%d\tolimit=%d\t", ilimit, olimit);
81 //printf("\t%d\t%d\t%d\t%d\t", ilimit/32, ilimit%32, olimit/32,olimit%32 );
82 double f1, f2, f3, f4, f;
83 f1 = ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
84 f2 = ((ilimitr/32) * (olimit/32) + (double)(ilimitr/32) * (olimit%32) /32+ (double)(ilimitr%32) * (olimit/32) /32 + (double)(ilimitr%32) * (olimit%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimit+31)/32)));
85 f3 = ((ilimit/32) * (olimitr/32) + (double)(ilimit/32) * (olimitr%32) /32+ (double)(ilimit%32) * (olimitr/32) /32 + (double)(ilimit%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimit+31)/32) * ((olimitr+31)/32)));
86 f4 = ((ilimitr/32) * (olimitr/32) + (double)(ilimitr/32) * (olimitr%32) /32+ (double)(ilimitr%32) * (olimitr/32) /32 + (double)(ilimitr%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimitr+31)/32)));
87 f = ((asize/blockA) * (bsize/blockB) *f1 + (double)((asize/blockA) * (bsize%blockB > 0) *f3)+ (double)((asize%blockA > 0) * (bsize/blockB)*f2) + (double)((asize%blockA>0) * (bsize%blockB > 0) *f4) )/ (int)(((asize+blockA-1)/blockA) * ((bsize+blockB-1)/blockB));
88 //printf("\t%lf\t", f);
89 return f;
90 }
91
92 #ifdef __cplusplus
93 extern "C"
94 #endif
getEfficiency_overlap(int ilimit,int olimit,int asize,int bsize,int blockA,int blockB)95 double getEfficiency_overlap(int ilimit, int olimit, int asize, int bsize, int blockA, int blockB)
96 {
97 // return 0;
98 const int remainder1 = asize % blockA;
99 const int remainder2 = bsize % blockB;
100 const int ilimitr = ilimit * remainder1 / blockA;
101 const int olimitr = olimit * remainder2 / blockB;
102 double f1, f2, f3, f4, f;
103 f1 = ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
104 f2 = ((ilimitr/32) * (olimit/32) + (double)(ilimitr/32) * (olimit%32) /32+ (double)(ilimitr%32) * (olimit/32) /32 + (double)(ilimitr%32) * (olimit%32) /(32*32) )/ max(1,(int)(((ilimitr+31)/32) * ((olimit+31)/32)));
105 f3 = ((ilimit/32) * (olimitr/32) + (double)(ilimit/32) * (olimitr%32) /32+ (double)(ilimit%32) * (olimitr/32) /32 + (double)(ilimit%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((ilimit+31)/32) * ((olimitr+31)/32)));
106 f4 = ((ilimitr/32) * (olimitr/32) + (double)(ilimitr/32) * (olimitr%32) /32+ (double)(ilimitr%32) * (olimitr/32) /32 + (double)(ilimitr%32) * (olimitr%32) /(32*32) )/ max(1,(int)(((olimitr+31)/32) * ((olimitr+31)/32)));
107 int amax = blockA;
108 int bmax = blockB;
109 f = ((asize/amax) * (bsize/bmax) *f1 + (double)((asize/amax) * (bsize%bmax > 0) *f3)+ (double)((asize%amax > 0) * (bsize/bmax)*f2) + (double)((asize%amax>0) * (bsize%bmax > 0) *f4) )/ (int)(((asize+amax-1)/amax) * ((bsize+bmax-1)/bmax));
110 return f;
111
112
113 }
114
115 #ifdef __cplusplus
116 extern "C"
117 #endif
getEfficiency_nomatchg32(int ilimit,int olimit)118 double getEfficiency_nomatchg32(int ilimit, int olimit)
119 {
120 return -1;
121 double f = ((ilimit/32) * (olimit/32) + (double)(ilimit/32) * (olimit%32) /32+ (double)(ilimit%32) * (olimit/32) /32 + (double)(ilimit%32) * (olimit%32) /(32*32) )/ (int)(((ilimit+31)/32) * ((olimit+31)/32));
122 return f;
123 }
124
125 #ifdef __cplusplus
126 extern "C"
127 #endif
getEfficiency_matchg32(int ilimit,int olimit)128 double getEfficiency_matchg32(int ilimit, int olimit)
129 {
130 return 1;
131 }
132
133 #ifdef __cplusplus
134 extern "C"
135 #endif
getEfficiency_matchl32(int size0,int asize,int bsize,int blockA)136 double getEfficiency_matchl32(int size0, int asize, int bsize, int blockA)
137 {
138 double f1, f2, f3, f4, f;
139 const int remainder1 = asize % blockA;
140 const int remainder2 = bsize % blockA;
141 const int ilimit = remainder1 * size0;
142 const int olimit = remainder2 * size0;
143 const int plain = blockA * size0;
144 int minlimit = min(ilimit, olimit);
145
146 //f1 = ((plain/32) + (double)(plain%32) /32)/ (int)((plain+31)/32);
147 //f2 = ((ilimit/32) + (double)(ilimit%32) /32)/ (int)(max(1,(ilimit+31)/32));
148 //f3 = ((olimit/32) + (double)(olimit%32) /32)/ (int)(max(1,(olimit+31)/32));
149 //f4 = ((minlimit/32) + (double)(minlimit%32) /32)/ (int)(max(1,(minlimit+31)/32));
150 f1 = ((plain/32) + (double)(plain%32) /32)/ (int)((plain+31)/32);
151 f2 = ((ilimit/32) + (double)(ilimit%32) /32)/ (int)(max(1,(plain+31)/32));
152 f3 = ((olimit/32) + (double)(olimit%32) /32)/ (int)(max(1,(plain+31)/32));
153 f4 = ((minlimit/32) + (double)(minlimit%32) /32)/ (int)(max(1,(plain+31)/32));
154 int amax = blockA;
155 int bmax = blockA;
156 //printf("\t%lf %lf %lf %lf\t", f1, f2, f3, f4);
157 int n1, n2, n3, n4;
158 n1 = (asize/amax) * (bsize/bmax);
159 n2 = (asize%amax > 0 ) * (bsize/bmax);
160 n3 = (asize/amax) * (bsize%bmax>0);
161 n4 = (asize%amax > 0) * (bsize%bmax > 0);
162 //printf("\t%d %d %d %d\t", n1, n2, n3, n4);
163 //f = ((asize/amax) * (bsize/bmax) *f1 + (double)(asize/amax) * (bsize%bmax > 0) *f3+ (double)(asize%amax>0) * (bsize/bmax)*f2 + (double)(asize%amax > 0) * (bsize%bmax > 0) *f4 )/ (int)(((asize+amax-1)/amax) * ((bsize+bmax-1)/bmax));
164 f = (n1*f1 + n2*f2 + n3*f3 + n4*f4)/(n1+n2+n3+n4);
165 #ifdef MODEL
166 printf("\t%d\t%lf\t",blockA, f);
167 #endif
168 return f;
169 }
170