1
2 /*
3 Defines some vector operation functions that are shared by
4 sequential and parallel vectors.
5 */
6 #include <../src/vec/vec/impls/dvecimpl.h>
7 #include <petsc/private/kernels/petscaxpy.h>
8
9
10
11 #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
12 #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)13 PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
14 {
15 PetscErrorCode ierr;
16 PetscInt i,nv_rem,n = xin->map->n;
17 PetscScalar sum0,sum1,sum2,sum3;
18 const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
19 Vec *yy;
20
21 PetscFunctionBegin;
22 sum0 = 0.0;
23 sum1 = 0.0;
24 sum2 = 0.0;
25
26 i = nv;
27 nv_rem = nv&0x3;
28 yy = (Vec*)yin;
29 ierr = VecGetArrayRead(xin,&x);CHKERRQ(ierr);
30
31 switch (nv_rem) {
32 case 3:
33 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
34 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
35 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
36 fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
37 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
38 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
39 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
40 z[0] = sum0;
41 z[1] = sum1;
42 z[2] = sum2;
43 break;
44 case 2:
45 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
46 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
47 fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
48 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
49 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
50 z[0] = sum0;
51 z[1] = sum1;
52 break;
53 case 1:
54 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
55 fortranmdot1_(x,yy0,&n,&sum0);
56 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
57 z[0] = sum0;
58 break;
59 case 0:
60 break;
61 }
62 z += nv_rem;
63 i -= nv_rem;
64 yy += nv_rem;
65
66 while (i >0) {
67 sum0 = 0.;
68 sum1 = 0.;
69 sum2 = 0.;
70 sum3 = 0.;
71 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
72 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
73 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
74 ierr = VecGetArrayRead(yy[3],&yy3);CHKERRQ(ierr);
75 fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
76 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
77 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
78 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
79 ierr = VecRestoreArrayRead(yy[3],&yy3);CHKERRQ(ierr);
80 yy += 4;
81 z[0] = sum0;
82 z[1] = sum1;
83 z[2] = sum2;
84 z[3] = sum3;
85 z += 4;
86 i -= 4;
87 }
88 ierr = VecRestoreArrayRead(xin,&x);CHKERRQ(ierr);
89 ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
90 PetscFunctionReturn(0);
91 }
92
93 #else
VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)94 PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
95 {
96 PetscErrorCode ierr;
97 PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
98 PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
99 const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
100 Vec *yy;
101
102 PetscFunctionBegin;
103 sum0 = 0.;
104 sum1 = 0.;
105 sum2 = 0.;
106
107 i = nv;
108 nv_rem = nv&0x3;
109 yy = (Vec*)yin;
110 j = n;
111 ierr = VecGetArrayRead(xin,&xbase);CHKERRQ(ierr);
112 x = xbase;
113
114 switch (nv_rem) {
115 case 3:
116 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
117 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
118 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
119 switch (j_rem=j&0x3) {
120 case 3:
121 x2 = x[2];
122 sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
123 sum2 += x2*PetscConj(yy2[2]);
124 case 2:
125 x1 = x[1];
126 sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
127 sum2 += x1*PetscConj(yy2[1]);
128 case 1:
129 x0 = x[0];
130 sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
131 sum2 += x0*PetscConj(yy2[0]);
132 case 0:
133 x += j_rem;
134 yy0 += j_rem;
135 yy1 += j_rem;
136 yy2 += j_rem;
137 j -= j_rem;
138 break;
139 }
140 while (j>0) {
141 x0 = x[0];
142 x1 = x[1];
143 x2 = x[2];
144 x3 = x[3];
145 x += 4;
146
147 sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
148 sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
149 sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
150 j -= 4;
151 }
152 z[0] = sum0;
153 z[1] = sum1;
154 z[2] = sum2;
155 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
156 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
157 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
158 break;
159 case 2:
160 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
161 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
162 switch (j_rem=j&0x3) {
163 case 3:
164 x2 = x[2];
165 sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
166 case 2:
167 x1 = x[1];
168 sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
169 case 1:
170 x0 = x[0];
171 sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
172 case 0:
173 x += j_rem;
174 yy0 += j_rem;
175 yy1 += j_rem;
176 j -= j_rem;
177 break;
178 }
179 while (j>0) {
180 x0 = x[0];
181 x1 = x[1];
182 x2 = x[2];
183 x3 = x[3];
184 x += 4;
185
186 sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
187 sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
188 j -= 4;
189 }
190 z[0] = sum0;
191 z[1] = sum1;
192
193 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
194 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
195 break;
196 case 1:
197 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
198 switch (j_rem=j&0x3) {
199 case 3:
200 x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
201 case 2:
202 x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
203 case 1:
204 x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
205 case 0:
206 x += j_rem;
207 yy0 += j_rem;
208 j -= j_rem;
209 break;
210 }
211 while (j>0) {
212 sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
213 + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
214 yy0 +=4;
215 j -= 4; x+=4;
216 }
217 z[0] = sum0;
218
219 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
220 break;
221 case 0:
222 break;
223 }
224 z += nv_rem;
225 i -= nv_rem;
226 yy += nv_rem;
227
228 while (i >0) {
229 sum0 = 0.;
230 sum1 = 0.;
231 sum2 = 0.;
232 sum3 = 0.;
233 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
234 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
235 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
236 ierr = VecGetArrayRead(yy[3],&yy3);CHKERRQ(ierr);
237
238 j = n;
239 x = xbase;
240 switch (j_rem=j&0x3) {
241 case 3:
242 x2 = x[2];
243 sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
244 sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
245 case 2:
246 x1 = x[1];
247 sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
248 sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
249 case 1:
250 x0 = x[0];
251 sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
252 sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
253 case 0:
254 x += j_rem;
255 yy0 += j_rem;
256 yy1 += j_rem;
257 yy2 += j_rem;
258 yy3 += j_rem;
259 j -= j_rem;
260 break;
261 }
262 while (j>0) {
263 x0 = x[0];
264 x1 = x[1];
265 x2 = x[2];
266 x3 = x[3];
267 x += 4;
268
269 sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
270 sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
271 sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
272 sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
273 j -= 4;
274 }
275 z[0] = sum0;
276 z[1] = sum1;
277 z[2] = sum2;
278 z[3] = sum3;
279 z += 4;
280 i -= 4;
281 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
282 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
283 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
284 ierr = VecRestoreArrayRead(yy[3],&yy3);CHKERRQ(ierr);
285 yy += 4;
286 }
287 ierr = VecRestoreArrayRead(xin,&xbase);CHKERRQ(ierr);
288 ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
289 PetscFunctionReturn(0);
290 }
291 #endif
292
293 /* ----------------------------------------------------------------------------*/
VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)294 PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
295 {
296 PetscErrorCode ierr;
297 PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
298 PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
299 const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
300 Vec *yy;
301
302 PetscFunctionBegin;
303 sum0 = 0.;
304 sum1 = 0.;
305 sum2 = 0.;
306
307 i = nv;
308 nv_rem = nv&0x3;
309 yy = (Vec*)yin;
310 j = n;
311 ierr = VecGetArrayRead(xin,&xbase);CHKERRQ(ierr);
312 x = xbase;
313
314 switch (nv_rem) {
315 case 3:
316 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
317 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
318 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
319 switch (j_rem=j&0x3) {
320 case 3:
321 x2 = x[2];
322 sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
323 sum2 += x2*yy2[2];
324 case 2:
325 x1 = x[1];
326 sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
327 sum2 += x1*yy2[1];
328 case 1:
329 x0 = x[0];
330 sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
331 sum2 += x0*yy2[0];
332 case 0:
333 x += j_rem;
334 yy0 += j_rem;
335 yy1 += j_rem;
336 yy2 += j_rem;
337 j -= j_rem;
338 break;
339 }
340 while (j>0) {
341 x0 = x[0];
342 x1 = x[1];
343 x2 = x[2];
344 x3 = x[3];
345 x += 4;
346
347 sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
348 sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
349 sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
350 j -= 4;
351 }
352 z[0] = sum0;
353 z[1] = sum1;
354 z[2] = sum2;
355 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
356 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
357 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
358 break;
359 case 2:
360 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
361 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
362 switch (j_rem=j&0x3) {
363 case 3:
364 x2 = x[2];
365 sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
366 case 2:
367 x1 = x[1];
368 sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
369 case 1:
370 x0 = x[0];
371 sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
372 case 0:
373 x += j_rem;
374 yy0 += j_rem;
375 yy1 += j_rem;
376 j -= j_rem;
377 break;
378 }
379 while (j>0) {
380 x0 = x[0];
381 x1 = x[1];
382 x2 = x[2];
383 x3 = x[3];
384 x += 4;
385
386 sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
387 sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
388 j -= 4;
389 }
390 z[0] = sum0;
391 z[1] = sum1;
392
393 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
394 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
395 break;
396 case 1:
397 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
398 switch (j_rem=j&0x3) {
399 case 3:
400 x2 = x[2]; sum0 += x2*yy0[2];
401 case 2:
402 x1 = x[1]; sum0 += x1*yy0[1];
403 case 1:
404 x0 = x[0]; sum0 += x0*yy0[0];
405 case 0:
406 x += j_rem;
407 yy0 += j_rem;
408 j -= j_rem;
409 break;
410 }
411 while (j>0) {
412 sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
413 j -= 4; x+=4;
414 }
415 z[0] = sum0;
416
417 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
418 break;
419 case 0:
420 break;
421 }
422 z += nv_rem;
423 i -= nv_rem;
424 yy += nv_rem;
425
426 while (i >0) {
427 sum0 = 0.;
428 sum1 = 0.;
429 sum2 = 0.;
430 sum3 = 0.;
431 ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
432 ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
433 ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
434 ierr = VecGetArrayRead(yy[3],&yy3);CHKERRQ(ierr);
435 x = xbase;
436
437 j = n;
438 switch (j_rem=j&0x3) {
439 case 3:
440 x2 = x[2];
441 sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
442 sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
443 case 2:
444 x1 = x[1];
445 sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
446 sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
447 case 1:
448 x0 = x[0];
449 sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
450 sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
451 case 0:
452 x += j_rem;
453 yy0 += j_rem;
454 yy1 += j_rem;
455 yy2 += j_rem;
456 yy3 += j_rem;
457 j -= j_rem;
458 break;
459 }
460 while (j>0) {
461 x0 = x[0];
462 x1 = x[1];
463 x2 = x[2];
464 x3 = x[3];
465 x += 4;
466
467 sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
468 sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
469 sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
470 sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
471 j -= 4;
472 }
473 z[0] = sum0;
474 z[1] = sum1;
475 z[2] = sum2;
476 z[3] = sum3;
477 z += 4;
478 i -= 4;
479 ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
480 ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
481 ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
482 ierr = VecRestoreArrayRead(yy[3],&yy3);CHKERRQ(ierr);
483 yy += 4;
484 }
485 ierr = VecRestoreArrayRead(xin,&xbase);CHKERRQ(ierr);
486 ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
487 PetscFunctionReturn(0);
488 }
489
490
VecMax_Seq(Vec xin,PetscInt * idx,PetscReal * z)491 PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
492 {
493 PetscInt i,j=0,n = xin->map->n;
494 PetscReal max,tmp;
495 const PetscScalar *xx;
496 PetscErrorCode ierr;
497
498 PetscFunctionBegin;
499 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
500 if (!n) {
501 max = PETSC_MIN_REAL;
502 j = -1;
503 } else {
504 max = PetscRealPart(*xx++); j = 0;
505 for (i=1; i<n; i++) {
506 if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
507 }
508 }
509 *z = max;
510 if (idx) *idx = j;
511 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
512 PetscFunctionReturn(0);
513 }
514
VecMin_Seq(Vec xin,PetscInt * idx,PetscReal * z)515 PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
516 {
517 PetscInt i,j=0,n = xin->map->n;
518 PetscReal min,tmp;
519 const PetscScalar *xx;
520 PetscErrorCode ierr;
521
522 PetscFunctionBegin;
523 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
524 if (!n) {
525 min = PETSC_MAX_REAL;
526 j = -1;
527 } else {
528 min = PetscRealPart(*xx++); j = 0;
529 for (i=1; i<n; i++) {
530 if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
531 }
532 }
533 *z = min;
534 if (idx) *idx = j;
535 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
536 PetscFunctionReturn(0);
537 }
538
VecSet_Seq(Vec xin,PetscScalar alpha)539 PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
540 {
541 PetscInt i,n = xin->map->n;
542 PetscScalar *xx;
543 PetscErrorCode ierr;
544
545 PetscFunctionBegin;
546 ierr = VecGetArrayWrite(xin,&xx);CHKERRQ(ierr);
547 if (alpha == (PetscScalar)0.0) {
548 ierr = PetscArrayzero(xx,n);CHKERRQ(ierr);
549 } else {
550 for (i=0; i<n; i++) xx[i] = alpha;
551 }
552 ierr = VecRestoreArrayWrite(xin,&xx);CHKERRQ(ierr);
553 PetscFunctionReturn(0);
554 }
555
VecMAXPY_Seq(Vec xin,PetscInt nv,const PetscScalar * alpha,Vec * y)556 PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
557 {
558 PetscErrorCode ierr;
559 PetscInt n = xin->map->n,j,j_rem;
560 const PetscScalar *yy0,*yy1,*yy2,*yy3;
561 PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
562
563 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
564 #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
565 #endif
566
567 PetscFunctionBegin;
568 ierr = PetscLogFlops(nv*2.0*n);CHKERRQ(ierr);
569 ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
570 switch (j_rem=nv&0x3) {
571 case 3:
572 ierr = VecGetArrayRead(y[0],&yy0);CHKERRQ(ierr);
573 ierr = VecGetArrayRead(y[1],&yy1);CHKERRQ(ierr);
574 ierr = VecGetArrayRead(y[2],&yy2);CHKERRQ(ierr);
575 alpha0 = alpha[0];
576 alpha1 = alpha[1];
577 alpha2 = alpha[2];
578 alpha += 3;
579 PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
580 ierr = VecRestoreArrayRead(y[0],&yy0);CHKERRQ(ierr);
581 ierr = VecRestoreArrayRead(y[1],&yy1);CHKERRQ(ierr);
582 ierr = VecRestoreArrayRead(y[2],&yy2);CHKERRQ(ierr);
583 y += 3;
584 break;
585 case 2:
586 ierr = VecGetArrayRead(y[0],&yy0);CHKERRQ(ierr);
587 ierr = VecGetArrayRead(y[1],&yy1);CHKERRQ(ierr);
588 alpha0 = alpha[0];
589 alpha1 = alpha[1];
590 alpha +=2;
591 PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
592 ierr = VecRestoreArrayRead(y[0],&yy0);CHKERRQ(ierr);
593 ierr = VecRestoreArrayRead(y[1],&yy1);CHKERRQ(ierr);
594 y +=2;
595 break;
596 case 1:
597 ierr = VecGetArrayRead(y[0],&yy0);CHKERRQ(ierr);
598 alpha0 = *alpha++;
599 PetscKernelAXPY(xx,alpha0,yy0,n);
600 ierr = VecRestoreArrayRead(y[0],&yy0);CHKERRQ(ierr);
601 y +=1;
602 break;
603 }
604 for (j=j_rem; j<nv; j+=4) {
605 ierr = VecGetArrayRead(y[0],&yy0);CHKERRQ(ierr);
606 ierr = VecGetArrayRead(y[1],&yy1);CHKERRQ(ierr);
607 ierr = VecGetArrayRead(y[2],&yy2);CHKERRQ(ierr);
608 ierr = VecGetArrayRead(y[3],&yy3);CHKERRQ(ierr);
609 alpha0 = alpha[0];
610 alpha1 = alpha[1];
611 alpha2 = alpha[2];
612 alpha3 = alpha[3];
613 alpha += 4;
614
615 PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
616 ierr = VecRestoreArrayRead(y[0],&yy0);CHKERRQ(ierr);
617 ierr = VecRestoreArrayRead(y[1],&yy1);CHKERRQ(ierr);
618 ierr = VecRestoreArrayRead(y[2],&yy2);CHKERRQ(ierr);
619 ierr = VecRestoreArrayRead(y[3],&yy3);CHKERRQ(ierr);
620 y += 4;
621 }
622 ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
623 PetscFunctionReturn(0);
624 }
625
626 #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
627
VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)628 PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
629 {
630 PetscErrorCode ierr;
631 PetscInt n = yin->map->n;
632 PetscScalar *yy;
633 const PetscScalar *xx;
634
635 PetscFunctionBegin;
636 if (alpha == (PetscScalar)0.0) {
637 ierr = VecCopy(xin,yin);CHKERRQ(ierr);
638 } else if (alpha == (PetscScalar)1.0) {
639 ierr = VecAXPY_Seq(yin,alpha,xin);CHKERRQ(ierr);
640 } else if (alpha == (PetscScalar)-1.0) {
641 PetscInt i;
642 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
643 ierr = VecGetArray(yin,&yy);CHKERRQ(ierr);
644
645 for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
646
647 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
648 ierr = VecRestoreArray(yin,&yy);CHKERRQ(ierr);
649 ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
650 } else {
651 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
652 ierr = VecGetArray(yin,&yy);CHKERRQ(ierr);
653 #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
654 {
655 PetscScalar oalpha = alpha;
656 fortranaypx_(&n,&oalpha,xx,yy);
657 }
658 #else
659 {
660 PetscInt i;
661
662 for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
663 }
664 #endif
665 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
666 ierr = VecRestoreArray(yin,&yy);CHKERRQ(ierr);
667 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
668 }
669 PetscFunctionReturn(0);
670 }
671
672 #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
673 /*
674 IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
675 to be slower than a regular C loop. Hence,we do not include it.
676 void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
677 */
678
VecWAXPY_Seq(Vec win,PetscScalar alpha,Vec xin,Vec yin)679 PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
680 {
681 PetscErrorCode ierr;
682 PetscInt i,n = win->map->n;
683 PetscScalar *ww;
684 const PetscScalar *yy,*xx;
685
686 PetscFunctionBegin;
687 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
688 ierr = VecGetArrayRead(yin,&yy);CHKERRQ(ierr);
689 ierr = VecGetArray(win,&ww);CHKERRQ(ierr);
690 if (alpha == (PetscScalar)1.0) {
691 ierr = PetscLogFlops(n);CHKERRQ(ierr);
692 /* could call BLAS axpy after call to memcopy, but may be slower */
693 for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
694 } else if (alpha == (PetscScalar)-1.0) {
695 ierr = PetscLogFlops(n);CHKERRQ(ierr);
696 for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
697 } else if (alpha == (PetscScalar)0.0) {
698 ierr = PetscArraycpy(ww,yy,n);CHKERRQ(ierr);
699 } else {
700 PetscScalar oalpha = alpha;
701 #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
702 fortranwaxpy_(&n,&oalpha,xx,yy,ww);
703 #else
704 for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
705 #endif
706 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
707 }
708 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
709 ierr = VecRestoreArrayRead(yin,&yy);CHKERRQ(ierr);
710 ierr = VecRestoreArray(win,&ww);CHKERRQ(ierr);
711 PetscFunctionReturn(0);
712 }
713
VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal * max)714 PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
715 {
716 PetscErrorCode ierr;
717 PetscInt n = xin->map->n,i;
718 const PetscScalar *xx,*yy;
719 PetscReal m = 0.0;
720
721 PetscFunctionBegin;
722 ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
723 ierr = VecGetArrayRead(yin,&yy);CHKERRQ(ierr);
724 for (i = 0; i < n; i++) {
725 if (yy[i] != (PetscScalar)0.0) {
726 m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
727 } else {
728 m = PetscMax(PetscAbsScalar(xx[i]), m);
729 }
730 }
731 ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
732 ierr = VecRestoreArrayRead(yin,&yy);CHKERRQ(ierr);
733 ierr = MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
734 ierr = PetscLogFlops(n);CHKERRQ(ierr);
735 PetscFunctionReturn(0);
736 }
737
VecPlaceArray_Seq(Vec vin,const PetscScalar * a)738 PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
739 {
740 Vec_Seq *v = (Vec_Seq*)vin->data;
741
742 PetscFunctionBegin;
743 if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
744 v->unplacedarray = v->array; /* save previous array so reset can bring it back */
745 v->array = (PetscScalar*)a;
746 PetscFunctionReturn(0);
747 }
748
VecReplaceArray_Seq(Vec vin,const PetscScalar * a)749 PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
750 {
751 Vec_Seq *v = (Vec_Seq*)vin->data;
752 PetscErrorCode ierr;
753
754 PetscFunctionBegin;
755 ierr = PetscFree(v->array_allocated);CHKERRQ(ierr);
756 v->array_allocated = v->array = (PetscScalar*)a;
757 PetscFunctionReturn(0);
758 }
759