1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <math.h>
42 #ifdef _OPENMP
43 #include <omp.h>
44 #endif
45 #ifdef USE_MPI
46 #include <mpi.h>
47 #endif
48 #include "lislib.h"
49
50 #ifndef USE_OVERLAP
lis_matvec_jad(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])51 void lis_matvec_jad(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
52 {
53 LIS_INT i,j,k,is,ie,js,je;
54 LIS_INT n,maxnzr;
55 LIS_INT nprocs,my_rank;
56 LIS_SCALAR *w;
57
58 n = A->n;
59 w = A->work;
60 if( A->is_splited )
61 {
62 n = A->n;
63 #ifdef _OPENMP
64 nprocs = omp_get_max_threads();
65 #else
66 nprocs = 1;
67 #endif
68
69 #ifdef _OPENMP
70 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
71 #endif
72 {
73 #ifdef _OPENMP
74 my_rank = omp_get_thread_num();
75 #else
76 my_rank = 0;
77 #endif
78 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
79
80 #ifdef USE_VEC_COMP
81 #pragma cdir nodep
82 #pragma _NEC ivdep
83 #endif
84 for(i=is; i<ie; i++)
85 {
86 y[i] = A->D->value[i]*x[i];
87 w[i] = 0.0;
88 }
89 for(j=0;j<A->L->maxnzr;j++)
90 {
91 k = is;
92 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
93 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
94 #ifdef USE_VEC_COMP
95 #pragma cdir nodep
96 #pragma _NEC ivdep
97 #endif
98 for(i=js;i<je;i++)
99 {
100 w[k] += A->L->value[i] * x[A->L->index[i]];
101 k++;
102 }
103 }
104 #ifdef USE_VEC_COMP
105 #pragma cdir nodep
106 #pragma _NEC ivdep
107 #endif
108 for(i=is; i<ie; i++)
109 {
110 y[A->L->row[i]] += w[i];
111 }
112 #ifdef USE_VEC_COMP
113 #pragma cdir nodep
114 #pragma _NEC ivdep
115 #endif
116 for(i=is; i<ie; i++)
117 {
118 w[i] = 0.0;
119 }
120 for(j=0;j<A->U->maxnzr;j++)
121 {
122 k = is;
123 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
124 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
125 #ifdef USE_VEC_COMP
126 #pragma cdir nodep
127 #pragma _NEC ivdep
128 #endif
129 for(i=js;i<je;i++)
130 {
131 w[k] += A->U->value[i] * x[A->U->index[i]];
132 k++;
133 }
134 }
135 #ifdef USE_VEC_COMP
136 #pragma cdir nodep
137 #pragma _NEC ivdep
138 #endif
139 for(i=is; i<ie; i++)
140 {
141 y[A->U->row[i]] += w[i];
142 }
143 }
144 }
145 else
146 {
147 n = A->n;
148 maxnzr = A->maxnzr;
149 #ifdef _OPENMP
150 nprocs = omp_get_max_threads();
151 #else
152 nprocs = 1;
153 #endif
154
155 #ifdef _OPENMP
156 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
157 #endif
158 {
159 #ifdef _OPENMP
160 my_rank = omp_get_thread_num();
161 #else
162 my_rank = 0;
163 #endif
164 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
165
166 #ifdef USE_VEC_COMP
167 #pragma cdir nodep
168 #pragma _NEC ivdep
169 #endif
170 for(i=is; i<ie; i++)
171 {
172 w[i] = 0.0;
173 }
174 for(j=0;j<maxnzr;j++)
175 {
176 k = is;
177 js = A->ptr[my_rank*(maxnzr+1) + j];
178 je = A->ptr[my_rank*(maxnzr+1) + j+1];
179 #ifdef USE_VEC_COMP
180 #pragma cdir nodep
181 #pragma _NEC ivdep
182 #endif
183 for(i=js;i<je;i++)
184 {
185 w[k] += A->value[i] * x[A->index[i]];
186 k++;
187 }
188 }
189 #ifdef USE_VEC_COMP
190 #pragma cdir nodep
191 #pragma _NEC ivdep
192 #endif
193 for(i=is; i<ie; i++)
194 {
195 y[A->row[i]] = w[i];
196 }
197 }
198 }
199 }
200 #else
lis_matvec_jad(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])201 void lis_matvec_jad(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
202 {
203 LIS_INT i,j,k,is,ie,js,je,jj,kk;
204 LIS_INT n,maxnzr,maxnzr2;
205 LIS_INT nprocs,my_rank;
206 LIS_SCALAR t;
207 LIS_SCALAR *w;
208 LIS_INT neib,inum,neibpetot,pad;
209 LIS_SCALAR *ws,*wr;
210 LIS_INT *iw,err;
211 LIS_COMMTABLE commtable;
212
213 n = A->n;
214 w = A->work;
215 commtable = A->commtable;
216 neibpetot = commtable->neibpetot;
217 ws = commtable->ws;
218 wr = commtable->wr;
219 pad = commtable->pad;
220
221 for(neib=0;neib<neibpetot;neib++)
222 {
223 is = commtable->export_ptr[neib];
224 inum = commtable->export_ptr[neib+1] - is;
225 for(i=is;i<is+inum;i++)
226 {
227 ws[i] = x[commtable->export_index[i]];
228 }
229 MPI_Isend(&ws[is],inum,LIS_MPI_SCALAR,commtable->neibpe[neib],0,commtable->comm,&commtable->req1[neib]);
230 }
231
232 if( A->is_splited )
233 {
234 n = A->n;
235 #ifdef _OPENMP
236 nprocs = omp_get_max_threads();
237 #else
238 nprocs = 1;
239 #endif
240
241 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
242 {
243 #ifdef _OPENMP
244 my_rank = omp_get_thread_num();
245 #else
246 my_rank = 0;
247 #endif
248 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
249
250 #pragma cdir nodep
251 #pragma _NEC ivdep
252 for(i=is; i<ie; i++)
253 {
254 y[i] = A->D->value[i]*x[i];
255 w[i] = 0.0;
256 }
257 for(j=0;j<A->L->maxnzr;j++)
258 {
259 k = is;
260 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
261 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
262 #pragma cdir nodep
263 #pragma _NEC ivdep
264 for(i=js;i<je;i++)
265 {
266 w[k] += A->L->value[i] * x[A->L->index[i]];
267 k++;
268 }
269 }
270 #pragma cdir nodep
271 #pragma _NEC ivdep
272 for(i=is; i<ie; i++)
273 {
274 y[A->L->row[i]] += w[i];
275 }
276 #pragma cdir nodep
277 #pragma _NEC ivdep
278 for(i=is; i<ie; i++)
279 {
280 w[i] = 0.0;
281 }
282 for(j=0;j<A->U->maxnzr;j++)
283 {
284 k = is;
285 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
286 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
287 #pragma cdir nodep
288 #pragma _NEC ivdep
289 for(i=js;i<je;i++)
290 {
291 w[k] += A->U->value[i] * x[A->U->index[i]];
292 k++;
293 }
294 }
295 #pragma cdir nodep
296 #pragma _NEC ivdep
297 for(i=is; i<ie; i++)
298 {
299 y[A->U->row[i]] += w[i];
300 }
301 }
302 }
303 else
304 {
305 n = A->n;
306 maxnzr = A->maxnzr;
307 #ifdef _OPENMP
308 nprocs = omp_get_max_threads();
309 #else
310 nprocs = 1;
311 #endif
312
313 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
314 {
315 #ifdef _OPENMP
316 my_rank = omp_get_thread_num();
317 #else
318 my_rank = 0;
319 #endif
320 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
321
322 #pragma cdir nodep
323 #pragma _NEC ivdep
324 for(i=is; i<ie; i++)
325 {
326 w[i] = 0.0;
327 }
328 for(j=0;j<maxnzr;j++)
329 {
330 k = is;
331 js = A->ptr[my_rank*(maxnzr+1) + j];
332 je = A->ptr[my_rank*(maxnzr+1) + j+1];
333 #pragma cdir nodep
334 #pragma _NEC ivdep
335 for(i=js;i<je;i++)
336 {
337 w[k] += A->value[i] * x[A->index[i]];
338 k++;
339 }
340 }
341 #pragma cdir nodep
342 #pragma _NEC ivdep
343 for(i=is; i<ie; i++)
344 {
345 y[A->row[i]] = w[i];
346 }
347 }
348 }
349 for(neib=0;neib<neibpetot;neib++)
350 {
351 is = commtable->import_ptr[neib];
352 inum = commtable->import_ptr[neib+1] - is;
353 MPI_Irecv(&wr[is],inum,LIS_MPI_SCALAR,commtable->neibpe[neib],0,commtable->comm,&commtable->req2[neib]);
354 }
355 MPI_Waitall(neibpetot, commtable->req2, commtable->sta2);
356
357 k = commtable->import_index[0] + pad;
358 for(i=commtable->import_ptr[0];i<commtable->import_ptr[neibpetot];i++)
359 {
360 x[k++] = wr[i];
361 }
362
363 MPI_Waitall(neibpetot, commtable->req1, commtable->sta1);
364
365 if( A->is_splited )
366 {
367 n = A->n;
368 #ifdef _OPENMP
369 nprocs = omp_get_max_threads();
370 #else
371 nprocs = 1;
372 #endif
373
374 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
375 {
376 #ifdef _OPENMP
377 my_rank = omp_get_thread_num();
378 #else
379 my_rank = 0;
380 #endif
381 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
382
383 #pragma cdir nodep
384 #pragma _NEC ivdep
385 for(i=is; i<ie; i++)
386 {
387 y[i] = A->D->value[i]*x[i];
388 w[i] = 0.0;
389 }
390 for(j=0;j<A->L->maxnzr;j++)
391 {
392 k = is;
393 js = A->L->ptr[my_rank*(A->L->maxnzr+1) + j];
394 je = A->L->ptr[my_rank*(A->L->maxnzr+1) + j+1];
395 #pragma cdir nodep
396 #pragma _NEC ivdep
397 for(i=js;i<je;i++)
398 {
399 w[k] += A->L->value[i] * x[A->L->index[i]];
400 k++;
401 }
402 }
403 #pragma cdir nodep
404 #pragma _NEC ivdep
405 for(i=is; i<ie; i++)
406 {
407 y[A->L->row[i]] += w[i];
408 }
409 #pragma cdir nodep
410 #pragma _NEC ivdep
411 for(i=is; i<ie; i++)
412 {
413 w[i] = 0.0;
414 }
415 for(j=0;j<A->U->maxnzr;j++)
416 {
417 k = is;
418 js = A->U->ptr[my_rank*(A->U->maxnzr+1) + j];
419 je = A->U->ptr[my_rank*(A->U->maxnzr+1) + j+1];
420 #pragma cdir nodep
421 #pragma _NEC ivdep
422 for(i=js;i<je;i++)
423 {
424 w[k] += A->U->value[i] * x[A->U->index[i]];
425 k++;
426 }
427 }
428 #pragma cdir nodep
429 #pragma _NEC ivdep
430 for(i=is; i<ie; i++)
431 {
432 y[A->U->row[i]] += w[i];
433 }
434 }
435 }
436 else
437 {
438 maxnzr2 = A->U->maxnzr;
439 #ifdef _OPENMP
440 nprocs = omp_get_max_threads();
441 #else
442 nprocs = 1;
443 #endif
444
445 #pragma omp parallel private(i,j,k,is,ie,js,je,my_rank)
446 {
447 #ifdef _OPENMP
448 my_rank = omp_get_thread_num();
449 #else
450 my_rank = 0;
451 #endif
452 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
453
454 #pragma cdir nodep
455 #pragma _NEC ivdep
456 for(i=is; i<ie; i++)
457 {
458 w[i] = 0.0;
459 }
460 for(j=0;j<maxnzr2;j++)
461 {
462 k = is;
463 js = A->U->ptr[my_rank*(maxnzr2+1) + j];
464 je = A->U->ptr[my_rank*(maxnzr2+1) + j+1];
465 #pragma cdir nodep
466 #pragma _NEC ivdep
467 for(i=js;i<je;i++)
468 {
469 w[k] += A->U->value[i] * x[A->U->index[i]];
470 k++;
471 }
472 }
473 #pragma cdir nodep
474 #pragma _NEC ivdep
475 for(i=is; i<ie; i++)
476 {
477 y[A->U->row[i]] += w[i];
478 }
479 }
480 }
481 }
482 #endif
483
lis_matvech_jad(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])484 void lis_matvech_jad(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
485 {
486 LIS_INT i,j,k,js,je,jj;
487 LIS_INT n,np,maxnzr;
488 #ifdef _OPENMP
489 LIS_INT is,ie,my_rank,nprocs;
490 LIS_SCALAR t;
491 #endif
492 LIS_SCALAR *w;
493
494 n = A->n;
495 np = A->np;
496 maxnzr = A->maxnzr;
497 w = A->work;
498
499 if( A->is_splited )
500 {
501 #ifdef USE_VEC_COMP
502 #pragma cdir nodep
503 #pragma _NEC ivdep
504 #endif
505 for(i=0; i<n; i++)
506 {
507 y[i] = conj(A->D->value[i]) * x[i];
508 }
509 for(i=0;i<A->L->maxnzr;i++)
510 {
511 k = 0;
512 js = A->L->ptr[i];
513 je = A->L->ptr[i+1];
514 #ifdef USE_VEC_COMP
515 #pragma cdir nodep
516 #pragma _NEC ivdep
517 #endif
518 for(j=js;j<je;j++)
519 {
520 jj = A->L->index[j];
521 y[jj] += conj(A->L->value[j]) * x[A->L->row[k]];
522 k++;
523 }
524 }
525 for(i=0;i<A->U->maxnzr;i++)
526 {
527 k = 0;
528 js = A->U->ptr[i];
529 je = A->U->ptr[i+1];
530 #ifdef USE_VEC_COMP
531 #pragma cdir nodep
532 #pragma _NEC ivdep
533 #endif
534 for(j=js;j<je;j++)
535 {
536 jj = A->U->index[j];
537 y[jj] += conj(A->U->value[j]) * x[A->U->row[k]];
538 k++;
539 }
540 }
541 }
542 else
543 {
544 #ifdef _OPENMP
545 nprocs = omp_get_max_threads();
546 w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvech_jad::w" );
547 #pragma omp parallel private(i,j,t,is,ie,js,je,jj,my_rank)
548 {
549 my_rank = omp_get_thread_num();
550 LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
551 memset( &w[my_rank*np], 0, np*sizeof(LIS_SCALAR) );
552
553 for(j=0;j<maxnzr;j++)
554 {
555 k = is;
556 js = A->ptr[my_rank*(maxnzr+1) + j];
557 je = A->ptr[my_rank*(maxnzr+1) + j+1];
558 #ifdef USE_VEC_COMP
559 #pragma cdir nodep
560 #pragma _NEC ivdep
561 #endif
562 for(i=js;i<je;i++)
563 {
564 w[my_rank*np + A->index[i]] += conj(A->value[i]) * x[A->row[k]];
565 k++;
566 }
567 }
568 #pragma omp barrier
569 #pragma omp for
570 #ifdef USE_VEC_COMP
571 #pragma cdir nodep
572 #pragma _NEC ivdep
573 #endif
574 for(i=0;i<np;i++)
575 {
576 t = 0.0;
577 for(j=0;j<nprocs;j++)
578 {
579 t += w[j*np+i];
580 }
581 y[i] = t;
582 }
583 }
584 lis_free(w);
585 #else
586 #ifdef USE_VEC_COMP
587 #pragma cdir nodep
588 #pragma _NEC ivdep
589 #endif
590 for(i=0; i<np; i++)
591 {
592 y[i] = 0.0;
593 }
594 for(i=0;i<maxnzr;i++)
595 {
596 k = 0;
597 js = A->ptr[i];
598 je = A->ptr[i+1];
599 #ifdef USE_VEC_COMP
600 #pragma cdir nodep
601 #pragma _NEC ivdep
602 #endif
603 for(j=js;j<je;j++)
604 {
605 jj = A->index[j];
606 y[jj] += conj(A->value[j]) * x[A->row[k]];
607 k++;
608 }
609 }
610 #endif
611 }
612 }
613
lis_matvec_jad_u4_1(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])614 void lis_matvec_jad_u4_1(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
615 {
616 LIS_INT i,j;
617 LIS_INT n,np,maxnzr;
618 LIS_SCALAR *yy;
619 LIS_INT is0,is1,is2,is3;
620 LIS_INT ie0,ie1,ie2,ie3;
621
622 n = A->n;
623 np = A->np;
624 maxnzr = A->maxnzr;
625 yy = A->work;
626 #ifdef USE_VEC_COMP
627 #pragma cdir nodep
628 #pragma _NEC ivdep
629 #endif
630 for(i=0; i<np; i++)
631 {
632 yy[i] = 0.0;
633 }
634 for(j=3;j<maxnzr;j+=4)
635 {
636 is0 = A->ptr[j-3];
637 is1 = A->ptr[j-2];
638 is2 = A->ptr[j-1];
639 is3 = A->ptr[j-0];
640 ie3 = A->ptr[j+1-0] - A->ptr[j-0];
641
642 #ifdef USE_VEC_COMP
643 #pragma cdir nodep
644 #pragma _NEC ivdep
645 #endif
646 for(i=0;i<ie3-0;i+=1)
647 {
648 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
649 + A->value[is1+i]*x[A->index[is1+i]]
650 + A->value[is2+i]*x[A->index[is2+i]]
651 + A->value[is3+i]*x[A->index[is3+i]];
652 }
653 ie2 = A->ptr[j+1-1] - A->ptr[j-1];
654 #ifdef USE_VEC_COMP
655 #pragma cdir nodep
656 #pragma _NEC ivdep
657 #endif
658 for(;i<ie2-0;i+=1)
659 {
660 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
661 + A->value[is1+i]*x[A->index[is1+i]]
662 + A->value[is2+i]*x[A->index[is2+i]];
663 }
664 ie1 = A->ptr[j+1-2] - A->ptr[j-2];
665 #ifdef USE_VEC_COMP
666 #pragma cdir nodep
667 #pragma _NEC ivdep
668 #endif
669 for(;i<ie1-0;i+=1)
670 {
671 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
672 + A->value[is1+i]*x[A->index[is1+i]];
673 }
674 ie0 = A->ptr[j+1-3] - A->ptr[j-3];
675 #ifdef USE_VEC_COMP
676 #pragma cdir nodep
677 #pragma _NEC ivdep
678 #endif
679 for(;i<ie0-0;i+=1)
680 {
681 yy[i] += A->value[is0+i]*x[A->index[is0+i]];
682 }
683 }
684 for(j=j-1;j<maxnzr;j+=3)
685 {
686 is0 = A->ptr[j-2];
687 is1 = A->ptr[j-1];
688 is2 = A->ptr[j-0];
689 ie2 = A->ptr[j+1-0] - A->ptr[j-0];
690
691 #ifdef USE_VEC_COMP
692 #pragma cdir nodep
693 #pragma _NEC ivdep
694 #endif
695 for(i=0;i<ie2-0;i+=1)
696 {
697 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
698 + A->value[is1+i]*x[A->index[is1+i]]
699 + A->value[is2+i]*x[A->index[is2+i]];
700 }
701 ie1 = A->ptr[j+1-1] - A->ptr[j-1];
702 #ifdef USE_VEC_COMP
703 #pragma cdir nodep
704 #pragma _NEC ivdep
705 #endif
706 for(;i<ie1-0;i+=1)
707 {
708 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
709 + A->value[is1+i]*x[A->index[is1+i]];
710 }
711 ie0 = A->ptr[j+1-2] - A->ptr[j-2];
712 #ifdef USE_VEC_COMP
713 #pragma cdir nodep
714 #pragma _NEC ivdep
715 #endif
716 for(;i<ie0-0;i+=1)
717 {
718 yy[i] += A->value[is0+i]*x[A->index[is0+i]];
719 }
720 }
721 for(j=j-1;j<maxnzr;j+=2)
722 {
723 is0 = A->ptr[j-1];
724 is1 = A->ptr[j-0];
725 ie1 = A->ptr[j+1-0] - A->ptr[j-0];
726
727 #ifdef USE_VEC_COMP
728 #pragma cdir nodep
729 #pragma _NEC ivdep
730 #endif
731 for(i=0;i<ie1-0;i+=1)
732 {
733 yy[i] += A->value[is0+i]*x[A->index[is0+i]]
734 + A->value[is1+i]*x[A->index[is1+i]];
735 }
736 ie0 = A->ptr[j+1-1] - A->ptr[j-1];
737 #ifdef USE_VEC_COMP
738 #pragma cdir nodep
739 #pragma _NEC ivdep
740 #endif
741 for(;i<ie0-0;i+=1)
742 {
743 yy[i] += A->value[is0+i]*x[A->index[is0+i]];
744 }
745 }
746 for(j=j-1;j<maxnzr;j+=1)
747 {
748 is0 = A->ptr[j-0];
749 ie0 = A->ptr[j+1-0] - A->ptr[j-0];
750
751 #ifdef USE_VEC_COMP
752 #pragma cdir nodep
753 #pragma _NEC ivdep
754 #endif
755 for(i=0;i<ie0-0;i+=1)
756 {
757 yy[i] += A->value[is0+i]*x[A->index[is0+i]];
758 }
759 }
760 #ifdef USE_VEC_COMP
761 #pragma cdir nodep
762 #pragma _NEC ivdep
763 #endif
764 for(i=0;i<n;i++)
765 {
766 y[A->row[i]] = yy[i];
767 }
768 }
769
lis_matvec_jad_u5_1(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])770 void lis_matvec_jad_u5_1(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
771 {
772 LIS_INT i,j;
773 LIS_INT n,np,maxnzr;
774 LIS_SCALAR *yy;
775 LIS_INT is0,is1,is2,is3,is4;
776 LIS_INT ie0,ie1,ie2,ie3,ie4;
777 LIS_INT *jj0,*jj1,*jj2,*jj3,*jj4;
778 LIS_SCALAR *vv0,*vv1,*vv2,*vv3,*vv4;
779 LIS_INT j00;
780 LIS_INT j10;
781 LIS_INT j20;
782 LIS_INT j30;
783 LIS_INT j40;
784
785 n = A->n;
786 np = A->np;
787 maxnzr = A->maxnzr;
788 yy = A->work;
789 #ifdef USE_VEC_COMP
790 #pragma cdir nodep
791 #pragma _NEC ivdep
792 #endif
793 for(i=0; i<np; i++)
794 {
795 yy[i] = 0.0;
796 }
797 for(j=4;j<maxnzr;j+=5)
798 {
799 yy = A->work;
800 is0 = A->ptr[j-4];
801 ie0 = A->ptr[j+1-4] - A->ptr[j-4];
802 jj0 = &A->index[is0];
803 vv0 = &A->value[is0];
804 is1 = A->ptr[j-3];
805 ie1 = A->ptr[j+1-3] - A->ptr[j-3];
806 jj1 = &A->index[is1];
807 vv1 = &A->value[is1];
808 is2 = A->ptr[j-2];
809 ie2 = A->ptr[j+1-2] - A->ptr[j-2];
810 jj2 = &A->index[is2];
811 vv2 = &A->value[is2];
812 is3 = A->ptr[j-1];
813 ie3 = A->ptr[j+1-1] - A->ptr[j-1];
814 jj3 = &A->index[is3];
815 vv3 = &A->value[is3];
816 is4 = A->ptr[j-0];
817 ie4 = A->ptr[j+1-0] - A->ptr[j-0];
818 jj4 = &A->index[is4];
819 vv4 = &A->value[is4];
820
821 #ifdef USE_VEC_COMP
822 #pragma cdir nodep
823 #pragma _NEC ivdep
824 #endif
825 for(i=0;i<ie4-0;i+=1)
826 {
827 j00 = jj0[i+0];
828 j10 = jj1[i+0];
829 j20 = jj2[i+0];
830 j30 = jj3[i+0];
831 j40 = jj4[i+0];
832 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
833 }
834 #ifdef USE_VEC_COMP
835 #pragma cdir nodep
836 #pragma _NEC ivdep
837 #endif
838 for(;i<ie3-0;i+=1)
839 {
840 j00 = jj0[i+0];
841 j10 = jj1[i+0];
842 j20 = jj2[i+0];
843 j30 = jj3[i+0];
844 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
845 }
846 #ifdef USE_VEC_COMP
847 #pragma cdir nodep
848 #pragma _NEC ivdep
849 #endif
850 for(;i<ie2-0;i+=1)
851 {
852 j00 = jj0[i+0];
853 j10 = jj1[i+0];
854 j20 = jj2[i+0];
855 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
856 }
857 #ifdef USE_VEC_COMP
858 #pragma cdir nodep
859 #pragma _NEC ivdep
860 #endif
861 for(;i<ie1-0;i+=1)
862 {
863 j00 = jj0[i+0];
864 j10 = jj1[i+0];
865 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
866 }
867 #ifdef USE_VEC_COMP
868 #pragma cdir nodep
869 #pragma _NEC ivdep
870 #endif
871 for(;i<ie0-0;i+=1)
872 {
873 j00 = jj0[i+0];
874 yy[i+0] += vv0[i+0]*x[j00];
875 }
876 }
877 for(j=j-1;j<maxnzr;j+=4)
878 {
879 yy = A->work;
880 is0 = A->ptr[j-3];
881 ie0 = A->ptr[j+1-3] - A->ptr[j-3];
882 jj0 = &A->index[is0];
883 vv0 = &A->value[is0];
884 is1 = A->ptr[j-2];
885 ie1 = A->ptr[j+1-2] - A->ptr[j-2];
886 jj1 = &A->index[is1];
887 vv1 = &A->value[is1];
888 is2 = A->ptr[j-1];
889 ie2 = A->ptr[j+1-1] - A->ptr[j-1];
890 jj2 = &A->index[is2];
891 vv2 = &A->value[is2];
892 is3 = A->ptr[j-0];
893 ie3 = A->ptr[j+1-0] - A->ptr[j-0];
894 jj3 = &A->index[is3];
895 vv3 = &A->value[is3];
896
897 #ifdef USE_VEC_COMP
898 #pragma cdir nodep
899 #pragma _NEC ivdep
900 #endif
901 for(i=0;i<ie3-0;i+=1)
902 {
903 j00 = jj0[i+0];
904 j10 = jj1[i+0];
905 j20 = jj2[i+0];
906 j30 = jj3[i+0];
907 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
908 }
909 #ifdef USE_VEC_COMP
910 #pragma cdir nodep
911 #pragma _NEC ivdep
912 #endif
913 for(;i<ie2-0;i+=1)
914 {
915 j00 = jj0[i+0];
916 j10 = jj1[i+0];
917 j20 = jj2[i+0];
918 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
919 }
920 #ifdef USE_VEC_COMP
921 #pragma cdir nodep
922 #pragma _NEC ivdep
923 #endif
924 for(;i<ie1-0;i+=1)
925 {
926 j00 = jj0[i+0];
927 j10 = jj1[i+0];
928 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
929 }
930 #ifdef USE_VEC_COMP
931 #pragma cdir nodep
932 #pragma _NEC ivdep
933 #endif
934 for(;i<ie0-0;i+=1)
935 {
936 j00 = jj0[i+0];
937 yy[i+0] += vv0[i+0]*x[j00];
938 }
939 }
940 for(j=j-1;j<maxnzr;j+=3)
941 {
942 yy = A->work;
943 is0 = A->ptr[j-2];
944 ie0 = A->ptr[j+1-2] - A->ptr[j-2];
945 jj0 = &A->index[is0];
946 vv0 = &A->value[is0];
947 is1 = A->ptr[j-1];
948 ie1 = A->ptr[j+1-1] - A->ptr[j-1];
949 jj1 = &A->index[is1];
950 vv1 = &A->value[is1];
951 is2 = A->ptr[j-0];
952 ie2 = A->ptr[j+1-0] - A->ptr[j-0];
953 jj2 = &A->index[is2];
954 vv2 = &A->value[is2];
955
956 #ifdef USE_VEC_COMP
957 #pragma cdir nodep
958 #pragma _NEC ivdep
959 #endif
960 for(i=0;i<ie2-0;i+=1)
961 {
962 j00 = jj0[i+0];
963 j10 = jj1[i+0];
964 j20 = jj2[i+0];
965 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
966 }
967 #ifdef USE_VEC_COMP
968 #pragma cdir nodep
969 #pragma _NEC ivdep
970 #endif
971 for(;i<ie1-0;i+=1)
972 {
973 j00 = jj0[i+0];
974 j10 = jj1[i+0];
975 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
976 }
977 #ifdef USE_VEC_COMP
978 #pragma cdir nodep
979 #pragma _NEC ivdep
980 #endif
981 for(;i<ie0-0;i+=1)
982 {
983 j00 = jj0[i+0];
984 yy[i+0] += vv0[i+0]*x[j00];
985 }
986 }
987 for(j=j-1;j<maxnzr;j+=2)
988 {
989 yy = A->work;
990 is0 = A->ptr[j-1];
991 ie0 = A->ptr[j+1-1] - A->ptr[j-1];
992 jj0 = &A->index[is0];
993 vv0 = &A->value[is0];
994 is1 = A->ptr[j-0];
995 ie1 = A->ptr[j+1-0] - A->ptr[j-0];
996 jj1 = &A->index[is1];
997 vv1 = &A->value[is1];
998
999 #ifdef USE_VEC_COMP
1000 #pragma cdir nodep
1001 #pragma _NEC ivdep
1002 #endif
1003 for(i=0;i<ie1-0;i+=1)
1004 {
1005 j00 = jj0[i+0];
1006 j10 = jj1[i+0];
1007 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1008 }
1009 #ifdef USE_VEC_COMP
1010 #pragma cdir nodep
1011 #pragma _NEC ivdep
1012 #endif
1013 for(;i<ie0-0;i+=1)
1014 {
1015 j00 = jj0[i+0];
1016 yy[i+0] += vv0[i+0]*x[j00];
1017 }
1018 }
1019 for(j=j-1;j<maxnzr;j+=1)
1020 {
1021 yy = A->work;
1022 is0 = A->ptr[j-0];
1023 ie0 = A->ptr[j+1-0] - A->ptr[j-0];
1024 jj0 = &A->index[is0];
1025 vv0 = &A->value[is0];
1026
1027 #ifdef USE_VEC_COMP
1028 #pragma cdir nodep
1029 #pragma _NEC ivdep
1030 #endif
1031 for(i=0;i<ie0-0;i+=1)
1032 {
1033 j00 = jj0[i+0];
1034 yy[i+0] += vv0[i+0]*x[j00];
1035 }
1036 }
1037 yy = A->work;
1038 #ifdef USE_VEC_COMP
1039 #pragma cdir nodep
1040 #pragma _NEC ivdep
1041 #endif
1042 for(i=0;i<n;i++)
1043 {
1044 y[A->row[i]] = yy[i];
1045 }
1046 }
1047
lis_matvec_jad_u6_1(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])1048 void lis_matvec_jad_u6_1(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
1049 {
1050 LIS_INT i,j;
1051 LIS_INT n,np,maxnzr;
1052 LIS_SCALAR *yy;
1053 LIS_INT is0,is1,is2,is3,is4,is5;
1054 LIS_INT ie0,ie1,ie2,ie3,ie4,ie5;
1055 LIS_INT *jj0,*jj1,*jj2,*jj3,*jj4,*jj5;
1056 LIS_SCALAR *vv0,*vv1,*vv2,*vv3,*vv4,*vv5;
1057 LIS_INT j00;
1058 LIS_INT j10;
1059 LIS_INT j20;
1060 LIS_INT j30;
1061 LIS_INT j40;
1062 LIS_INT j50;
1063
1064 n = A->n;
1065 np = A->np;
1066 maxnzr = A->maxnzr;
1067 yy = A->work;
1068 #ifdef USE_VEC_COMP
1069 #pragma cdir nodep
1070 #pragma _NEC ivdep
1071 #endif
1072 for(i=0; i<np; i++)
1073 {
1074 yy[i] = 0.0;
1075 }
1076 for(j=5;j<maxnzr;j+=6)
1077 {
1078 yy = A->work;
1079 is0 = A->ptr[j-5];
1080 ie0 = A->ptr[j+1-5] - A->ptr[j-5];
1081 jj0 = &A->index[is0];
1082 vv0 = &A->value[is0];
1083 is1 = A->ptr[j-4];
1084 ie1 = A->ptr[j+1-4] - A->ptr[j-4];
1085 jj1 = &A->index[is1];
1086 vv1 = &A->value[is1];
1087 is2 = A->ptr[j-3];
1088 ie2 = A->ptr[j+1-3] - A->ptr[j-3];
1089 jj2 = &A->index[is2];
1090 vv2 = &A->value[is2];
1091 is3 = A->ptr[j-2];
1092 ie3 = A->ptr[j+1-2] - A->ptr[j-2];
1093 jj3 = &A->index[is3];
1094 vv3 = &A->value[is3];
1095 is4 = A->ptr[j-1];
1096 ie4 = A->ptr[j+1-1] - A->ptr[j-1];
1097 jj4 = &A->index[is4];
1098 vv4 = &A->value[is4];
1099 is5 = A->ptr[j-0];
1100 ie5 = A->ptr[j+1-0] - A->ptr[j-0];
1101 jj5 = &A->index[is5];
1102 vv5 = &A->value[is5];
1103
1104 #ifdef USE_VEC_COMP
1105 #pragma cdir nodep
1106 #pragma _NEC ivdep
1107 #endif
1108 for(i=0;i<ie5-0;i+=1)
1109 {
1110 j00 = jj0[i+0];
1111 j10 = jj1[i+0];
1112 j20 = jj2[i+0];
1113 j30 = jj3[i+0];
1114 j40 = jj4[i+0];
1115 j50 = jj5[i+0];
1116 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
1117 }
1118 #ifdef USE_VEC_COMP
1119 #pragma cdir nodep
1120 #pragma _NEC ivdep
1121 #endif
1122 for(;i<ie4-0;i+=1)
1123 {
1124 j00 = jj0[i+0];
1125 j10 = jj1[i+0];
1126 j20 = jj2[i+0];
1127 j30 = jj3[i+0];
1128 j40 = jj4[i+0];
1129 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
1130 }
1131 #ifdef USE_VEC_COMP
1132 #pragma cdir nodep
1133 #pragma _NEC ivdep
1134 #endif
1135 for(;i<ie3-0;i+=1)
1136 {
1137 j00 = jj0[i+0];
1138 j10 = jj1[i+0];
1139 j20 = jj2[i+0];
1140 j30 = jj3[i+0];
1141 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1142 }
1143 #ifdef USE_VEC_COMP
1144 #pragma cdir nodep
1145 #pragma _NEC ivdep
1146 #endif
1147 for(;i<ie2-0;i+=1)
1148 {
1149 j00 = jj0[i+0];
1150 j10 = jj1[i+0];
1151 j20 = jj2[i+0];
1152 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1153 }
1154 #ifdef USE_VEC_COMP
1155 #pragma cdir nodep
1156 #pragma _NEC ivdep
1157 #endif
1158 for(;i<ie1-0;i+=1)
1159 {
1160 j00 = jj0[i+0];
1161 j10 = jj1[i+0];
1162 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1163 }
1164 #ifdef USE_VEC_COMP
1165 #pragma cdir nodep
1166 #pragma _NEC ivdep
1167 #endif
1168 for(;i<ie0-0;i+=1)
1169 {
1170 j00 = jj0[i+0];
1171 yy[i+0] += vv0[i+0]*x[j00];
1172 }
1173 }
1174 for(j=j-1;j<maxnzr;j+=5)
1175 {
1176 yy = A->work;
1177 is0 = A->ptr[j-4];
1178 ie0 = A->ptr[j+1-4] - A->ptr[j-4];
1179 jj0 = &A->index[is0];
1180 vv0 = &A->value[is0];
1181 is1 = A->ptr[j-3];
1182 ie1 = A->ptr[j+1-3] - A->ptr[j-3];
1183 jj1 = &A->index[is1];
1184 vv1 = &A->value[is1];
1185 is2 = A->ptr[j-2];
1186 ie2 = A->ptr[j+1-2] - A->ptr[j-2];
1187 jj2 = &A->index[is2];
1188 vv2 = &A->value[is2];
1189 is3 = A->ptr[j-1];
1190 ie3 = A->ptr[j+1-1] - A->ptr[j-1];
1191 jj3 = &A->index[is3];
1192 vv3 = &A->value[is3];
1193 is4 = A->ptr[j-0];
1194 ie4 = A->ptr[j+1-0] - A->ptr[j-0];
1195 jj4 = &A->index[is4];
1196 vv4 = &A->value[is4];
1197
1198 #ifdef USE_VEC_COMP
1199 #pragma cdir nodep
1200 #pragma _NEC ivdep
1201 #endif
1202 for(i=0;i<ie4-0;i+=1)
1203 {
1204 j00 = jj0[i+0];
1205 j10 = jj1[i+0];
1206 j20 = jj2[i+0];
1207 j30 = jj3[i+0];
1208 j40 = jj4[i+0];
1209 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
1210 }
1211 #ifdef USE_VEC_COMP
1212 #pragma cdir nodep
1213 #pragma _NEC ivdep
1214 #endif
1215 for(;i<ie3-0;i+=1)
1216 {
1217 j00 = jj0[i+0];
1218 j10 = jj1[i+0];
1219 j20 = jj2[i+0];
1220 j30 = jj3[i+0];
1221 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1222 }
1223 #ifdef USE_VEC_COMP
1224 #pragma cdir nodep
1225 #pragma _NEC ivdep
1226 #endif
1227 for(;i<ie2-0;i+=1)
1228 {
1229 j00 = jj0[i+0];
1230 j10 = jj1[i+0];
1231 j20 = jj2[i+0];
1232 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1233 }
1234 #ifdef USE_VEC_COMP
1235 #pragma cdir nodep
1236 #pragma _NEC ivdep
1237 #endif
1238 for(;i<ie1-0;i+=1)
1239 {
1240 j00 = jj0[i+0];
1241 j10 = jj1[i+0];
1242 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1243 }
1244 #ifdef USE_VEC_COMP
1245 #pragma cdir nodep
1246 #pragma _NEC ivdep
1247 #endif
1248 for(;i<ie0-0;i+=1)
1249 {
1250 j00 = jj0[i+0];
1251 yy[i+0] += vv0[i+0]*x[j00];
1252 }
1253 }
1254 for(j=j-1;j<maxnzr;j+=4)
1255 {
1256 yy = A->work;
1257 is0 = A->ptr[j-3];
1258 ie0 = A->ptr[j+1-3] - A->ptr[j-3];
1259 jj0 = &A->index[is0];
1260 vv0 = &A->value[is0];
1261 is1 = A->ptr[j-2];
1262 ie1 = A->ptr[j+1-2] - A->ptr[j-2];
1263 jj1 = &A->index[is1];
1264 vv1 = &A->value[is1];
1265 is2 = A->ptr[j-1];
1266 ie2 = A->ptr[j+1-1] - A->ptr[j-1];
1267 jj2 = &A->index[is2];
1268 vv2 = &A->value[is2];
1269 is3 = A->ptr[j-0];
1270 ie3 = A->ptr[j+1-0] - A->ptr[j-0];
1271 jj3 = &A->index[is3];
1272 vv3 = &A->value[is3];
1273
1274 #ifdef USE_VEC_COMP
1275 #pragma cdir nodep
1276 #pragma _NEC ivdep
1277 #endif
1278 for(i=0;i<ie3-0;i+=1)
1279 {
1280 j00 = jj0[i+0];
1281 j10 = jj1[i+0];
1282 j20 = jj2[i+0];
1283 j30 = jj3[i+0];
1284 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1285 }
1286 #ifdef USE_VEC_COMP
1287 #pragma cdir nodep
1288 #pragma _NEC ivdep
1289 #endif
1290 for(;i<ie2-0;i+=1)
1291 {
1292 j00 = jj0[i+0];
1293 j10 = jj1[i+0];
1294 j20 = jj2[i+0];
1295 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1296 }
1297 #ifdef USE_VEC_COMP
1298 #pragma cdir nodep
1299 #pragma _NEC ivdep
1300 #endif
1301 for(;i<ie1-0;i+=1)
1302 {
1303 j00 = jj0[i+0];
1304 j10 = jj1[i+0];
1305 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1306 }
1307 #ifdef USE_VEC_COMP
1308 #pragma cdir nodep
1309 #pragma _NEC ivdep
1310 #endif
1311 for(;i<ie0-0;i+=1)
1312 {
1313 j00 = jj0[i+0];
1314 yy[i+0] += vv0[i+0]*x[j00];
1315 }
1316 }
1317 for(j=j-1;j<maxnzr;j+=3)
1318 {
1319 yy = A->work;
1320 is0 = A->ptr[j-2];
1321 ie0 = A->ptr[j+1-2] - A->ptr[j-2];
1322 jj0 = &A->index[is0];
1323 vv0 = &A->value[is0];
1324 is1 = A->ptr[j-1];
1325 ie1 = A->ptr[j+1-1] - A->ptr[j-1];
1326 jj1 = &A->index[is1];
1327 vv1 = &A->value[is1];
1328 is2 = A->ptr[j-0];
1329 ie2 = A->ptr[j+1-0] - A->ptr[j-0];
1330 jj2 = &A->index[is2];
1331 vv2 = &A->value[is2];
1332
1333 #ifdef USE_VEC_COMP
1334 #pragma cdir nodep
1335 #pragma _NEC ivdep
1336 #endif
1337 for(i=0;i<ie2-0;i+=1)
1338 {
1339 j00 = jj0[i+0];
1340 j10 = jj1[i+0];
1341 j20 = jj2[i+0];
1342 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1343 }
1344 #ifdef USE_VEC_COMP
1345 #pragma cdir nodep
1346 #pragma _NEC ivdep
1347 #endif
1348 for(;i<ie1-0;i+=1)
1349 {
1350 j00 = jj0[i+0];
1351 j10 = jj1[i+0];
1352 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1353 }
1354 #ifdef USE_VEC_COMP
1355 #pragma cdir nodep
1356 #pragma _NEC ivdep
1357 #endif
1358 for(;i<ie0-0;i+=1)
1359 {
1360 j00 = jj0[i+0];
1361 yy[i+0] += vv0[i+0]*x[j00];
1362 }
1363 }
1364 for(j=j-1;j<maxnzr;j+=2)
1365 {
1366 yy = A->work;
1367 is0 = A->ptr[j-1];
1368 ie0 = A->ptr[j+1-1] - A->ptr[j-1];
1369 jj0 = &A->index[is0];
1370 vv0 = &A->value[is0];
1371 is1 = A->ptr[j-0];
1372 ie1 = A->ptr[j+1-0] - A->ptr[j-0];
1373 jj1 = &A->index[is1];
1374 vv1 = &A->value[is1];
1375
1376 #ifdef USE_VEC_COMP
1377 #pragma cdir nodep
1378 #pragma _NEC ivdep
1379 #endif
1380 for(i=0;i<ie1-0;i+=1)
1381 {
1382 j00 = jj0[i+0];
1383 j10 = jj1[i+0];
1384 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1385 }
1386 #ifdef USE_VEC_COMP
1387 #pragma cdir nodep
1388 #pragma _NEC ivdep
1389 #endif
1390 for(;i<ie0-0;i+=1)
1391 {
1392 j00 = jj0[i+0];
1393 yy[i+0] += vv0[i+0]*x[j00];
1394 }
1395 }
1396 for(j=j-1;j<maxnzr;j+=1)
1397 {
1398 yy = A->work;
1399 is0 = A->ptr[j-0];
1400 ie0 = A->ptr[j+1-0] - A->ptr[j-0];
1401 jj0 = &A->index[is0];
1402 vv0 = &A->value[is0];
1403
1404 #ifdef USE_VEC_COMP
1405 #pragma cdir nodep
1406 #pragma _NEC ivdep
1407 #endif
1408 for(i=0;i<ie0-0;i+=1)
1409 {
1410 j00 = jj0[i+0];
1411 yy[i+0] += vv0[i+0]*x[j00];
1412 }
1413 }
1414 yy = A->work;
1415 #ifdef USE_VEC_COMP
1416 #pragma cdir nodep
1417 #pragma _NEC ivdep
1418 #endif
1419 for(i=0;i<n;i++)
1420 {
1421 y[A->row[i]] = yy[i];
1422 }
1423 }
1424
lis_matvec_jad_u7_1(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])1425 void lis_matvec_jad_u7_1(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
1426 {
1427 LIS_INT i,j;
1428 LIS_INT n,np,maxnzr;
1429 LIS_SCALAR *yy;
1430 LIS_INT is0,is1,is2,is3,is4,is5,is6;
1431 LIS_INT ie0,ie1,ie2,ie3,ie4,ie5,ie6;
1432 LIS_INT *jj0,*jj1,*jj2,*jj3,*jj4,*jj5,*jj6;
1433 LIS_SCALAR *vv0,*vv1,*vv2,*vv3,*vv4,*vv5,*vv6;
1434 LIS_INT j00;
1435 LIS_INT j10;
1436 LIS_INT j20;
1437 LIS_INT j30;
1438 LIS_INT j40;
1439 LIS_INT j50;
1440 LIS_INT j60;
1441
1442 n = A->n;
1443 np = A->np;
1444 maxnzr = A->maxnzr;
1445 yy = A->work;
1446 #ifdef USE_VEC_COMP
1447 #pragma cdir nodep
1448 #pragma _NEC ivdep
1449 #endif
1450 for(i=0; i<np; i++)
1451 {
1452 yy[i] = 0.0;
1453 }
1454 for(j=6;j<maxnzr;j+=7)
1455 {
1456 yy = A->work;
1457 is0 = A->ptr[j-6];
1458 ie0 = A->ptr[j+1-6] - A->ptr[j-6];
1459 jj0 = &A->index[is0];
1460 vv0 = &A->value[is0];
1461 is1 = A->ptr[j-5];
1462 ie1 = A->ptr[j+1-5] - A->ptr[j-5];
1463 jj1 = &A->index[is1];
1464 vv1 = &A->value[is1];
1465 is2 = A->ptr[j-4];
1466 ie2 = A->ptr[j+1-4] - A->ptr[j-4];
1467 jj2 = &A->index[is2];
1468 vv2 = &A->value[is2];
1469 is3 = A->ptr[j-3];
1470 ie3 = A->ptr[j+1-3] - A->ptr[j-3];
1471 jj3 = &A->index[is3];
1472 vv3 = &A->value[is3];
1473 is4 = A->ptr[j-2];
1474 ie4 = A->ptr[j+1-2] - A->ptr[j-2];
1475 jj4 = &A->index[is4];
1476 vv4 = &A->value[is4];
1477 is5 = A->ptr[j-1];
1478 ie5 = A->ptr[j+1-1] - A->ptr[j-1];
1479 jj5 = &A->index[is5];
1480 vv5 = &A->value[is5];
1481 is6 = A->ptr[j-0];
1482 ie6 = A->ptr[j+1-0] - A->ptr[j-0];
1483 jj6 = &A->index[is6];
1484 vv6 = &A->value[is6];
1485
1486 #ifdef USE_VEC_COMP
1487 #pragma cdir nodep
1488 #pragma _NEC ivdep
1489 #endif
1490 for(i=0;i<ie6-0;i+=1)
1491 {
1492 j00 = jj0[i+0];
1493 j10 = jj1[i+0];
1494 j20 = jj2[i+0];
1495 j30 = jj3[i+0];
1496 j40 = jj4[i+0];
1497 j50 = jj5[i+0];
1498 j60 = jj6[i+0];
1499 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50] + vv6[i+0]*x[j60];
1500 }
1501 #ifdef USE_VEC_COMP
1502 #pragma cdir nodep
1503 #pragma _NEC ivdep
1504 #endif
1505 for(;i<ie5-0;i+=1)
1506 {
1507 j00 = jj0[i+0];
1508 j10 = jj1[i+0];
1509 j20 = jj2[i+0];
1510 j30 = jj3[i+0];
1511 j40 = jj4[i+0];
1512 j50 = jj5[i+0];
1513 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
1514 }
1515 #ifdef USE_VEC_COMP
1516 #pragma cdir nodep
1517 #pragma _NEC ivdep
1518 #endif
1519 for(;i<ie4-0;i+=1)
1520 {
1521 j00 = jj0[i+0];
1522 j10 = jj1[i+0];
1523 j20 = jj2[i+0];
1524 j30 = jj3[i+0];
1525 j40 = jj4[i+0];
1526 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
1527 }
1528 #ifdef USE_VEC_COMP
1529 #pragma cdir nodep
1530 #pragma _NEC ivdep
1531 #endif
1532 for(;i<ie3-0;i+=1)
1533 {
1534 j00 = jj0[i+0];
1535 j10 = jj1[i+0];
1536 j20 = jj2[i+0];
1537 j30 = jj3[i+0];
1538 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1539 }
1540 #ifdef USE_VEC_COMP
1541 #pragma cdir nodep
1542 #pragma _NEC ivdep
1543 #endif
1544 for(;i<ie2-0;i+=1)
1545 {
1546 j00 = jj0[i+0];
1547 j10 = jj1[i+0];
1548 j20 = jj2[i+0];
1549 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1550 }
1551 #ifdef USE_VEC_COMP
1552 #pragma cdir nodep
1553 #pragma _NEC ivdep
1554 #endif
1555 for(;i<ie1-0;i+=1)
1556 {
1557 j00 = jj0[i+0];
1558 j10 = jj1[i+0];
1559 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1560 }
1561 #ifdef USE_VEC_COMP
1562 #pragma cdir nodep
1563 #pragma _NEC ivdep
1564 #endif
1565 for(;i<ie0-0;i+=1)
1566 {
1567 j00 = jj0[i+0];
1568 yy[i+0] += vv0[i+0]*x[j00];
1569 }
1570 }
1571 for(j=j-1;j<maxnzr;j+=6)
1572 {
1573 yy = A->work;
1574 is0 = A->ptr[j-5];
1575 ie0 = A->ptr[j+1-5] - A->ptr[j-5];
1576 jj0 = &A->index[is0];
1577 vv0 = &A->value[is0];
1578 is1 = A->ptr[j-4];
1579 ie1 = A->ptr[j+1-4] - A->ptr[j-4];
1580 jj1 = &A->index[is1];
1581 vv1 = &A->value[is1];
1582 is2 = A->ptr[j-3];
1583 ie2 = A->ptr[j+1-3] - A->ptr[j-3];
1584 jj2 = &A->index[is2];
1585 vv2 = &A->value[is2];
1586 is3 = A->ptr[j-2];
1587 ie3 = A->ptr[j+1-2] - A->ptr[j-2];
1588 jj3 = &A->index[is3];
1589 vv3 = &A->value[is3];
1590 is4 = A->ptr[j-1];
1591 ie4 = A->ptr[j+1-1] - A->ptr[j-1];
1592 jj4 = &A->index[is4];
1593 vv4 = &A->value[is4];
1594 is5 = A->ptr[j-0];
1595 ie5 = A->ptr[j+1-0] - A->ptr[j-0];
1596 jj5 = &A->index[is5];
1597 vv5 = &A->value[is5];
1598
1599 #ifdef USE_VEC_COMP
1600 #pragma cdir nodep
1601 #pragma _NEC ivdep
1602 #endif
1603 for(i=0;i<ie5-0;i+=1)
1604 {
1605 j00 = jj0[i+0];
1606 j10 = jj1[i+0];
1607 j20 = jj2[i+0];
1608 j30 = jj3[i+0];
1609 j40 = jj4[i+0];
1610 j50 = jj5[i+0];
1611 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
1612 }
1613 #ifdef USE_VEC_COMP
1614 #pragma cdir nodep
1615 #pragma _NEC ivdep
1616 #endif
1617 for(;i<ie4-0;i+=1)
1618 {
1619 j00 = jj0[i+0];
1620 j10 = jj1[i+0];
1621 j20 = jj2[i+0];
1622 j30 = jj3[i+0];
1623 j40 = jj4[i+0];
1624 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
1625 }
1626 #ifdef USE_VEC_COMP
1627 #pragma cdir nodep
1628 #pragma _NEC ivdep
1629 #endif
1630 for(;i<ie3-0;i+=1)
1631 {
1632 j00 = jj0[i+0];
1633 j10 = jj1[i+0];
1634 j20 = jj2[i+0];
1635 j30 = jj3[i+0];
1636 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1637 }
1638 #ifdef USE_VEC_COMP
1639 #pragma cdir nodep
1640 #pragma _NEC ivdep
1641 #endif
1642 for(;i<ie2-0;i+=1)
1643 {
1644 j00 = jj0[i+0];
1645 j10 = jj1[i+0];
1646 j20 = jj2[i+0];
1647 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1648 }
1649 #ifdef USE_VEC_COMP
1650 #pragma cdir nodep
1651 #pragma _NEC ivdep
1652 #endif
1653 for(;i<ie1-0;i+=1)
1654 {
1655 j00 = jj0[i+0];
1656 j10 = jj1[i+0];
1657 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1658 }
1659 #ifdef USE_VEC_COMP
1660 #pragma cdir nodep
1661 #pragma _NEC ivdep
1662 #endif
1663 for(;i<ie0-0;i+=1)
1664 {
1665 j00 = jj0[i+0];
1666 yy[i+0] += vv0[i+0]*x[j00];
1667 }
1668 }
1669 for(j=j-1;j<maxnzr;j+=5)
1670 {
1671 yy = A->work;
1672 is0 = A->ptr[j-4];
1673 ie0 = A->ptr[j+1-4] - A->ptr[j-4];
1674 jj0 = &A->index[is0];
1675 vv0 = &A->value[is0];
1676 is1 = A->ptr[j-3];
1677 ie1 = A->ptr[j+1-3] - A->ptr[j-3];
1678 jj1 = &A->index[is1];
1679 vv1 = &A->value[is1];
1680 is2 = A->ptr[j-2];
1681 ie2 = A->ptr[j+1-2] - A->ptr[j-2];
1682 jj2 = &A->index[is2];
1683 vv2 = &A->value[is2];
1684 is3 = A->ptr[j-1];
1685 ie3 = A->ptr[j+1-1] - A->ptr[j-1];
1686 jj3 = &A->index[is3];
1687 vv3 = &A->value[is3];
1688 is4 = A->ptr[j-0];
1689 ie4 = A->ptr[j+1-0] - A->ptr[j-0];
1690 jj4 = &A->index[is4];
1691 vv4 = &A->value[is4];
1692
1693 #ifdef USE_VEC_COMP
1694 #pragma cdir nodep
1695 #pragma _NEC ivdep
1696 #endif
1697 for(i=0;i<ie4-0;i+=1)
1698 {
1699 j00 = jj0[i+0];
1700 j10 = jj1[i+0];
1701 j20 = jj2[i+0];
1702 j30 = jj3[i+0];
1703 j40 = jj4[i+0];
1704 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
1705 }
1706 #ifdef USE_VEC_COMP
1707 #pragma cdir nodep
1708 #pragma _NEC ivdep
1709 #endif
1710 for(;i<ie3-0;i+=1)
1711 {
1712 j00 = jj0[i+0];
1713 j10 = jj1[i+0];
1714 j20 = jj2[i+0];
1715 j30 = jj3[i+0];
1716 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1717 }
1718 #ifdef USE_VEC_COMP
1719 #pragma cdir nodep
1720 #pragma _NEC ivdep
1721 #endif
1722 for(;i<ie2-0;i+=1)
1723 {
1724 j00 = jj0[i+0];
1725 j10 = jj1[i+0];
1726 j20 = jj2[i+0];
1727 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1728 }
1729 #ifdef USE_VEC_COMP
1730 #pragma cdir nodep
1731 #pragma _NEC ivdep
1732 #endif
1733 for(;i<ie1-0;i+=1)
1734 {
1735 j00 = jj0[i+0];
1736 j10 = jj1[i+0];
1737 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1738 }
1739 #ifdef USE_VEC_COMP
1740 #pragma cdir nodep
1741 #pragma _NEC ivdep
1742 #endif
1743 for(;i<ie0-0;i+=1)
1744 {
1745 j00 = jj0[i+0];
1746 yy[i+0] += vv0[i+0]*x[j00];
1747 }
1748 }
1749 for(j=j-1;j<maxnzr;j+=4)
1750 {
1751 yy = A->work;
1752 is0 = A->ptr[j-3];
1753 ie0 = A->ptr[j+1-3] - A->ptr[j-3];
1754 jj0 = &A->index[is0];
1755 vv0 = &A->value[is0];
1756 is1 = A->ptr[j-2];
1757 ie1 = A->ptr[j+1-2] - A->ptr[j-2];
1758 jj1 = &A->index[is1];
1759 vv1 = &A->value[is1];
1760 is2 = A->ptr[j-1];
1761 ie2 = A->ptr[j+1-1] - A->ptr[j-1];
1762 jj2 = &A->index[is2];
1763 vv2 = &A->value[is2];
1764 is3 = A->ptr[j-0];
1765 ie3 = A->ptr[j+1-0] - A->ptr[j-0];
1766 jj3 = &A->index[is3];
1767 vv3 = &A->value[is3];
1768
1769 #ifdef USE_VEC_COMP
1770 #pragma cdir nodep
1771 #pragma _NEC ivdep
1772 #endif
1773 for(i=0;i<ie3-0;i+=1)
1774 {
1775 j00 = jj0[i+0];
1776 j10 = jj1[i+0];
1777 j20 = jj2[i+0];
1778 j30 = jj3[i+0];
1779 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
1780 }
1781 #ifdef USE_VEC_COMP
1782 #pragma cdir nodep
1783 #pragma _NEC ivdep
1784 #endif
1785 for(;i<ie2-0;i+=1)
1786 {
1787 j00 = jj0[i+0];
1788 j10 = jj1[i+0];
1789 j20 = jj2[i+0];
1790 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1791 }
1792 #ifdef USE_VEC_COMP
1793 #pragma cdir nodep
1794 #pragma _NEC ivdep
1795 #endif
1796 for(;i<ie1-0;i+=1)
1797 {
1798 j00 = jj0[i+0];
1799 j10 = jj1[i+0];
1800 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1801 }
1802 #ifdef USE_VEC_COMP
1803 #pragma cdir nodep
1804 #pragma _NEC ivdep
1805 #endif
1806 for(;i<ie0-0;i+=1)
1807 {
1808 j00 = jj0[i+0];
1809 yy[i+0] += vv0[i+0]*x[j00];
1810 }
1811 }
1812 for(j=j-1;j<maxnzr;j+=3)
1813 {
1814 yy = A->work;
1815 is0 = A->ptr[j-2];
1816 ie0 = A->ptr[j+1-2] - A->ptr[j-2];
1817 jj0 = &A->index[is0];
1818 vv0 = &A->value[is0];
1819 is1 = A->ptr[j-1];
1820 ie1 = A->ptr[j+1-1] - A->ptr[j-1];
1821 jj1 = &A->index[is1];
1822 vv1 = &A->value[is1];
1823 is2 = A->ptr[j-0];
1824 ie2 = A->ptr[j+1-0] - A->ptr[j-0];
1825 jj2 = &A->index[is2];
1826 vv2 = &A->value[is2];
1827
1828 #ifdef USE_VEC_COMP
1829 #pragma cdir nodep
1830 #pragma _NEC ivdep
1831 #endif
1832 for(i=0;i<ie2-0;i+=1)
1833 {
1834 j00 = jj0[i+0];
1835 j10 = jj1[i+0];
1836 j20 = jj2[i+0];
1837 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
1838 }
1839 #ifdef USE_VEC_COMP
1840 #pragma cdir nodep
1841 #pragma _NEC ivdep
1842 #endif
1843 for(;i<ie1-0;i+=1)
1844 {
1845 j00 = jj0[i+0];
1846 j10 = jj1[i+0];
1847 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1848 }
1849 #ifdef USE_VEC_COMP
1850 #pragma cdir nodep
1851 #pragma _NEC ivdep
1852 #endif
1853 for(;i<ie0-0;i+=1)
1854 {
1855 j00 = jj0[i+0];
1856 yy[i+0] += vv0[i+0]*x[j00];
1857 }
1858 }
1859 for(j=j-1;j<maxnzr;j+=2)
1860 {
1861 yy = A->work;
1862 is0 = A->ptr[j-1];
1863 ie0 = A->ptr[j+1-1] - A->ptr[j-1];
1864 jj0 = &A->index[is0];
1865 vv0 = &A->value[is0];
1866 is1 = A->ptr[j-0];
1867 ie1 = A->ptr[j+1-0] - A->ptr[j-0];
1868 jj1 = &A->index[is1];
1869 vv1 = &A->value[is1];
1870
1871 #ifdef USE_VEC_COMP
1872 #pragma cdir nodep
1873 #pragma _NEC ivdep
1874 #endif
1875 for(i=0;i<ie1-0;i+=1)
1876 {
1877 j00 = jj0[i+0];
1878 j10 = jj1[i+0];
1879 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
1880 }
1881 #ifdef USE_VEC_COMP
1882 #pragma cdir nodep
1883 #pragma _NEC ivdep
1884 #endif
1885 for(;i<ie0-0;i+=1)
1886 {
1887 j00 = jj0[i+0];
1888 yy[i+0] += vv0[i+0]*x[j00];
1889 }
1890 }
1891 for(j=j-1;j<maxnzr;j+=1)
1892 {
1893 yy = A->work;
1894 is0 = A->ptr[j-0];
1895 ie0 = A->ptr[j+1-0] - A->ptr[j-0];
1896 jj0 = &A->index[is0];
1897 vv0 = &A->value[is0];
1898
1899 #ifdef USE_VEC_COMP
1900 #pragma cdir nodep
1901 #pragma _NEC ivdep
1902 #endif
1903 for(i=0;i<ie0-0;i+=1)
1904 {
1905 j00 = jj0[i+0];
1906 yy[i+0] += vv0[i+0]*x[j00];
1907 }
1908 }
1909 yy = A->work;
1910 #ifdef USE_VEC_COMP
1911 #pragma cdir nodep
1912 #pragma _NEC ivdep
1913 #endif
1914 for(i=0;i<n;i++)
1915 {
1916 y[A->row[i]] = yy[i];
1917 }
1918 }
1919
lis_matvec_jad_u8_1(LIS_MATRIX A,LIS_SCALAR x[],LIS_SCALAR y[])1920 void lis_matvec_jad_u8_1(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
1921 {
1922 LIS_INT i,j;
1923 LIS_INT n,np,maxnzr;
1924 LIS_SCALAR *yy;
1925 LIS_INT is0,is1,is2,is3,is4,is5,is6,is7;
1926 LIS_INT ie0,ie1,ie2,ie3,ie4,ie5,ie6,ie7;
1927 LIS_INT *jj0,*jj1,*jj2,*jj3,*jj4,*jj5,*jj6,*jj7;
1928 LIS_SCALAR *vv0,*vv1,*vv2,*vv3,*vv4,*vv5,*vv6,*vv7;
1929 LIS_INT j00;
1930 LIS_INT j10;
1931 LIS_INT j20;
1932 LIS_INT j30;
1933 LIS_INT j40;
1934 LIS_INT j50;
1935 LIS_INT j60;
1936 LIS_INT j70;
1937
1938 n = A->n;
1939 np = A->np;
1940 maxnzr = A->maxnzr;
1941 yy = A->work;
1942 #ifdef USE_VEC_COMP
1943 #pragma cdir nodep
1944 #pragma _NEC ivdep
1945 #endif
1946 for(i=0; i<np; i++)
1947 {
1948 yy[i] = 0.0;
1949 }
1950 for(j=7;j<maxnzr;j+=8)
1951 {
1952 yy = A->work;
1953 is0 = A->ptr[j-7];
1954 ie0 = A->ptr[j+1-7] - A->ptr[j-7];
1955 jj0 = &A->index[is0];
1956 vv0 = &A->value[is0];
1957 is1 = A->ptr[j-6];
1958 ie1 = A->ptr[j+1-6] - A->ptr[j-6];
1959 jj1 = &A->index[is1];
1960 vv1 = &A->value[is1];
1961 is2 = A->ptr[j-5];
1962 ie2 = A->ptr[j+1-5] - A->ptr[j-5];
1963 jj2 = &A->index[is2];
1964 vv2 = &A->value[is2];
1965 is3 = A->ptr[j-4];
1966 ie3 = A->ptr[j+1-4] - A->ptr[j-4];
1967 jj3 = &A->index[is3];
1968 vv3 = &A->value[is3];
1969 is4 = A->ptr[j-3];
1970 ie4 = A->ptr[j+1-3] - A->ptr[j-3];
1971 jj4 = &A->index[is4];
1972 vv4 = &A->value[is4];
1973 is5 = A->ptr[j-2];
1974 ie5 = A->ptr[j+1-2] - A->ptr[j-2];
1975 jj5 = &A->index[is5];
1976 vv5 = &A->value[is5];
1977 is6 = A->ptr[j-1];
1978 ie6 = A->ptr[j+1-1] - A->ptr[j-1];
1979 jj6 = &A->index[is6];
1980 vv6 = &A->value[is6];
1981 is7 = A->ptr[j-0];
1982 ie7 = A->ptr[j+1-0] - A->ptr[j-0];
1983 jj7 = &A->index[is7];
1984 vv7 = &A->value[is7];
1985
1986 #ifdef USE_VEC_COMP
1987 #pragma cdir nodep
1988 #pragma _NEC ivdep
1989 #endif
1990 for(i=0;i<ie7-0;i+=1)
1991 {
1992 j00 = jj0[i+0];
1993 j10 = jj1[i+0];
1994 j20 = jj2[i+0];
1995 j30 = jj3[i+0];
1996 j40 = jj4[i+0];
1997 j50 = jj5[i+0];
1998 j60 = jj6[i+0];
1999 j70 = jj7[i+0];
2000 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50] + vv6[i+0]*x[j60] + vv7[i+0]*x[j70];
2001 }
2002 #ifdef USE_VEC_COMP
2003 #pragma cdir nodep
2004 #pragma _NEC ivdep
2005 #endif
2006 for(;i<ie6-0;i+=1)
2007 {
2008 j00 = jj0[i+0];
2009 j10 = jj1[i+0];
2010 j20 = jj2[i+0];
2011 j30 = jj3[i+0];
2012 j40 = jj4[i+0];
2013 j50 = jj5[i+0];
2014 j60 = jj6[i+0];
2015 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50] + vv6[i+0]*x[j60];
2016 }
2017 #ifdef USE_VEC_COMP
2018 #pragma cdir nodep
2019 #pragma _NEC ivdep
2020 #endif
2021 for(;i<ie5-0;i+=1)
2022 {
2023 j00 = jj0[i+0];
2024 j10 = jj1[i+0];
2025 j20 = jj2[i+0];
2026 j30 = jj3[i+0];
2027 j40 = jj4[i+0];
2028 j50 = jj5[i+0];
2029 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
2030 }
2031 #ifdef USE_VEC_COMP
2032 #pragma cdir nodep
2033 #pragma _NEC ivdep
2034 #endif
2035 for(;i<ie4-0;i+=1)
2036 {
2037 j00 = jj0[i+0];
2038 j10 = jj1[i+0];
2039 j20 = jj2[i+0];
2040 j30 = jj3[i+0];
2041 j40 = jj4[i+0];
2042 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
2043 }
2044 #ifdef USE_VEC_COMP
2045 #pragma cdir nodep
2046 #pragma _NEC ivdep
2047 #endif
2048 for(;i<ie3-0;i+=1)
2049 {
2050 j00 = jj0[i+0];
2051 j10 = jj1[i+0];
2052 j20 = jj2[i+0];
2053 j30 = jj3[i+0];
2054 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
2055 }
2056 #ifdef USE_VEC_COMP
2057 #pragma cdir nodep
2058 #pragma _NEC ivdep
2059 #endif
2060 for(;i<ie2-0;i+=1)
2061 {
2062 j00 = jj0[i+0];
2063 j10 = jj1[i+0];
2064 j20 = jj2[i+0];
2065 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2066 }
2067 #ifdef USE_VEC_COMP
2068 #pragma cdir nodep
2069 #pragma _NEC ivdep
2070 #endif
2071 for(;i<ie1-0;i+=1)
2072 {
2073 j00 = jj0[i+0];
2074 j10 = jj1[i+0];
2075 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2076 }
2077 #ifdef USE_VEC_COMP
2078 #pragma cdir nodep
2079 #pragma _NEC ivdep
2080 #endif
2081 for(;i<ie0-0;i+=1)
2082 {
2083 j00 = jj0[i+0];
2084 yy[i+0] += vv0[i+0]*x[j00];
2085 }
2086 }
2087 for(j=j-1;j<maxnzr;j+=7)
2088 {
2089 yy = A->work;
2090 is0 = A->ptr[j-6];
2091 ie0 = A->ptr[j+1-6] - A->ptr[j-6];
2092 jj0 = &A->index[is0];
2093 vv0 = &A->value[is0];
2094 is1 = A->ptr[j-5];
2095 ie1 = A->ptr[j+1-5] - A->ptr[j-5];
2096 jj1 = &A->index[is1];
2097 vv1 = &A->value[is1];
2098 is2 = A->ptr[j-4];
2099 ie2 = A->ptr[j+1-4] - A->ptr[j-4];
2100 jj2 = &A->index[is2];
2101 vv2 = &A->value[is2];
2102 is3 = A->ptr[j-3];
2103 ie3 = A->ptr[j+1-3] - A->ptr[j-3];
2104 jj3 = &A->index[is3];
2105 vv3 = &A->value[is3];
2106 is4 = A->ptr[j-2];
2107 ie4 = A->ptr[j+1-2] - A->ptr[j-2];
2108 jj4 = &A->index[is4];
2109 vv4 = &A->value[is4];
2110 is5 = A->ptr[j-1];
2111 ie5 = A->ptr[j+1-1] - A->ptr[j-1];
2112 jj5 = &A->index[is5];
2113 vv5 = &A->value[is5];
2114 is6 = A->ptr[j-0];
2115 ie6 = A->ptr[j+1-0] - A->ptr[j-0];
2116 jj6 = &A->index[is6];
2117 vv6 = &A->value[is6];
2118
2119 #ifdef USE_VEC_COMP
2120 #pragma cdir nodep
2121 #pragma _NEC ivdep
2122 #endif
2123 for(i=0;i<ie6-0;i+=1)
2124 {
2125 j00 = jj0[i+0];
2126 j10 = jj1[i+0];
2127 j20 = jj2[i+0];
2128 j30 = jj3[i+0];
2129 j40 = jj4[i+0];
2130 j50 = jj5[i+0];
2131 j60 = jj6[i+0];
2132 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50] + vv6[i+0]*x[j60];
2133 }
2134 #ifdef USE_VEC_COMP
2135 #pragma cdir nodep
2136 #pragma _NEC ivdep
2137 #endif
2138 for(;i<ie5-0;i+=1)
2139 {
2140 j00 = jj0[i+0];
2141 j10 = jj1[i+0];
2142 j20 = jj2[i+0];
2143 j30 = jj3[i+0];
2144 j40 = jj4[i+0];
2145 j50 = jj5[i+0];
2146 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
2147 }
2148 #ifdef USE_VEC_COMP
2149 #pragma cdir nodep
2150 #pragma _NEC ivdep
2151 #endif
2152 for(;i<ie4-0;i+=1)
2153 {
2154 j00 = jj0[i+0];
2155 j10 = jj1[i+0];
2156 j20 = jj2[i+0];
2157 j30 = jj3[i+0];
2158 j40 = jj4[i+0];
2159 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
2160 }
2161 #ifdef USE_VEC_COMP
2162 #pragma cdir nodep
2163 #pragma _NEC ivdep
2164 #endif
2165 for(;i<ie3-0;i+=1)
2166 {
2167 j00 = jj0[i+0];
2168 j10 = jj1[i+0];
2169 j20 = jj2[i+0];
2170 j30 = jj3[i+0];
2171 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
2172 }
2173 #ifdef USE_VEC_COMP
2174 #pragma cdir nodep
2175 #pragma _NEC ivdep
2176 #endif
2177 for(;i<ie2-0;i+=1)
2178 {
2179 j00 = jj0[i+0];
2180 j10 = jj1[i+0];
2181 j20 = jj2[i+0];
2182 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2183 }
2184 #ifdef USE_VEC_COMP
2185 #pragma cdir nodep
2186 #pragma _NEC ivdep
2187 #endif
2188 for(;i<ie1-0;i+=1)
2189 {
2190 j00 = jj0[i+0];
2191 j10 = jj1[i+0];
2192 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2193 }
2194 #ifdef USE_VEC_COMP
2195 #pragma cdir nodep
2196 #pragma _NEC ivdep
2197 #endif
2198 for(;i<ie0-0;i+=1)
2199 {
2200 j00 = jj0[i+0];
2201 yy[i+0] += vv0[i+0]*x[j00];
2202 }
2203 }
2204 for(j=j-1;j<maxnzr;j+=6)
2205 {
2206 yy = A->work;
2207 is0 = A->ptr[j-5];
2208 ie0 = A->ptr[j+1-5] - A->ptr[j-5];
2209 jj0 = &A->index[is0];
2210 vv0 = &A->value[is0];
2211 is1 = A->ptr[j-4];
2212 ie1 = A->ptr[j+1-4] - A->ptr[j-4];
2213 jj1 = &A->index[is1];
2214 vv1 = &A->value[is1];
2215 is2 = A->ptr[j-3];
2216 ie2 = A->ptr[j+1-3] - A->ptr[j-3];
2217 jj2 = &A->index[is2];
2218 vv2 = &A->value[is2];
2219 is3 = A->ptr[j-2];
2220 ie3 = A->ptr[j+1-2] - A->ptr[j-2];
2221 jj3 = &A->index[is3];
2222 vv3 = &A->value[is3];
2223 is4 = A->ptr[j-1];
2224 ie4 = A->ptr[j+1-1] - A->ptr[j-1];
2225 jj4 = &A->index[is4];
2226 vv4 = &A->value[is4];
2227 is5 = A->ptr[j-0];
2228 ie5 = A->ptr[j+1-0] - A->ptr[j-0];
2229 jj5 = &A->index[is5];
2230 vv5 = &A->value[is5];
2231
2232 #ifdef USE_VEC_COMP
2233 #pragma cdir nodep
2234 #pragma _NEC ivdep
2235 #endif
2236 for(i=0;i<ie5-0;i+=1)
2237 {
2238 j00 = jj0[i+0];
2239 j10 = jj1[i+0];
2240 j20 = jj2[i+0];
2241 j30 = jj3[i+0];
2242 j40 = jj4[i+0];
2243 j50 = jj5[i+0];
2244 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40] + vv5[i+0]*x[j50];
2245 }
2246 #ifdef USE_VEC_COMP
2247 #pragma cdir nodep
2248 #pragma _NEC ivdep
2249 #endif
2250 for(;i<ie4-0;i+=1)
2251 {
2252 j00 = jj0[i+0];
2253 j10 = jj1[i+0];
2254 j20 = jj2[i+0];
2255 j30 = jj3[i+0];
2256 j40 = jj4[i+0];
2257 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
2258 }
2259 #ifdef USE_VEC_COMP
2260 #pragma cdir nodep
2261 #pragma _NEC ivdep
2262 #endif
2263 for(;i<ie3-0;i+=1)
2264 {
2265 j00 = jj0[i+0];
2266 j10 = jj1[i+0];
2267 j20 = jj2[i+0];
2268 j30 = jj3[i+0];
2269 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
2270 }
2271 #ifdef USE_VEC_COMP
2272 #pragma cdir nodep
2273 #pragma _NEC ivdep
2274 #endif
2275 for(;i<ie2-0;i+=1)
2276 {
2277 j00 = jj0[i+0];
2278 j10 = jj1[i+0];
2279 j20 = jj2[i+0];
2280 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2281 }
2282 #ifdef USE_VEC_COMP
2283 #pragma cdir nodep
2284 #pragma _NEC ivdep
2285 #endif
2286 for(;i<ie1-0;i+=1)
2287 {
2288 j00 = jj0[i+0];
2289 j10 = jj1[i+0];
2290 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2291 }
2292 #ifdef USE_VEC_COMP
2293 #pragma cdir nodep
2294 #pragma _NEC ivdep
2295 #endif
2296 for(;i<ie0-0;i+=1)
2297 {
2298 j00 = jj0[i+0];
2299 yy[i+0] += vv0[i+0]*x[j00];
2300 }
2301 }
2302 for(j=j-1;j<maxnzr;j+=5)
2303 {
2304 yy = A->work;
2305 is0 = A->ptr[j-4];
2306 ie0 = A->ptr[j+1-4] - A->ptr[j-4];
2307 jj0 = &A->index[is0];
2308 vv0 = &A->value[is0];
2309 is1 = A->ptr[j-3];
2310 ie1 = A->ptr[j+1-3] - A->ptr[j-3];
2311 jj1 = &A->index[is1];
2312 vv1 = &A->value[is1];
2313 is2 = A->ptr[j-2];
2314 ie2 = A->ptr[j+1-2] - A->ptr[j-2];
2315 jj2 = &A->index[is2];
2316 vv2 = &A->value[is2];
2317 is3 = A->ptr[j-1];
2318 ie3 = A->ptr[j+1-1] - A->ptr[j-1];
2319 jj3 = &A->index[is3];
2320 vv3 = &A->value[is3];
2321 is4 = A->ptr[j-0];
2322 ie4 = A->ptr[j+1-0] - A->ptr[j-0];
2323 jj4 = &A->index[is4];
2324 vv4 = &A->value[is4];
2325
2326 #ifdef USE_VEC_COMP
2327 #pragma cdir nodep
2328 #pragma _NEC ivdep
2329 #endif
2330 for(i=0;i<ie4-0;i+=1)
2331 {
2332 j00 = jj0[i+0];
2333 j10 = jj1[i+0];
2334 j20 = jj2[i+0];
2335 j30 = jj3[i+0];
2336 j40 = jj4[i+0];
2337 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30] + vv4[i+0]*x[j40];
2338 }
2339 #ifdef USE_VEC_COMP
2340 #pragma cdir nodep
2341 #pragma _NEC ivdep
2342 #endif
2343 for(;i<ie3-0;i+=1)
2344 {
2345 j00 = jj0[i+0];
2346 j10 = jj1[i+0];
2347 j20 = jj2[i+0];
2348 j30 = jj3[i+0];
2349 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
2350 }
2351 #ifdef USE_VEC_COMP
2352 #pragma cdir nodep
2353 #pragma _NEC ivdep
2354 #endif
2355 for(;i<ie2-0;i+=1)
2356 {
2357 j00 = jj0[i+0];
2358 j10 = jj1[i+0];
2359 j20 = jj2[i+0];
2360 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2361 }
2362 #ifdef USE_VEC_COMP
2363 #pragma cdir nodep
2364 #pragma _NEC ivdep
2365 #endif
2366 for(;i<ie1-0;i+=1)
2367 {
2368 j00 = jj0[i+0];
2369 j10 = jj1[i+0];
2370 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2371 }
2372 #ifdef USE_VEC_COMP
2373 #pragma cdir nodep
2374 #pragma _NEC ivdep
2375 #endif
2376 for(;i<ie0-0;i+=1)
2377 {
2378 j00 = jj0[i+0];
2379 yy[i+0] += vv0[i+0]*x[j00];
2380 }
2381 }
2382 for(j=j-1;j<maxnzr;j+=4)
2383 {
2384 yy = A->work;
2385 is0 = A->ptr[j-3];
2386 ie0 = A->ptr[j+1-3] - A->ptr[j-3];
2387 jj0 = &A->index[is0];
2388 vv0 = &A->value[is0];
2389 is1 = A->ptr[j-2];
2390 ie1 = A->ptr[j+1-2] - A->ptr[j-2];
2391 jj1 = &A->index[is1];
2392 vv1 = &A->value[is1];
2393 is2 = A->ptr[j-1];
2394 ie2 = A->ptr[j+1-1] - A->ptr[j-1];
2395 jj2 = &A->index[is2];
2396 vv2 = &A->value[is2];
2397 is3 = A->ptr[j-0];
2398 ie3 = A->ptr[j+1-0] - A->ptr[j-0];
2399 jj3 = &A->index[is3];
2400 vv3 = &A->value[is3];
2401
2402 #ifdef USE_VEC_COMP
2403 #pragma cdir nodep
2404 #pragma _NEC ivdep
2405 #endif
2406 for(i=0;i<ie3-0;i+=1)
2407 {
2408 j00 = jj0[i+0];
2409 j10 = jj1[i+0];
2410 j20 = jj2[i+0];
2411 j30 = jj3[i+0];
2412 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20] + vv3[i+0]*x[j30];
2413 }
2414 #ifdef USE_VEC_COMP
2415 #pragma cdir nodep
2416 #pragma _NEC ivdep
2417 #endif
2418 for(;i<ie2-0;i+=1)
2419 {
2420 j00 = jj0[i+0];
2421 j10 = jj1[i+0];
2422 j20 = jj2[i+0];
2423 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2424 }
2425 #ifdef USE_VEC_COMP
2426 #pragma cdir nodep
2427 #pragma _NEC ivdep
2428 #endif
2429 for(;i<ie1-0;i+=1)
2430 {
2431 j00 = jj0[i+0];
2432 j10 = jj1[i+0];
2433 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2434 }
2435 #ifdef USE_VEC_COMP
2436 #pragma cdir nodep
2437 #pragma _NEC ivdep
2438 #endif
2439 for(;i<ie0-0;i+=1)
2440 {
2441 j00 = jj0[i+0];
2442 yy[i+0] += vv0[i+0]*x[j00];
2443 }
2444 }
2445 for(j=j-1;j<maxnzr;j+=3)
2446 {
2447 yy = A->work;
2448 is0 = A->ptr[j-2];
2449 ie0 = A->ptr[j+1-2] - A->ptr[j-2];
2450 jj0 = &A->index[is0];
2451 vv0 = &A->value[is0];
2452 is1 = A->ptr[j-1];
2453 ie1 = A->ptr[j+1-1] - A->ptr[j-1];
2454 jj1 = &A->index[is1];
2455 vv1 = &A->value[is1];
2456 is2 = A->ptr[j-0];
2457 ie2 = A->ptr[j+1-0] - A->ptr[j-0];
2458 jj2 = &A->index[is2];
2459 vv2 = &A->value[is2];
2460
2461 #ifdef USE_VEC_COMP
2462 #pragma cdir nodep
2463 #pragma _NEC ivdep
2464 #endif
2465 for(i=0;i<ie2-0;i+=1)
2466 {
2467 j00 = jj0[i+0];
2468 j10 = jj1[i+0];
2469 j20 = jj2[i+0];
2470 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10] + vv2[i+0]*x[j20];
2471 }
2472 #ifdef USE_VEC_COMP
2473 #pragma cdir nodep
2474 #pragma _NEC ivdep
2475 #endif
2476 for(;i<ie1-0;i+=1)
2477 {
2478 j00 = jj0[i+0];
2479 j10 = jj1[i+0];
2480 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2481 }
2482 #ifdef USE_VEC_COMP
2483 #pragma cdir nodep
2484 #pragma _NEC ivdep
2485 #endif
2486 for(;i<ie0-0;i+=1)
2487 {
2488 j00 = jj0[i+0];
2489 yy[i+0] += vv0[i+0]*x[j00];
2490 }
2491 }
2492 for(j=j-1;j<maxnzr;j+=2)
2493 {
2494 yy = A->work;
2495 is0 = A->ptr[j-1];
2496 ie0 = A->ptr[j+1-1] - A->ptr[j-1];
2497 jj0 = &A->index[is0];
2498 vv0 = &A->value[is0];
2499 is1 = A->ptr[j-0];
2500 ie1 = A->ptr[j+1-0] - A->ptr[j-0];
2501 jj1 = &A->index[is1];
2502 vv1 = &A->value[is1];
2503
2504 #ifdef USE_VEC_COMP
2505 #pragma cdir nodep
2506 #pragma _NEC ivdep
2507 #endif
2508 for(i=0;i<ie1-0;i+=1)
2509 {
2510 j00 = jj0[i+0];
2511 j10 = jj1[i+0];
2512 yy[i+0] += vv0[i+0]*x[j00] + vv1[i+0]*x[j10];
2513 }
2514 #ifdef USE_VEC_COMP
2515 #pragma cdir nodep
2516 #pragma _NEC ivdep
2517 #endif
2518 for(;i<ie0-0;i+=1)
2519 {
2520 j00 = jj0[i+0];
2521 yy[i+0] += vv0[i+0]*x[j00];
2522 }
2523 }
2524 for(j=j-1;j<maxnzr;j+=1)
2525 {
2526 yy = A->work;
2527 is0 = A->ptr[j-0];
2528 ie0 = A->ptr[j+1-0] - A->ptr[j-0];
2529 jj0 = &A->index[is0];
2530 vv0 = &A->value[is0];
2531
2532 #ifdef USE_VEC_COMP
2533 #pragma cdir nodep
2534 #pragma _NEC ivdep
2535 #endif
2536 for(i=0;i<ie0-0;i+=1)
2537 {
2538 j00 = jj0[i+0];
2539 yy[i+0] += vv0[i+0]*x[j00];
2540 }
2541 }
2542 yy = A->work;
2543 #ifdef USE_VEC_COMP
2544 #pragma cdir nodep
2545 #pragma _NEC ivdep
2546 #endif
2547 for(i=0;i<n;i++)
2548 {
2549 y[A->row[i]] = yy[i];
2550 }
2551 }
2552