1 //
2 // mops.h --- block matrix operations
3 //
4 // Copyright (C) 1997 Limit Point Systems, Inc.
5 //
6 // Author: Edward Seidl <seidl@janed.com>
7 // Maintainer: LPS
8 //
9 // This file is part of the SC Toolkit.
10 //
11 // The SC Toolkit is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 2, or (at your option)
14 // any later version.
15 //
16 // The SC Toolkit is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 // The U.S. Government is granted a limited license as per AL 91-7.
26 //
27 
28 #ifndef _math_scmat_mops_h
29 #define _math_scmat_mops_h
30 
31 #define D1 32
32 
33 // copy a chunk of rectangular matrix source into dest.  dest is D1xD1, and is
34 // padded with zeros
35 
36 static inline void
copy_block(double ** dest,double ** source,int istart,int ni,int jstart,int nj)37 copy_block(double **dest, double **source,
38            int istart, int ni, int jstart, int nj)
39 {
40   int ii,jj;
41 
42   for (ii=0; ii < ni; ii++) {
43     double *di = dest[ii];
44     double *si = &source[istart+ii][jstart];
45     for (jj=0; jj < nj; jj++)
46       di[jj] = si[jj];
47     for (; jj < D1; jj++)
48       di[jj] = 0;
49   }
50 
51   int left=D1-ii;
52   if (left)
53     memset(dest[ii], 0, sizeof(double)*left*D1);
54 }
55 
56 static inline void
copy_trans_block(double ** dest,double ** source,int istart,int ni,int jstart,int nj)57 copy_trans_block(double **dest, double **source,
58                  int istart, int ni, int jstart, int nj)
59 {
60   int ii,jj;
61 
62   memset(dest[0], 0, sizeof(double)*D1*D1);
63 
64   for (jj=0; jj < nj; jj++) {
65     double *sj = &source[jstart+jj][istart];
66     for (ii=0; ii < ni; ii++)
67       dest[ii][jj] = sj[ii];
68   }
69 }
70 
71 // copy a chunk of symmetric matrix source into dest.  dest is D1xD1, and is
72 // padded with zeros
73 static inline void
copy_sym_block(double ** dest,double ** source,int istart,int ni,int jstart,int nj)74 copy_sym_block(double **dest, double **source,
75                int istart, int ni, int jstart, int nj)
76 {
77   int ii,jj;
78 
79   for (ii=0; ii < ni; ii++) {
80     double *di = dest[ii];
81     double *si = &source[istart+ii][jstart];
82 
83     if (jstart < istart)
84       for (jj=0; jj < nj; jj++)
85         di[jj] = si[jj];
86     else if (jstart==istart)
87       for (jj=0; jj <= ii; jj++)
88         di[jj] = dest[jj][ii] = si[jj];
89     else
90       for (jj=0; jj < nj; jj++)
91         di[jj] = source[jstart+jj][istart+ii];
92 
93     for (jj=nj; jj < D1; jj++)
94       di[jj] = 0;
95   }
96 
97   int left=D1-ii;
98   if (left)
99     memset(dest[ii], 0, sizeof(double)*left*D1);
100 }
101 
102 static inline void
return_block(double ** dest,double ** source,int istart,int ni,int jstart,int nj)103 return_block(double **dest, double **source,
104              int istart, int ni, int jstart, int nj)
105 {
106   int ii,jj;
107 
108   for (ii=0; ii < ni; ii++)
109     for (jj=0; jj < nj; jj++)
110       dest[istart+ii][jstart+jj] = source[ii][jj];
111 }
112 
113 // a, b, and c are all D1xD1 blocks
114 static inline void
mult_block(double ** a,double ** b,double ** c,int ni,int nj,int nk)115 mult_block(double **a, double **b, double **c, int ni, int nj, int nk)
116 {
117   int ii,jj,kk;
118   double t00,t10,t20,t30;
119   double *a0, *a1, *a2, *a3;
120   double *c0, *c1, *c2, *c3;
121 
122   for (ii=0; ii < ni; ii += 4) {
123     a0=a[ii]; a1=a[ii+1]; a2=a[ii+2]; a3=a[ii+3];
124     c0=c[ii]; c1=c[ii+1]; c2=c[ii+2]; c3=c[ii+3];
125 
126     for (jj=0; jj < nj; jj++) {
127       double *bt = b[jj];
128       t00=c0[jj]; t10=c1[jj]; t20=c2[jj]; t30=c3[jj];
129 
130       for (kk=0; kk < nk; kk += 2) {
131         register double b0=bt[kk], b1=bt[kk+1];
132         t00 += a0[kk]*b0 + a0[kk+1]*b1;
133         t10 += a1[kk]*b0 + a1[kk+1]*b1;
134         t20 += a2[kk]*b0 + a2[kk+1]*b1;
135         t30 += a3[kk]*b0 + a3[kk+1]*b1;
136       }
137 
138       c0[jj]=t00;
139       c1[jj]=t10;
140       c2[jj]=t20;
141       c3[jj]=t30;
142     }
143   }
144 }
145 
146 #endif
147 
148 // Local Variables:
149 // mode: c++
150 // c-file-style: "ETS"
151 // End:
152