1 /*     CalculiX - A 3-dimensional finite element program                 */
2 /*              Copyright (C) 1998-2021 Guido Dhondt                          */
3 
4 /*     This program is free software; you can redistribute it and/or     */
5 /*     modify it under the terms of the GNU General Public License as    */
6 /*     published by the Free Software Foundation(version 2);    */
7 /*                    */
8 
9 /*     This program is distributed in the hope that it will be useful,   */
10 /*     but WITHOUT ANY WARRANTY; without even the implied warranty of    */
11 /*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the      */
12 /*     GNU General Public License for more details.                      */
13 
14 /*     You should have received a copy of the GNU General Public License */
15 /*     along with this program; if not, write to the Free Software       */
16 /*     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.         */
17 
18 #include <unistd.h>
19 #include <stdio.h>
20 #include <math.h>
21 #include <stdlib.h>
22 #include <pthread.h>
23 #include "CalculiX.h"
24 
25 static char *lakon1,*sideload1,*matname1,*tieset1,*set1;
26 
27 static ITG *nk1,*kon1,*ipkon1,*ne1,*nodeboun1,*ndirboun1,*nboun1,
28   *ipompc1,*nodempc1,*nmpc1,*nodeforc1,*ndirforc1,*nforc1,*nelemload1,
29   *nload1,*ipobody1,*nbody1,*nactdof1,*icol1,*jq1,*irow1,*neq1,
30   *nzl1,*nmethod1=NULL,*ikmpc1,*ilmpc1,*ikboun1,*ilboun1,*nelcon1,
31   *nrhcon1,*nalcon1,*ielmat1,*ielorien1,*norien1,*ntmat1_,*ithermal1,
32   *iprestr1,*iperturb1,*nzs1,*iexpl1,*nplicon1,*nplkcon1,*npmat1_,
33   *mi1,*ncmat1_,*mass1,*stiffness1,*buckling1,*rhsi1,*intscheme1,
34   *nshcon1,*ncocon1,*istep1,*iinc1,*coriolis1,*ibody1,*nstate1_,
35   *integerglob1,*istartset1,*iendset1,*ialset1,*ntie1,*nasym1,
36   *mortar1,*ielprop1,*ne01,num_cpus,*kscale1,*iponoel1,*inoel1,
37   *network1,*neapar=NULL,*nebpar=NULL,*mscalmethod1,*nset1,
38   *irowtloc1,*jqtloc1,*islavelinv1,*mortartrafoflag1;
39 
40 static double *co1,*xboun1,*coefmpc1,*xforc1,*xload1,*xbody1,*cgr1,
41   *ad1=NULL,*au1=NULL,*fext1=NULL,*elcon1,*rhcon1,*alcon1,*alzero1,
42   *orab1,*t01,*t11,*prestr1,*vold1,*sti1,*stx1,*adb1=NULL,*aub1=NULL,
43   *plicon1,*plkcon1,*xstiff1,*dtime1,*physcon1,*shcon1,*cocon1,
44   *ttime1,*time1,*xloadold1,*reltime1,*veold1,*springarea1,
45   *xstateini1,*xstate1,*thicke1,*doubleglob1,*pslavsurf1,*pmastsurf1,
46   *clearini1,*prop1,*fnext1=NULL,*smscale1,*autloc1;
47 
mafillsmmain(double * co,ITG * nk,ITG * kon,ITG * ipkon,char * lakon,ITG * ne,ITG * nodeboun,ITG * ndirboun,double * xboun,ITG * nboun,ITG * ipompc,ITG * nodempc,double * coefmpc,ITG * nmpc,ITG * nodeforc,ITG * ndirforc,double * xforc,ITG * nforc,ITG * nelemload,char * sideload,double * xload,ITG * nload,double * xbody,ITG * ipobody,ITG * nbody,double * cgr,double * ad,double * au,double * fext,ITG * nactdof,ITG * icol,ITG * jq,ITG * irow,ITG * neq,ITG * nzl,ITG * nmethod,ITG * ikmpc,ITG * ilmpc,ITG * ikboun,ITG * ilboun,double * elcon,ITG * nelcon,double * rhcon,ITG * nrhcon,double * alcon,ITG * nalcon,double * alzero,ITG * ielmat,ITG * ielorien,ITG * norien,double * orab,ITG * ntmat_,double * t0,double * t1,ITG * ithermal,double * prestr,ITG * iprestr,double * vold,ITG * iperturb,double * sti,ITG * nzs,double * stx,double * adb,double * aub,ITG * iexpl,double * plicon,ITG * nplicon,double * plkcon,ITG * nplkcon,double * xstiff,ITG * npmat_,double * dtime,char * matname,ITG * mi,ITG * ncmat_,ITG * mass,ITG * stiffness,ITG * buckling,ITG * rhsi,ITG * intscheme,double * physcon,double * shcon,ITG * nshcon,double * cocon,ITG * ncocon,double * ttime,double * time,ITG * istep,ITG * iinc,ITG * coriolis,ITG * ibody,double * xloadold,double * reltime,double * veold,double * springarea,ITG * nstate_,double * xstateini,double * xstate,double * thicke,ITG * integerglob,double * doubleglob,char * tieset,ITG * istartset,ITG * iendset,ITG * ialset,ITG * ntie,ITG * nasym,double * pslavsurf,double * pmastsurf,ITG * mortar,double * clearini,ITG * ielprop,double * prop,ITG * ne0,double * fnext,ITG * kscale,ITG * iponoel,ITG * inoel,ITG * network,ITG * ntrans,ITG * inotr,double * trab,double * smscale,ITG * mscalmethod,char * set,ITG * nset,ITG * islavelinv,double * autloc,ITG * irowtloc,ITG * jqtloc,ITG * mortartrafoflag)48 void mafillsmmain(double *co,ITG *nk,ITG *kon,ITG *ipkon,char *lakon,
49 		  ITG *ne,ITG *nodeboun,ITG *ndirboun,double *xboun,
50 		  ITG *nboun,ITG *ipompc,ITG *nodempc,double *coefmpc,
51 		  ITG *nmpc,ITG *nodeforc,ITG *ndirforc,
52 		  double *xforc,ITG *nforc,ITG *nelemload,char *sideload,
53 		  double *xload,ITG *nload,double *xbody,ITG *ipobody,
54 		  ITG *nbody,double *cgr,
55 		  double *ad,double *au,double *fext,ITG *nactdof,
56 		  ITG *icol,ITG *jq,ITG *irow,ITG *neq,ITG *nzl,
57 		  ITG *nmethod,ITG *ikmpc,ITG *ilmpc,ITG *ikboun,
58 		  ITG *ilboun,
59 		  double *elcon,ITG *nelcon,double *rhcon,ITG *nrhcon,
60 		  double *alcon,ITG *nalcon,double *alzero,ITG *ielmat,
61 		  ITG *ielorien,ITG *norien,double *orab,ITG *ntmat_,
62 		  double *t0,double *t1,ITG *ithermal,
63 		  double *prestr,ITG *iprestr,double *vold,
64 		  ITG *iperturb,double *sti,ITG *nzs,double *stx,
65 		  double *adb,double *aub,ITG *iexpl,
66 		  double *plicon,ITG *nplicon,double *plkcon,ITG *nplkcon,
67 		  double *xstiff,
68 		  ITG *npmat_,double *dtime,char *matname,ITG *mi,
69 		  ITG *ncmat_,ITG *mass,ITG *stiffness,ITG *buckling,ITG *rhsi,
70 		  ITG *intscheme,double *physcon,double *shcon,ITG *nshcon,
71 		  double *cocon,ITG *ncocon,double *ttime,double *time,
72 		  ITG *istep,ITG *iinc,ITG *coriolis,ITG *ibody,
73 		  double *xloadold,double *reltime,double *veold,
74 		  double *springarea,ITG *nstate_,double *xstateini,
75 		  double *xstate,double *thicke,
76 		  ITG *integerglob,double *doubleglob,char *tieset,
77 		  ITG *istartset,ITG *iendset,ITG *ialset,ITG *ntie,
78 		  ITG *nasym,double *pslavsurf,double *pmastsurf,ITG *mortar,
79 		  double *clearini,ITG *ielprop,double *prop,ITG *ne0,
80 		  double *fnext,ITG *kscale,ITG *iponoel,ITG *inoel,
81 		  ITG *network,ITG *ntrans,ITG *inotr,double *trab,
82 		  double *smscale,ITG *mscalmethod,char *set,ITG *nset,
83 		  ITG *islavelinv,double *autloc,ITG *irowtloc,ITG *jqtloc,
84 		  ITG *mortartrafoflag){
85 
86   /* mafillsmmain = main program for MAtrix FILLing of the Stiffnes
87      Matrix */
88 
89   ITG i,j,mt=mi[1]+1;
90 
91   /* variables for multithreading procedure */
92 
93   ITG sys_cpus,*ithread=NULL;
94   char *env,*envloc,*envsys;
95 
96   num_cpus = 0;
97   sys_cpus=0;
98 
99   /* explicit user declaration prevails */
100 
101   envsys=getenv("NUMBER_OF_CPUS");
102   if(envsys){
103     sys_cpus=atoi(envsys);
104     if(sys_cpus<0) sys_cpus=0;
105   }
106 
107   //    sys_cpus=1;
108 
109   /* automatic detection of available number of processors */
110 
111   if(sys_cpus==0){
112     sys_cpus = getSystemCPUs();
113     if(sys_cpus<1) sys_cpus=1;
114   }
115 
116   /* local declaration prevails, if strictly positive */
117 
118   envloc = getenv("CCX_NPROC_STIFFNESS");
119   if(envloc){
120     num_cpus=atoi(envloc);
121     if(num_cpus<0){
122       num_cpus=0;
123     }else if(num_cpus>sys_cpus){
124       num_cpus=sys_cpus;
125     }
126 
127   }
128 
129   /* else global declaration, if any, applies */
130 
131   env = getenv("OMP_NUM_THREADS");
132   if(num_cpus==0){
133     if (env)
134       num_cpus = atoi(env);
135     if (num_cpus < 1) {
136       num_cpus=1;
137     }else if(num_cpus>sys_cpus){
138       num_cpus=sys_cpus;
139     }
140   }
141 
142   // next line is to be inserted in a similar way for all other paralell parts
143 
144   if(*ne<num_cpus) num_cpus=*ne;
145 
146   pthread_t tid[num_cpus];
147 
148   /* determining the element bounds in each thread */
149 
150   NNEW(neapar,ITG,num_cpus);
151   NNEW(nebpar,ITG,num_cpus);
152   if((*nasym==0)||(*ithermal>1)){
153 
154     /* symmetric mechanical calculations or
155        thermal/thermomechanical calculations:
156        include contact elements (symmetric
157        thermal contributions are not covered by
158        mafillsmas.f) */
159 
160     elementcpuload(neapar,nebpar,ne,ipkon,&num_cpus);
161 
162   }else{
163 
164     /* asymmetric mechanical calculations:
165        do not include contact elements */
166 
167     elementcpuload(neapar,nebpar,ne0,ipkon,&num_cpus);
168   }
169 
170   /* determine nzl */
171 
172   *nzl=0;
173   for(i=neq[1];i>=1;i--){
174     if(icol[i-1]>0){
175       *nzl=i;
176       break;
177     }
178   }
179 
180   /* allocating fields for mass and stiffness matrix */
181 
182   NNEW(ad1,double,num_cpus*neq[1]);
183   NNEW(au1,double,(long long)num_cpus*nzs[2]);
184 
185   if(*rhsi==1){
186     NNEW(fext1,double,num_cpus*neq[1]);
187   }
188 
189   if((mass[1]==1)||((mass[0]==1)||(*buckling==1))){
190     NNEW(adb1,double,num_cpus*neq[1]);
191     NNEW(aub1,double,(long long)num_cpus*nzs[1]);
192   }
193 
194   if(*nmethod==4){
195     NNEW(fnext1,double,num_cpus*mt**nk);
196   }
197 
198   /* allocating memory for nmethod; if the Jacobian determinant
199      in any of the elements is nonpositive, nmethod is set to
200      zero */
201 
202   NNEW(nmethod1,ITG,num_cpus);
203   for(j=0;j<num_cpus;j++){
204     nmethod1[j]=*nmethod;
205   }
206 
207   /* calculating the stiffness and/or mass matrix
208      (symmetric part) */
209 
210   co1=co;nk1=nk;kon1=kon;ipkon1=ipkon;lakon1=lakon;ne1=ne;
211   nodeboun1=nodeboun;ndirboun1=ndirboun;xboun1=xboun;
212   nboun1=nboun;ipompc1=ipompc;nodempc1=nodempc;coefmpc1=coefmpc;
213   nmpc1=nmpc;nodeforc1=nodeforc;ndirforc1=ndirforc;xforc1=xforc;
214   nforc1=nforc;nelemload1=nelemload;sideload1=sideload;xload1=xload;
215   nload1=nload;xbody1=xbody;ipobody1=ipobody;nbody1=nbody;
216   cgr1=cgr;nactdof1=nactdof;icol1=icol;jq1=jq;irow1=irow;neq1=neq;
217   nzl1=nzl;ikmpc1=ikmpc;ilmpc1=ilmpc;ikboun1=ikboun;
218   ilboun1=ilboun;elcon1=elcon;nelcon1=nelcon;rhcon1=rhcon;
219   nrhcon1=nrhcon;alcon1=alcon;nalcon1=nalcon;alzero1=alzero;
220   ielmat1=ielmat;ielorien1=ielorien;norien1=norien;orab1=orab;
221   ntmat1_=ntmat_;t01=t0;t11=t1;ithermal1=ithermal;prestr1=prestr;
222   iprestr1=iprestr;vold1=vold;iperturb1=iperturb;sti1=sti;nzs1=nzs;
223   stx1=stx;iexpl1=iexpl;plicon1=plicon;nplicon1=nplicon;
224   plkcon1=plkcon;nplkcon1=nplkcon;xstiff1=xstiff;npmat1_=npmat_;
225   dtime1=dtime;matname1=matname;mi1=mi;ncmat1_=ncmat_;mass1=mass;
226   stiffness1=stiffness;buckling1=buckling;rhsi1=rhsi;intscheme1=intscheme;
227   physcon1=physcon;shcon1=shcon;nshcon1=nshcon;cocon1=cocon;
228   ncocon1=ncocon;ttime1=ttime;time1=time;istep1=istep;iinc1=iinc;
229   coriolis1=coriolis;ibody1=ibody;xloadold1=xloadold;reltime1=reltime;
230   veold1=veold;springarea1=springarea;nstate1_=nstate_;xstateini1=xstateini;
231   xstate1=xstate;thicke1=thicke;integerglob1=integerglob;
232   doubleglob1=doubleglob;tieset1=tieset;istartset1=istartset;
233   iendset1=iendset;ialset1=ialset;ntie1=ntie;nasym1=nasym;
234   pslavsurf1=pslavsurf;pmastsurf1=pmastsurf;mortar1=mortar;
235   clearini1=clearini;ielprop1=ielprop;prop1=prop;ne01=ne0;kscale1=kscale;
236   iponoel1=iponoel;inoel1=inoel;network1=network;
237   smscale1=smscale;mscalmethod1=mscalmethod;set1=set;nset1=nset;
238   islavelinv1=islavelinv;autloc1=autloc;irowtloc1=irowtloc;jqtloc1=jqtloc;
239   mortartrafoflag1=mortartrafoflag;
240 
241   /* calculating the stiffness/mass */
242 
243   printf(" Using up to %" ITGFORMAT " cpu(s) for the symmetric stiffness/mass contributions.\n\n", num_cpus);
244 
245   /* create threads and wait */
246 
247   NNEW(ithread,ITG,num_cpus);
248   for(i=0; i<num_cpus; i++)  {
249     ithread[i]=i;
250     pthread_create(&tid[i], NULL, (void *)mafillsmmt, (void *)&ithread[i]);
251   }
252   for(i=0; i<num_cpus; i++)
253     pthread_join(tid[i], NULL);
254 
255   SFREE(ithread);SFREE(neapar);SFREE(nebpar);
256 
257   /* copying and accumulating the stiffnes and/or mass matrix
258      for buckling the matrices have to be added*/
259 
260   if(*buckling!=1){
261 
262     /* no buckling */
263 
264     for(i=0;i<neq[1];i++){
265       ad[i]=ad1[i];
266     }
267   }else{
268 
269     /* buckling */
270 
271     for(i=0;i<neq[1];i++){
272       ad[i]+=ad1[i];
273     }
274   }
275 
276   for(i=0;i<neq[1];i++){
277     for(j=1;j<num_cpus;j++){
278       ad[i]+=ad1[i+j*neq[1]];
279     }
280   }
281   SFREE(ad1);
282 
283   if(*buckling!=1){
284 
285     /* no buckling */
286 
287     for(i=0;i<nzs[2];i++){
288       au[i]=au1[i];
289     }
290   }else{
291 
292     /* buckling */
293 
294     for(i=0;i<nzs[2];i++){
295       au[i]+=au1[i];
296     }
297   }
298 
299   for(i=0;i<nzs[2];i++){
300     for(j=1;j<num_cpus;j++){
301       au[i]+=au1[i+(long long)j*nzs[2]];
302     }
303   }
304   SFREE(au1);
305 
306   if(*rhsi==1){
307     for(i=0;i<neq[1];i++){
308       fext[i]=fext1[i];
309     }
310     for(i=0;i<neq[1];i++){
311       for(j=1;j<num_cpus;j++){
312 	fext[i]+=fext1[i+j*neq[1]];
313       }
314     }
315     SFREE(fext1);
316   }
317 
318   /* the heat capacity matrix and mass matrix must be treated
319      separately, since the mass matrix is no recalculated
320      in each iteration, whereas the capacity matrix is */
321 
322   /* heat capacity matrix */
323 
324   if(mass[1]==1){
325     for(i=neq[0];i<neq[1];i++){
326       adb[i]=adb1[i];
327     }
328     for(i=neq[0];i<neq[1];i++){
329       for(j=1;j<num_cpus;j++){
330 	adb[i]+=adb1[i+j*neq[1]];
331       }
332     }
333 
334     for(i=nzs[0];i<nzs[1];i++){
335       aub[i]=aub1[i];
336     }
337     for(i=nzs[0];i<nzs[1];i++){
338       for(j=1;j<num_cpus;j++){
339 	aub[i]+=aub1[i+(long long)j*nzs[1]];
340       }
341     }
342   }
343 
344   /* mass matrix or buckling matrix */
345 
346   if((mass[0]==1)||(*buckling==1)){
347     for(i=0;i<neq[0];i++){
348       adb[i]=adb1[i];
349     }
350     for(i=0;i<neq[0];i++){
351       for(j=1;j<num_cpus;j++){
352 	adb[i]+=adb1[i+j*neq[1]];
353       }
354     }
355 
356     for(i=0;i<nzs[0];i++){
357       aub[i]=aub1[i];
358     }
359     for(i=0;i<nzs[0];i++){
360       for(j=1;j<num_cpus;j++){
361 	aub[i]+=aub1[i+(long long)j*nzs[1]];
362       }
363     }
364   }
365   if((mass[0]==1)||(mass[1]==1)||(*buckling==1)){
366     SFREE(adb1);SFREE(aub1);
367   }
368 
369   if(*nmethod==4){
370     for(i=0;i<mt**nk;i++){
371       fnext[i]=fnext1[i];
372     }
373     for(i=0;i<mt**nk;i++){
374       for(j=1;j<num_cpus;j++){
375 	fnext[i]+=fnext1[i+j*mt**nk];
376       }
377     }
378     SFREE(fnext1);
379   }
380 
381   for(j=0;j<num_cpus;j++){
382     if(nmethod1[j]==0){
383       *nmethod=0;
384       break;
385     }
386   }
387   SFREE(nmethod1);
388 
389   /* taking point forces into account in fext */
390 
391   FORTRAN(mafillsmforc,(nforc,ndirforc,nodeforc,xforc,nactdof,
392 			fext,ipompc,nodempc,coefmpc,mi,rhsi,fnext,
393 			nmethod,ntrans,inotr,trab,co));
394 
395   return;
396 
397 }
398 
399 /* subroutine for multithreading of mafillsm */
400 
mafillsmmt(ITG * i)401 void *mafillsmmt(ITG *i){
402 
403   ITG indexad,indexfext,indexadb,nea,neb,indexfnext;
404   long long indexau,indexaub;
405 
406   indexad=0;
407   indexau=0;
408   indexfext=0;
409   indexadb=0;
410   indexaub=0;
411   indexfnext=0;
412 
413   indexad=*i*neq1[1];
414   indexau=(long long)*i*nzs1[2];
415 
416   if(*rhsi1==1){
417     indexfext=*i*neq1[1];
418   }
419   if(mass1[1]==1){
420     indexadb=*i*neq1[1];
421     indexaub=(long long)*i*nzs1[1];
422   }else if((mass1[0]==1)||(*buckling1==1)){
423     indexadb=*i*neq1[0];
424     indexaub=(long long)*i*nzs1[0];
425   }
426   if(nmethod1[0]==4){
427     indexfnext=*i*(mi1[1]+1)**nk1;
428   }
429 
430   nea=neapar[*i]+1;
431   neb=nebpar[*i]+1;
432 
433   FORTRAN(mafillsm,(co1,nk1,kon1,ipkon1,lakon1,ne1,nodeboun1,ndirboun1,
434 		    xboun1,nboun1,
435 		    ipompc1,nodempc1,coefmpc1,nmpc1,nodeforc1,ndirforc1,xforc1,
436 		    nforc1,nelemload1,sideload1,xload1,nload1,xbody1,ipobody1,
437 		    nbody1,cgr1,&ad1[indexad],&au1[indexau],&fext1[indexfext],
438 		    nactdof1,icol1,jq1,irow1,neq1,nzl1,&nmethod1[*i],
439 		    ikmpc1,ilmpc1,ikboun1,ilboun1,elcon1,nelcon1,rhcon1,
440 		    nrhcon1,alcon1,nalcon1,alzero1,ielmat1,
441 		    ielorien1,norien1,orab1,ntmat1_,
442 		    t01,t11,ithermal1,prestr1,iprestr1,vold1,iperturb1,sti1,
443 		    nzs1,stx1,&adb1[indexadb],&aub1[indexaub],iexpl1,plicon1,
444 		    nplicon1,plkcon1,nplkcon1,xstiff1,npmat1_,dtime1,matname1,
445 		    mi1,ncmat1_,mass1,stiffness1,buckling1,rhsi1,intscheme1,
446 		    physcon1,shcon1,nshcon1,cocon1,ncocon1,ttime1,time1,istep1,
447 		    iinc1,coriolis1,
448 		    ibody1,xloadold1,reltime1,veold1,springarea1,nstate1_,
449 		    xstateini1,xstate1,thicke1,integerglob1,doubleglob1,
450 		    tieset1,istartset1,iendset1,ialset1,ntie1,nasym1,pslavsurf1,
451 		    pmastsurf1,mortar1,clearini1,ielprop1,prop1,ne01,
452 		    &fnext1[indexfnext],&nea,&neb,kscale1,iponoel1,inoel1,
453 		    network1,smscale1,mscalmethod1,set1,nset1,islavelinv1,
454 		    autloc1,irowtloc1,jqtloc1,mortartrafoflag1));
455 
456   return NULL;
457 }
458