1 /* CalculiX - A 3-dimensional finite element program */
2 /* Copyright (C) 1998-2021 Guido Dhondt */
3
4 /* This program is free software; you can redistribute it and/or */
5 /* modify it under the terms of the GNU General Public License as */
6 /* published by the Free Software Foundation(version 2); */
7 /* */
8
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program; if not, write to the Free Software */
16 /* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17
18 #include <unistd.h>
19 #include <stdio.h>
20 #include <math.h>
21 #include <stdlib.h>
22 #include <pthread.h>
23 #include "CalculiX.h"
24
25 static char *lakon1,*sideload1,*matname1,*tieset1,*set1;
26
27 static ITG *nk1,*kon1,*ipkon1,*ne1,*nodeboun1,*ndirboun1,*nboun1,
28 *ipompc1,*nodempc1,*nmpc1,*nodeforc1,*ndirforc1,*nforc1,*nelemload1,
29 *nload1,*ipobody1,*nbody1,*nactdof1,*icol1,*jq1,*irow1,*neq1,
30 *nzl1,*nmethod1=NULL,*ikmpc1,*ilmpc1,*ikboun1,*ilboun1,*nelcon1,
31 *nrhcon1,*nalcon1,*ielmat1,*ielorien1,*norien1,*ntmat1_,*ithermal1,
32 *iprestr1,*iperturb1,*nzs1,*iexpl1,*nplicon1,*nplkcon1,*npmat1_,
33 *mi1,*ncmat1_,*mass1,*stiffness1,*buckling1,*rhsi1,*intscheme1,
34 *nshcon1,*ncocon1,*istep1,*iinc1,*coriolis1,*ibody1,*nstate1_,
35 *integerglob1,*istartset1,*iendset1,*ialset1,*ntie1,*nasym1,
36 *mortar1,*ielprop1,*ne01,num_cpus,*kscale1,*iponoel1,*inoel1,
37 *network1,*neapar=NULL,*nebpar=NULL,*mscalmethod1,*nset1,
38 *irowtloc1,*jqtloc1,*islavelinv1,*mortartrafoflag1;
39
40 static double *co1,*xboun1,*coefmpc1,*xforc1,*xload1,*xbody1,*cgr1,
41 *ad1=NULL,*au1=NULL,*fext1=NULL,*elcon1,*rhcon1,*alcon1,*alzero1,
42 *orab1,*t01,*t11,*prestr1,*vold1,*sti1,*stx1,*adb1=NULL,*aub1=NULL,
43 *plicon1,*plkcon1,*xstiff1,*dtime1,*physcon1,*shcon1,*cocon1,
44 *ttime1,*time1,*xloadold1,*reltime1,*veold1,*springarea1,
45 *xstateini1,*xstate1,*thicke1,*doubleglob1,*pslavsurf1,*pmastsurf1,
46 *clearini1,*prop1,*fnext1=NULL,*smscale1,*autloc1;
47
mafillsmmain(double * co,ITG * nk,ITG * kon,ITG * ipkon,char * lakon,ITG * ne,ITG * nodeboun,ITG * ndirboun,double * xboun,ITG * nboun,ITG * ipompc,ITG * nodempc,double * coefmpc,ITG * nmpc,ITG * nodeforc,ITG * ndirforc,double * xforc,ITG * nforc,ITG * nelemload,char * sideload,double * xload,ITG * nload,double * xbody,ITG * ipobody,ITG * nbody,double * cgr,double * ad,double * au,double * fext,ITG * nactdof,ITG * icol,ITG * jq,ITG * irow,ITG * neq,ITG * nzl,ITG * nmethod,ITG * ikmpc,ITG * ilmpc,ITG * ikboun,ITG * ilboun,double * elcon,ITG * nelcon,double * rhcon,ITG * nrhcon,double * alcon,ITG * nalcon,double * alzero,ITG * ielmat,ITG * ielorien,ITG * norien,double * orab,ITG * ntmat_,double * t0,double * t1,ITG * ithermal,double * prestr,ITG * iprestr,double * vold,ITG * iperturb,double * sti,ITG * nzs,double * stx,double * adb,double * aub,ITG * iexpl,double * plicon,ITG * nplicon,double * plkcon,ITG * nplkcon,double * xstiff,ITG * npmat_,double * dtime,char * matname,ITG * mi,ITG * ncmat_,ITG * mass,ITG * stiffness,ITG * buckling,ITG * rhsi,ITG * intscheme,double * physcon,double * shcon,ITG * nshcon,double * cocon,ITG * ncocon,double * ttime,double * time,ITG * istep,ITG * iinc,ITG * coriolis,ITG * ibody,double * xloadold,double * reltime,double * veold,double * springarea,ITG * nstate_,double * xstateini,double * xstate,double * thicke,ITG * integerglob,double * doubleglob,char * tieset,ITG * istartset,ITG * iendset,ITG * ialset,ITG * ntie,ITG * nasym,double * pslavsurf,double * pmastsurf,ITG * mortar,double * clearini,ITG * ielprop,double * prop,ITG * ne0,double * fnext,ITG * kscale,ITG * iponoel,ITG * inoel,ITG * network,ITG * ntrans,ITG * inotr,double * trab,double * smscale,ITG * mscalmethod,char * set,ITG * nset,ITG * islavelinv,double * autloc,ITG * irowtloc,ITG * jqtloc,ITG * mortartrafoflag)48 void mafillsmmain(double *co,ITG *nk,ITG *kon,ITG *ipkon,char *lakon,
49 ITG *ne,ITG *nodeboun,ITG *ndirboun,double *xboun,
50 ITG *nboun,ITG *ipompc,ITG *nodempc,double *coefmpc,
51 ITG *nmpc,ITG *nodeforc,ITG *ndirforc,
52 double *xforc,ITG *nforc,ITG *nelemload,char *sideload,
53 double *xload,ITG *nload,double *xbody,ITG *ipobody,
54 ITG *nbody,double *cgr,
55 double *ad,double *au,double *fext,ITG *nactdof,
56 ITG *icol,ITG *jq,ITG *irow,ITG *neq,ITG *nzl,
57 ITG *nmethod,ITG *ikmpc,ITG *ilmpc,ITG *ikboun,
58 ITG *ilboun,
59 double *elcon,ITG *nelcon,double *rhcon,ITG *nrhcon,
60 double *alcon,ITG *nalcon,double *alzero,ITG *ielmat,
61 ITG *ielorien,ITG *norien,double *orab,ITG *ntmat_,
62 double *t0,double *t1,ITG *ithermal,
63 double *prestr,ITG *iprestr,double *vold,
64 ITG *iperturb,double *sti,ITG *nzs,double *stx,
65 double *adb,double *aub,ITG *iexpl,
66 double *plicon,ITG *nplicon,double *plkcon,ITG *nplkcon,
67 double *xstiff,
68 ITG *npmat_,double *dtime,char *matname,ITG *mi,
69 ITG *ncmat_,ITG *mass,ITG *stiffness,ITG *buckling,ITG *rhsi,
70 ITG *intscheme,double *physcon,double *shcon,ITG *nshcon,
71 double *cocon,ITG *ncocon,double *ttime,double *time,
72 ITG *istep,ITG *iinc,ITG *coriolis,ITG *ibody,
73 double *xloadold,double *reltime,double *veold,
74 double *springarea,ITG *nstate_,double *xstateini,
75 double *xstate,double *thicke,
76 ITG *integerglob,double *doubleglob,char *tieset,
77 ITG *istartset,ITG *iendset,ITG *ialset,ITG *ntie,
78 ITG *nasym,double *pslavsurf,double *pmastsurf,ITG *mortar,
79 double *clearini,ITG *ielprop,double *prop,ITG *ne0,
80 double *fnext,ITG *kscale,ITG *iponoel,ITG *inoel,
81 ITG *network,ITG *ntrans,ITG *inotr,double *trab,
82 double *smscale,ITG *mscalmethod,char *set,ITG *nset,
83 ITG *islavelinv,double *autloc,ITG *irowtloc,ITG *jqtloc,
84 ITG *mortartrafoflag){
85
86 /* mafillsmmain = main program for MAtrix FILLing of the Stiffnes
87 Matrix */
88
89 ITG i,j,mt=mi[1]+1;
90
91 /* variables for multithreading procedure */
92
93 ITG sys_cpus,*ithread=NULL;
94 char *env,*envloc,*envsys;
95
96 num_cpus = 0;
97 sys_cpus=0;
98
99 /* explicit user declaration prevails */
100
101 envsys=getenv("NUMBER_OF_CPUS");
102 if(envsys){
103 sys_cpus=atoi(envsys);
104 if(sys_cpus<0) sys_cpus=0;
105 }
106
107 // sys_cpus=1;
108
109 /* automatic detection of available number of processors */
110
111 if(sys_cpus==0){
112 sys_cpus = getSystemCPUs();
113 if(sys_cpus<1) sys_cpus=1;
114 }
115
116 /* local declaration prevails, if strictly positive */
117
118 envloc = getenv("CCX_NPROC_STIFFNESS");
119 if(envloc){
120 num_cpus=atoi(envloc);
121 if(num_cpus<0){
122 num_cpus=0;
123 }else if(num_cpus>sys_cpus){
124 num_cpus=sys_cpus;
125 }
126
127 }
128
129 /* else global declaration, if any, applies */
130
131 env = getenv("OMP_NUM_THREADS");
132 if(num_cpus==0){
133 if (env)
134 num_cpus = atoi(env);
135 if (num_cpus < 1) {
136 num_cpus=1;
137 }else if(num_cpus>sys_cpus){
138 num_cpus=sys_cpus;
139 }
140 }
141
142 // next line is to be inserted in a similar way for all other paralell parts
143
144 if(*ne<num_cpus) num_cpus=*ne;
145
146 pthread_t tid[num_cpus];
147
148 /* determining the element bounds in each thread */
149
150 NNEW(neapar,ITG,num_cpus);
151 NNEW(nebpar,ITG,num_cpus);
152 if((*nasym==0)||(*ithermal>1)){
153
154 /* symmetric mechanical calculations or
155 thermal/thermomechanical calculations:
156 include contact elements (symmetric
157 thermal contributions are not covered by
158 mafillsmas.f) */
159
160 elementcpuload(neapar,nebpar,ne,ipkon,&num_cpus);
161
162 }else{
163
164 /* asymmetric mechanical calculations:
165 do not include contact elements */
166
167 elementcpuload(neapar,nebpar,ne0,ipkon,&num_cpus);
168 }
169
170 /* determine nzl */
171
172 *nzl=0;
173 for(i=neq[1];i>=1;i--){
174 if(icol[i-1]>0){
175 *nzl=i;
176 break;
177 }
178 }
179
180 /* allocating fields for mass and stiffness matrix */
181
182 NNEW(ad1,double,num_cpus*neq[1]);
183 NNEW(au1,double,(long long)num_cpus*nzs[2]);
184
185 if(*rhsi==1){
186 NNEW(fext1,double,num_cpus*neq[1]);
187 }
188
189 if((mass[1]==1)||((mass[0]==1)||(*buckling==1))){
190 NNEW(adb1,double,num_cpus*neq[1]);
191 NNEW(aub1,double,(long long)num_cpus*nzs[1]);
192 }
193
194 if(*nmethod==4){
195 NNEW(fnext1,double,num_cpus*mt**nk);
196 }
197
198 /* allocating memory for nmethod; if the Jacobian determinant
199 in any of the elements is nonpositive, nmethod is set to
200 zero */
201
202 NNEW(nmethod1,ITG,num_cpus);
203 for(j=0;j<num_cpus;j++){
204 nmethod1[j]=*nmethod;
205 }
206
207 /* calculating the stiffness and/or mass matrix
208 (symmetric part) */
209
210 co1=co;nk1=nk;kon1=kon;ipkon1=ipkon;lakon1=lakon;ne1=ne;
211 nodeboun1=nodeboun;ndirboun1=ndirboun;xboun1=xboun;
212 nboun1=nboun;ipompc1=ipompc;nodempc1=nodempc;coefmpc1=coefmpc;
213 nmpc1=nmpc;nodeforc1=nodeforc;ndirforc1=ndirforc;xforc1=xforc;
214 nforc1=nforc;nelemload1=nelemload;sideload1=sideload;xload1=xload;
215 nload1=nload;xbody1=xbody;ipobody1=ipobody;nbody1=nbody;
216 cgr1=cgr;nactdof1=nactdof;icol1=icol;jq1=jq;irow1=irow;neq1=neq;
217 nzl1=nzl;ikmpc1=ikmpc;ilmpc1=ilmpc;ikboun1=ikboun;
218 ilboun1=ilboun;elcon1=elcon;nelcon1=nelcon;rhcon1=rhcon;
219 nrhcon1=nrhcon;alcon1=alcon;nalcon1=nalcon;alzero1=alzero;
220 ielmat1=ielmat;ielorien1=ielorien;norien1=norien;orab1=orab;
221 ntmat1_=ntmat_;t01=t0;t11=t1;ithermal1=ithermal;prestr1=prestr;
222 iprestr1=iprestr;vold1=vold;iperturb1=iperturb;sti1=sti;nzs1=nzs;
223 stx1=stx;iexpl1=iexpl;plicon1=plicon;nplicon1=nplicon;
224 plkcon1=plkcon;nplkcon1=nplkcon;xstiff1=xstiff;npmat1_=npmat_;
225 dtime1=dtime;matname1=matname;mi1=mi;ncmat1_=ncmat_;mass1=mass;
226 stiffness1=stiffness;buckling1=buckling;rhsi1=rhsi;intscheme1=intscheme;
227 physcon1=physcon;shcon1=shcon;nshcon1=nshcon;cocon1=cocon;
228 ncocon1=ncocon;ttime1=ttime;time1=time;istep1=istep;iinc1=iinc;
229 coriolis1=coriolis;ibody1=ibody;xloadold1=xloadold;reltime1=reltime;
230 veold1=veold;springarea1=springarea;nstate1_=nstate_;xstateini1=xstateini;
231 xstate1=xstate;thicke1=thicke;integerglob1=integerglob;
232 doubleglob1=doubleglob;tieset1=tieset;istartset1=istartset;
233 iendset1=iendset;ialset1=ialset;ntie1=ntie;nasym1=nasym;
234 pslavsurf1=pslavsurf;pmastsurf1=pmastsurf;mortar1=mortar;
235 clearini1=clearini;ielprop1=ielprop;prop1=prop;ne01=ne0;kscale1=kscale;
236 iponoel1=iponoel;inoel1=inoel;network1=network;
237 smscale1=smscale;mscalmethod1=mscalmethod;set1=set;nset1=nset;
238 islavelinv1=islavelinv;autloc1=autloc;irowtloc1=irowtloc;jqtloc1=jqtloc;
239 mortartrafoflag1=mortartrafoflag;
240
241 /* calculating the stiffness/mass */
242
243 printf(" Using up to %" ITGFORMAT " cpu(s) for the symmetric stiffness/mass contributions.\n\n", num_cpus);
244
245 /* create threads and wait */
246
247 NNEW(ithread,ITG,num_cpus);
248 for(i=0; i<num_cpus; i++) {
249 ithread[i]=i;
250 pthread_create(&tid[i], NULL, (void *)mafillsmmt, (void *)&ithread[i]);
251 }
252 for(i=0; i<num_cpus; i++)
253 pthread_join(tid[i], NULL);
254
255 SFREE(ithread);SFREE(neapar);SFREE(nebpar);
256
257 /* copying and accumulating the stiffnes and/or mass matrix
258 for buckling the matrices have to be added*/
259
260 if(*buckling!=1){
261
262 /* no buckling */
263
264 for(i=0;i<neq[1];i++){
265 ad[i]=ad1[i];
266 }
267 }else{
268
269 /* buckling */
270
271 for(i=0;i<neq[1];i++){
272 ad[i]+=ad1[i];
273 }
274 }
275
276 for(i=0;i<neq[1];i++){
277 for(j=1;j<num_cpus;j++){
278 ad[i]+=ad1[i+j*neq[1]];
279 }
280 }
281 SFREE(ad1);
282
283 if(*buckling!=1){
284
285 /* no buckling */
286
287 for(i=0;i<nzs[2];i++){
288 au[i]=au1[i];
289 }
290 }else{
291
292 /* buckling */
293
294 for(i=0;i<nzs[2];i++){
295 au[i]+=au1[i];
296 }
297 }
298
299 for(i=0;i<nzs[2];i++){
300 for(j=1;j<num_cpus;j++){
301 au[i]+=au1[i+(long long)j*nzs[2]];
302 }
303 }
304 SFREE(au1);
305
306 if(*rhsi==1){
307 for(i=0;i<neq[1];i++){
308 fext[i]=fext1[i];
309 }
310 for(i=0;i<neq[1];i++){
311 for(j=1;j<num_cpus;j++){
312 fext[i]+=fext1[i+j*neq[1]];
313 }
314 }
315 SFREE(fext1);
316 }
317
318 /* the heat capacity matrix and mass matrix must be treated
319 separately, since the mass matrix is no recalculated
320 in each iteration, whereas the capacity matrix is */
321
322 /* heat capacity matrix */
323
324 if(mass[1]==1){
325 for(i=neq[0];i<neq[1];i++){
326 adb[i]=adb1[i];
327 }
328 for(i=neq[0];i<neq[1];i++){
329 for(j=1;j<num_cpus;j++){
330 adb[i]+=adb1[i+j*neq[1]];
331 }
332 }
333
334 for(i=nzs[0];i<nzs[1];i++){
335 aub[i]=aub1[i];
336 }
337 for(i=nzs[0];i<nzs[1];i++){
338 for(j=1;j<num_cpus;j++){
339 aub[i]+=aub1[i+(long long)j*nzs[1]];
340 }
341 }
342 }
343
344 /* mass matrix or buckling matrix */
345
346 if((mass[0]==1)||(*buckling==1)){
347 for(i=0;i<neq[0];i++){
348 adb[i]=adb1[i];
349 }
350 for(i=0;i<neq[0];i++){
351 for(j=1;j<num_cpus;j++){
352 adb[i]+=adb1[i+j*neq[1]];
353 }
354 }
355
356 for(i=0;i<nzs[0];i++){
357 aub[i]=aub1[i];
358 }
359 for(i=0;i<nzs[0];i++){
360 for(j=1;j<num_cpus;j++){
361 aub[i]+=aub1[i+(long long)j*nzs[1]];
362 }
363 }
364 }
365 if((mass[0]==1)||(mass[1]==1)||(*buckling==1)){
366 SFREE(adb1);SFREE(aub1);
367 }
368
369 if(*nmethod==4){
370 for(i=0;i<mt**nk;i++){
371 fnext[i]=fnext1[i];
372 }
373 for(i=0;i<mt**nk;i++){
374 for(j=1;j<num_cpus;j++){
375 fnext[i]+=fnext1[i+j*mt**nk];
376 }
377 }
378 SFREE(fnext1);
379 }
380
381 for(j=0;j<num_cpus;j++){
382 if(nmethod1[j]==0){
383 *nmethod=0;
384 break;
385 }
386 }
387 SFREE(nmethod1);
388
389 /* taking point forces into account in fext */
390
391 FORTRAN(mafillsmforc,(nforc,ndirforc,nodeforc,xforc,nactdof,
392 fext,ipompc,nodempc,coefmpc,mi,rhsi,fnext,
393 nmethod,ntrans,inotr,trab,co));
394
395 return;
396
397 }
398
399 /* subroutine for multithreading of mafillsm */
400
mafillsmmt(ITG * i)401 void *mafillsmmt(ITG *i){
402
403 ITG indexad,indexfext,indexadb,nea,neb,indexfnext;
404 long long indexau,indexaub;
405
406 indexad=0;
407 indexau=0;
408 indexfext=0;
409 indexadb=0;
410 indexaub=0;
411 indexfnext=0;
412
413 indexad=*i*neq1[1];
414 indexau=(long long)*i*nzs1[2];
415
416 if(*rhsi1==1){
417 indexfext=*i*neq1[1];
418 }
419 if(mass1[1]==1){
420 indexadb=*i*neq1[1];
421 indexaub=(long long)*i*nzs1[1];
422 }else if((mass1[0]==1)||(*buckling1==1)){
423 indexadb=*i*neq1[0];
424 indexaub=(long long)*i*nzs1[0];
425 }
426 if(nmethod1[0]==4){
427 indexfnext=*i*(mi1[1]+1)**nk1;
428 }
429
430 nea=neapar[*i]+1;
431 neb=nebpar[*i]+1;
432
433 FORTRAN(mafillsm,(co1,nk1,kon1,ipkon1,lakon1,ne1,nodeboun1,ndirboun1,
434 xboun1,nboun1,
435 ipompc1,nodempc1,coefmpc1,nmpc1,nodeforc1,ndirforc1,xforc1,
436 nforc1,nelemload1,sideload1,xload1,nload1,xbody1,ipobody1,
437 nbody1,cgr1,&ad1[indexad],&au1[indexau],&fext1[indexfext],
438 nactdof1,icol1,jq1,irow1,neq1,nzl1,&nmethod1[*i],
439 ikmpc1,ilmpc1,ikboun1,ilboun1,elcon1,nelcon1,rhcon1,
440 nrhcon1,alcon1,nalcon1,alzero1,ielmat1,
441 ielorien1,norien1,orab1,ntmat1_,
442 t01,t11,ithermal1,prestr1,iprestr1,vold1,iperturb1,sti1,
443 nzs1,stx1,&adb1[indexadb],&aub1[indexaub],iexpl1,plicon1,
444 nplicon1,plkcon1,nplkcon1,xstiff1,npmat1_,dtime1,matname1,
445 mi1,ncmat1_,mass1,stiffness1,buckling1,rhsi1,intscheme1,
446 physcon1,shcon1,nshcon1,cocon1,ncocon1,ttime1,time1,istep1,
447 iinc1,coriolis1,
448 ibody1,xloadold1,reltime1,veold1,springarea1,nstate1_,
449 xstateini1,xstate1,thicke1,integerglob1,doubleglob1,
450 tieset1,istartset1,iendset1,ialset1,ntie1,nasym1,pslavsurf1,
451 pmastsurf1,mortar1,clearini1,ielprop1,prop1,ne01,
452 &fnext1[indexfnext],&nea,&neb,kscale1,iponoel1,inoel1,
453 network1,smscale1,mscalmethod1,set1,nset1,islavelinv1,
454 autloc1,irowtloc1,jqtloc1,mortartrafoflag1));
455
456 return NULL;
457 }
458