1#
2# Include user definition
3#
4
5# TO suppress recursive includes
6INCLUDED = 1
7
8ifndef TOPDIR
9TOPDIR = .
10endif
11
12# Default C compiler
13CC = gcc
14
15ifndef MAKEFILE_RULE
16include $(TOPDIR)/Makefile.rule
17else
18include $(TOPDIR)/$(MAKEFILE_RULE)
19endif
20
21#
22#  Beginning of system configuration
23#
24
25ifndef HOSTCC
26HOSTCC	 = $(CC)
27endif
28
29ifdef TARGET
30GETARCH_FLAGS += -DFORCE_$(TARGET)
31endif
32
33# This operation is expensive, so execution should be once.
34ifndef GOTOBLAS_MAKEFILE
35export GOTOBLAS_MAKEFILE = 1
36
37# Generating Makefile.conf and config.h
38DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.getarch CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS=$(GETARCH_FLAGS) BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all)
39
40ifndef TARGET_CORE
41include $(TOPDIR)/Makefile.conf
42else
43include $(TOPDIR)/Makefile_kernel.conf
44endif
45
46endif
47
48ifndef NUM_THREADS
49NUM_THREADS = $(NUM_CORES)
50endif
51
52ifeq ($(NUM_THREADS), 1)
53override USE_THREAD = 0
54endif
55
56ifdef USE_THREAD
57ifeq ($(USE_THREAD), 0)
58SMP =
59else
60SMP = 1
61endif
62else
63ifeq ($(NUM_THREAD), 1)
64SMP =
65else
66SMP = 1
67endif
68endif
69
70ifndef NEED_PIC
71NEED_PIC = 1
72endif
73
74ARFLAGS	=
75CPP	= $(COMPILER) -E
76AR	= /usr/local/bin/ar
77AS	= /usr/local/bin/as
78LD	= /usr/local/bin/ld
79RANLIB	= /usr/local/bin/ranlib
80NM	= /usr/local/bin/nm
81DLLWRAP = /usr/local/bin/dllwrap
82
83#
84#  OS dependent settings
85#
86
87ifeq ($(OSNAME), Darwin)
88EXTRALIB	+= -lSystemStubs
89export MACOSX_DEPLOYMENT_TARGET=10.2
90endif
91
92ifneq (,$(filter $(OSNAME), Linux DragonFly))
93EXTRALIB	+= -lm
94endif
95
96ifeq ($(OSNAME), FreeBSD)
97EXTRALIB	+= -lm
98FCOMMON_OPT	+= -Wl,-rpath=
99endif
100
101ifeq ($(OSNAME), AIX)
102EXTRALIB	+= -lm
103endif
104
105ifeq ($(OSNAME), WINNT)
106NEED_PIC = 0
107NO_EXPRECISION = 1
108
109EXTRALIB        += -defaultlib:advapi32
110
111SUFFIX  = obj
112PSUFFIX = pobj
113LIBSUFFIX = lib
114endif
115
116ifeq ($(OSNAME), Interix)
117NEED_PIC = 0
118NO_EXPRECISION = 1
119
120INTERIX_TOOL_DIR = /opt/gcc.3.3/i586-pc-interix3/bin
121endif
122
123ifeq ($(OSNAME), CYGWIN_NT)
124NEED_PIC = 0
125NO_EXPRECISION = 1
126endif
127
128ifneq ($(OSNAME), WINNT)
129ifneq ($(OSNAME), CYGWIN_NT)
130ifneq ($(OSNAME), Interix)
131ifdef SMP
132EXTRALIB   += -lpthread
133endif
134endif
135endif
136endif
137
138ifdef QUAD_PRECISION
139CCOMMON_OPT	+= -DQUAD_PRECISION
140NO_EXPRECISION = 1
141endif
142
143ifneq ($(ARCH_), x86)
144ifneq ($(ARCH_), x86_64)
145NO_EXPRECISION = 1
146endif
147endif
148
149ifdef SANITY_CHECK
150CCOMMON_OPT	+= -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
151endif
152
153#
154#  Architecture dependent settings
155#
156
157ifeq ($(ARCH_), x86)
158ifndef BINARY
159NO_BINARY_MODE	= 1
160endif
161ifndef NO_EXPRECISION
162ifeq ($(F_COMPILER), GFORTRAN)
163ifeq ($(C_COMPILER), GCC)
164EXPRECISION	= 1
165CCOMMON_OPT	+= -DEXPRECISION -m128bit-long-double
166FCOMMON_OPT	+= -m128bit-long-double
167endif
168endif
169endif
170endif
171
172ifeq ($(ARCH_), x86_64)
173ifndef NO_EXPRECISION
174ifeq ($(F_COMPILER), GFORTRAN)
175ifeq ($(C_COMPILER), GCC)
176EXPRECISION	= 1
177CCOMMON_OPT	+= -DEXPRECISION -m128bit-long-double
178FCOMMON_OPT	+= -m128bit-long-double
179endif
180endif
181endif
182endif
183
184ifeq ($(C_COMPILER), INTEL)
185CCOMMON_OPT    += -wd981
186endif
187
188ifdef USE_OPENMP
189ifeq ($(C_COMPILER), GCC)
190CCOMMON_OPT    += -fopenmp
191endif
192
193ifeq ($(C_COMPILER), INTEL)
194CCOMMON_OPT    += -openmp
195endif
196
197ifeq ($(C_COMPILER), PGI)
198CCOMMON_OPT    += -mp
199endif
200
201ifeq ($(C_COMPILER), OPEN64)
202CCOMMON_OPT    += -mp
203CEXTRALIB   += -lstdc++
204endif
205
206ifeq ($(C_COMPILER), PATHSCALE)
207CCOMMON_OPT    += -mp
208endif
209endif
210
211
212ifdef DYNAMIC_ARCH
213ifeq ($(ARCH_), x86)
214DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
215	       CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
216endif
217
218ifeq ($(ARCH_), x86_64)
219DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA ATOM NANO
220endif
221
222ifndef DYNAMIC_CORE
223DYNAMIC_ARCH =
224endif
225endif
226
227ifeq ($(ARCH_), ia64)
228NO_BINARY_MODE	= 1
229BINARY_DEFINED	= 1
230
231ifeq ($(F_COMPILER), GFORTRAN)
232ifeq ($(C_COMPILER), GCC)
233# EXPRECISION	= 1
234# CCOMMON_OPT	+= -DEXPRECISION
235endif
236endif
237endif
238
239ifeq ($(ARCH_), mips64)
240NO_BINARY_MODE	= 1
241endif
242
243ifeq ($(ARCH_), alpha)
244NO_BINARY_MODE	= 1
245BINARY_DEFINED	= 1
246endif
247
248#
249#  C Compiler dependent settings
250#
251
252ifeq ($(C_COMPILER), GCC)
253CCOMMON_OPT += -Wall
254COMMON_PROF += -fno-inline
255NO_UNINITIALIZED_WARN =  -Wno-uninitialized
256
257ifdef NO_BINARY_MODE
258
259ifeq ($(ARCH_), mips64)
260ifdef BINARY64
261CCOMMON_OPT += -mabi=64
262else
263CCOMMON_OPT += -mabi=n32
264endif
265BINARY_DEFINED = 1
266endif
267
268ifeq ($(OSNAME), AIX)
269BINARY_DEFINED = 1
270endif
271
272endif
273
274ifndef BINARY_DEFINED
275ifdef BINARY64
276CCOMMON_OPT += -m64
277else
278CCOMMON_OPT += -m32
279endif
280endif
281
282endif
283
284ifeq ($(C_COMPILER), PGI)
285ifdef BINARY64
286CCOMMON_OPT += -tp p7-64
287else
288CCOMMON_OPT += -tp p7
289endif
290endif
291
292ifeq ($(C_COMPILER), PATHSCALE)
293ifdef BINARY64
294CCOMMON_OPT += -m64
295else
296CCOMMON_OPT += -m32
297endif
298endif
299
300#
301#  Fortran Compiler dependent settings
302#
303
304ifeq ($(F_COMPILER), G77)
305CCOMMON_OPT += -DF_INTERFACE_G77
306FCOMMON_OPT += -Wall
307ifndef NO_BINARY_MODE
308ifdef BINARY64
309FCOMMON_OPT += -m64
310else
311FCOMMON_OPT += -m32
312endif
313endif
314endif
315
316ifeq ($(F_COMPILER), G95)
317CCOMMON_OPT += -DF_INTERFACE_G95
318FCOMMON_OPT += -Wall
319ifndef NO_BINARY_MODE
320ifdef BINARY64
321FCOMMON_OPT += -m64
322else
323FCOMMON_OPT += -m32
324endif
325endif
326endif
327
328ifeq ($(F_COMPILER), GFORTRAN)
329CCOMMON_OPT += -DF_INTERFACE_GFORT
330FCOMMON_OPT += -Wall
331ifdef NO_BINARY_MODE
332ifeq ($(ARCH_), mips64)
333ifdef BINARY64
334FCOMMON_OPT += -mabi=64
335else
336FCOMMON_OPT += -mabi=n32
337endif
338endif
339else
340ifdef BINARY64
341FCOMMON_OPT += -m64
342ifdef INTERFACE64
343FCOMMON_OPT +=  -fdefault-integer-8
344endif
345else
346FCOMMON_OPT += -m32
347endif
348endif
349ifdef USE_OPENMP
350FCOMMON_OPT += -fopenmp
351endif
352endif
353
354ifeq ($(F_COMPILER), INTEL)
355CCOMMON_OPT += -DF_INTERFACE_INTEL
356ifdef INTERFACE64
357FCOMMON_OPT += -i8
358endif
359ifdef USE_OPENMP
360FCOMMON_OPT += -openmp
361endif
362endif
363
364ifeq ($(F_COMPILER), FUJITSU)
365CCOMMON_OPT += -DF_INTERFACE_FUJITSU
366ifdef USE_OPENMP
367FCOMMON_OPT += -openmp
368endif
369endif
370
371ifeq ($(F_COMPILER), IBM)
372CCOMMON_OPT += -DF_INTERFACE_IBM
373# FCOMMON_OPT	+= -qarch=440
374ifdef BINARY64
375FCOMMON_OPT += -q64
376ifdef INTERFACE64
377FCOMMON_OPT += -qintsize=8
378endif
379else
380FCOMMON_OPT += -q32
381endif
382ifdef USE_OPENMP
383FCOMMON_OPT += -openmp
384endif
385endif
386
387ifeq ($(F_COMPILER), PGI)
388CCOMMON_OPT  += -DF_INTERFACE_PGI
389COMMON_PROF +=  -DPGICOMPILER
390ifdef BINARY64
391ifdef INTERFACE64
392FCOMMON_OPT += -i8
393endif
394FCOMMON_OPT += -tp p7-64
395else
396FCOMMON_OPT += -tp p7
397endif
398ifdef USE_OPENMP
399FCOMMON_OPT += -mp
400endif
401endif
402
403ifeq ($(F_COMPILER), PATHSCALE)
404CCOMMON_OPT  += -DF_INTERFACE_PATHSCALE
405ifdef BINARY64
406ifdef INTERFACE64
407FCOMMON_OPT += -i8
408endif
409endif
410
411ifneq ($(ARCH_), mips64)
412ifndef BINARY64
413FCOMMON_OPT += -m32
414else
415FCOMMON_OPT += -m64
416endif
417else
418ifdef BINARY64
419FCOMMON_OPT += -mabi=64
420else
421FCOMMON_OPT += -mabi=n32
422endif
423endif
424
425ifdef USE_OPENMP
426FCOMMON_OPT += -mp
427endif
428endif
429
430ifeq ($(F_COMPILER), OPEN64)
431CCOMMON_OPT  += -DF_INTERFACE_OPEN64
432ifdef BINARY64
433ifdef INTERFACE64
434FCOMMON_OPT += -i8
435endif
436endif
437ifndef BINARY64
438FCOMMON_OPT += -m32
439else
440FCOMMON_OPT += -m64
441endif
442
443ifdef USE_OPENMP
444FEXTRALIB   += -lstdc++
445FCOMMON_OPT += -mp
446endif
447endif
448
449ifeq ($(C_COMPILER), OPEN64)
450ifndef BINARY64
451CCOMMON_OPT += -m32
452else
453CCOMMON_OPT += -m64
454endif
455endif
456
457ifeq ($(C_COMPILER), SUN)
458CCOMMON_OPT  += -w
459ifeq ($(ARCH_), x86)
460CCOMMON_OPT  += -m32
461else
462FCOMMON_OPT  += -m64
463endif
464endif
465
466ifeq ($(F_COMPILER), SUN)
467CCOMMON_OPT  += -DF_INTERFACE_SUN
468ifeq ($(ARCH_), x86)
469FCOMMON_OPT  += -m32
470else
471FCOMMON_OPT  += -m64
472endif
473ifdef USE_OPENMP
474FCOMMON_OPT += -xopenmp=parallel
475endif
476endif
477
478ifeq ($(F_COMPILER), COMPAQ)
479CCOMMON_OPT  += -DF_INTERFACE_COMPAQ
480ifdef USE_OPENMP
481FCOMMON_OPT += -openmp
482endif
483endif
484
485ifdef BINARY64
486ifdef INTERFACE64
487CCOMMON_OPT	+= -DUSE64BITINT
488endif
489endif
490
491ifeq ($(NEED_PIC), 1)
492ifeq ($(C_COMPILER), IBM)
493CCOMMON_OPT += -qpic=large
494else
495CCOMMON_OPT += -fPIC
496endif
497ifeq ($(F_COMPILER), SUN)
498FCOMMON_OPT  += -pic
499else
500FCOMMON_OPT += -fPIC
501endif
502endif
503
504ifeq ($(DYNAMIC_ARCH), 1)
505CCOMMON_OPT	+= -DDYNAMIC_ARCH
506endif
507
508ifdef SMP
509CCOMMON_OPT	+= -DSMP_SERVER
510
511ifeq ($(ARCH_), mips64)
512USE_SIMPLE_THREADED_LEVEL3 = 1
513endif
514
515ifeq ($(USE_OPENMP), 1)
516# USE_SIMPLE_THREADED_LEVEL3 = 1
517# NO_AFFINITY = 1
518CCOMMON_OPT	+= -DUSE_OPENMP
519endif
520
521endif
522
523ifeq ($(NO_WARMUP), 1)
524CCOMMON_OPT	+= -DNO_WARMUP
525endif
526
527ifeq ($(CONSISTENT_FPCSR), 1)
528CCOMMON_OPT	+= -DCONSISTENT_FPCSR
529endif
530
531# Only for development
532# CCOMMON_OPT	 += -DPARAMTEST
533# CCOMMON_OPT	 += -DPREFETCHTEST
534# CCOMMON_OPT	 += -DNO_SWITCHING
535# USE_PAPI = 1
536
537ifdef USE_PAPI
538CCOMMON_OPT	 += -DUSE_PAPI
539EXTRALIB	 += -lpapi -lperfctr
540endif
541
542ifdef DYNAMIC_THREADS
543CCOMMON_OPT	 += -DDYNAMIC_THREADS
544endif
545
546CCOMMON_OPT	+= -DMAX_CPU_NUMBER=$(NUM_THREADS)
547
548ifdef USE_SIMPLE_THREADED_LEVEL3
549CCOMMON_OPT	+= -DUSE_SIMPLE_THREADED_LEVEL3
550endif
551
552LIBPREFIX = libgoto2
553
554KERNELDIR	= $(TOPDIR)/kernel/$(ARCH_)
555
556include $(TOPDIR)/Makefile.$(ARCH_)
557
558CCOMMON_OPT	+= -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
559
560ifeq ($(CORE), PPC440)
561CCOMMON_OPT	+= -DALLOC_QALLOC
562endif
563
564ifeq ($(CORE), PPC440FP2)
565STATIC_ALLOCATION = 1
566endif
567
568ifneq ($(OSNAME), Linux)
569NO_AFFINITY = 1
570endif
571
572ifneq ($(ARCH_), x86_64)
573ifneq ($(ARCH_), x86)
574NO_AFFINITY = 1
575endif
576endif
577
578ifdef NO_AFFINITY
579CCOMMON_OPT	+= -DNO_AFFINITY
580endif
581
582ifdef FUNCTION_PROFILE
583CCOMMON_OPT	+= -DFUNCTION_PROFILE
584endif
585
586ifdef HUGETLB_ALLOCATION
587CCOMMON_OPT	+= -DALLOC_HUGETLB
588endif
589
590ifdef HUGETLBFILE_ALLOCATION
591CCOMMON_OPT	+= -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION)
592endif
593
594ifdef STATIC_ALLOCATION
595CCOMMON_OPT	+= -DALLOC_STATIC
596endif
597
598ifdef DEVICEDRIVER_ALLOCATION
599CCOMMON_OPT	+= -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"
600endif
601
602ifdef MIXED_MEMORY_ALLOCATION
603CCOMMON_OPT	+= -DMIXED_MEMORY_ALLOCATION
604endif
605
606ifeq ($(OSNAME), SunOS)
607TAR	= gtar
608PATCH	= gpatch
609GREP	= ggrep
610else
611TAR	= tar
612PATCH	= patch
613GREP	= grep
614endif
615
616MD5SUM	= md5
617AWK	= awk
618
619REVISION = -r$(VERSION)
620
621CFLAGS     = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
622PFLAGS     = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
623
624FFLAGS     = $(COMMON_OPT) $(FCOMMON_OPT)
625FPFLAGS    = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
626
627ifndef SUFFIX
628SUFFIX  = o
629endif
630
631ifndef PSUFFIX
632PSUFFIX = po
633endif
634
635ifndef LIBSUFFIX
636LIBSUFFIX = a
637endif
638
639ifndef SMP
640LIBNAME		= $(LIBPREFIX)$(REVISION).$(LIBSUFFIX)
641LIBNAME_P	= $(LIBPREFIX)$(REVISION)_p.$(LIBSUFFIX)
642else
643LIBNAME		= $(LIBPREFIX)p$(REVISION).$(LIBSUFFIX)
644LIBNAME_P	= $(LIBPREFIX)p$(REVISION)_p.$(LIBSUFFIX)
645endif
646
647
648LIBSONAME    = $(LIBNAME:.$(LIBSUFFIX)=.so)
649LIBDLLNAME   = $(LIBNAME:.$(LIBSUFFIX)=.dll)
650LIBDYNNAME   = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
651LIBDEFNAME   = $(LIBNAME:.$(LIBSUFFIX)=.def)
652LIBEXPNAME   = $(LIBNAME:.$(LIBSUFFIX)=.exp)
653LIBZIPNAME   = $(LIBNAME:.$(LIBSUFFIX)=.zip)
654
655LIBS		= $(TOPDIR)/$(LIBNAME)
656LIBS_P		= $(TOPDIR)/$(LIBNAME_P)
657
658export OSNAME
659export ARCH_
660export CORE
661export LIBCORE
662export PGCPATH
663export CONFIG
664export CC
665export FC
666export BU
667export FU
668export USE_THREAD
669export NUM_THREADS
670export NUM_CORES
671export SMP
672export MAKEFILE_RULE
673export NEED_PIC
674export BINARY
675export BINARY32
676export BINARY64
677export F_COMPILER
678export C_COMPILER
679export USE_OPENMP
680export CROSS
681export CROSS_SUFFIX
682export NOFORTRAN
683export EXTRALIB
684export CEXTRALIB
685export FEXTRALIB
686export HAVE_SSE
687export HAVE_SSE2
688export HAVE_SSE3
689export HAVE_SSSE3
690export HAVE_SSE4_1
691export HAVE_SSE4_2
692export HAVE_SSE4A
693export HAVE_SSE5
694export KERNELDIR
695export FUNCTION_PROFILE
696export TARGET_CORE
697
698export SGEMM_UNROLL_M
699export SGEMM_UNROLL_N
700export DGEMM_UNROLL_M
701export DGEMM_UNROLL_N
702export QGEMM_UNROLL_M
703export QGEMM_UNROLL_N
704export CGEMM_UNROLL_M
705export CGEMM_UNROLL_N
706export ZGEMM_UNROLL_M
707export ZGEMM_UNROLL_N
708export XGEMM_UNROLL_M
709export XGEMM_UNROLL_N
710
711ifdef USE_CUDA
712export CUDADIR
713export CUCC
714export CUFLAGS
715export CULIB
716endif
717
718.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
719
720.f.$(SUFFIX):
721	$(FC) $(FFLAGS) -c $<  -o $(@F)
722
723.f.$(PSUFFIX):
724	$(FC) $(FPFLAGS) -pg -c $<  -o $(@F)
725
726
727ifdef BINARY64
728PATHSCALEPATH	= /opt/pathscale/lib/3.1
729PGIPATH		= /opt/pgi/linux86-64/7.1-5/lib
730else
731PATHSCALEPATH	= /opt/pathscale/lib/3.1/32
732PGIPATH		= /opt/pgi/linux86/7.1-5/lib
733endif
734
735ACMLPATH	= /opt/acml/4.3.0
736ifneq ($(OSNAME), Darwin)
737MKLPATH         = /opt/intel/mkl/10.2.2.025/lib
738else
739MKLPATH         = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
740endif
741ATLASPATH	= /opt/atlas/3.9.17/opteron
742FLAMEPATH	= $(HOME)/flame/lib
743ifneq ($(OSNAME), SunOS)
744SUNPATH		= /opt/sunstudio12.1
745else
746SUNPATH		= /opt/SUNWspro
747endif
748
749