1#===============================================================================
2# SuiteSparseQR/Tcov/Makefile
3#===============================================================================
4
5# This test requires METIS, and it only works on Linux.
6
7# run statement-coverage test without Valgrind
8default: go
9
10# to run with Valgrind as well
11valgrind: vgo
12
13ccode: all
14
15include ../../SuiteSparse_config/SuiteSparse_config.mk
16
17# this test requires gcc
18CC = gcc
19CXX= g++
20# BLAS = -lrefblas
21# LAPACK = -llapack
22
23# for statement coverage (with gcov; see go) and picky compiler warnings
24CF = -pg \
25	-O0 -g -fprofile-arcs -ftest-coverage \
26	-Wall -W -Wshadow -Winline -Wno-unused-parameter \
27	-Wredundant-decls -Wdisabled-optimization \
28	-fexceptions -fopenmp
29
30# Using an optimized BLAS can cause problems in Valgrind
31# FLIB = -llapack_plain -lblas_plain -lgfortran
32# FLIB = -llapack_plain -lblas_plain -lgfortran -lg2c
33  FLIB = $(LAPACK) $(BLAS)
34
35#-------------------------------------------------------------------------------
36
37CUDA_LIB = $(CUDART_LIB) $(CUBLAS_LIB)
38
39NVCC = /usr/local/cuda/bin/nvcc -g --profile --generate-line-info $(NV20) \
40	-Xcompiler -fprofile-arcs -Xcompiler -ftest-coverage
41
42#-------------------------------------------------------------------------------
43
44CLIB = -L../../lib -lcholmod -lamd -lcolamd -lccolamd -lcamd -lmetis -lsuitesparseconfig
45
46all: qrtest gpudemo qrdemo_gpu
47
48library: qrtest gpudemo
49
50purge: distclean
51
52distclean: clean
53	- $(RM) qrtest qrtest_out.txt pfile tfile cov.out qrtest_out1.txt
54	- $(RM) gpuqrengine_demo troll.m qrdemo_gpu gpu_results.txt X.mtx
55	- $(RM) -r $(PURGE)
56
57clean:
58	- $(RM) -r $(CLEAN)
59
60INC = ../Include/spqr.hpp ../Include/SuiteSparseQR_C.h \
61	../Include/SuiteSparseQR_definitions.h \
62	../Include/SuiteSparseQR.hpp
63# Makefile
64
65OBJ = \
66    spqr_rmap.o                              \
67    SuiteSparseQR_C.o                        \
68    SuiteSparseQR_expert.o                   \
69    spqr_parallel.o                          \
70    spqr_kernel.o                            \
71    spqr_analyze.o                           \
72    spqr_assemble.o                          \
73    spqr_cpack.o                             \
74    spqr_csize.o                             \
75    spqr_fcsize.o                            \
76    spqr_debug.o                             \
77    spqr_front.o                             \
78    spqr_factorize.o                         \
79    spqr_freenum.o                           \
80    spqr_freesym.o                           \
81    spqr_freefac.o                           \
82    spqr_fsize.o                             \
83    spqr_maxcolnorm.o                        \
84    spqr_rconvert.o                          \
85    spqr_rcount.o                            \
86    spqr_rhpack.o                            \
87    spqr_rsolve.o                            \
88    spqr_stranspose1.o                       \
89    spqr_stranspose2.o                       \
90    spqr_hpinv.o                             \
91    spqr_1fixed.o                            \
92    spqr_1colamd.o                           \
93    SuiteSparseQR.o                          \
94    spqr_1factor.o                           \
95    spqr_cumsum.o                            \
96    spqr_shift.o                             \
97    spqr_happly.o                            \
98    spqr_panel.o                             \
99    spqr_happly_work.o                       \
100    SuiteSparseQR_qmult.o                    \
101    spqr_trapezoidal.o                       \
102    spqr_larftb.o                            \
103    spqr_append.o                            \
104    spqr_type.o                              \
105    spqr_tol.o                               \
106    qrtestc.o
107
108ifneq ($(GPU_CONFIG),)
109OBJ += \
110    spqrgpu_kernel.o                         \
111    spqrgpu_buildAssemblyMaps.o              \
112    spqrgpu_computeFrontStaging.o            \
113    SuiteSparseGPU_Workspace.o               \
114    SuiteSparseGPU_Workspace_cpuAllocators.o \
115    SuiteSparseGPU_Workspace_gpuAllocators.o \
116    SuiteSparseGPU_Workspace_transfer.o      \
117    GPUQREngine_GraphVizHelper.o             \
118    GPUQREngine_UberKernel.o                 \
119    GPUQREngine_ExpertSparse.o               \
120    GPUQREngine_Internal.o                   \
121    GPUQREngine_ExpertDense.o                \
122    BucketList.o                 \
123    BucketList_AdvanceBundles.o  \
124    BucketList_CreateBundles.o   \
125    BucketList_FillWorkQueue.o   \
126    BucketList_GrowBundles.o     \
127    BucketList_Manage.o          \
128    BucketList_PostProcessing.o  \
129    LLBundle.o                               \
130    LLBundle_AddTiles.o                      \
131    LLBundle_Advance.o                       \
132    LLBundle_GPUPack.o                       \
133    LLBundle_PipelinedRearrange.o            \
134    LLBundle_UpdateSecondMinIndex.o          \
135    Scheduler.o                              \
136    Scheduler_FillWorkQueue.o                \
137    Scheduler_Front.o                        \
138    Scheduler_LaunchKernel.o                 \
139    Scheduler_PostProcess.o                  \
140    Scheduler_Render.o                       \
141    Scheduler_TransferData.o                 \
142    ssgpu_maxQueueSize.o                     \
143    TaskDescriptor_flops.o
144endif
145
146$(OBJ): $(INC)
147
148I = -I../../CHOLMOD/Include -I../../SuiteSparse_config -I../Include \
149
150ifneq ($(GPU_CONFIG),)
151    I += -I../../SuiteSparse_GPURuntime/Include \
152        -I../../GPUQREngine/Include $(CUDA_INC)
153endif
154
155C = $(CXX) $(CF) $(I) $(SPQR_CONFIG)
156
157LIBS = $(CLIB) $(FLIB) $(LDLIBS) $(CUDA_LIB)
158
159qrtestc.o: qrtestc.c $(INC)
160	$(CC) $(CF) $(I) -c $<
161
162qrtest: libraries qrtest.cpp $(INC) $(OBJ)
163	$(C) qrtest.cpp -o qrtest $(OBJ) $(LIBS) -lm
164
165ifneq ($(GPU_CONFIG),)
166gpu: gpuqrengine_demo qrdemo_gpu
167	- ./gpuqrengine_demo
168	- ./qrdemo_gpu ../Matrix/west0067.mtx 2
169	- ./qrdemo_gpu ../Matrix/lp_e226_transposed.mtx 2
170	- ./qrdemo_gpu ../Matrix/lp_e226_transposed.mtx 6
171	- ./qrdemo_gpu ../Matrix/Groebner_id2003_aug.mtx 6
172	- ./qrdemo_gpu ../Matrix/Franz6_id1959_aug.mtx 6
173else
174gpu:
175endif
176
177gpuqrengine_demo: libraries \
178        ../../GPUQREngine/Demo/gpuqrengine_demo.cpp $(INC) $(OBJ)
179	$(C) $(GPUQRDEMO)/gpuqrengine_demo.cpp -o gpuqrengine_demo \
180                $(OBJ) $(LIBS) -lm
181
182qrdemo_gpu: ../Demo/qrdemo_gpu.cpp $(INC) $(OBJ)
183ifneq ($(GPU_CONFIG),)
184	$(C) ../Demo/qrdemo_gpu.cpp -o qrdemo_gpu $(OBJ) $(LIBS)
185endif
186
187go: qrtest gpu qrdemo_gpu
188	- ./qrtest matrixlist.txt > qrtest_out.txt
189	- ./cov
190
191go1: qrtest
192	- ./qrtest matrix1.txt > qrtest_out1.txt
193	- ./cov
194
195vgo1: qrtest
196	- valgrind ./qrtest matrix1.txt > qrtest_out1.txt
197	# - valgrind --leak-check=full --show-reachable=yes ./qrtest matrix1.txt > qrtest_out1.txt
198	- ./cov
199
200vgo: qrtest
201	- valgrind --leak-check=full --show-reachable=yes ./qrtest matrixlist.txt > qrtest_out.txt
202	- ./cov
203
204spqr_1colamd.o: ../Source/spqr_1colamd.cpp
205	$(C) -c $<
206
207spqr_1factor.o: ../Source/spqr_1factor.cpp
208	$(C) -c $<
209
210spqr_1fixed.o: ../Source/spqr_1fixed.cpp
211	$(C) -c $<
212
213spqr_analyze.o: ../Source/spqr_analyze.cpp
214	$(C) -c $<
215
216spqr_parallel.o: ../Source/spqr_parallel.cpp
217	$(C) -c $<
218
219spqr_kernel.o: ../Source/spqr_kernel.cpp
220	$(C) -c $<
221
222spqr_append.o: ../Source/spqr_append.cpp
223	$(C) -c $<
224
225spqr_assemble.o: ../Source/spqr_assemble.cpp
226	$(C) -c $<
227
228spqr_cpack.o: ../Source/spqr_cpack.cpp
229	$(C) -c $<
230
231spqr_csize.o: ../Source/spqr_csize.cpp
232	$(C) -c $<
233
234spqr_cumsum.o: ../Source/spqr_cumsum.cpp
235	$(C) -c $<
236
237spqr_debug.o: ../Source/spqr_debug.cpp
238	$(C) -c $<
239
240spqr_factorize.o: ../Source/spqr_factorize.cpp
241	$(C) -c $<
242
243spqr_fcsize.o: ../Source/spqr_fcsize.cpp
244	$(C) -c $<
245
246spqr_freefac.o: ../Source/spqr_freefac.cpp
247	$(C) -c $<
248
249spqr_freenum.o: ../Source/spqr_freenum.cpp
250	$(C) -c $<
251
252spqr_freesym.o: ../Source/spqr_freesym.cpp
253	$(C) -c $<
254
255spqr_fsize.o: ../Source/spqr_fsize.cpp
256	$(C) -c $<
257
258spqr_happly.o: ../Source/spqr_happly.cpp
259	$(C) -c $<
260
261spqr_panel.o: ../Source/spqr_panel.cpp
262	$(C) -c $<
263
264spqr_happly_work.o: ../Source/spqr_happly_work.cpp
265	$(C) -c $<
266
267spqr_hpinv.o: ../Source/spqr_hpinv.cpp
268	$(C) -c $<
269
270spqr_larftb.o: ../Source/spqr_larftb.cpp
271	$(C) -c $<
272
273spqr_rconvert.o: ../Source/spqr_rconvert.cpp
274	$(C) -c $<
275
276spqr_rcount.o: ../Source/spqr_rcount.cpp
277	$(C) -c $<
278
279spqr_rhpack.o: ../Source/spqr_rhpack.cpp
280	$(C) -c $<
281
282spqr_rsolve.o: ../Source/spqr_rsolve.cpp
283	$(C) -c $<
284
285spqr_shift.o: ../Source/spqr_shift.cpp
286	$(C) -c $<
287
288spqr_stranspose1.o: ../Source/spqr_stranspose1.cpp
289	$(C) -c $<
290
291spqr_stranspose2.o: ../Source/spqr_stranspose2.cpp
292	$(C) -c $<
293
294spqr_trapezoidal.o: ../Source/spqr_trapezoidal.cpp
295	$(C) -c $<
296
297spqr_type.o: ../Source/spqr_type.cpp
298	$(C) -c $<
299
300spqr_front.o: ../Source/spqr_front.cpp
301	$(C) -c $<
302
303SuiteSparseQR_expert.o: ../Source/SuiteSparseQR_expert.cpp
304	$(C) -c $<
305
306spqr_maxcolnorm.o: ../Source/spqr_maxcolnorm.cpp
307	$(C) -c $<
308
309SuiteSparseQR_qmult.o: ../Source/SuiteSparseQR_qmult.cpp
310	$(C) -c $<
311
312SuiteSparseQR.o: ../Source/SuiteSparseQR.cpp
313	$(C) -c $<
314
315spqr_tol.o: ../Source/spqr_tol.cpp
316	$(C) -c $<
317
318SuiteSparseQR_C.o: ../Source/SuiteSparseQR_C.cpp
319	$(C) -c $<
320
321spqr_rmap.o: ../Source/spqr_rmap.cpp
322	$(C) -c $<
323
324spqrgpu_kernel.o: ../SPQRGPU/spqrgpu_kernel.cpp
325	$(C) -c $<
326
327spqrgpu_buildAssemblyMaps.o: ../SPQRGPU/spqrgpu_buildAssemblyMaps.cpp
328	$(C) -c $<
329
330spqrgpu_computeFrontStaging.o: ../SPQRGPU/spqrgpu_computeFrontStaging.cpp
331	$(C) -c $<
332
333#-------------------------------------------------------------------------------
334# libraries compiled without test coverage
335#-------------------------------------------------------------------------------
336
337libraries:
338	( cd ../.. && $(MAKE) metis )
339	( cd ../../SuiteSparse_config ; $(MAKE) library TCOV=yes )
340	( cd ../../AMD && $(MAKE) library TCOV=yes )
341	( cd ../../COLAMD && $(MAKE) library TCOV=yes )
342	( cd ../../CCOLAMD && $(MAKE) library TCOV=yes )
343	( cd ../../CAMD && $(MAKE) library TCOV=yes )
344	( cd ../../CHOLMOD && $(MAKE) library TCOV=yes )
345
346#-------------------------------------------------------------------------------
347# SuiteSparse_GPURuntime
348#-------------------------------------------------------------------------------
349
350GPURUNTIME = ../../SuiteSparse_GPURuntime
351GPURUNSRC = $(GPURUNTIME)/Source
352GPURUNINC = -I$(GPURUNTIME)/Include -I../../SuiteSparse_config
353
354RUNH = \
355        $(GPURUNTIME)/Include/SuiteSparseGPU_Workspace.hpp \
356        $(GPURUNTIME)/Include/SuiteSparseGPU_debug.hpp \
357        $(GPURUNTIME)/Include/SuiteSparseGPU_macros.hpp \
358        $(GPURUNTIME)/Include/SuiteSparseGPU_workspace_macros.hpp \
359        $(GPURUNTIME)/Include/SuiteSparseGPU_Runtime.hpp
360#        Makefile
361
362SuiteSparseGPU_Workspace.o: $(GPURUNSRC)/SuiteSparseGPU_Workspace.cpp $(RUNH)
363	$(NVCC) -c $(GPURUNINC) $<
364
365SuiteSparseGPU_Workspace_cpuAllocators.o: \
366        $(GPURUNSRC)/SuiteSparseGPU_Workspace_cpuAllocators.cpp $(RUNH)
367	$(NVCC) -c $(GPURUNINC) $<
368
369SuiteSparseGPU_Workspace_gpuAllocators.o: \
370        $(GPURUNSRC)/SuiteSparseGPU_Workspace_gpuAllocators.cpp $(RUNH)
371	$(NVCC) -c $(GPURUNINC) $<
372
373SuiteSparseGPU_Workspace_memset.o: \
374        $(GPURUNSRC)/SuiteSparseGPU_Workspace_memset.cpp $(RUNH)
375	$(NVCC) -c $(GPURUNINC) $<
376
377SuiteSparseGPU_Workspace_transfer.o: \
378        $(GPURUNSRC)/SuiteSparseGPU_Workspace_transfer.cpp $(RUNH)
379	$(NVCC) -c $(GPURUNINC) $<
380
381#-------------------------------------------------------------------------------
382# GPUQREngine
383#-------------------------------------------------------------------------------
384
385GPUQR = ../../GPUQREngine
386GPUQRSRC = $(GPUQR)/Source
387GPUQRDEMO = $(GPUQR)/Demo
388GPUQRINC = $(GPURUNINC) -I$(GPUQR)/Include
389
390KERNELH = \
391    $(GPUQR)/Include/GPUQREngine_Common.hpp \
392    $(GPUQR)/Include/GPUQREngine_BucketList.hpp \
393    $(GPUQR)/Include/GPUQREngine_Front.hpp \
394    $(GPUQR)/Include/GPUQREngine_FrontState.hpp \
395    $(GPUQR)/Include/GPUQREngine.hpp \
396    $(GPUQR)/Include/GPUQREngine_Internal.hpp \
397    $(GPUQR)/Include/GPUQREngine_GraphVizHelper.hpp \
398    $(GPUQR)/Include/Kernel/Apply/block_apply_1_by_1.cu \
399    $(GPUQR)/Include/Kernel/Apply/block_apply_1.cu \
400    $(GPUQR)/Include/Kernel/Apply/block_apply_2_by_1.cu \
401    $(GPUQR)/Include/Kernel/Apply/block_apply_2.cu \
402    $(GPUQR)/Include/Kernel/Apply/block_apply_3_by_1.cu \
403    $(GPUQR)/Include/Kernel/Apply/block_apply_3.cu \
404    $(GPUQR)/Include/Kernel/Apply/block_apply_chunk.cu \
405    $(GPUQR)/Include/Kernel/Apply/block_apply.cu \
406    $(GPUQR)/Include/Kernel/Apply/cevta_tile.cu \
407    $(GPUQR)/Include/Kernel/Apply/pipelined_rearrange.cu \
408    $(GPUQR)/Include/Kernel/Assemble/packAssemble.cu \
409    $(GPUQR)/Include/Kernel/Assemble/sAssemble.cu \
410    $(GPUQR)/Include/Kernel/Factorize/factorize_3_by_1.cu \
411    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_1_by_1.cu \
412    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_1_by_1_edge.cu \
413    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_2_by_1.cu \
414    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_2_by_1_edge.cu \
415    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_3_by_1.cu \
416    $(GPUQR)/Include/Kernel/Factorize/factorize_vt_3_by_1_edge.cu \
417    $(GPUQR)/Include/Kernel/Factorize/factorize_vt.cu \
418    $(GPUQR)/Include/Kernel/qrKernel.cu \
419    $(GPUQR)/Include/Kernel/sharedMemory.hpp \
420    $(GPUQR)/Include/Kernel/uberKernel.cu \
421    $(GPUQR)/Include/GPUQREngine_LLBundle.hpp \
422    $(GPUQR)/Include/GPUQREngine_Stats.hpp \
423    $(GPUQR)/Include/GPUQREngine_Scheduler.hpp \
424    $(GPUQR)/Include/GPUQREngine_SEntry.hpp \
425    $(GPUQR)/Include/GPUQREngine_SparseMeta.hpp \
426    $(GPUQR)/Include/GPUQREngine_TaskDescriptor.hpp \
427    $(GPUQR)/Include/GPUQREngine_Timing.hpp
428#   Makefile
429
430GPUQREngine_GraphVizHelper.o: \
431        $(GPUQRSRC)/GPUQREngine_GraphVizHelper.cpp $(KERNELH)
432	$(NVCC) -c  $(GPUQRINC) $<
433
434GPUQREngine_UberKernel.o: $(GPUQRSRC)/GPUQREngine_UberKernel.cu $(KERNELH)
435	$(NVCC) -c  $(GPUQRINC) $<
436
437GPUQREngine_ExpertDense.o: $(GPUQRSRC)/GPUQREngine_ExpertDense.cpp $(KERNELH)
438	$(NVCC) -c  $(GPUQRINC) $<
439
440GPUQREngine_Internal.o: $(GPUQRSRC)/GPUQREngine_Internal.cpp $(KERNELH)
441	$(NVCC) -c  $(GPUQRINC) $<
442
443GPUQREngine_ExpertSparse.o: $(GPUQRSRC)/GPUQREngine_ExpertSparse.cpp $(KERNELH)
444	$(NVCC) -c  $(GPUQRINC) $<
445
446BucketList.o: $(GPUQRSRC)/BucketList/BucketList.cpp $(KERNELH)
447	$(NVCC) -c  $(GPUQRINC) $<
448
449BucketList_AdvanceBundles.o: $(GPUQRSRC)/BucketList/BucketList_AdvanceBundles.cpp $(KERNELH)
450	$(NVCC) -c  $(GPUQRINC) $<
451
452BucketList_CreateBundles.o: $(GPUQRSRC)/BucketList/BucketList_CreateBundles.cpp $(KERNELH)
453	$(NVCC) -c  $(GPUQRINC) $<
454
455BucketList_FillWorkQueue.o: $(GPUQRSRC)/BucketList/BucketList_FillWorkQueue.cpp $(KERNELH)
456	$(NVCC) -c  $(GPUQRINC) $<
457
458BucketList_GrowBundles.o: $(GPUQRSRC)/BucketList/BucketList_GrowBundles.cpp $(KERNELH)
459	$(NVCC) -c  $(GPUQRINC) $<
460
461BucketList_Manage.o: $(GPUQRSRC)/BucketList/BucketList_Manage.cpp $(KERNELH)
462	$(NVCC) -c  $(GPUQRINC) $<
463
464BucketList_PostProcessing.o: $(GPUQRSRC)/BucketList/BucketList_PostProcessing.cpp $(KERNELH)
465	$(NVCC) -c  $(GPUQRINC) $<
466
467LLBundle.o: $(GPUQRSRC)/LLBundle/LLBundle.cpp $(KERNELH)
468	$(NVCC) -c  $(GPUQRINC) $<
469
470LLBundle_AddTiles.o: $(GPUQRSRC)/LLBundle/LLBundle_AddTiles.cpp $(KERNELH)
471	$(NVCC) -c  $(GPUQRINC) $<
472
473LLBundle_Advance.o: $(GPUQRSRC)/LLBundle/LLBundle_Advance.cpp $(KERNELH)
474	$(NVCC) -c  $(GPUQRINC) $<
475
476LLBundle_GPUPack.o: $(GPUQRSRC)/LLBundle/LLBundle_GPUPack.cpp $(KERNELH)
477	$(NVCC) -c  $(GPUQRINC) $<
478
479LLBundle_PipelinedRearrange.o: $(GPUQRSRC)/LLBundle/LLBundle_PipelinedRearrange.cpp $(KERNELH)
480	$(NVCC) -c  $(GPUQRINC) $<
481
482LLBundle_UpdateSecondMinIndex.o: $(GPUQRSRC)/LLBundle/LLBundle_UpdateSecondMinIndex.cpp $(KERNELH)
483	$(NVCC) -c  $(GPUQRINC) $<
484
485Scheduler.o: $(GPUQRSRC)/Scheduler/Scheduler.cpp $(KERNELH)
486	$(NVCC) -c  $(GPUQRINC) $<
487
488Scheduler_FillWorkQueue.o: $(GPUQRSRC)/Scheduler/Scheduler_FillWorkQueue.cpp $(KERNELH)
489	$(NVCC) -c  $(GPUQRINC) $<
490
491Scheduler_Front.o: $(GPUQRSRC)/Scheduler/Scheduler_Front.cpp $(KERNELH)
492	$(NVCC) -c  $(GPUQRINC) $<
493
494Scheduler_LaunchKernel.o: $(GPUQRSRC)/Scheduler/Scheduler_LaunchKernel.cpp $(KERNELH)
495	$(NVCC) -c  $(GPUQRINC) $<
496
497Scheduler_PostProcess.o: $(GPUQRSRC)/Scheduler/Scheduler_PostProcess.cpp $(KERNELH)
498	$(NVCC) -c  $(GPUQRINC) $<
499
500Scheduler_Render.o: $(GPUQRSRC)/Scheduler/Scheduler_Render.cpp $(KERNELH)
501	$(NVCC) -c  $(GPUQRINC) $<
502
503Scheduler_TransferData.o: $(GPUQRSRC)/Scheduler/Scheduler_TransferData.cpp $(KERNELH)
504	$(NVCC) -c  $(GPUQRINC) $<
505
506ssgpu_maxQueueSize.o: $(GPUQRSRC)/Scheduler/ssgpu_maxQueueSize.cpp $(KERNELH)
507	$(NVCC) -c  $(GPUQRINC) $<
508
509TaskDescriptor_flops.o: $(GPUQRSRC)/TaskDescriptor/TaskDescriptor_flops.cpp $(KERNELH)
510	$(NVCC) -c  $(GPUQRINC) $<
511
512