1#=============================================================================== 2# SuiteSparseQR/Tcov/Makefile 3#=============================================================================== 4 5# This test requires METIS, and it only works on Linux. 6 7# run statement-coverage test without Valgrind 8default: go 9 10# to run with Valgrind as well 11valgrind: vgo 12 13ccode: all 14 15include ../../SuiteSparse_config/SuiteSparse_config.mk 16 17# this test requires gcc 18CC = gcc 19CXX= g++ 20# BLAS = -lrefblas 21# LAPACK = -llapack 22 23# for statement coverage (with gcov; see go) and picky compiler warnings 24CF = -pg \ 25 -O0 -g -fprofile-arcs -ftest-coverage \ 26 -Wall -W -Wshadow -Winline -Wno-unused-parameter \ 27 -Wredundant-decls -Wdisabled-optimization \ 28 -fexceptions -fopenmp 29 30# Using an optimized BLAS can cause problems in Valgrind 31# FLIB = -llapack_plain -lblas_plain -lgfortran 32# FLIB = -llapack_plain -lblas_plain -lgfortran -lg2c 33 FLIB = $(LAPACK) $(BLAS) 34 35#------------------------------------------------------------------------------- 36 37CUDA_LIB = $(CUDART_LIB) $(CUBLAS_LIB) 38 39NVCC = /usr/local/cuda/bin/nvcc -g --profile --generate-line-info $(NV20) \ 40 -Xcompiler -fprofile-arcs -Xcompiler -ftest-coverage 41 42#------------------------------------------------------------------------------- 43 44CLIB = -L../../lib -lcholmod -lamd -lcolamd -lccolamd -lcamd -lmetis -lsuitesparseconfig 45 46all: qrtest gpudemo qrdemo_gpu 47 48library: qrtest gpudemo 49 50purge: distclean 51 52distclean: clean 53 - $(RM) qrtest qrtest_out.txt pfile tfile cov.out qrtest_out1.txt 54 - $(RM) gpuqrengine_demo troll.m qrdemo_gpu gpu_results.txt X.mtx 55 - $(RM) -r $(PURGE) 56 57clean: 58 - $(RM) -r $(CLEAN) 59 60INC = ../Include/spqr.hpp ../Include/SuiteSparseQR_C.h \ 61 ../Include/SuiteSparseQR_definitions.h \ 62 ../Include/SuiteSparseQR.hpp 63# Makefile 64 65OBJ = \ 66 spqr_rmap.o \ 67 SuiteSparseQR_C.o \ 68 SuiteSparseQR_expert.o \ 69 spqr_parallel.o \ 70 spqr_kernel.o \ 71 spqr_analyze.o \ 72 spqr_assemble.o \ 73 spqr_cpack.o \ 74 spqr_csize.o \ 75 spqr_fcsize.o \ 76 spqr_debug.o \ 77 spqr_front.o \ 78 spqr_factorize.o \ 79 spqr_freenum.o \ 80 spqr_freesym.o \ 81 spqr_freefac.o \ 82 spqr_fsize.o \ 83 spqr_maxcolnorm.o \ 84 spqr_rconvert.o \ 85 spqr_rcount.o \ 86 spqr_rhpack.o \ 87 spqr_rsolve.o \ 88 spqr_stranspose1.o \ 89 spqr_stranspose2.o \ 90 spqr_hpinv.o \ 91 spqr_1fixed.o \ 92 spqr_1colamd.o \ 93 SuiteSparseQR.o \ 94 spqr_1factor.o \ 95 spqr_cumsum.o \ 96 spqr_shift.o \ 97 spqr_happly.o \ 98 spqr_panel.o \ 99 spqr_happly_work.o \ 100 SuiteSparseQR_qmult.o \ 101 spqr_trapezoidal.o \ 102 spqr_larftb.o \ 103 spqr_append.o \ 104 spqr_type.o \ 105 spqr_tol.o \ 106 qrtestc.o 107 108ifneq ($(GPU_CONFIG),) 109OBJ += \ 110 spqrgpu_kernel.o \ 111 spqrgpu_buildAssemblyMaps.o \ 112 spqrgpu_computeFrontStaging.o \ 113 SuiteSparseGPU_Workspace.o \ 114 SuiteSparseGPU_Workspace_cpuAllocators.o \ 115 SuiteSparseGPU_Workspace_gpuAllocators.o \ 116 SuiteSparseGPU_Workspace_transfer.o \ 117 GPUQREngine_GraphVizHelper.o \ 118 GPUQREngine_UberKernel.o \ 119 GPUQREngine_ExpertSparse.o \ 120 GPUQREngine_Internal.o \ 121 GPUQREngine_ExpertDense.o \ 122 BucketList.o \ 123 BucketList_AdvanceBundles.o \ 124 BucketList_CreateBundles.o \ 125 BucketList_FillWorkQueue.o \ 126 BucketList_GrowBundles.o \ 127 BucketList_Manage.o \ 128 BucketList_PostProcessing.o \ 129 LLBundle.o \ 130 LLBundle_AddTiles.o \ 131 LLBundle_Advance.o \ 132 LLBundle_GPUPack.o \ 133 LLBundle_PipelinedRearrange.o \ 134 LLBundle_UpdateSecondMinIndex.o \ 135 Scheduler.o \ 136 Scheduler_FillWorkQueue.o \ 137 Scheduler_Front.o \ 138 Scheduler_LaunchKernel.o \ 139 Scheduler_PostProcess.o \ 140 Scheduler_Render.o \ 141 Scheduler_TransferData.o \ 142 ssgpu_maxQueueSize.o \ 143 TaskDescriptor_flops.o 144endif 145 146$(OBJ): $(INC) 147 148I = -I../../CHOLMOD/Include -I../../SuiteSparse_config -I../Include \ 149 150ifneq ($(GPU_CONFIG),) 151 I += -I../../SuiteSparse_GPURuntime/Include \ 152 -I../../GPUQREngine/Include $(CUDA_INC) 153endif 154 155C = $(CXX) $(CF) $(I) $(SPQR_CONFIG) 156 157LIBS = $(CLIB) $(FLIB) $(LDLIBS) $(CUDA_LIB) 158 159qrtestc.o: qrtestc.c $(INC) 160 $(CC) $(CF) $(I) -c $< 161 162qrtest: libraries qrtest.cpp $(INC) $(OBJ) 163 $(C) qrtest.cpp -o qrtest $(OBJ) $(LIBS) -lm 164 165ifneq ($(GPU_CONFIG),) 166gpu: gpuqrengine_demo qrdemo_gpu 167 - ./gpuqrengine_demo 168 - ./qrdemo_gpu ../Matrix/west0067.mtx 2 169 - ./qrdemo_gpu ../Matrix/lp_e226_transposed.mtx 2 170 - ./qrdemo_gpu ../Matrix/lp_e226_transposed.mtx 6 171 - ./qrdemo_gpu ../Matrix/Groebner_id2003_aug.mtx 6 172 - ./qrdemo_gpu ../Matrix/Franz6_id1959_aug.mtx 6 173else 174gpu: 175endif 176 177gpuqrengine_demo: libraries \ 178 ../../GPUQREngine/Demo/gpuqrengine_demo.cpp $(INC) $(OBJ) 179 $(C) $(GPUQRDEMO)/gpuqrengine_demo.cpp -o gpuqrengine_demo \ 180 $(OBJ) $(LIBS) -lm 181 182qrdemo_gpu: ../Demo/qrdemo_gpu.cpp $(INC) $(OBJ) 183ifneq ($(GPU_CONFIG),) 184 $(C) ../Demo/qrdemo_gpu.cpp -o qrdemo_gpu $(OBJ) $(LIBS) 185endif 186 187go: qrtest gpu qrdemo_gpu 188 - ./qrtest matrixlist.txt > qrtest_out.txt 189 - ./cov 190 191go1: qrtest 192 - ./qrtest matrix1.txt > qrtest_out1.txt 193 - ./cov 194 195vgo1: qrtest 196 - valgrind ./qrtest matrix1.txt > qrtest_out1.txt 197 # - valgrind --leak-check=full --show-reachable=yes ./qrtest matrix1.txt > qrtest_out1.txt 198 - ./cov 199 200vgo: qrtest 201 - valgrind --leak-check=full --show-reachable=yes ./qrtest matrixlist.txt > qrtest_out.txt 202 - ./cov 203 204spqr_1colamd.o: ../Source/spqr_1colamd.cpp 205 $(C) -c $< 206 207spqr_1factor.o: ../Source/spqr_1factor.cpp 208 $(C) -c $< 209 210spqr_1fixed.o: ../Source/spqr_1fixed.cpp 211 $(C) -c $< 212 213spqr_analyze.o: ../Source/spqr_analyze.cpp 214 $(C) -c $< 215 216spqr_parallel.o: ../Source/spqr_parallel.cpp 217 $(C) -c $< 218 219spqr_kernel.o: ../Source/spqr_kernel.cpp 220 $(C) -c $< 221 222spqr_append.o: ../Source/spqr_append.cpp 223 $(C) -c $< 224 225spqr_assemble.o: ../Source/spqr_assemble.cpp 226 $(C) -c $< 227 228spqr_cpack.o: ../Source/spqr_cpack.cpp 229 $(C) -c $< 230 231spqr_csize.o: ../Source/spqr_csize.cpp 232 $(C) -c $< 233 234spqr_cumsum.o: ../Source/spqr_cumsum.cpp 235 $(C) -c $< 236 237spqr_debug.o: ../Source/spqr_debug.cpp 238 $(C) -c $< 239 240spqr_factorize.o: ../Source/spqr_factorize.cpp 241 $(C) -c $< 242 243spqr_fcsize.o: ../Source/spqr_fcsize.cpp 244 $(C) -c $< 245 246spqr_freefac.o: ../Source/spqr_freefac.cpp 247 $(C) -c $< 248 249spqr_freenum.o: ../Source/spqr_freenum.cpp 250 $(C) -c $< 251 252spqr_freesym.o: ../Source/spqr_freesym.cpp 253 $(C) -c $< 254 255spqr_fsize.o: ../Source/spqr_fsize.cpp 256 $(C) -c $< 257 258spqr_happly.o: ../Source/spqr_happly.cpp 259 $(C) -c $< 260 261spqr_panel.o: ../Source/spqr_panel.cpp 262 $(C) -c $< 263 264spqr_happly_work.o: ../Source/spqr_happly_work.cpp 265 $(C) -c $< 266 267spqr_hpinv.o: ../Source/spqr_hpinv.cpp 268 $(C) -c $< 269 270spqr_larftb.o: ../Source/spqr_larftb.cpp 271 $(C) -c $< 272 273spqr_rconvert.o: ../Source/spqr_rconvert.cpp 274 $(C) -c $< 275 276spqr_rcount.o: ../Source/spqr_rcount.cpp 277 $(C) -c $< 278 279spqr_rhpack.o: ../Source/spqr_rhpack.cpp 280 $(C) -c $< 281 282spqr_rsolve.o: ../Source/spqr_rsolve.cpp 283 $(C) -c $< 284 285spqr_shift.o: ../Source/spqr_shift.cpp 286 $(C) -c $< 287 288spqr_stranspose1.o: ../Source/spqr_stranspose1.cpp 289 $(C) -c $< 290 291spqr_stranspose2.o: ../Source/spqr_stranspose2.cpp 292 $(C) -c $< 293 294spqr_trapezoidal.o: ../Source/spqr_trapezoidal.cpp 295 $(C) -c $< 296 297spqr_type.o: ../Source/spqr_type.cpp 298 $(C) -c $< 299 300spqr_front.o: ../Source/spqr_front.cpp 301 $(C) -c $< 302 303SuiteSparseQR_expert.o: ../Source/SuiteSparseQR_expert.cpp 304 $(C) -c $< 305 306spqr_maxcolnorm.o: ../Source/spqr_maxcolnorm.cpp 307 $(C) -c $< 308 309SuiteSparseQR_qmult.o: ../Source/SuiteSparseQR_qmult.cpp 310 $(C) -c $< 311 312SuiteSparseQR.o: ../Source/SuiteSparseQR.cpp 313 $(C) -c $< 314 315spqr_tol.o: ../Source/spqr_tol.cpp 316 $(C) -c $< 317 318SuiteSparseQR_C.o: ../Source/SuiteSparseQR_C.cpp 319 $(C) -c $< 320 321spqr_rmap.o: ../Source/spqr_rmap.cpp 322 $(C) -c $< 323 324spqrgpu_kernel.o: ../SPQRGPU/spqrgpu_kernel.cpp 325 $(C) -c $< 326 327spqrgpu_buildAssemblyMaps.o: ../SPQRGPU/spqrgpu_buildAssemblyMaps.cpp 328 $(C) -c $< 329 330spqrgpu_computeFrontStaging.o: ../SPQRGPU/spqrgpu_computeFrontStaging.cpp 331 $(C) -c $< 332 333#------------------------------------------------------------------------------- 334# libraries compiled without test coverage 335#------------------------------------------------------------------------------- 336 337libraries: 338 ( cd ../.. && $(MAKE) metis ) 339 ( cd ../../SuiteSparse_config ; $(MAKE) library TCOV=yes ) 340 ( cd ../../AMD && $(MAKE) library TCOV=yes ) 341 ( cd ../../COLAMD && $(MAKE) library TCOV=yes ) 342 ( cd ../../CCOLAMD && $(MAKE) library TCOV=yes ) 343 ( cd ../../CAMD && $(MAKE) library TCOV=yes ) 344 ( cd ../../CHOLMOD && $(MAKE) library TCOV=yes ) 345 346#------------------------------------------------------------------------------- 347# SuiteSparse_GPURuntime 348#------------------------------------------------------------------------------- 349 350GPURUNTIME = ../../SuiteSparse_GPURuntime 351GPURUNSRC = $(GPURUNTIME)/Source 352GPURUNINC = -I$(GPURUNTIME)/Include -I../../SuiteSparse_config 353 354RUNH = \ 355 $(GPURUNTIME)/Include/SuiteSparseGPU_Workspace.hpp \ 356 $(GPURUNTIME)/Include/SuiteSparseGPU_debug.hpp \ 357 $(GPURUNTIME)/Include/SuiteSparseGPU_macros.hpp \ 358 $(GPURUNTIME)/Include/SuiteSparseGPU_workspace_macros.hpp \ 359 $(GPURUNTIME)/Include/SuiteSparseGPU_Runtime.hpp 360# Makefile 361 362SuiteSparseGPU_Workspace.o: $(GPURUNSRC)/SuiteSparseGPU_Workspace.cpp $(RUNH) 363 $(NVCC) -c $(GPURUNINC) $< 364 365SuiteSparseGPU_Workspace_cpuAllocators.o: \ 366 $(GPURUNSRC)/SuiteSparseGPU_Workspace_cpuAllocators.cpp $(RUNH) 367 $(NVCC) -c $(GPURUNINC) $< 368 369SuiteSparseGPU_Workspace_gpuAllocators.o: \ 370 $(GPURUNSRC)/SuiteSparseGPU_Workspace_gpuAllocators.cpp $(RUNH) 371 $(NVCC) -c $(GPURUNINC) $< 372 373SuiteSparseGPU_Workspace_memset.o: \ 374 $(GPURUNSRC)/SuiteSparseGPU_Workspace_memset.cpp $(RUNH) 375 $(NVCC) -c $(GPURUNINC) $< 376 377SuiteSparseGPU_Workspace_transfer.o: \ 378 $(GPURUNSRC)/SuiteSparseGPU_Workspace_transfer.cpp $(RUNH) 379 $(NVCC) -c $(GPURUNINC) $< 380 381#------------------------------------------------------------------------------- 382# GPUQREngine 383#------------------------------------------------------------------------------- 384 385GPUQR = ../../GPUQREngine 386GPUQRSRC = $(GPUQR)/Source 387GPUQRDEMO = $(GPUQR)/Demo 388GPUQRINC = $(GPURUNINC) -I$(GPUQR)/Include 389 390KERNELH = \ 391 $(GPUQR)/Include/GPUQREngine_Common.hpp \ 392 $(GPUQR)/Include/GPUQREngine_BucketList.hpp \ 393 $(GPUQR)/Include/GPUQREngine_Front.hpp \ 394 $(GPUQR)/Include/GPUQREngine_FrontState.hpp \ 395 $(GPUQR)/Include/GPUQREngine.hpp \ 396 $(GPUQR)/Include/GPUQREngine_Internal.hpp \ 397 $(GPUQR)/Include/GPUQREngine_GraphVizHelper.hpp \ 398 $(GPUQR)/Include/Kernel/Apply/block_apply_1_by_1.cu \ 399 $(GPUQR)/Include/Kernel/Apply/block_apply_1.cu \ 400 $(GPUQR)/Include/Kernel/Apply/block_apply_2_by_1.cu \ 401 $(GPUQR)/Include/Kernel/Apply/block_apply_2.cu \ 402 $(GPUQR)/Include/Kernel/Apply/block_apply_3_by_1.cu \ 403 $(GPUQR)/Include/Kernel/Apply/block_apply_3.cu \ 404 $(GPUQR)/Include/Kernel/Apply/block_apply_chunk.cu \ 405 $(GPUQR)/Include/Kernel/Apply/block_apply.cu \ 406 $(GPUQR)/Include/Kernel/Apply/cevta_tile.cu \ 407 $(GPUQR)/Include/Kernel/Apply/pipelined_rearrange.cu \ 408 $(GPUQR)/Include/Kernel/Assemble/packAssemble.cu \ 409 $(GPUQR)/Include/Kernel/Assemble/sAssemble.cu \ 410 $(GPUQR)/Include/Kernel/Factorize/factorize_3_by_1.cu \ 411 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_1_by_1.cu \ 412 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_1_by_1_edge.cu \ 413 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_2_by_1.cu \ 414 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_2_by_1_edge.cu \ 415 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_3_by_1.cu \ 416 $(GPUQR)/Include/Kernel/Factorize/factorize_vt_3_by_1_edge.cu \ 417 $(GPUQR)/Include/Kernel/Factorize/factorize_vt.cu \ 418 $(GPUQR)/Include/Kernel/qrKernel.cu \ 419 $(GPUQR)/Include/Kernel/sharedMemory.hpp \ 420 $(GPUQR)/Include/Kernel/uberKernel.cu \ 421 $(GPUQR)/Include/GPUQREngine_LLBundle.hpp \ 422 $(GPUQR)/Include/GPUQREngine_Stats.hpp \ 423 $(GPUQR)/Include/GPUQREngine_Scheduler.hpp \ 424 $(GPUQR)/Include/GPUQREngine_SEntry.hpp \ 425 $(GPUQR)/Include/GPUQREngine_SparseMeta.hpp \ 426 $(GPUQR)/Include/GPUQREngine_TaskDescriptor.hpp \ 427 $(GPUQR)/Include/GPUQREngine_Timing.hpp 428# Makefile 429 430GPUQREngine_GraphVizHelper.o: \ 431 $(GPUQRSRC)/GPUQREngine_GraphVizHelper.cpp $(KERNELH) 432 $(NVCC) -c $(GPUQRINC) $< 433 434GPUQREngine_UberKernel.o: $(GPUQRSRC)/GPUQREngine_UberKernel.cu $(KERNELH) 435 $(NVCC) -c $(GPUQRINC) $< 436 437GPUQREngine_ExpertDense.o: $(GPUQRSRC)/GPUQREngine_ExpertDense.cpp $(KERNELH) 438 $(NVCC) -c $(GPUQRINC) $< 439 440GPUQREngine_Internal.o: $(GPUQRSRC)/GPUQREngine_Internal.cpp $(KERNELH) 441 $(NVCC) -c $(GPUQRINC) $< 442 443GPUQREngine_ExpertSparse.o: $(GPUQRSRC)/GPUQREngine_ExpertSparse.cpp $(KERNELH) 444 $(NVCC) -c $(GPUQRINC) $< 445 446BucketList.o: $(GPUQRSRC)/BucketList/BucketList.cpp $(KERNELH) 447 $(NVCC) -c $(GPUQRINC) $< 448 449BucketList_AdvanceBundles.o: $(GPUQRSRC)/BucketList/BucketList_AdvanceBundles.cpp $(KERNELH) 450 $(NVCC) -c $(GPUQRINC) $< 451 452BucketList_CreateBundles.o: $(GPUQRSRC)/BucketList/BucketList_CreateBundles.cpp $(KERNELH) 453 $(NVCC) -c $(GPUQRINC) $< 454 455BucketList_FillWorkQueue.o: $(GPUQRSRC)/BucketList/BucketList_FillWorkQueue.cpp $(KERNELH) 456 $(NVCC) -c $(GPUQRINC) $< 457 458BucketList_GrowBundles.o: $(GPUQRSRC)/BucketList/BucketList_GrowBundles.cpp $(KERNELH) 459 $(NVCC) -c $(GPUQRINC) $< 460 461BucketList_Manage.o: $(GPUQRSRC)/BucketList/BucketList_Manage.cpp $(KERNELH) 462 $(NVCC) -c $(GPUQRINC) $< 463 464BucketList_PostProcessing.o: $(GPUQRSRC)/BucketList/BucketList_PostProcessing.cpp $(KERNELH) 465 $(NVCC) -c $(GPUQRINC) $< 466 467LLBundle.o: $(GPUQRSRC)/LLBundle/LLBundle.cpp $(KERNELH) 468 $(NVCC) -c $(GPUQRINC) $< 469 470LLBundle_AddTiles.o: $(GPUQRSRC)/LLBundle/LLBundle_AddTiles.cpp $(KERNELH) 471 $(NVCC) -c $(GPUQRINC) $< 472 473LLBundle_Advance.o: $(GPUQRSRC)/LLBundle/LLBundle_Advance.cpp $(KERNELH) 474 $(NVCC) -c $(GPUQRINC) $< 475 476LLBundle_GPUPack.o: $(GPUQRSRC)/LLBundle/LLBundle_GPUPack.cpp $(KERNELH) 477 $(NVCC) -c $(GPUQRINC) $< 478 479LLBundle_PipelinedRearrange.o: $(GPUQRSRC)/LLBundle/LLBundle_PipelinedRearrange.cpp $(KERNELH) 480 $(NVCC) -c $(GPUQRINC) $< 481 482LLBundle_UpdateSecondMinIndex.o: $(GPUQRSRC)/LLBundle/LLBundle_UpdateSecondMinIndex.cpp $(KERNELH) 483 $(NVCC) -c $(GPUQRINC) $< 484 485Scheduler.o: $(GPUQRSRC)/Scheduler/Scheduler.cpp $(KERNELH) 486 $(NVCC) -c $(GPUQRINC) $< 487 488Scheduler_FillWorkQueue.o: $(GPUQRSRC)/Scheduler/Scheduler_FillWorkQueue.cpp $(KERNELH) 489 $(NVCC) -c $(GPUQRINC) $< 490 491Scheduler_Front.o: $(GPUQRSRC)/Scheduler/Scheduler_Front.cpp $(KERNELH) 492 $(NVCC) -c $(GPUQRINC) $< 493 494Scheduler_LaunchKernel.o: $(GPUQRSRC)/Scheduler/Scheduler_LaunchKernel.cpp $(KERNELH) 495 $(NVCC) -c $(GPUQRINC) $< 496 497Scheduler_PostProcess.o: $(GPUQRSRC)/Scheduler/Scheduler_PostProcess.cpp $(KERNELH) 498 $(NVCC) -c $(GPUQRINC) $< 499 500Scheduler_Render.o: $(GPUQRSRC)/Scheduler/Scheduler_Render.cpp $(KERNELH) 501 $(NVCC) -c $(GPUQRINC) $< 502 503Scheduler_TransferData.o: $(GPUQRSRC)/Scheduler/Scheduler_TransferData.cpp $(KERNELH) 504 $(NVCC) -c $(GPUQRINC) $< 505 506ssgpu_maxQueueSize.o: $(GPUQRSRC)/Scheduler/ssgpu_maxQueueSize.cpp $(KERNELH) 507 $(NVCC) -c $(GPUQRINC) $< 508 509TaskDescriptor_flops.o: $(GPUQRSRC)/TaskDescriptor/TaskDescriptor_flops.cpp $(KERNELH) 510 $(NVCC) -c $(GPUQRINC) $< 511 512