1 //------------------------------------------------------------------------------
2 // GB_matlab_helper.c: helper functions for MATLAB interface
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // These functions are only used by the MATLAB interface for
11 // SuiteSparse:GraphBLAS.
12
13 #include "GB_matlab_helper.h"
14
15 //------------------------------------------------------------------------------
16 // GB_NTHREADS: determine the number of threads to use
17 //------------------------------------------------------------------------------
18
19 #define GB_NTHREADS(work) \
20 int nthreads_max = GB_Global_nthreads_max_get ( ) ; \
21 double chunk = GB_Global_chunk_get ( ) ; \
22 int nthreads = GB_nthreads (work, chunk, nthreads_max) ;
23
24 //------------------------------------------------------------------------------
25 // GB_ALLOCATE_WORK: allocate per-thread workspace
26 //------------------------------------------------------------------------------
27
28 #define GB_ALLOCATE_WORK(work_type) \
29 size_t Work_size ; \
30 work_type *Work = GB_MALLOC (nthreads, work_type, &Work_size) ; \
31 if (Work == NULL) return (false) ;
32
33 //------------------------------------------------------------------------------
34 // GB_FREE_WORK: free per-thread workspace
35 //------------------------------------------------------------------------------
36
37 #define GB_FREE_WORK(work_type) \
38 GB_FREE (&Work, Work_size) ;
39
40 //------------------------------------------------------------------------------
41 // GB_matlab_helper1: convert 0-based indices to 1-based for gbextracttuples
42 //------------------------------------------------------------------------------
43
GB_matlab_helper1(double * restrict I_double,const GrB_Index * restrict I,int64_t nvals)44 void GB_matlab_helper1 // convert zero-based indices to one-based
45 (
46 double *restrict I_double, // output array
47 const GrB_Index *restrict I, // input array
48 int64_t nvals // size of input and output arrays
49 )
50 {
51
52 GB_NTHREADS (nvals) ;
53
54 int64_t k ;
55 #pragma omp parallel for num_threads(nthreads) schedule(static)
56 for (k = 0 ; k < nvals ; k++)
57 {
58 I_double [k] = (double) (I [k] + 1) ;
59 }
60 }
61
62 //------------------------------------------------------------------------------
63 // GB_matlab_helper1i: convert 0-based indices to 1-based for gbextracttuples
64 //------------------------------------------------------------------------------
65
GB_matlab_helper1i(int64_t * restrict I,int64_t nvals)66 void GB_matlab_helper1i // convert zero-based indices to one-based
67 (
68 int64_t *restrict I, // input/output array
69 int64_t nvals // size of input/output array
70 )
71 {
72
73 GB_NTHREADS (nvals) ;
74
75 int64_t k ;
76 #pragma omp parallel for num_threads(nthreads) schedule(static)
77 for (k = 0 ; k < nvals ; k++)
78 {
79 I [k] ++ ;
80 }
81 }
82
83 //------------------------------------------------------------------------------
84 // GB_matlab_helper3: convert 1-based indices to 0-based for gb_mxarray_to_list
85 //------------------------------------------------------------------------------
86
GB_matlab_helper3(int64_t * restrict List,const double * restrict List_double,int64_t len,int64_t * List_max)87 bool GB_matlab_helper3 // return true if OK, false on error
88 (
89 int64_t *restrict List, // size len, output array
90 const double *restrict List_double, // size len, input array
91 int64_t len,
92 int64_t *List_max // also compute the max entry in the list
93 )
94 {
95
96 GB_NTHREADS (len) ;
97
98 ASSERT (List != NULL) ;
99 ASSERT (List_double != NULL) ;
100 ASSERT (List_max != NULL) ;
101
102 bool ok = true ;
103 int64_t listmax = -1 ;
104
105 GB_ALLOCATE_WORK (int64_t) ;
106
107 int tid ;
108 #pragma omp parallel for num_threads(nthreads) schedule(static)
109 for (tid = 0 ; tid < nthreads ; tid++)
110 {
111 bool my_ok = true ;
112 int64_t k1, k2, my_listmax = -1 ;
113 GB_PARTITION (k1, k2, len, tid, nthreads) ;
114 for (int64_t k = k1 ; k < k2 ; k++)
115 {
116 double x = List_double [k] ;
117 int64_t i = (int64_t) x ;
118 my_ok = my_ok && (x == (double) i) ;
119 my_listmax = GB_IMAX (my_listmax, i) ;
120 List [k] = i - 1 ;
121 }
122 // rather than create a separate per-thread boolean workspace, just
123 // use a sentinal value of INT64_MIN if non-integer indices appear
124 // in List_double.
125 Work [tid] = my_ok ? my_listmax : INT64_MIN ;
126 }
127
128 // wrapup
129 for (tid = 0 ; tid < nthreads ; tid++)
130 {
131 listmax = GB_IMAX (listmax, Work [tid]) ;
132 ok = ok && (Work [tid] != INT64_MIN) ;
133 }
134
135 GB_FREE_WORK (int64_t) ;
136
137 (*List_max) = listmax ;
138 return (ok) ;
139 }
140
141 //------------------------------------------------------------------------------
142 // GB_matlab_helper3i: convert 1-based indices to 0-based for gb_mxarray_to_list
143 //------------------------------------------------------------------------------
144
GB_matlab_helper3i(int64_t * restrict List,const int64_t * restrict List_int64,int64_t len,int64_t * List_max)145 bool GB_matlab_helper3i // return true if OK, false on error
146 (
147 int64_t *restrict List, // size len, output array
148 const int64_t *restrict List_int64, // size len, input array
149 int64_t len,
150 int64_t *List_max // also compute the max entry in the list
151 )
152 {
153
154 GB_NTHREADS (len) ;
155
156 int64_t listmax = -1 ;
157
158 GB_ALLOCATE_WORK (int64_t) ;
159
160 int tid ;
161 #pragma omp parallel for num_threads(nthreads) schedule(static)
162 for (tid = 0 ; tid < nthreads ; tid++)
163 {
164 int64_t k1, k2, my_listmax = -1 ;
165 GB_PARTITION (k1, k2, len, tid, nthreads) ;
166 for (int64_t k = k1 ; k < k2 ; k++)
167 {
168 int64_t i = List_int64 [k] ;
169 my_listmax = GB_IMAX (my_listmax, i) ;
170 List [k] = i - 1 ;
171 }
172 Work [tid] = my_listmax ;
173 }
174
175 // wrapup
176 for (tid = 0 ; tid < nthreads ; tid++)
177 {
178 listmax = GB_IMAX (listmax, Work [tid]) ;
179 }
180
181 GB_FREE_WORK (int64_t) ;
182
183 (*List_max) = listmax ;
184 return (true) ;
185 }
186
187 //------------------------------------------------------------------------------
188 // GB_matlab_helper4: find the max entry in an index list for gbbuild
189 //------------------------------------------------------------------------------
190
GB_matlab_helper4(const GrB_Index * restrict I,const int64_t len,GrB_Index * List_max)191 bool GB_matlab_helper4 // return true if OK, false on error
192 (
193 const GrB_Index *restrict I, // array of size len
194 const int64_t len,
195 GrB_Index *List_max // find max (I) + 1
196 )
197 {
198
199 GB_NTHREADS (len) ;
200
201 GrB_Index listmax = 0 ;
202
203 GB_ALLOCATE_WORK (GrB_Index) ;
204
205 int tid ;
206 #pragma omp parallel for num_threads(nthreads) schedule(static)
207 for (tid = 0 ; tid < nthreads ; tid++)
208 {
209 int64_t k1, k2 ;
210 GrB_Index my_listmax = 0 ;
211 GB_PARTITION (k1, k2, len, tid, nthreads) ;
212 for (int64_t k = k1 ; k < k2 ; k++)
213 {
214 my_listmax = GB_IMAX (my_listmax, I [k]) ;
215 }
216 Work [tid] = my_listmax ;
217 }
218
219 // wrapup
220 for (tid = 0 ; tid < nthreads ; tid++)
221 {
222 listmax = GB_IMAX (listmax, Work [tid]) ;
223 }
224
225 GB_FREE_WORK (GrB_Index) ;
226
227 if (len > 0) listmax++ ;
228 (*List_max) = listmax ;
229 return (true) ;
230 }
231
232 //------------------------------------------------------------------------------
233 // GB_matlab_helper5: construct pattern of S for gblogassign
234 //------------------------------------------------------------------------------
235
GB_matlab_helper5(GrB_Index * restrict Si,GrB_Index * restrict Sj,const GrB_Index * restrict Mi,const GrB_Index * restrict Mj,const int64_t mvlen,GrB_Index * restrict Ai,const int64_t avlen,const GrB_Index anz)236 void GB_matlab_helper5 // construct pattern of S
237 (
238 GrB_Index *restrict Si, // array of size anz
239 GrB_Index *restrict Sj, // array of size anz
240 const GrB_Index *restrict Mi, // array of size mnz, M->i, may be NULL
241 const GrB_Index *restrict Mj, // array of size mnz,
242 const int64_t mvlen, // M->vlen
243 GrB_Index *restrict Ai, // array of size anz, A->i, may be NULL
244 const int64_t avlen, // M->vlen
245 const GrB_Index anz
246 )
247 {
248
249 GB_NTHREADS (anz) ;
250 ASSERT (Mj != NULL) ;
251 ASSERT (Si != NULL) ;
252 ASSERT (Sj != NULL) ;
253
254 int64_t k ;
255 #pragma omp parallel for num_threads(nthreads) schedule(static)
256 for (k = 0 ; k < anz ; k++)
257 {
258 int64_t i = GBI (Ai, k, avlen) ;
259 Si [k] = GBI (Mi, i, mvlen) ;
260 Sj [k] = Mj [i] ;
261 }
262 }
263
264 //------------------------------------------------------------------------------
265 // GB_matlab_helper6: set bool array to all true gblogextract
266 //------------------------------------------------------------------------------
267
GB_matlab_helper6(bool * restrict Gbool,const GrB_Index gnvals)268 void GB_matlab_helper6 // set Gbool to all true
269 (
270 bool *restrict Gbool, // array of size gnvals
271 const GrB_Index gnvals
272 )
273 {
274
275 GB_NTHREADS (gnvals) ;
276
277 int64_t k ;
278 #pragma omp parallel for num_threads(nthreads) schedule(static)
279 for (k = 0 ; k < gnvals ; k++)
280 {
281 Gbool [k] = true ;
282 }
283 }
284
285 //------------------------------------------------------------------------------
286 // GB_matlab_helper7: Kx = uint64 (0:mnz-1), for gblogextract
287 //------------------------------------------------------------------------------
288
GB_matlab_helper7(uint64_t * restrict Kx,const GrB_Index mnz)289 void GB_matlab_helper7 // Kx = uint64 (0:mnz-1)
290 (
291 uint64_t *restrict Kx, // array of size mnz
292 const GrB_Index mnz
293 )
294 {
295
296 GB_NTHREADS (mnz) ;
297
298 int64_t k ;
299 #pragma omp parallel for num_threads(nthreads) schedule(static)
300 for (k = 0 ; k < mnz ; k++)
301 {
302 Kx [k] = k ;
303 }
304 }
305
306 //------------------------------------------------------------------------------
307 // GB_matlab_helper8: expand a scalar into an array for gbbuild
308 //------------------------------------------------------------------------------
309
GB_matlab_helper8(GB_void * C,GB_void * A,GrB_Index nvals,size_t s)310 void GB_matlab_helper8
311 (
312 GB_void *C, // output array of size nvals * s
313 GB_void *A, // input scalar of size s
314 GrB_Index nvals, // size of C
315 size_t s // size of each scalar
316 )
317 {
318
319 GB_NTHREADS (nvals) ;
320
321 int64_t k ;
322 #pragma omp parallel for num_threads(nthreads) schedule(static)
323 for (k = 0 ; k < nvals ; k++)
324 {
325 // C [k] = A [0]
326 memcpy (C + k * s, A, s) ;
327 }
328 }
329
330 //------------------------------------------------------------------------------
331 // GB_matlab_helper9: compute the degree of each vector
332 //------------------------------------------------------------------------------
333
334 // TODO: use GrB_mxv or GrB_vxm when possible.
335
GB_matlab_helper9(GrB_Matrix A,int64_t ** degree,size_t * degree_size,GrB_Index ** list,size_t * list_size,GrB_Index * nvec)336 bool GB_matlab_helper9 // true if successful, false if out of memory
337 (
338 GrB_Matrix A, // input matrix
339 int64_t **degree, // degree of each vector, size nvec
340 size_t *degree_size, // size of degree, in bytes
341 GrB_Index **list, // list of non-empty vectors
342 size_t *list_size, // size of degree, in bytes
343 GrB_Index *nvec // # of non-empty vectors
344 )
345 {
346
347 ASSERT_MATRIX_OK (A, "A for matlab helper9", GB0) ;
348 ASSERT (!GB_IS_BITMAP (A)) ;
349 ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A) || GB_IS_FULL (A)) ;
350
351 int64_t anvec = A->nvec ;
352 GB_NTHREADS (anvec) ;
353
354 uint64_t *List = NULL ; size_t List_size = 0 ;
355 int64_t *Degree = NULL ; size_t Degree_size = 0 ;
356
357 List = GB_MALLOC (anvec, uint64_t, &List_size) ;
358 Degree = GB_MALLOC (anvec, int64_t , &Degree_size) ;
359
360 if (List == NULL || Degree == NULL)
361 {
362 GB_FREE (&List, List_size) ;
363 GB_FREE (&Degree, Degree_size) ;
364 return (false) ;
365 }
366
367 #ifdef GB_DEBUG
368 // remove List and Degree from the debug memtable, since they will be
369 // imported as the degree GrB_Vector d
370 GB_Global_memtable_remove (List) ;
371 GB_Global_memtable_remove (Degree) ;
372 #endif
373
374 int64_t *Ah = A->h ;
375 int64_t *Ap = A->p ;
376 int64_t avlen = A->vlen ;
377
378 int64_t k ;
379 #pragma omp parallel for num_threads(nthreads) schedule(static)
380 for (k = 0 ; k < anvec ; k++)
381 {
382 List [k] = GBH (Ah, k) ;
383 Degree [k] = (Ap == NULL) ? avlen : (Ap [k+1] - Ap [k]) ;
384 }
385
386 // return result
387 (*degree) = Degree ; (*degree_size) = Degree_size ;
388 (*list) = List ; (*list_size) = List_size ;
389 (*nvec) = anvec ;
390 return (true) ;
391 }
392
393 //------------------------------------------------------------------------------
394 // GB_matlab_helper10: compute norm (x-y,p) of two dense FP32 or FP64 vectors
395 //------------------------------------------------------------------------------
396
397 // p can be:
398
399 // 0 or 2: 2-norm, sqrt (sum ((x-y).^2))
400 // 1: 1-norm, sum (abs (x-y))
401 // INT64_MAX inf-norm, max (abs (x-y))
402 // INT64_MIN (-inf)-norm, min (abs (x-y))
403 // other: p-norm not yet computed
404
GB_matlab_helper10(GB_void * x_arg,GB_void * y_arg,GrB_Type type,int64_t p,GrB_Index n)405 double GB_matlab_helper10 // norm (x-y,p), or -1 on error
406 (
407 GB_void *x_arg, // float or double, depending on type parameter
408 GB_void *y_arg, // same type as x, treat as zero if NULL
409 GrB_Type type, // GrB_FP32 or GrB_FP64
410 int64_t p, // 0, 1, 2, INT64_MIN, or INT64_MAX
411 GrB_Index n
412 )
413 {
414
415 //--------------------------------------------------------------------------
416 // check inputs
417 //--------------------------------------------------------------------------
418
419 if (!(type == GrB_FP32 || type == GrB_FP64))
420 {
421 // type of x and y must be GrB_FP32 or GrB_FP64
422 return ((double) -1) ;
423 }
424
425 if (n == 0)
426 {
427 return ((double) 0) ;
428 }
429
430 //--------------------------------------------------------------------------
431 // allocate workspace and determine # of threads to use
432 //--------------------------------------------------------------------------
433
434 GB_NTHREADS (n) ;
435 GB_ALLOCATE_WORK (double) ;
436
437 //--------------------------------------------------------------------------
438 // each thread computes its partial norm
439 //--------------------------------------------------------------------------
440
441 int tid ;
442 #pragma omp parallel for num_threads(nthreads) schedule(static)
443 for (tid = 0 ; tid < nthreads ; tid++)
444 {
445 int64_t k1, k2 ;
446 GB_PARTITION (k1, k2, n, tid, nthreads) ;
447
448 if (type == GrB_FP32)
449 {
450
451 //------------------------------------------------------------------
452 // FP32 case
453 //------------------------------------------------------------------
454
455 float my_s = 0 ;
456 const float *x = (float *) x_arg ;
457 const float *y = (float *) y_arg ;
458 switch (p)
459 {
460 case 0: // Frobenius norm
461 case 2: // 2-norm: sqrt of sum of (x-y).^2
462 {
463 if (y == NULL)
464 {
465 for (int64_t k = k1 ; k < k2 ; k++)
466 {
467 float t = x [k] ;
468 my_s += (t*t) ;
469 }
470 }
471 else
472 {
473 for (int64_t k = k1 ; k < k2 ; k++)
474 {
475 float t = (x [k] - y [k]) ;
476 my_s += (t*t) ;
477 }
478 }
479 }
480 break ;
481
482 case 1: // 1-norm: sum (abs (x-y))
483 {
484 if (y == NULL)
485 {
486 for (int64_t k = k1 ; k < k2 ; k++)
487 {
488 my_s += fabsf (x [k]) ;
489 }
490 }
491 else
492 {
493 for (int64_t k = k1 ; k < k2 ; k++)
494 {
495 my_s += fabsf (x [k] - y [k]) ;
496 }
497 }
498 }
499 break ;
500
501 case INT64_MAX: // inf-norm: max (abs (x-y))
502 {
503 if (y == NULL)
504 {
505 for (int64_t k = k1 ; k < k2 ; k++)
506 {
507 my_s = fmaxf (my_s, fabsf (x [k])) ;
508 }
509 }
510 else
511 {
512 for (int64_t k = k1 ; k < k2 ; k++)
513 {
514 my_s = fmaxf (my_s, fabsf (x [k] - y [k])) ;
515 }
516 }
517 }
518 break ;
519
520 case INT64_MIN: // (-inf)-norm: min (abs (x-y))
521 {
522 my_s = INFINITY ;
523 if (y == NULL)
524 {
525 for (int64_t k = k1 ; k < k2 ; k++)
526 {
527 my_s = fminf (my_s, fabsf (x [k])) ;
528 }
529 }
530 else
531 {
532 for (int64_t k = k1 ; k < k2 ; k++)
533 {
534 my_s = fminf (my_s, fabsf (x [k] - y [k])) ;
535 }
536 }
537 }
538 break ;
539
540 default: ; // p-norm not yet supported
541 }
542 Work [tid] = (double) my_s ;
543
544 }
545 else
546 {
547
548 //------------------------------------------------------------------
549 // FP64 case
550 //------------------------------------------------------------------
551
552 double my_s = 0 ;
553 const double *x = (double *) x_arg ;
554 const double *y = (double *) y_arg ;
555 switch (p)
556 {
557 case 0: // Frobenius norm
558 case 2: // 2-norm: sqrt of sum of (x-y).^2
559 {
560 if (y == NULL)
561 {
562 for (int64_t k = k1 ; k < k2 ; k++)
563 {
564 double t = x [k] ;
565 my_s += (t*t) ;
566 }
567 }
568 else
569 {
570 for (int64_t k = k1 ; k < k2 ; k++)
571 {
572 double t = (x [k] - y [k]) ;
573 my_s += (t*t) ;
574 }
575 }
576 }
577 break ;
578
579 case 1: // 1-norm: sum (abs (x-y))
580 {
581 if (y == NULL)
582 {
583 for (int64_t k = k1 ; k < k2 ; k++)
584 {
585 my_s += fabs (x [k]) ;
586 }
587 }
588 else
589 {
590 for (int64_t k = k1 ; k < k2 ; k++)
591 {
592 my_s += fabs (x [k] - y [k]) ;
593 }
594 }
595 }
596 break ;
597
598 case INT64_MAX: // inf-norm: max (abs (x-y))
599 {
600 if (y == NULL)
601 {
602 for (int64_t k = k1 ; k < k2 ; k++)
603 {
604 my_s = fmax (my_s, fabs (x [k])) ;
605 }
606 }
607 else
608 {
609 for (int64_t k = k1 ; k < k2 ; k++)
610 {
611 my_s = fmax (my_s, fabs (x [k] - y [k])) ;
612 }
613 }
614 }
615 break ;
616
617 case INT64_MIN: // (-inf)-norm: min (abs (x-y))
618 {
619 my_s = INFINITY ;
620 if (y == NULL)
621 {
622 for (int64_t k = k1 ; k < k2 ; k++)
623 {
624 my_s = fmin (my_s, fabs (x [k])) ;
625 }
626 }
627 else
628 {
629 for (int64_t k = k1 ; k < k2 ; k++)
630 {
631 my_s = fmin (my_s, fabs (x [k] - y [k])) ;
632 }
633 }
634 }
635 break ;
636
637 default: ; // p-norm not yet supported
638 }
639
640 Work [tid] = my_s ;
641 }
642 }
643
644 //--------------------------------------------------------------------------
645 // combine results of each thread
646 //--------------------------------------------------------------------------
647
648 double s = 0 ;
649 switch (p)
650 {
651 case 0: // Frobenius norm
652 case 2: // 2-norm: sqrt of sum of (x-y).^2
653 {
654 for (int64_t tid = 0 ; tid < nthreads ; tid++)
655 {
656 s += Work [tid] ;
657 }
658 s = sqrt (s) ;
659 }
660 break ;
661
662 case 1: // 1-norm: sum (abs (x-y))
663 {
664 for (int64_t tid = 0 ; tid < nthreads ; tid++)
665 {
666 s += Work [tid] ;
667 }
668 }
669 break ;
670
671 case INT64_MAX: // inf-norm: max (abs (x-y))
672 {
673 for (int64_t tid = 0 ; tid < nthreads ; tid++)
674 {
675 s = fmax (s, Work [tid]) ;
676 }
677 }
678 break ;
679
680 case INT64_MIN: // (-inf)-norm: min (abs (x-y))
681 {
682 s = Work [0] ;
683 for (int64_t tid = 1 ; tid < nthreads ; tid++)
684 {
685 s = fmin (s, Work [tid]) ;
686 }
687 }
688 break ;
689
690 default: // p-norm not yet supported
691 s = -1 ;
692 }
693
694 //--------------------------------------------------------------------------
695 // free workspace and return result
696 //--------------------------------------------------------------------------
697
698 GB_FREE_WORK (double) ;
699 return (s) ;
700 }
701
702