1 //------------------------------------------------------------------------------
2 // GB_matlab_helper.c: helper functions for MATLAB interface
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // These functions are only used by the MATLAB interface for
11 // SuiteSparse:GraphBLAS.
12 
13 #include "GB_matlab_helper.h"
14 
15 //------------------------------------------------------------------------------
16 // GB_NTHREADS: determine the number of threads to use
17 //------------------------------------------------------------------------------
18 
19 #define GB_NTHREADS(work)                                       \
20     int nthreads_max = GB_Global_nthreads_max_get ( ) ;         \
21     double chunk = GB_Global_chunk_get ( ) ;                    \
22     int nthreads = GB_nthreads (work, chunk, nthreads_max) ;
23 
24 //------------------------------------------------------------------------------
25 // GB_ALLOCATE_WORK: allocate per-thread workspace
26 //------------------------------------------------------------------------------
27 
28 #define GB_ALLOCATE_WORK(work_type)                                         \
29     size_t Work_size ;                                                      \
30     work_type *Work = GB_MALLOC (nthreads, work_type, &Work_size) ;         \
31     if (Work == NULL) return (false) ;
32 
33 //------------------------------------------------------------------------------
34 // GB_FREE_WORK: free per-thread workspace
35 //------------------------------------------------------------------------------
36 
37 #define GB_FREE_WORK(work_type)                                             \
38     GB_FREE (&Work, Work_size) ;
39 
40 //------------------------------------------------------------------------------
41 // GB_matlab_helper1: convert 0-based indices to 1-based for gbextracttuples
42 //------------------------------------------------------------------------------
43 
GB_matlab_helper1(double * restrict I_double,const GrB_Index * restrict I,int64_t nvals)44 void GB_matlab_helper1              // convert zero-based indices to one-based
45 (
46     double *restrict I_double,   // output array
47     const GrB_Index *restrict I, // input array
48     int64_t nvals                   // size of input and output arrays
49 )
50 {
51 
52     GB_NTHREADS (nvals) ;
53 
54     int64_t k ;
55     #pragma omp parallel for num_threads(nthreads) schedule(static)
56     for (k = 0 ; k < nvals ; k++)
57     {
58         I_double [k] = (double) (I [k] + 1) ;
59     }
60 }
61 
62 //------------------------------------------------------------------------------
63 // GB_matlab_helper1i: convert 0-based indices to 1-based for gbextracttuples
64 //------------------------------------------------------------------------------
65 
GB_matlab_helper1i(int64_t * restrict I,int64_t nvals)66 void GB_matlab_helper1i             // convert zero-based indices to one-based
67 (
68     int64_t *restrict I,         // input/output array
69     int64_t nvals                   // size of input/output array
70 )
71 {
72 
73     GB_NTHREADS (nvals) ;
74 
75     int64_t k ;
76     #pragma omp parallel for num_threads(nthreads) schedule(static)
77     for (k = 0 ; k < nvals ; k++)
78     {
79         I [k] ++ ;
80     }
81 }
82 
83 //------------------------------------------------------------------------------
84 // GB_matlab_helper3: convert 1-based indices to 0-based for gb_mxarray_to_list
85 //------------------------------------------------------------------------------
86 
GB_matlab_helper3(int64_t * restrict List,const double * restrict List_double,int64_t len,int64_t * List_max)87 bool GB_matlab_helper3              // return true if OK, false on error
88 (
89     int64_t *restrict List,      // size len, output array
90     const double *restrict List_double, // size len, input array
91     int64_t len,
92     int64_t *List_max               // also compute the max entry in the list
93 )
94 {
95 
96     GB_NTHREADS (len) ;
97 
98     ASSERT (List != NULL) ;
99     ASSERT (List_double != NULL) ;
100     ASSERT (List_max != NULL) ;
101 
102     bool ok = true ;
103     int64_t listmax = -1 ;
104 
105     GB_ALLOCATE_WORK (int64_t) ;
106 
107     int tid ;
108     #pragma omp parallel for num_threads(nthreads) schedule(static)
109     for (tid = 0 ; tid < nthreads ; tid++)
110     {
111         bool my_ok = true ;
112         int64_t k1, k2, my_listmax = -1 ;
113         GB_PARTITION (k1, k2, len, tid, nthreads) ;
114         for (int64_t k = k1 ; k < k2 ; k++)
115         {
116             double x = List_double [k] ;
117             int64_t i = (int64_t) x ;
118             my_ok = my_ok && (x == (double) i) ;
119             my_listmax = GB_IMAX (my_listmax, i) ;
120             List [k] = i - 1 ;
121         }
122         // rather than create a separate per-thread boolean workspace, just
123         // use a sentinal value of INT64_MIN if non-integer indices appear
124         // in List_double.
125         Work [tid] = my_ok ? my_listmax : INT64_MIN ;
126     }
127 
128     // wrapup
129     for (tid = 0 ; tid < nthreads ; tid++)
130     {
131         listmax = GB_IMAX (listmax, Work [tid]) ;
132         ok = ok && (Work [tid] != INT64_MIN) ;
133     }
134 
135     GB_FREE_WORK (int64_t) ;
136 
137     (*List_max) = listmax ;
138     return (ok) ;
139 }
140 
141 //------------------------------------------------------------------------------
142 // GB_matlab_helper3i: convert 1-based indices to 0-based for gb_mxarray_to_list
143 //------------------------------------------------------------------------------
144 
GB_matlab_helper3i(int64_t * restrict List,const int64_t * restrict List_int64,int64_t len,int64_t * List_max)145 bool GB_matlab_helper3i             // return true if OK, false on error
146 (
147     int64_t *restrict List,      // size len, output array
148     const int64_t *restrict List_int64, // size len, input array
149     int64_t len,
150     int64_t *List_max               // also compute the max entry in the list
151 )
152 {
153 
154     GB_NTHREADS (len) ;
155 
156     int64_t listmax = -1 ;
157 
158     GB_ALLOCATE_WORK (int64_t) ;
159 
160     int tid ;
161     #pragma omp parallel for num_threads(nthreads) schedule(static)
162     for (tid = 0 ; tid < nthreads ; tid++)
163     {
164         int64_t k1, k2, my_listmax = -1 ;
165         GB_PARTITION (k1, k2, len, tid, nthreads) ;
166         for (int64_t k = k1 ; k < k2 ; k++)
167         {
168             int64_t i = List_int64 [k] ;
169             my_listmax = GB_IMAX (my_listmax, i) ;
170             List [k] = i - 1 ;
171         }
172         Work [tid] = my_listmax ;
173     }
174 
175     // wrapup
176     for (tid = 0 ; tid < nthreads ; tid++)
177     {
178         listmax = GB_IMAX (listmax, Work [tid]) ;
179     }
180 
181     GB_FREE_WORK (int64_t) ;
182 
183     (*List_max) = listmax ;
184     return (true) ;
185 }
186 
187 //------------------------------------------------------------------------------
188 // GB_matlab_helper4: find the max entry in an index list for gbbuild
189 //------------------------------------------------------------------------------
190 
GB_matlab_helper4(const GrB_Index * restrict I,const int64_t len,GrB_Index * List_max)191 bool GB_matlab_helper4              // return true if OK, false on error
192 (
193     const GrB_Index *restrict I, // array of size len
194     const int64_t len,
195     GrB_Index *List_max             // find max (I) + 1
196 )
197 {
198 
199     GB_NTHREADS (len) ;
200 
201     GrB_Index listmax = 0 ;
202 
203     GB_ALLOCATE_WORK (GrB_Index) ;
204 
205     int tid ;
206     #pragma omp parallel for num_threads(nthreads) schedule(static)
207     for (tid = 0 ; tid < nthreads ; tid++)
208     {
209         int64_t k1, k2 ;
210         GrB_Index my_listmax = 0 ;
211         GB_PARTITION (k1, k2, len, tid, nthreads) ;
212         for (int64_t k = k1 ; k < k2 ; k++)
213         {
214             my_listmax = GB_IMAX (my_listmax, I [k]) ;
215         }
216         Work [tid] = my_listmax ;
217     }
218 
219     // wrapup
220     for (tid = 0 ; tid < nthreads ; tid++)
221     {
222         listmax = GB_IMAX (listmax, Work [tid]) ;
223     }
224 
225     GB_FREE_WORK (GrB_Index) ;
226 
227     if (len > 0) listmax++ ;
228     (*List_max) = listmax ;
229     return (true) ;
230 }
231 
232 //------------------------------------------------------------------------------
233 // GB_matlab_helper5: construct pattern of S for gblogassign
234 //------------------------------------------------------------------------------
235 
GB_matlab_helper5(GrB_Index * restrict Si,GrB_Index * restrict Sj,const GrB_Index * restrict Mi,const GrB_Index * restrict Mj,const int64_t mvlen,GrB_Index * restrict Ai,const int64_t avlen,const GrB_Index anz)236 void GB_matlab_helper5              // construct pattern of S
237 (
238     GrB_Index *restrict Si,         // array of size anz
239     GrB_Index *restrict Sj,         // array of size anz
240     const GrB_Index *restrict Mi,   // array of size mnz, M->i, may be NULL
241     const GrB_Index *restrict Mj,   // array of size mnz,
242     const int64_t mvlen,               // M->vlen
243     GrB_Index *restrict Ai,         // array of size anz, A->i, may be NULL
244     const int64_t avlen,               // M->vlen
245     const GrB_Index anz
246 )
247 {
248 
249     GB_NTHREADS (anz) ;
250     ASSERT (Mj != NULL) ;
251     ASSERT (Si != NULL) ;
252     ASSERT (Sj != NULL) ;
253 
254     int64_t k ;
255     #pragma omp parallel for num_threads(nthreads) schedule(static)
256     for (k = 0 ; k < anz ; k++)
257     {
258         int64_t i = GBI (Ai, k, avlen) ;
259         Si [k] = GBI (Mi, i, mvlen) ;
260         Sj [k] = Mj [i] ;
261     }
262 }
263 
264 //------------------------------------------------------------------------------
265 // GB_matlab_helper6: set bool array to all true gblogextract
266 //------------------------------------------------------------------------------
267 
GB_matlab_helper6(bool * restrict Gbool,const GrB_Index gnvals)268 void GB_matlab_helper6              // set Gbool to all true
269 (
270     bool *restrict Gbool,        // array of size gnvals
271     const GrB_Index gnvals
272 )
273 {
274 
275     GB_NTHREADS (gnvals) ;
276 
277     int64_t k ;
278     #pragma omp parallel for num_threads(nthreads) schedule(static)
279     for (k = 0 ; k < gnvals ; k++)
280     {
281         Gbool [k] = true ;
282     }
283 }
284 
285 //------------------------------------------------------------------------------
286 // GB_matlab_helper7: Kx = uint64 (0:mnz-1), for gblogextract
287 //------------------------------------------------------------------------------
288 
GB_matlab_helper7(uint64_t * restrict Kx,const GrB_Index mnz)289 void GB_matlab_helper7              // Kx = uint64 (0:mnz-1)
290 (
291     uint64_t *restrict Kx,       // array of size mnz
292     const GrB_Index mnz
293 )
294 {
295 
296     GB_NTHREADS (mnz) ;
297 
298     int64_t k ;
299     #pragma omp parallel for num_threads(nthreads) schedule(static)
300     for (k = 0 ; k < mnz ; k++)
301     {
302         Kx [k] = k ;
303     }
304 }
305 
306 //------------------------------------------------------------------------------
307 // GB_matlab_helper8: expand a scalar into an array for gbbuild
308 //------------------------------------------------------------------------------
309 
GB_matlab_helper8(GB_void * C,GB_void * A,GrB_Index nvals,size_t s)310 void GB_matlab_helper8
311 (
312     GB_void *C,         // output array of size nvals * s
313     GB_void *A,         // input scalar of size s
314     GrB_Index nvals,    // size of C
315     size_t s            // size of each scalar
316 )
317 {
318 
319     GB_NTHREADS (nvals) ;
320 
321     int64_t k ;
322     #pragma omp parallel for num_threads(nthreads) schedule(static)
323     for (k = 0 ; k < nvals ; k++)
324     {
325         // C [k] = A [0]
326         memcpy (C + k * s, A, s) ;
327     }
328 }
329 
330 //------------------------------------------------------------------------------
331 // GB_matlab_helper9: compute the degree of each vector
332 //------------------------------------------------------------------------------
333 
334 // TODO: use GrB_mxv or GrB_vxm when possible.
335 
GB_matlab_helper9(GrB_Matrix A,int64_t ** degree,size_t * degree_size,GrB_Index ** list,size_t * list_size,GrB_Index * nvec)336 bool GB_matlab_helper9  // true if successful, false if out of memory
337 (
338     GrB_Matrix A,           // input matrix
339     int64_t **degree,       // degree of each vector, size nvec
340     size_t *degree_size,    // size of degree, in bytes
341     GrB_Index **list,       // list of non-empty vectors
342     size_t *list_size,      // size of degree, in bytes
343     GrB_Index *nvec         // # of non-empty vectors
344 )
345 {
346 
347     ASSERT_MATRIX_OK (A, "A for matlab helper9", GB0) ;
348     ASSERT (!GB_IS_BITMAP (A)) ;
349     ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A) || GB_IS_FULL (A)) ;
350 
351     int64_t anvec = A->nvec ;
352     GB_NTHREADS (anvec) ;
353 
354     uint64_t *List   = NULL ; size_t List_size = 0 ;
355     int64_t  *Degree = NULL ; size_t Degree_size = 0 ;
356 
357     List   = GB_MALLOC (anvec, uint64_t, &List_size) ;
358     Degree = GB_MALLOC (anvec, int64_t , &Degree_size) ;
359 
360     if (List == NULL || Degree == NULL)
361     {
362         GB_FREE (&List, List_size) ;
363         GB_FREE (&Degree, Degree_size) ;
364         return (false) ;
365     }
366 
367     #ifdef GB_DEBUG
368     // remove List and Degree from the debug memtable, since they will be
369     // imported as the degree GrB_Vector d
370     GB_Global_memtable_remove (List) ;
371     GB_Global_memtable_remove (Degree) ;
372     #endif
373 
374     int64_t *Ah = A->h ;
375     int64_t *Ap = A->p ;
376     int64_t avlen = A->vlen ;
377 
378     int64_t k ;
379     #pragma omp parallel for num_threads(nthreads) schedule(static)
380     for (k = 0 ; k < anvec ; k++)
381     {
382         List [k] = GBH (Ah, k) ;
383         Degree [k] = (Ap == NULL) ? avlen : (Ap [k+1] - Ap [k]) ;
384     }
385 
386     // return result
387     (*degree) = Degree ;    (*degree_size) = Degree_size ;
388     (*list) = List ;        (*list_size) = List_size ;
389     (*nvec) = anvec ;
390     return (true) ;
391 }
392 
393 //------------------------------------------------------------------------------
394 // GB_matlab_helper10: compute norm (x-y,p) of two dense FP32 or FP64 vectors
395 //------------------------------------------------------------------------------
396 
397 // p can be:
398 
399 //      0 or 2:     2-norm, sqrt (sum ((x-y).^2))
400 //      1:          1-norm, sum (abs (x-y))
401 //      INT64_MAX   inf-norm, max (abs (x-y))
402 //      INT64_MIN   (-inf)-norm, min (abs (x-y))
403 //      other:      p-norm not yet computed
404 
GB_matlab_helper10(GB_void * x_arg,GB_void * y_arg,GrB_Type type,int64_t p,GrB_Index n)405 double GB_matlab_helper10       // norm (x-y,p), or -1 on error
406 (
407     GB_void *x_arg,             // float or double, depending on type parameter
408     GB_void *y_arg,             // same type as x, treat as zero if NULL
409     GrB_Type type,              // GrB_FP32 or GrB_FP64
410     int64_t p,                  // 0, 1, 2, INT64_MIN, or INT64_MAX
411     GrB_Index n
412 )
413 {
414 
415     //--------------------------------------------------------------------------
416     // check inputs
417     //--------------------------------------------------------------------------
418 
419     if (!(type == GrB_FP32 || type == GrB_FP64))
420     {
421         // type of x and y must be GrB_FP32 or GrB_FP64
422         return ((double) -1) ;
423     }
424 
425     if (n == 0)
426     {
427         return ((double) 0) ;
428     }
429 
430     //--------------------------------------------------------------------------
431     // allocate workspace and determine # of threads to use
432     //--------------------------------------------------------------------------
433 
434     GB_NTHREADS (n) ;
435     GB_ALLOCATE_WORK (double) ;
436 
437     //--------------------------------------------------------------------------
438     // each thread computes its partial norm
439     //--------------------------------------------------------------------------
440 
441     int tid ;
442     #pragma omp parallel for num_threads(nthreads) schedule(static)
443     for (tid = 0 ; tid < nthreads ; tid++)
444     {
445         int64_t k1, k2 ;
446         GB_PARTITION (k1, k2, n, tid, nthreads) ;
447 
448         if (type == GrB_FP32)
449         {
450 
451             //------------------------------------------------------------------
452             // FP32 case
453             //------------------------------------------------------------------
454 
455             float my_s = 0 ;
456             const float *x = (float *) x_arg ;
457             const float *y = (float *) y_arg ;
458             switch (p)
459             {
460                 case 0:     // Frobenius norm
461                 case 2:     // 2-norm: sqrt of sum of (x-y).^2
462                 {
463                     if (y == NULL)
464                     {
465                         for (int64_t k = k1 ; k < k2 ; k++)
466                         {
467                             float t = x [k] ;
468                             my_s += (t*t) ;
469                         }
470                     }
471                     else
472                     {
473                         for (int64_t k = k1 ; k < k2 ; k++)
474                         {
475                             float t = (x [k] - y [k]) ;
476                             my_s += (t*t) ;
477                         }
478                     }
479                 }
480                 break ;
481 
482                 case 1:     // 1-norm: sum (abs (x-y))
483                 {
484                     if (y == NULL)
485                     {
486                         for (int64_t k = k1 ; k < k2 ; k++)
487                         {
488                             my_s += fabsf (x [k]) ;
489                         }
490                     }
491                     else
492                     {
493                         for (int64_t k = k1 ; k < k2 ; k++)
494                         {
495                             my_s += fabsf (x [k] - y [k]) ;
496                         }
497                     }
498                 }
499                 break ;
500 
501                 case INT64_MAX:     // inf-norm: max (abs (x-y))
502                 {
503                     if (y == NULL)
504                     {
505                         for (int64_t k = k1 ; k < k2 ; k++)
506                         {
507                             my_s = fmaxf (my_s, fabsf (x [k])) ;
508                         }
509                     }
510                     else
511                     {
512                         for (int64_t k = k1 ; k < k2 ; k++)
513                         {
514                             my_s = fmaxf (my_s, fabsf (x [k] - y [k])) ;
515                         }
516                     }
517                 }
518                 break ;
519 
520                 case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
521                 {
522                     my_s = INFINITY ;
523                     if (y == NULL)
524                     {
525                         for (int64_t k = k1 ; k < k2 ; k++)
526                         {
527                             my_s = fminf (my_s, fabsf (x [k])) ;
528                         }
529                     }
530                     else
531                     {
532                         for (int64_t k = k1 ; k < k2 ; k++)
533                         {
534                             my_s = fminf (my_s, fabsf (x [k] - y [k])) ;
535                         }
536                     }
537                 }
538                 break ;
539 
540                 default: ;  // p-norm not yet supported
541             }
542             Work [tid] = (double) my_s ;
543 
544         }
545         else
546         {
547 
548             //------------------------------------------------------------------
549             // FP64 case
550             //------------------------------------------------------------------
551 
552             double my_s = 0 ;
553             const double *x = (double *) x_arg ;
554             const double *y = (double *) y_arg ;
555             switch (p)
556             {
557                 case 0:     // Frobenius norm
558                 case 2:     // 2-norm: sqrt of sum of (x-y).^2
559                 {
560                     if (y == NULL)
561                     {
562                         for (int64_t k = k1 ; k < k2 ; k++)
563                         {
564                             double t = x [k] ;
565                             my_s += (t*t) ;
566                         }
567                     }
568                     else
569                     {
570                         for (int64_t k = k1 ; k < k2 ; k++)
571                         {
572                             double t = (x [k] - y [k]) ;
573                             my_s += (t*t) ;
574                         }
575                     }
576                 }
577                 break ;
578 
579                 case 1:     // 1-norm: sum (abs (x-y))
580                 {
581                     if (y == NULL)
582                     {
583                         for (int64_t k = k1 ; k < k2 ; k++)
584                         {
585                             my_s += fabs (x [k]) ;
586                         }
587                     }
588                     else
589                     {
590                         for (int64_t k = k1 ; k < k2 ; k++)
591                         {
592                             my_s += fabs (x [k] - y [k]) ;
593                         }
594                     }
595                 }
596                 break ;
597 
598                 case INT64_MAX:     // inf-norm: max (abs (x-y))
599                 {
600                     if (y == NULL)
601                     {
602                         for (int64_t k = k1 ; k < k2 ; k++)
603                         {
604                             my_s = fmax (my_s, fabs (x [k])) ;
605                         }
606                     }
607                     else
608                     {
609                         for (int64_t k = k1 ; k < k2 ; k++)
610                         {
611                             my_s = fmax (my_s, fabs (x [k] - y [k])) ;
612                         }
613                     }
614                 }
615                 break ;
616 
617                 case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
618                 {
619                     my_s = INFINITY ;
620                     if (y == NULL)
621                     {
622                         for (int64_t k = k1 ; k < k2 ; k++)
623                         {
624                             my_s = fmin (my_s, fabs (x [k])) ;
625                         }
626                     }
627                     else
628                     {
629                         for (int64_t k = k1 ; k < k2 ; k++)
630                         {
631                             my_s = fmin (my_s, fabs (x [k] - y [k])) ;
632                         }
633                     }
634                 }
635                 break ;
636 
637                 default: ;  // p-norm not yet supported
638             }
639 
640             Work [tid] = my_s ;
641         }
642     }
643 
644     //--------------------------------------------------------------------------
645     // combine results of each thread
646     //--------------------------------------------------------------------------
647 
648     double s = 0 ;
649     switch (p)
650     {
651         case 0:     // Frobenius norm
652         case 2:     // 2-norm: sqrt of sum of (x-y).^2
653         {
654             for (int64_t tid = 0 ; tid < nthreads ; tid++)
655             {
656                 s += Work [tid] ;
657             }
658             s = sqrt (s) ;
659         }
660         break ;
661 
662         case 1:     // 1-norm: sum (abs (x-y))
663         {
664             for (int64_t tid = 0 ; tid < nthreads ; tid++)
665             {
666                 s += Work [tid] ;
667             }
668         }
669         break ;
670 
671         case INT64_MAX:     // inf-norm: max (abs (x-y))
672         {
673             for (int64_t tid = 0 ; tid < nthreads ; tid++)
674             {
675                 s = fmax (s, Work [tid]) ;
676             }
677         }
678         break ;
679 
680         case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
681         {
682             s = Work [0] ;
683             for (int64_t tid = 1 ; tid < nthreads ; tid++)
684             {
685                 s = fmin (s, Work [tid]) ;
686             }
687         }
688         break ;
689 
690         default:    // p-norm not yet supported
691             s = -1 ;
692     }
693 
694     //--------------------------------------------------------------------------
695     // free workspace and return result
696     //--------------------------------------------------------------------------
697 
698     GB_FREE_WORK (double) ;
699     return (s) ;
700 }
701 
702