1 //------------------------------------------------------------------------------
2 // GB_transpose:  C=A' or C=op(A'), with typecasting
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // CALLS:     GB_builder
11 
12 // TODO:: this is way too complicated.  Reduce it to two cases:
13 //  C = A'      both C and A are passed in, not Chandle
14 //  C = C'      C is transposed in-place, (C==A aliased)
15 // In both cases, require the header for C and A to be allocated already
16 // (either static or dynamic).   A is never modified, unless C==A.
17 // C and A cannot be NULL on input.  If C==A then C contains a valid
18 // matrix on input (the input matrix A), otherise GB_phbix_free(C) is
19 // immediately done.  Either header may be static or dynamic.
20 
21 // Transpose a matrix, C=A', and optionally apply a unary operator and/or
22 // typecast the values.  The transpose may be done in-place, in which case C or
23 // A are modified in-place.
24 
25 // The input matrix may have shallow components (even if in-place), and the
26 // output may also have shallow components (even if the input matrix is not
27 // shallow).
28 
29 // This function is CSR/CSC agnostic; it sets the output matrix format from
30 // C_is_csc but otherwise ignores the CSR/CSC type of A and C.
31 
32 // There are 3 ways to use this function:
33 
34 // Method1 (in_place_C): If A_in is not NULL and Chandle is NULL, then A is
35 // modified in-place, and the A_in matrix is not freed when done.
36 
37 // Method2 (in_place_A): If A_in is NULL, then C = (*Chandle) is transposed
38 // in-place.  If out of memory, (*Chandle) is always returned as NULL, which
39 // frees the input matrix C if the transpose is done in-place.
40 
41 // Method3 (C=A'): Otherwise, A_in and Chandle are non-NULL, and C=A' is
42 // computed.  If (*Chandle) is NULL on input, then a new header is allocated.
43 // If (*Chandle) is non-NULL on input, it is assumed to be an uninitialized
44 // static header, not obtained from malloc.  On output, C->static_header will
45 // be true.
46 
47 // The bucket sort is parallel, but not highly scalable.  If e=nnz(A) and A is
48 // m-by-n, then at most O(e/n) threads are used.  The GB_builder method is more
49 // scalable, but not as fast with a modest number of threads.
50 
51 #include "GB_transpose.h"
52 #include "GB_build.h"
53 #include "GB_apply.h"
54 
55 #define GB_FREE_ALL ;
56 
57 // free prior content of A, if transpose is done in-place
58 #define GB_FREE_IN_PLACE_A                                                  \
59 {                                                                           \
60     if (in_place)                                                           \
61     {                                                                       \
62         /* A is being transposed in-place */                                \
63         /* free prior content of A but not &A itself */                     \
64         if (!Ap_shallow) GB_FREE (&Ap, Ap_size) ;                           \
65         if (!Ah_shallow) GB_FREE (&Ah, Ah_size) ;                           \
66         if (!Ab_shallow) GB_FREE (&Ab, Ab_size) ;                           \
67         if (!Ai_shallow) GB_FREE (&Ai, Ai_size) ;                           \
68         if (!Ax_shallow) GB_FREE (&Ax, Ax_size) ;                           \
69     }                                                                       \
70     else                                                                    \
71     {                                                                       \
72         /* A is not modified; it is purely an input matrix */               \
73         ;                                                                   \
74     }                                                                       \
75 }
76 
77 // free the new C matrix, unless C=A' is being done in-place of A
78 #define GB_FREE_C                                                           \
79 {                                                                           \
80     if (!in_place_A)                                                        \
81     {                                                                       \
82         /* free all of C and all its contents &C */                         \
83         GB_Matrix_free (Chandle) ;                                          \
84     }                                                                       \
85 }
86 
87 //------------------------------------------------------------------------------
88 // GB_transpose
89 //------------------------------------------------------------------------------
90 
91 GB_PUBLIC   // accessed by the MATLAB tests in GraphBLAS/Test only
GB_transpose(GrB_Matrix * Chandle,GrB_Type ctype,const bool C_is_csc,const GrB_Matrix A_in,const GrB_UnaryOp op1_in,const GrB_BinaryOp op2_in,const GxB_Scalar scalar,bool binop_bind1st,GB_Context Context)92 GrB_Info GB_transpose           // C=A', C=(ctype)A or C=op(A')
93 (
94     GrB_Matrix *Chandle,        // output matrix C, possibly modified in-place
95     GrB_Type ctype,             // desired type of C; if NULL use A->type.
96                                 // ignored if op is present (cast to op->ztype)
97     const bool C_is_csc,        // desired CSR/CSC format of C
98     const GrB_Matrix A_in,      // input matrix
99         // no operator is applied if both op1 and op2 are NULL
100         const GrB_UnaryOp op1_in,       // unary operator to apply
101         const GrB_BinaryOp op2_in,      // binary operator to apply
102         const GxB_Scalar scalar,        // scalar to bind to binary operator
103         bool binop_bind1st,             // if true, binop(x,A) else binop(A,y)
104     GB_Context Context
105 )
106 {
107 
108     //--------------------------------------------------------------------------
109     // check inputs and determine if transpose is done in-place
110     //--------------------------------------------------------------------------
111 
112     GrB_Info info ;
113     GBURBLE ("(transpose) ") ;
114     GrB_Matrix A, C ;
115     bool in_place_C, in_place_A ;
116     bool C_static_header = false ;
117     struct GB_Matrix_opaque T_header ;
118     GrB_Matrix T = GB_clear_static_header (&T_header) ;
119 
120     if (A_in == NULL)
121     {
122 
123         //----------------------------------------------------------------------
124         // Method1 (in_place_C): C = C' ; &C is transposed in-place
125         //----------------------------------------------------------------------
126 
127         // GB_transpose (&C, ctype, csc, NULL, op) ;
128         // C=A' is transposed in-place, in the matrix C.
129         // The matrix C is freed if an error occurs and C is set to NULL.
130 
131         ASSERT (Chandle != NULL) ;  // at least &C or A must be non-NULL
132         ASSERT (*Chandle != NULL) ;
133         A = (*Chandle) ;
134         C = A ;                     // C must be freed if an error occurs
135         in_place_C = true ;         // C is modified in-place
136         in_place_A = false ;
137         ASSERT (A == C && C == (*Chandle)) ;
138         C_static_header = C->static_header ;
139 
140     }
141     else if (Chandle == NULL || (*Chandle) == A_in)
142     {
143 
144         //----------------------------------------------------------------------
145         // Method2 (in_place_A): A = A' ; A transposed in-place
146         //----------------------------------------------------------------------
147 
148         // GB_transpose (NULL, ctype, csc, A, op) ;
149         // GB_transpose (&A, ctype, csc, A, op) ;
150         // C=A' is transposed in-place, in the matrix A.
151         // The matrix A_in is not freed if an error occurs.
152 
153         A = A_in ;
154         Chandle = &A ;              // C must not be freed if an error occurs
155         C = A ;
156         in_place_C = false ;
157         in_place_A = true ;         // A is modified in-place
158         ASSERT (A == C && C == (*Chandle)) ;
159         C_static_header = A->static_header ;
160 
161     }
162     else
163     {
164 
165         //----------------------------------------------------------------------
166         // Method3 (C=A'): C and A are different; C may be a static header
167         //----------------------------------------------------------------------
168 
169         // GB_transpose (&C, ctype, csc, A, op) ;
170         // &C and A are both non-NULL, and not aliased.
171         // C=A' where C is a new matrix constructed here.
172         // The matrix C is freed if an error occurs, and C is set to NULL
173         // unless C has a static header.
174 
175         // If C is NULL, a new header is allocated and returned as (*Chandle).
176         // Otherwise, C = (*Chandle) is assumed to be an uninitialized static
177         // header, and (*Chandle) is not modified.
178 
179         A = A_in ;
180         C = (*Chandle) ;            // NULL, or pre-existing static header
181         in_place_C = false ;        // C and A are different matrices
182         in_place_A = false ;
183         ASSERT (A != C && C == (*Chandle)) ;
184         C_static_header = (C != NULL) ;
185         if (C_static_header)
186         {
187             GB_clear_static_header (C) ;
188         }
189     }
190 
191     bool in_place = (in_place_A || in_place_C) ;
192 
193     ASSERT_MATRIX_OK (A, "A input for GB_transpose", GB0) ;
194     ASSERT_TYPE_OK_OR_NULL (ctype, "ctype for GB_transpose", GB0) ;
195     ASSERT_UNARYOP_OK_OR_NULL (op1_in, "unop for GB_transpose", GB0) ;
196     ASSERT_BINARYOP_OK_OR_NULL (op2_in, "binop for GB_transpose", GB0) ;
197     ASSERT_SCALAR_OK_OR_NULL (scalar, "scalar for GB_transpose", GB0) ;
198     ASSERT (C == (*Chandle)) ;      // both may be NULL
199 
200     // get the current sparsity control of A
201     float A_hyper_switch = A->hyper_switch ;
202     float A_bitmap_switch = A->bitmap_switch ;
203     int A_sparsity = A->sparsity ;
204 
205     // wait if A has pending tuples or zombies, but leave it jumbled
206     GB_MATRIX_WAIT_IF_PENDING_OR_ZOMBIES (A) ;
207     ASSERT (!GB_PENDING (A)) ;
208     ASSERT (!GB_ZOMBIES (A)) ;
209     ASSERT (GB_JUMBLED_OK (A)) ;
210 
211     //--------------------------------------------------------------------------
212     // get A
213     //--------------------------------------------------------------------------
214 
215     GrB_Type atype = A->type ;
216     size_t asize = atype->size ;
217     GB_Type_code acode = atype->code ;
218 
219     int64_t avlen = A->vlen ;
220     int64_t avdim = A->vdim ;
221     int64_t anzmax = A->nzmax ;
222 
223     // if in-place, these must be freed when done, whether successful or not
224     int64_t *restrict Ap = A->p ; size_t Ap_size = A->p_size ;
225     int64_t *restrict Ah = A->h ; size_t Ah_size = A->h_size ;
226     int64_t *restrict Ai = A->i ; size_t Ab_size = A->b_size ;
227     int8_t  *restrict Ab = A->b ; size_t Ai_size = A->i_size ;
228     GB_void *restrict Ax = (GB_void *) A->x ; size_t Ax_size = A->x_size ;
229 
230     bool A_is_bitmap = GB_IS_BITMAP (A) ;
231     bool A_is_packed = GB_is_packed (A) ;
232     bool A_is_hyper  = GB_IS_HYPERSPARSE (A) ;
233 
234     bool Ap_shallow = A->p_shallow ;
235     bool Ah_shallow = A->h_shallow ;
236     bool Ai_shallow = A->i_shallow ;
237     bool Ax_shallow = A->x_shallow ;
238     bool Ab_shallow = A->b_shallow ;
239 
240     int64_t anz = GB_NNZ (A) ;
241     int64_t anvec = A->nvec ;
242     int64_t anvals = A->nvals ;
243 
244     //--------------------------------------------------------------------------
245     // determine the max number of threads to use
246     //--------------------------------------------------------------------------
247 
248     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
249 
250     //--------------------------------------------------------------------------
251     // determine the type of C and get the unary or binary operator
252     //--------------------------------------------------------------------------
253 
254     // If a unary or binary operator is present, C is always returned as
255     // the ztype of the operator.  The input ctype is ignored.
256 
257     GrB_UnaryOp  op1 = NULL ;
258     GrB_BinaryOp op2 = NULL ;
259     GB_Opcode opcode = GB_NOP_opcode ;
260 
261     if (op1_in != NULL)
262     {
263         // get the unary operator
264         opcode = op1_in->opcode ;
265         if (atype == op1_in->xtype && opcode == GB_IDENTITY_opcode)
266         {
267             // op1 is a built-in identity operator, with the same type as A, so
268             // do not apply the operator and do not typecast.  op1 is NULL.
269             ctype = atype ;
270         }
271         else
272         {
273             // apply the operator, z=op1(x)
274             op1 = op1_in ;
275             ctype = op1->ztype ;
276         }
277     }
278     else if (op2_in != NULL)
279     {
280         // get the binary operator
281         GrB_Type op2_intype = binop_bind1st ? op2_in->xtype : op2_in->ytype ;
282         opcode = op2_in->opcode ;
283         // only GB_apply calls GB_transpose with op2_in, and it ensures this
284         // condition holds: the first(A,y), second(x,A), and any(...) have
285         // been renamed to identity(A), so these cases do not occur here.
286         ASSERT (!
287            ((opcode == GB_ANY_opcode) ||
288             (opcode == GB_FIRST_opcode  && !binop_bind1st) ||
289             (opcode == GB_SECOND_opcode &&  binop_bind1st))) ;
290         // apply the operator, z=op2(A,y) or op2(x,A)
291         op2 = op2_in ;
292         ctype = op2->ztype ;
293     }
294     else
295     {
296         // no operator.  both op1 and op2 are NULL
297         if (ctype == NULL)
298         {
299             // no typecasting if ctype is NULL
300             ctype = atype ;
301         }
302     }
303 
304     GB_Type_code ccode = ctype->code ;
305     size_t csize = ctype->size ;
306 
307     //--------------------------------------------------------------------------
308     // check for positional operators
309     //--------------------------------------------------------------------------
310 
311     bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
312     GrB_UnaryOp  save_op1 = op1 ;
313     GrB_BinaryOp save_op2 = op2 ;
314     if (op_is_positional)
315     {
316         // do not apply the op until after the transpose
317         op1 = NULL ;
318         op2 = NULL ;
319         // replace op1 with the ONE operator, as a placeholder
320         ASSERT (ctype == GrB_INT64 || ctype == GrB_INT32) ;
321         op1 = (ctype == GrB_INT64) ? GxB_ONE_INT64 : GxB_ONE_INT32 ;
322     }
323 
324     //--------------------------------------------------------------------------
325     // C = A'
326     //--------------------------------------------------------------------------
327 
328     ASSERT (GB_IMPLIES (avlen == 0 || avdim == 0, anz == 0)) ;
329 
330     bool allocate_new_Cx = (ctype != atype) || (op1 != NULL) || (op2 != NULL) ;
331 
332     if (anz == 0)
333     {
334 
335         //======================================================================
336         // quick return if A is empty
337         //======================================================================
338 
339         // free prior space of A, if transpose is done in-place
340         GB_FREE_IN_PLACE_A ;
341 
342         // A is empty; create a new empty matrix C, with the new type and
343         // dimensions.  C is hypersparse for now but may convert when
344         // returned.
345         info = GB_new_bix (Chandle, C_static_header, // hyper, old or new header
346             ctype, avdim, avlen, GB_Ap_calloc, C_is_csc,
347             GxB_HYPERSPARSE, true, A_hyper_switch, 1, 1, true, Context) ;
348         if (info != GrB_SUCCESS)
349         {
350             // out of memory
351             GB_FREE_C ;
352             return (info) ;
353         }
354         ASSERT_MATRIX_OK (*Chandle, "C transpose empty", GB0) ;
355         ASSERT (!GB_JUMBLED (*Chandle)) ;
356 
357     }
358     else if (A_is_packed)
359     {
360 
361         //======================================================================
362         // transpose a packed or bitmap matrix or vector
363         //======================================================================
364 
365         // A is packed if it is either: (a) bitmap, (b) full, or (c) sparse or
366         // hypersparse with all entries present, no zombies, no pending tuples,
367         // and not jumbled.  For (c), the matrix A can be treated as if it was
368         // full, and the pattern (A->p, A->h, and A->i) can be ignored.
369 
370         int sparsity = (A_is_bitmap) ? GxB_BITMAP : GxB_FULL ;
371         bool T_cheap =                  // T can be done quickly if:
372             (avlen == 1 || avdim == 1)      // A is a row or column vector,
373             && op1 == NULL && op2 == NULL   // no operator to apply, and
374             && atype == ctype ;             // no typecasting
375 
376         // allocate T
377         if (T_cheap)
378         {
379             // just initialize the static header of T, not T->b or T->x
380             info = GB_new (&T, true,  // bitmap or full, static header
381                 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
382                 sparsity, A_hyper_switch, 1, Context) ;
383             ASSERT (info == GrB_SUCCESS) ;
384         }
385         else
386         {
387             // allocate all of T, including T->b and T->x
388             info = GB_new_bix (&T, true,  // bitmap or full, static header
389                 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
390                 sparsity, true, A_hyper_switch, 1, anzmax, true, Context) ;
391         }
392 
393         if (info != GrB_SUCCESS)
394         {
395             // out of memory
396             GB_FREE_C ;
397             return (info) ;
398         }
399 
400         T->magic = GB_MAGIC ;
401         if (sparsity == GxB_BITMAP)
402         {
403             T->nvals = anvals ;     // for bitmap case only
404         }
405 
406         //----------------------------------------------------------------------
407         // T = A'
408         //----------------------------------------------------------------------
409 
410         // Since A is full, # threads to use is nthreads, and the
411         // nworkspaces parameter is not used
412 
413         int64_t anz_held = GB_NNZ_HELD (A) ;
414         ASSERT (anz > 0 && anz_held > 0) ;
415         int nthreads = GB_nthreads (anz_held + anvec, chunk, nthreads_max) ;
416 
417         if (T_cheap)
418         {
419             // no work to do.  Transposing does not change A->b or A->x
420             T->b = Ab ; T->b_size = Ab_size ;
421             T->x = Ax ; T->x_size = Ax_size ;
422             T->nzmax = A->nzmax ;
423             if (in_place)
424             {
425                 // transplant A->b and A->x into T
426                 T->b_shallow = Ab_shallow ;
427                 T->x_shallow = Ax_shallow ;
428                 Ab = NULL ;     // do not free prior Ab
429                 Ax = NULL ;     // do not free prior Ax
430                 A->b = NULL ;
431                 A->x = NULL ;
432             }
433             else
434             {
435                 // T is a purely shallow copy of A
436                 T->b_shallow = (Ab != NULL) ;
437                 T->x_shallow = true ;
438             }
439         }
440         else if (op1 == NULL && op2 == NULL)
441         {
442             // do not apply an operator; optional typecast to C->type
443             GB_transpose_ix (T, A, NULL, NULL, 0, nthreads) ;
444         }
445         else
446         {
447             // apply an operator, C has type op->ztype
448             GB_transpose_op (T, op1, op2, scalar, binop_bind1st, A,
449                 NULL, NULL, 0, nthreads) ;
450         }
451 
452         ASSERT_MATRIX_OK (T, "T dense/bitmap", GB0) ;
453         ASSERT (!GB_JUMBLED (T)) ;
454 
455         // free prior space of A, if transpose is done in-place
456         GB_FREE_IN_PLACE_A ;
457 
458         //----------------------------------------------------------------------
459         // transplant T into C
460         //----------------------------------------------------------------------
461 
462         // allocate the output matrix C as a full or bitmap matrix
463         // if *Chandle == NULL, allocate a new header; otherwise reuse existing
464         info = GB_new (Chandle, C_static_header, // bitmap/full, old/new header
465             ctype, avdim, avlen, GB_Ap_null, C_is_csc,
466             sparsity, A_hyper_switch, 0, Context) ;
467         if (info != GrB_SUCCESS)
468         {
469             // out of memory
470             ASSERT (!in_place) ;            // cannot fail if in-place,
471             ASSERT (!C_static_header) ;     // or if C has a static header
472             GB_FREE_C ;
473             GB_phbix_free (T) ;
474             return (info) ;
475         }
476 
477         // Transplant T into the result C, making a copy if T is shallow
478         info = GB_transplant (*Chandle, ctype, &T, Context) ;
479         if (info != GrB_SUCCESS)
480         {
481             // out of memory
482             GB_FREE_C ;
483             return (info) ;
484         }
485         ASSERT_MATRIX_OK (*Chandle, "Chandle, GB_transpose, bitmap/full", GB0) ;
486 
487     }
488     else if (avdim == 1)
489     {
490 
491         //======================================================================
492         // transpose a "column" vector into a "row"
493         //======================================================================
494 
495         // transpose a vector (avlen-by-1) into a "row" matrix (1-by-avlen).
496         // A must be sorted first.
497         ASSERT_MATRIX_OK (A, "the vector A must already be sorted", GB0) ;
498         GB_MATRIX_WAIT (A) ;
499         ASSERT (!GB_JUMBLED (A)) ;
500 
501         //----------------------------------------------------------------------
502         // allocate space
503         //----------------------------------------------------------------------
504 
505         // Allocate the header of C, with no C->p, C->h, C->i, C->b, or C->x
506         // content, and initialize the type and dimension of C.  The new matrix
507         // is hypersparse.  This step does not allocate anything if in-place.
508 
509         // if *Chandle == NULL, allocate a new header; otherwise reuse existing
510         info = GB_new (Chandle, C_static_header, // hyper; old or new header
511             ctype, 1, avlen, GB_Ap_null, C_is_csc,
512             GxB_HYPERSPARSE, A_hyper_switch, 0, Context) ;
513         if (info != GrB_SUCCESS)
514         {
515             // out of memory
516             ASSERT (!in_place) ;            // cannot fail if in-place,
517             ASSERT (!C_static_header) ;     // or if C has a static header
518             GB_FREE_C ;
519             return (info) ;
520         }
521 
522         if (!in_place)
523         {
524             C = (*Chandle) ;
525         }
526         else
527         {
528             ASSERT (A == C && A == (*Chandle)) ;
529         }
530 
531         // allocate new space for the values and pattern
532         int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
533         int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
534         GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
535         bool ok = true ;
536         Cp = GB_MALLOC (anz+1, int64_t, &Cp_size) ;
537         Ci = GB_MALLOC (anz  , int64_t, &Ci_size) ;
538         ok = (Cp != NULL && Ci != NULL) ;
539 
540         if (allocate_new_Cx)
541         {
542             // allocate new space for the new typecasted numerical values of C
543             ASSERT (anz > 0) ;
544             Cx = GB_MALLOC (anz * ctype->size, GB_void,  &Cx_size) ;
545             ok = ok && (Cx != NULL) ;
546         }
547 
548         if (!ok)
549         {
550             // out of memory
551             GB_FREE (&Cp, Cp_size) ;
552             GB_FREE (&Ci, Ci_size) ;
553             GB_FREE (&Cx, Cx_size) ;
554             GB_FREE_IN_PLACE_A ;
555             GB_FREE_C ;
556             return (GrB_OUT_OF_MEMORY) ;
557         }
558 
559         //----------------------------------------------------------------------
560         // fill the content of C
561         //----------------------------------------------------------------------
562 
563         // numerical values: apply the operator, typecast, or make shallow copy
564         if (op1 != NULL || op2 != NULL)
565         {
566             // Cx = op (A)
567             info = GB_apply_op ( // op1 != identity of same types
568                 (GB_void *) Cx, op1, op2, scalar, binop_bind1st, A, Context) ;
569             // GB_apply_op can only fail if op1/op2 are positional
570             ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
571             ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
572             ASSERT (info == GrB_SUCCESS) ;
573             C->x = Cx ; C->x_size = Cx_size ;
574             C->x_shallow = false ;
575             // prior Ax will be freed
576         }
577         else if (ctype != atype)
578         {
579             // copy the values from A into C and cast from atype to ctype
580             C->x = Cx ; C->x_size = Cx_size ;
581             C->x_shallow = false ;
582             GB_cast_array (Cx, ccode, Ax, acode, Ab, asize, anz, 1) ;
583             // prior Ax will be freed
584         }
585         else // ctype == atype
586         {
587             // no type change; numerical values of C are a shallow copy of A.
588             C->x = Ax ; C->x_size = Ax_size ;
589             C->x_shallow = (in_place) ? Ax_shallow : true ;
590             Ax = NULL ;  // do not free prior Ax
591         }
592 
593         // each entry in A becomes a non-empty vector in C
594         // C is a hypersparse 1-by-avlen matrix
595         C->h = Ai ; C->h_size = Ai_size ;
596         C->h_shallow = (in_place) ? Ai_shallow : true ;
597         Ai = NULL ;     // do not free prior Ai
598         // C->p = 0:anz and C->i = zeros (1,anz), newly allocated
599         C->plen = anz ;
600         C->nvec = anz ;
601         C->nvec_nonempty = anz ;
602         C->i = Ci ; C->i_size = Ci_size ;
603         C->p = Cp ; C->p_size = Cp_size ;
604         // fill the vector pointers C->p
605         int nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
606         int64_t k ;
607         #pragma omp parallel for num_threads(nthreads) schedule(static)
608         for (k = 0 ; k < anz ; k++)
609         {
610             Ci [k] = 0 ;
611             Cp [k] = k ;
612         }
613         Cp [anz] = anz ;
614 
615         C->nzmax = anz ;
616         C->magic = GB_MAGIC ;
617 
618         // free prior space of A, if transpose is done in-place
619         GB_FREE_IN_PLACE_A ;
620 
621     }
622     else if (avlen == 1)
623     {
624 
625         //======================================================================
626         // transpose a "row" into a "column" vector
627         //======================================================================
628 
629         // transpose a "row" matrix (1-by-avdim) into a vector (avdim-by-1).
630         // if A->vlen is 1, all vectors of A are implicitly sorted
631         ASSERT_MATRIX_OK (A, "1-by-n input A already sorted", GB0) ;
632 
633         //----------------------------------------------------------------------
634         // allocate workspace, if needed
635         //----------------------------------------------------------------------
636 
637         int ntasks = 0 ;
638         int nth = GB_nthreads (avdim, chunk, nthreads_max) ;
639         GB_WERK_DECLARE (Count, int64_t) ;
640         if (nth > 1 && !A_is_hyper)
641         {
642             // ntasks and Count are not needed if nth == 1
643             ntasks = 8 * nth ;
644             ntasks = GB_IMIN (ntasks, avdim) ;
645             ntasks = GB_IMAX (ntasks, 1) ;
646             GB_WERK_PUSH (Count, ntasks+1, int64_t) ;
647             if (Count == NULL)
648             {
649                 // out of memory
650                 GB_FREE_C ;
651                 return (GrB_OUT_OF_MEMORY) ;
652             }
653         }
654 
655         // Allocate the header of C, with no C->p, C->h, C->i, or C->x content,
656         // and initialize the type and dimension of C.   If in-place, A->p,
657         // A->h, A->i, and A->x are all NULL.  The new matrix is sparse, but
658         // can be CSR or CSC.  This step does not allocate anything if in
659         // place.
660 
661         // if *Chandle == NULL, allocate a new header; otherwise reuse existing
662         info = GB_new (Chandle, C_static_header, // sparse; old or new header
663             ctype, avdim, 1, GB_Ap_null, C_is_csc,
664             GxB_SPARSE, A_hyper_switch, 0, Context) ;
665         if (info != GrB_SUCCESS)
666         {
667             // out of memory
668             ASSERT (!in_place) ;            // cannot fail if in-place,
669             ASSERT (!C_static_header) ;     // or if C has a static header
670             GB_FREE_C ;
671             GB_WERK_POP (Count, int64_t) ;
672             return (info) ;
673         }
674 
675         if (!in_place)
676         {
677             C = (*Chandle) ;
678         }
679         else
680         {
681             ASSERT (A == C && A == (*Chandle)) ;
682         }
683 
684         // allocate new space for the values and pattern
685         int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
686         int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
687         GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
688         bool ok = true ;
689         Cp = GB_MALLOC (2, int64_t, &Cp_size) ;
690         ok = ok && (Cp != NULL) ;
691         if (!A_is_hyper)
692         {
693             // A is sparse, so new space is needed for Ci
694             Ci = GB_MALLOC (anz, int64_t, &Ci_size) ;
695             ok = ok && (Ci != NULL) ;
696         }
697 
698         if (allocate_new_Cx)
699         {
700             // allocate new space for the new typecasted numerical values of C
701             Cx = GB_MALLOC (anz * ctype->size, GB_void, &Cx_size) ;
702             ok = ok && (Cx != NULL) ;
703         }
704 
705         if (!ok)
706         {
707             // out of memory
708             GB_FREE (&Cp, Cp_size) ;
709             GB_FREE (&Ci, Ci_size) ;
710             GB_FREE (&Cx, Cx_size) ;
711             GB_WERK_POP (Count, int64_t) ;
712             GB_FREE_IN_PLACE_A ;
713             GB_FREE_C ;
714             return (GrB_OUT_OF_MEMORY) ;
715         }
716 
717         Cp [0] = 0 ;
718         Cp [1] = 0 ;
719 
720         //----------------------------------------------------------------------
721         // numerical values of C: apply the op, typecast, or make shallow copy
722         //----------------------------------------------------------------------
723 
724         // numerical values: apply the operator, typecast, or make shallow copy
725         if (op1 != NULL || op2 != NULL)
726         {
727             // Cx = op (A)
728             info = GB_apply_op ( // op1 != identity of same types
729                 (GB_void *) Cx, op1, op2, scalar, binop_bind1st, A, Context) ;
730             // GB_apply_op can only fail if op1/op2 are positional
731             ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
732             ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
733             ASSERT (info == GrB_SUCCESS) ;
734             C->x = Cx ; C->x_size = Cx_size ;
735             C->x_shallow = false ;
736             // prior Ax will be freed
737         }
738         else if (ctype != atype)
739         {
740             // copy the values from A into C and cast from atype to ctype
741             C->x = Cx ; C->x_size = Cx_size ;
742             C->x_shallow = false ;
743             GB_cast_array (Cx, ccode, Ax, acode, Ab, asize, anz, 1) ;
744             // prior Ax will be freed
745         }
746         else // ctype == atype
747         {
748             // no type change; numerical values of C are a shallow copy of A
749             C->x = Ax ; C->x_size = Ax_size ;
750             C->x_shallow = (in_place) ? Ax_shallow : true ;
751             Ax = NULL ;  // do not free prior Ax
752         }
753 
754         //----------------------------------------------------------------------
755         // pattern of C
756         //----------------------------------------------------------------------
757 
758         if (A_is_hyper)
759         {
760 
761             //------------------------------------------------------------------
762             // each non-empty vector in A becomes an entry in C
763             //------------------------------------------------------------------
764 
765             C->i = Ah ; C->i_size = Ah_size ;
766             C->i_shallow = (in_place) ? Ah_shallow : true ;
767             ASSERT (anvec == anz) ;
768             Ah = NULL ;     // do not free prior Ah
769 
770         }
771         else
772         {
773 
774             //------------------------------------------------------------------
775             // find the non-empty vectors of A, which become entries in C
776             //------------------------------------------------------------------
777 
778             ASSERT (Ah == NULL) ;
779 
780             if (nth == 1)
781             {
782 
783                 //--------------------------------------------------------------
784                 // construct Ci with a single thread
785                 //--------------------------------------------------------------
786 
787                 int64_t k = 0 ;
788                 for (int64_t j = 0 ; j < avdim ; j++)
789                 {
790                     if (Ap [j] < Ap [j+1])
791                     {
792                         Ci [k++] = j ;
793                     }
794                 }
795                 ASSERT (k == anz) ;
796 
797             }
798             else
799             {
800 
801                 //--------------------------------------------------------------
802                 // construct Ci in parallel
803                 //--------------------------------------------------------------
804 
805                 int tid ;
806                 #pragma omp parallel for num_threads(nth) schedule(dynamic,1)
807                 for (tid = 0 ; tid < ntasks ; tid++)
808                 {
809                     int64_t jstart, jend, k = 0 ;
810                     GB_PARTITION (jstart, jend, avdim, tid, ntasks) ;
811                     for (int64_t j = jstart ; j < jend ; j++)
812                     {
813                         if (Ap [j] < Ap [j+1])
814                         {
815                             k++ ;
816                         }
817                     }
818                     Count [tid] = k ;
819                 }
820 
821                 GB_cumsum (Count, ntasks, NULL, 1, NULL) ;
822                 ASSERT (Count [ntasks] == anz) ;
823 
824                 #pragma omp parallel for num_threads(nth) schedule(dynamic,1)
825                 for (tid = 0 ; tid < ntasks ; tid++)
826                 {
827                     int64_t jstart, jend, k = Count [tid] ;
828                     GB_PARTITION (jstart, jend, avdim, tid, ntasks) ;
829                     for (int64_t j = jstart ; j < jend ; j++)
830                     {
831                         if (Ap [j] < Ap [j+1])
832                         {
833                             Ci [k++] = j ;
834                         }
835                     }
836                 }
837             }
838 
839             #ifdef GB_DEBUG
840             int64_t k = 0 ;
841             for (int64_t j = 0 ; j < avdim ; j++)
842             {
843                 if (Ap [j] < Ap [j+1])
844                 {
845                     ASSERT (Ci [k] == j) ;
846                     k++ ;
847                 }
848             }
849             ASSERT (k == anz) ;
850             #endif
851 
852             C->i = Ci ; C->i_size = Ci_size ;
853             C->i_shallow = false ;
854         }
855 
856         //---------------------------------------------------------------------
857         // vector pointers of C
858         //---------------------------------------------------------------------
859 
860         // C->p = [0 anz] and C->h = NULL
861         ASSERT (C->plen == 1) ;
862         ASSERT (C->nvec == 1) ;
863         ASSERT (C->h == NULL) ;
864         C->p = Cp ; C->p_size = Cp_size ;
865         C->p_shallow = false ;
866         C->nvec_nonempty = (anz == 0) ? 0 : 1 ;
867         // fill the vector pointers C->p
868         Cp [0] = 0 ;
869         Cp [1] = anz ;
870         C->nzmax = anz ;
871         C->magic = GB_MAGIC ;
872         ASSERT (!GB_JUMBLED (C)) ;
873 
874         // free prior space of A, if transpose done in-place, and free workspace
875         GB_FREE_IN_PLACE_A ;
876         GB_WERK_POP (Count, int64_t) ;
877 
878     }
879     else
880     {
881 
882         //======================================================================
883         // transpose a general sparse or hypersparse matrix
884         //======================================================================
885 
886         ASSERT_MATRIX_OK (A, "A for GB_transpose", GB0) ;
887 
888         // T=A' with optional typecasting, or T=op(A')
889 
890         //----------------------------------------------------------------------
891         // select the method
892         //----------------------------------------------------------------------
893 
894         int nworkspaces_bucket, nthreads_bucket ;
895         bool use_builder = GB_transpose_method (A,
896             &nworkspaces_bucket, &nthreads_bucket, Context) ;
897 
898         //----------------------------------------------------------------------
899         // transpose the matrix with the selected method
900         //----------------------------------------------------------------------
901 
902         if (use_builder)
903         {
904 
905             //==================================================================
906             // transpose via GB_builder
907             //==================================================================
908 
909             //------------------------------------------------------------------
910             // allocate and create iwork
911             //------------------------------------------------------------------
912 
913             // allocate iwork of size anz
914             int64_t *iwork = NULL ; size_t iwork_size = 0 ;
915             iwork = GB_MALLOC (anz, int64_t, &iwork_size) ;
916             if (iwork == NULL)
917             {
918                 // out of memory
919                 GB_FREE_C ;
920                 return (GrB_OUT_OF_MEMORY) ;
921             }
922 
923             // Construct the "row" indices of C, which are "column" indices of
924             // A.  This array becomes the permanent T->i on output.  This phase
925             // must be done before Chandle is created below, since that step
926             // destroys A.
927 
928             info = GB_extract_vector_list (iwork, A, Context) ;
929             if (info != GrB_SUCCESS)
930             {
931                 // out of memory
932                 GB_FREE (&iwork, iwork_size) ;
933                 GB_FREE_C ;
934                 return (info) ;
935             }
936 
937             //------------------------------------------------------------------
938             // allocate the output matrix and additional space (jwork and S)
939             //------------------------------------------------------------------
940 
941             // Allocate the header of C, with no C->p, C->h, C->i, or C->x
942             // content, and initialize the type and dimension of C.   If in
943             // place, A->p, A->h, A->i, and A->x are all NULL.  The new matrix
944             // is hypersparse, but can be CSR or CSC.  This step does not
945             // allocate anything if in-place.
946 
947             // if *Chandle == NULL, allocate a new header; otherwise reuse
948             info = GB_new (Chandle, C_static_header, // hyper, old or new header
949                 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
950                 GxB_HYPERSPARSE, A_hyper_switch, 0, Context) ;
951             if (info != GrB_SUCCESS)
952             {
953                 // out of memory
954                 ASSERT (!in_place) ;            // cannot fail if in-place,
955                 ASSERT (!C_static_header) ;     // or if C has a static header
956                 GB_FREE (&iwork, iwork_size) ;
957                 GB_FREE_C ;
958                 return (info) ;
959             }
960 
961             if (!in_place)
962             {
963                 C = (*Chandle) ;
964             }
965             else
966             {
967                 ASSERT (A == C && A == (*Chandle)) ;
968             }
969 
970             // if in_place, the prior Ap and Ah can now be freed
971             if (in_place)
972             {
973                 if (!Ap_shallow) GB_FREE (&Ap, Ap_size) ;
974                 if (!Ah_shallow) GB_FREE (&Ah, Ah_size) ;
975             }
976 
977             int64_t *jwork = NULL ; size_t jwork_size = 0 ;
978             GB_Type_code scode ;
979             GB_void *S = NULL ;
980             GB_void *Swork = NULL ; size_t Swork_size = 0 ;
981 
982             // for the GB_builder method, if the transpose is done in-place and
983             // A->i is not shallow, A->i can be used and then freed.
984             // Otherwise, A->i is not modified at all.
985             bool ok = true ;
986             bool recycle_Ai = (in_place && !Ai_shallow) ;
987             if (!recycle_Ai)
988             {
989                 // allocate jwork of size anz
990                 jwork = GB_MALLOC (anz, int64_t, &jwork_size) ;
991                 ok = ok && (jwork != NULL) ;
992             }
993 
994             if (op1 != NULL || op2 != NULL)
995             {
996                 // allocate Swork of size anz * csize
997                 Swork = GB_MALLOC (anz * csize, GB_void, &Swork_size) ;
998                 ok = ok && (Swork != NULL) ;
999             }
1000 
1001             if (!ok)
1002             {
1003                 // out of memory
1004                 GB_FREE (&iwork, iwork_size) ;
1005                 GB_FREE (&jwork, jwork_size) ;
1006                 GB_FREE (&Swork, Swork_size) ;
1007                 GB_FREE_IN_PLACE_A ;
1008                 GB_FREE_C ;
1009                 return (GrB_OUT_OF_MEMORY) ;
1010             }
1011 
1012             //------------------------------------------------------------------
1013             // construct jwork and Swork
1014             //------------------------------------------------------------------
1015 
1016             // "row" indices of A become "column" indices of C
1017             if (recycle_Ai)
1018             {
1019                 // Ai is used as workspace for the "column" indices of C.
1020                 // jwork is a shallow copy of Ai, and is freed by GB_builder.
1021                 jwork = Ai ;
1022                 jwork_size = Ai_size ;
1023                 ASSERT (in_place) ;
1024                 // set Ai to NULL so it is not freed by GB_FREE_IN_PLACE_A
1025                 Ai = NULL ;
1026             }
1027             else
1028             {
1029                 // jwork = Ai, making a deep copy.  jwork is freed by
1030                 // GB_builder.  A->i is not modified, even if out of memory.
1031                 GB_memcpy (jwork, Ai, anz * sizeof (int64_t), nthreads_max) ;
1032             }
1033 
1034             // numerical values: apply the op, typecast, or make shallow copy
1035             if (op1 != NULL || op2 != NULL)
1036             {
1037                 // Swork = op (A)
1038                 info = GB_apply_op ( // op1 != identity of same types
1039                     (GB_void *) Swork, op1, op2, scalar, binop_bind1st,
1040                     A, Context) ;
1041                 // GB_apply_op can only fail if op1/op2 are positional
1042                 ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
1043                 ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
1044                 ASSERT (info == GrB_SUCCESS) ;
1045                 // GB_builder will not need to typecast Swork to T->x, and it
1046                 // may choose to transplant it into T->x
1047                 scode = ccode ;
1048                 #if 0
1049                 if (in_place && !Ax_shallow)
1050                 {
1051                     // A is being transposed in-place so A->x is no longer
1052                     // needed.  If A->x is shallow this can be skipped.  T->x
1053                     // will not be shallow if the op is present.  A->x should
1054                     // be freed early to free up space for GB_builder.
1055                     // However, in the current usage, when op is used, A is not
1056                     // transposed in-place, so this step is not needed.
1057                     ASSERT (GB_DEAD_CODE) ;
1058                     GB_FREE (&Ax, A->x_size) ;
1059                 }
1060                 #endif
1061             }
1062             else
1063             {
1064                 // GB_builder will typecast S from atype to ctype if needed.
1065                 // S is a shallow copy of Ax, and must not be modified.
1066                 S = Ax ;
1067                 scode = acode ;
1068             }
1069 
1070             //------------------------------------------------------------------
1071             // build the matrix: T = (ctype) A' or op ((xtype) A')
1072             //------------------------------------------------------------------
1073 
1074             // internally, jwork is freed and then T->x is allocated, so the
1075             // total high-water memory usage is anz * max (csize,
1076             // sizeof(int64_t)).  T is always hypersparse.
1077 
1078             // If op is not NULL, then Swork can be transplanted into T in
1079             // GB_builder, instead.  However, this requires the tuples to be
1080             // sorted on input, which is possible but rare for GB_transpose.
1081 
1082             info = GB_builder
1083             (
1084                 T,          // create T using a static header
1085                 ctype,      // T is of type ctype
1086                 avdim,      // T->vlen = A->vdim, always > 1
1087                 avlen,      // T->vdim = A->vlen, always > 1
1088                 C_is_csc,   // T has the same CSR/CSC format as C
1089                 &iwork,     // iwork_handle, becomes T->i on output
1090                 &iwork_size,
1091                 &jwork,     // jwork_handle, freed on output
1092                 &jwork_size,
1093                 &Swork,     // Swork_handle, freed on output
1094                 &Swork_size,
1095                 false,      // tuples are not sorted on input
1096                 true,       // tuples have no duplicates
1097                 anz,        // size of iwork, jwork, and Swork
1098                 true,       // is_matrix: unused
1099                 NULL, NULL, // original I,J indices: not used here
1100                 S,          // array of values of type scode, not modified
1101                 anz,        // number of tuples
1102                 NULL,       // no dup operator needed (input has no duplicates)
1103                 scode,      // type of S or Swork
1104                 Context
1105             ) ;
1106 
1107             // GB_builder always frees jwork, and either frees iwork or
1108             // transplants it in to T->i and sets iwork to NULL.  So iwork and
1109             // jwork are always NULL on output.  GB_builder does not modify S.
1110             ASSERT (iwork == NULL && jwork == NULL && Swork == NULL) ;
1111 
1112             //------------------------------------------------------------------
1113             // free prior space and transplant T into C
1114             //------------------------------------------------------------------
1115 
1116             // Free the prior content of the input matrix, if done in-place.
1117             // Ap, Ah, and Ai have already been freed, but Ax has not.
1118             GB_FREE_IN_PLACE_A ;
1119 
1120             if (info != GrB_SUCCESS)
1121             {
1122                 // out of memory in GB_builder
1123                 GB_FREE_C ;
1124                 return (info) ;
1125             }
1126 
1127             // Transplant T in to the result C.  The matrix T is not shallow
1128             // and no typecasting is done, so this will always succeed.
1129             ASSERT (!GB_JUMBLED (T)) ;
1130             info = GB_transplant (*Chandle, ctype, &T, Context) ;
1131             ASSERT (info == GrB_SUCCESS) ;
1132 
1133         }
1134         else
1135         {
1136 
1137             //==================================================================
1138             // transpose via bucket sort
1139             //==================================================================
1140 
1141             // This method does not operate on the matrix in-place, so it must
1142             // create a temporary matrix T.  Then the input matrix is freed and
1143             // replaced with the new matrix T.
1144 
1145             // T = A' and typecast to ctype
1146             info = GB_transpose_bucket (T, ctype, C_is_csc, A,
1147                 op1, op2, scalar, binop_bind1st,
1148                 nworkspaces_bucket, nthreads_bucket, Context) ;
1149 
1150             // free prior content, if C=A' is being done in-place
1151             if (in_place_A)
1152             {
1153                 ASSERT (A == (*Chandle)) ;
1154                 GB_phbix_free (A) ;
1155             }
1156             else if (in_place_C)
1157             {
1158                 GB_phbix_free (C) ;
1159             }
1160 
1161             if (info != GrB_SUCCESS)
1162             {
1163                 // out of memory in GB_transpose_bucket
1164                 GB_FREE_C ;
1165                 return (info) ;
1166             }
1167 
1168             ASSERT_MATRIX_OK (T, "T from bucket", GB0) ;
1169             ASSERT (GB_JUMBLED_OK (T)) ;
1170 
1171             // allocate the output matrix C (just the header)
1172             // if *Chandle == NULL, allocate a new header; otherwise reuse
1173             info = GB_new (Chandle, C_static_header, // old/new header
1174                 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
1175                 GxB_SPARSE, A_hyper_switch, 0, Context) ;
1176             if (info != GrB_SUCCESS)
1177             {
1178                 // out of memory
1179                 ASSERT (!in_place) ;            // cannot fail if in-place,
1180                 ASSERT (!C_static_header) ;     // or if C has a static header
1181                 GB_FREE_C ;
1182                 GB_phbix_free (T) ;
1183                 return (info) ;
1184             }
1185 
1186             // Transplant T back into C and free T.  T is not shallow and
1187             // no typecast is done so this will always succeed.
1188             info = GB_transplant (*Chandle, ctype, &T, Context) ;
1189             ASSERT (info == GrB_SUCCESS) ;
1190         }
1191     }
1192 
1193     //--------------------------------------------------------------------------
1194     // get the output matrix
1195     //--------------------------------------------------------------------------
1196 
1197     C = (*Chandle) ;
1198     ASSERT (GB_JUMBLED_OK (C)) ;
1199 
1200     //--------------------------------------------------------------------------
1201     // apply a positional operator, after transposing the matrix
1202     //--------------------------------------------------------------------------
1203 
1204     if (op_is_positional)
1205     {
1206         // the positional operator is applied in-place to the values of C
1207         op1 = save_op1 ;
1208         op2 = save_op2 ;
1209         // Cx = op (C)
1210         info = GB_apply_op ((GB_void *) C->x, op1,  // positional op only
1211             op2, scalar, binop_bind1st, C, Context) ;
1212         if (info != GrB_SUCCESS)
1213         {
1214             // out of memory
1215             GB_FREE_C ;
1216             return (info) ;
1217         }
1218     }
1219 
1220     //--------------------------------------------------------------------------
1221     // conform the result to the desired sparisty structure of A
1222     //--------------------------------------------------------------------------
1223 
1224     // transplant the control settings from A to C
1225     C->hyper_switch = A_hyper_switch ;
1226     C->bitmap_switch = A_bitmap_switch ;
1227     C->sparsity = A_sparsity ;
1228 
1229     ASSERT_MATRIX_OK (C, "C to conform in GB_transpose", GB0) ;
1230 
1231     info = GB_conform (C, Context) ;
1232     if (info != GrB_SUCCESS)
1233     {
1234         // out of memory
1235         GB_FREE_C ;
1236         return (info) ;
1237     }
1238 
1239     ASSERT_MATRIX_OK (*Chandle, "Chandle conformed in GB_transpose", GB0) ;
1240     return (GrB_SUCCESS) ;
1241 }
1242 
1243