1 //------------------------------------------------------------------------------
2 // GB_transpose: C=A' or C=op(A'), with typecasting
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // CALLS: GB_builder
11
12 // TODO:: this is way too complicated. Reduce it to two cases:
13 // C = A' both C and A are passed in, not Chandle
14 // C = C' C is transposed in-place, (C==A aliased)
15 // In both cases, require the header for C and A to be allocated already
16 // (either static or dynamic). A is never modified, unless C==A.
17 // C and A cannot be NULL on input. If C==A then C contains a valid
18 // matrix on input (the input matrix A), otherise GB_phbix_free(C) is
19 // immediately done. Either header may be static or dynamic.
20
21 // Transpose a matrix, C=A', and optionally apply a unary operator and/or
22 // typecast the values. The transpose may be done in-place, in which case C or
23 // A are modified in-place.
24
25 // The input matrix may have shallow components (even if in-place), and the
26 // output may also have shallow components (even if the input matrix is not
27 // shallow).
28
29 // This function is CSR/CSC agnostic; it sets the output matrix format from
30 // C_is_csc but otherwise ignores the CSR/CSC type of A and C.
31
32 // There are 3 ways to use this function:
33
34 // Method1 (in_place_C): If A_in is not NULL and Chandle is NULL, then A is
35 // modified in-place, and the A_in matrix is not freed when done.
36
37 // Method2 (in_place_A): If A_in is NULL, then C = (*Chandle) is transposed
38 // in-place. If out of memory, (*Chandle) is always returned as NULL, which
39 // frees the input matrix C if the transpose is done in-place.
40
41 // Method3 (C=A'): Otherwise, A_in and Chandle are non-NULL, and C=A' is
42 // computed. If (*Chandle) is NULL on input, then a new header is allocated.
43 // If (*Chandle) is non-NULL on input, it is assumed to be an uninitialized
44 // static header, not obtained from malloc. On output, C->static_header will
45 // be true.
46
47 // The bucket sort is parallel, but not highly scalable. If e=nnz(A) and A is
48 // m-by-n, then at most O(e/n) threads are used. The GB_builder method is more
49 // scalable, but not as fast with a modest number of threads.
50
51 #include "GB_transpose.h"
52 #include "GB_build.h"
53 #include "GB_apply.h"
54
55 #define GB_FREE_ALL ;
56
57 // free prior content of A, if transpose is done in-place
58 #define GB_FREE_IN_PLACE_A \
59 { \
60 if (in_place) \
61 { \
62 /* A is being transposed in-place */ \
63 /* free prior content of A but not &A itself */ \
64 if (!Ap_shallow) GB_FREE (&Ap, Ap_size) ; \
65 if (!Ah_shallow) GB_FREE (&Ah, Ah_size) ; \
66 if (!Ab_shallow) GB_FREE (&Ab, Ab_size) ; \
67 if (!Ai_shallow) GB_FREE (&Ai, Ai_size) ; \
68 if (!Ax_shallow) GB_FREE (&Ax, Ax_size) ; \
69 } \
70 else \
71 { \
72 /* A is not modified; it is purely an input matrix */ \
73 ; \
74 } \
75 }
76
77 // free the new C matrix, unless C=A' is being done in-place of A
78 #define GB_FREE_C \
79 { \
80 if (!in_place_A) \
81 { \
82 /* free all of C and all its contents &C */ \
83 GB_Matrix_free (Chandle) ; \
84 } \
85 }
86
87 //------------------------------------------------------------------------------
88 // GB_transpose
89 //------------------------------------------------------------------------------
90
91 GB_PUBLIC // accessed by the MATLAB tests in GraphBLAS/Test only
GB_transpose(GrB_Matrix * Chandle,GrB_Type ctype,const bool C_is_csc,const GrB_Matrix A_in,const GrB_UnaryOp op1_in,const GrB_BinaryOp op2_in,const GxB_Scalar scalar,bool binop_bind1st,GB_Context Context)92 GrB_Info GB_transpose // C=A', C=(ctype)A or C=op(A')
93 (
94 GrB_Matrix *Chandle, // output matrix C, possibly modified in-place
95 GrB_Type ctype, // desired type of C; if NULL use A->type.
96 // ignored if op is present (cast to op->ztype)
97 const bool C_is_csc, // desired CSR/CSC format of C
98 const GrB_Matrix A_in, // input matrix
99 // no operator is applied if both op1 and op2 are NULL
100 const GrB_UnaryOp op1_in, // unary operator to apply
101 const GrB_BinaryOp op2_in, // binary operator to apply
102 const GxB_Scalar scalar, // scalar to bind to binary operator
103 bool binop_bind1st, // if true, binop(x,A) else binop(A,y)
104 GB_Context Context
105 )
106 {
107
108 //--------------------------------------------------------------------------
109 // check inputs and determine if transpose is done in-place
110 //--------------------------------------------------------------------------
111
112 GrB_Info info ;
113 GBURBLE ("(transpose) ") ;
114 GrB_Matrix A, C ;
115 bool in_place_C, in_place_A ;
116 bool C_static_header = false ;
117 struct GB_Matrix_opaque T_header ;
118 GrB_Matrix T = GB_clear_static_header (&T_header) ;
119
120 if (A_in == NULL)
121 {
122
123 //----------------------------------------------------------------------
124 // Method1 (in_place_C): C = C' ; &C is transposed in-place
125 //----------------------------------------------------------------------
126
127 // GB_transpose (&C, ctype, csc, NULL, op) ;
128 // C=A' is transposed in-place, in the matrix C.
129 // The matrix C is freed if an error occurs and C is set to NULL.
130
131 ASSERT (Chandle != NULL) ; // at least &C or A must be non-NULL
132 ASSERT (*Chandle != NULL) ;
133 A = (*Chandle) ;
134 C = A ; // C must be freed if an error occurs
135 in_place_C = true ; // C is modified in-place
136 in_place_A = false ;
137 ASSERT (A == C && C == (*Chandle)) ;
138 C_static_header = C->static_header ;
139
140 }
141 else if (Chandle == NULL || (*Chandle) == A_in)
142 {
143
144 //----------------------------------------------------------------------
145 // Method2 (in_place_A): A = A' ; A transposed in-place
146 //----------------------------------------------------------------------
147
148 // GB_transpose (NULL, ctype, csc, A, op) ;
149 // GB_transpose (&A, ctype, csc, A, op) ;
150 // C=A' is transposed in-place, in the matrix A.
151 // The matrix A_in is not freed if an error occurs.
152
153 A = A_in ;
154 Chandle = &A ; // C must not be freed if an error occurs
155 C = A ;
156 in_place_C = false ;
157 in_place_A = true ; // A is modified in-place
158 ASSERT (A == C && C == (*Chandle)) ;
159 C_static_header = A->static_header ;
160
161 }
162 else
163 {
164
165 //----------------------------------------------------------------------
166 // Method3 (C=A'): C and A are different; C may be a static header
167 //----------------------------------------------------------------------
168
169 // GB_transpose (&C, ctype, csc, A, op) ;
170 // &C and A are both non-NULL, and not aliased.
171 // C=A' where C is a new matrix constructed here.
172 // The matrix C is freed if an error occurs, and C is set to NULL
173 // unless C has a static header.
174
175 // If C is NULL, a new header is allocated and returned as (*Chandle).
176 // Otherwise, C = (*Chandle) is assumed to be an uninitialized static
177 // header, and (*Chandle) is not modified.
178
179 A = A_in ;
180 C = (*Chandle) ; // NULL, or pre-existing static header
181 in_place_C = false ; // C and A are different matrices
182 in_place_A = false ;
183 ASSERT (A != C && C == (*Chandle)) ;
184 C_static_header = (C != NULL) ;
185 if (C_static_header)
186 {
187 GB_clear_static_header (C) ;
188 }
189 }
190
191 bool in_place = (in_place_A || in_place_C) ;
192
193 ASSERT_MATRIX_OK (A, "A input for GB_transpose", GB0) ;
194 ASSERT_TYPE_OK_OR_NULL (ctype, "ctype for GB_transpose", GB0) ;
195 ASSERT_UNARYOP_OK_OR_NULL (op1_in, "unop for GB_transpose", GB0) ;
196 ASSERT_BINARYOP_OK_OR_NULL (op2_in, "binop for GB_transpose", GB0) ;
197 ASSERT_SCALAR_OK_OR_NULL (scalar, "scalar for GB_transpose", GB0) ;
198 ASSERT (C == (*Chandle)) ; // both may be NULL
199
200 // get the current sparsity control of A
201 float A_hyper_switch = A->hyper_switch ;
202 float A_bitmap_switch = A->bitmap_switch ;
203 int A_sparsity = A->sparsity ;
204
205 // wait if A has pending tuples or zombies, but leave it jumbled
206 GB_MATRIX_WAIT_IF_PENDING_OR_ZOMBIES (A) ;
207 ASSERT (!GB_PENDING (A)) ;
208 ASSERT (!GB_ZOMBIES (A)) ;
209 ASSERT (GB_JUMBLED_OK (A)) ;
210
211 //--------------------------------------------------------------------------
212 // get A
213 //--------------------------------------------------------------------------
214
215 GrB_Type atype = A->type ;
216 size_t asize = atype->size ;
217 GB_Type_code acode = atype->code ;
218
219 int64_t avlen = A->vlen ;
220 int64_t avdim = A->vdim ;
221 int64_t anzmax = A->nzmax ;
222
223 // if in-place, these must be freed when done, whether successful or not
224 int64_t *restrict Ap = A->p ; size_t Ap_size = A->p_size ;
225 int64_t *restrict Ah = A->h ; size_t Ah_size = A->h_size ;
226 int64_t *restrict Ai = A->i ; size_t Ab_size = A->b_size ;
227 int8_t *restrict Ab = A->b ; size_t Ai_size = A->i_size ;
228 GB_void *restrict Ax = (GB_void *) A->x ; size_t Ax_size = A->x_size ;
229
230 bool A_is_bitmap = GB_IS_BITMAP (A) ;
231 bool A_is_packed = GB_is_packed (A) ;
232 bool A_is_hyper = GB_IS_HYPERSPARSE (A) ;
233
234 bool Ap_shallow = A->p_shallow ;
235 bool Ah_shallow = A->h_shallow ;
236 bool Ai_shallow = A->i_shallow ;
237 bool Ax_shallow = A->x_shallow ;
238 bool Ab_shallow = A->b_shallow ;
239
240 int64_t anz = GB_NNZ (A) ;
241 int64_t anvec = A->nvec ;
242 int64_t anvals = A->nvals ;
243
244 //--------------------------------------------------------------------------
245 // determine the max number of threads to use
246 //--------------------------------------------------------------------------
247
248 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
249
250 //--------------------------------------------------------------------------
251 // determine the type of C and get the unary or binary operator
252 //--------------------------------------------------------------------------
253
254 // If a unary or binary operator is present, C is always returned as
255 // the ztype of the operator. The input ctype is ignored.
256
257 GrB_UnaryOp op1 = NULL ;
258 GrB_BinaryOp op2 = NULL ;
259 GB_Opcode opcode = GB_NOP_opcode ;
260
261 if (op1_in != NULL)
262 {
263 // get the unary operator
264 opcode = op1_in->opcode ;
265 if (atype == op1_in->xtype && opcode == GB_IDENTITY_opcode)
266 {
267 // op1 is a built-in identity operator, with the same type as A, so
268 // do not apply the operator and do not typecast. op1 is NULL.
269 ctype = atype ;
270 }
271 else
272 {
273 // apply the operator, z=op1(x)
274 op1 = op1_in ;
275 ctype = op1->ztype ;
276 }
277 }
278 else if (op2_in != NULL)
279 {
280 // get the binary operator
281 GrB_Type op2_intype = binop_bind1st ? op2_in->xtype : op2_in->ytype ;
282 opcode = op2_in->opcode ;
283 // only GB_apply calls GB_transpose with op2_in, and it ensures this
284 // condition holds: the first(A,y), second(x,A), and any(...) have
285 // been renamed to identity(A), so these cases do not occur here.
286 ASSERT (!
287 ((opcode == GB_ANY_opcode) ||
288 (opcode == GB_FIRST_opcode && !binop_bind1st) ||
289 (opcode == GB_SECOND_opcode && binop_bind1st))) ;
290 // apply the operator, z=op2(A,y) or op2(x,A)
291 op2 = op2_in ;
292 ctype = op2->ztype ;
293 }
294 else
295 {
296 // no operator. both op1 and op2 are NULL
297 if (ctype == NULL)
298 {
299 // no typecasting if ctype is NULL
300 ctype = atype ;
301 }
302 }
303
304 GB_Type_code ccode = ctype->code ;
305 size_t csize = ctype->size ;
306
307 //--------------------------------------------------------------------------
308 // check for positional operators
309 //--------------------------------------------------------------------------
310
311 bool op_is_positional = GB_OPCODE_IS_POSITIONAL (opcode) ;
312 GrB_UnaryOp save_op1 = op1 ;
313 GrB_BinaryOp save_op2 = op2 ;
314 if (op_is_positional)
315 {
316 // do not apply the op until after the transpose
317 op1 = NULL ;
318 op2 = NULL ;
319 // replace op1 with the ONE operator, as a placeholder
320 ASSERT (ctype == GrB_INT64 || ctype == GrB_INT32) ;
321 op1 = (ctype == GrB_INT64) ? GxB_ONE_INT64 : GxB_ONE_INT32 ;
322 }
323
324 //--------------------------------------------------------------------------
325 // C = A'
326 //--------------------------------------------------------------------------
327
328 ASSERT (GB_IMPLIES (avlen == 0 || avdim == 0, anz == 0)) ;
329
330 bool allocate_new_Cx = (ctype != atype) || (op1 != NULL) || (op2 != NULL) ;
331
332 if (anz == 0)
333 {
334
335 //======================================================================
336 // quick return if A is empty
337 //======================================================================
338
339 // free prior space of A, if transpose is done in-place
340 GB_FREE_IN_PLACE_A ;
341
342 // A is empty; create a new empty matrix C, with the new type and
343 // dimensions. C is hypersparse for now but may convert when
344 // returned.
345 info = GB_new_bix (Chandle, C_static_header, // hyper, old or new header
346 ctype, avdim, avlen, GB_Ap_calloc, C_is_csc,
347 GxB_HYPERSPARSE, true, A_hyper_switch, 1, 1, true, Context) ;
348 if (info != GrB_SUCCESS)
349 {
350 // out of memory
351 GB_FREE_C ;
352 return (info) ;
353 }
354 ASSERT_MATRIX_OK (*Chandle, "C transpose empty", GB0) ;
355 ASSERT (!GB_JUMBLED (*Chandle)) ;
356
357 }
358 else if (A_is_packed)
359 {
360
361 //======================================================================
362 // transpose a packed or bitmap matrix or vector
363 //======================================================================
364
365 // A is packed if it is either: (a) bitmap, (b) full, or (c) sparse or
366 // hypersparse with all entries present, no zombies, no pending tuples,
367 // and not jumbled. For (c), the matrix A can be treated as if it was
368 // full, and the pattern (A->p, A->h, and A->i) can be ignored.
369
370 int sparsity = (A_is_bitmap) ? GxB_BITMAP : GxB_FULL ;
371 bool T_cheap = // T can be done quickly if:
372 (avlen == 1 || avdim == 1) // A is a row or column vector,
373 && op1 == NULL && op2 == NULL // no operator to apply, and
374 && atype == ctype ; // no typecasting
375
376 // allocate T
377 if (T_cheap)
378 {
379 // just initialize the static header of T, not T->b or T->x
380 info = GB_new (&T, true, // bitmap or full, static header
381 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
382 sparsity, A_hyper_switch, 1, Context) ;
383 ASSERT (info == GrB_SUCCESS) ;
384 }
385 else
386 {
387 // allocate all of T, including T->b and T->x
388 info = GB_new_bix (&T, true, // bitmap or full, static header
389 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
390 sparsity, true, A_hyper_switch, 1, anzmax, true, Context) ;
391 }
392
393 if (info != GrB_SUCCESS)
394 {
395 // out of memory
396 GB_FREE_C ;
397 return (info) ;
398 }
399
400 T->magic = GB_MAGIC ;
401 if (sparsity == GxB_BITMAP)
402 {
403 T->nvals = anvals ; // for bitmap case only
404 }
405
406 //----------------------------------------------------------------------
407 // T = A'
408 //----------------------------------------------------------------------
409
410 // Since A is full, # threads to use is nthreads, and the
411 // nworkspaces parameter is not used
412
413 int64_t anz_held = GB_NNZ_HELD (A) ;
414 ASSERT (anz > 0 && anz_held > 0) ;
415 int nthreads = GB_nthreads (anz_held + anvec, chunk, nthreads_max) ;
416
417 if (T_cheap)
418 {
419 // no work to do. Transposing does not change A->b or A->x
420 T->b = Ab ; T->b_size = Ab_size ;
421 T->x = Ax ; T->x_size = Ax_size ;
422 T->nzmax = A->nzmax ;
423 if (in_place)
424 {
425 // transplant A->b and A->x into T
426 T->b_shallow = Ab_shallow ;
427 T->x_shallow = Ax_shallow ;
428 Ab = NULL ; // do not free prior Ab
429 Ax = NULL ; // do not free prior Ax
430 A->b = NULL ;
431 A->x = NULL ;
432 }
433 else
434 {
435 // T is a purely shallow copy of A
436 T->b_shallow = (Ab != NULL) ;
437 T->x_shallow = true ;
438 }
439 }
440 else if (op1 == NULL && op2 == NULL)
441 {
442 // do not apply an operator; optional typecast to C->type
443 GB_transpose_ix (T, A, NULL, NULL, 0, nthreads) ;
444 }
445 else
446 {
447 // apply an operator, C has type op->ztype
448 GB_transpose_op (T, op1, op2, scalar, binop_bind1st, A,
449 NULL, NULL, 0, nthreads) ;
450 }
451
452 ASSERT_MATRIX_OK (T, "T dense/bitmap", GB0) ;
453 ASSERT (!GB_JUMBLED (T)) ;
454
455 // free prior space of A, if transpose is done in-place
456 GB_FREE_IN_PLACE_A ;
457
458 //----------------------------------------------------------------------
459 // transplant T into C
460 //----------------------------------------------------------------------
461
462 // allocate the output matrix C as a full or bitmap matrix
463 // if *Chandle == NULL, allocate a new header; otherwise reuse existing
464 info = GB_new (Chandle, C_static_header, // bitmap/full, old/new header
465 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
466 sparsity, A_hyper_switch, 0, Context) ;
467 if (info != GrB_SUCCESS)
468 {
469 // out of memory
470 ASSERT (!in_place) ; // cannot fail if in-place,
471 ASSERT (!C_static_header) ; // or if C has a static header
472 GB_FREE_C ;
473 GB_phbix_free (T) ;
474 return (info) ;
475 }
476
477 // Transplant T into the result C, making a copy if T is shallow
478 info = GB_transplant (*Chandle, ctype, &T, Context) ;
479 if (info != GrB_SUCCESS)
480 {
481 // out of memory
482 GB_FREE_C ;
483 return (info) ;
484 }
485 ASSERT_MATRIX_OK (*Chandle, "Chandle, GB_transpose, bitmap/full", GB0) ;
486
487 }
488 else if (avdim == 1)
489 {
490
491 //======================================================================
492 // transpose a "column" vector into a "row"
493 //======================================================================
494
495 // transpose a vector (avlen-by-1) into a "row" matrix (1-by-avlen).
496 // A must be sorted first.
497 ASSERT_MATRIX_OK (A, "the vector A must already be sorted", GB0) ;
498 GB_MATRIX_WAIT (A) ;
499 ASSERT (!GB_JUMBLED (A)) ;
500
501 //----------------------------------------------------------------------
502 // allocate space
503 //----------------------------------------------------------------------
504
505 // Allocate the header of C, with no C->p, C->h, C->i, C->b, or C->x
506 // content, and initialize the type and dimension of C. The new matrix
507 // is hypersparse. This step does not allocate anything if in-place.
508
509 // if *Chandle == NULL, allocate a new header; otherwise reuse existing
510 info = GB_new (Chandle, C_static_header, // hyper; old or new header
511 ctype, 1, avlen, GB_Ap_null, C_is_csc,
512 GxB_HYPERSPARSE, A_hyper_switch, 0, Context) ;
513 if (info != GrB_SUCCESS)
514 {
515 // out of memory
516 ASSERT (!in_place) ; // cannot fail if in-place,
517 ASSERT (!C_static_header) ; // or if C has a static header
518 GB_FREE_C ;
519 return (info) ;
520 }
521
522 if (!in_place)
523 {
524 C = (*Chandle) ;
525 }
526 else
527 {
528 ASSERT (A == C && A == (*Chandle)) ;
529 }
530
531 // allocate new space for the values and pattern
532 int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
533 int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
534 GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
535 bool ok = true ;
536 Cp = GB_MALLOC (anz+1, int64_t, &Cp_size) ;
537 Ci = GB_MALLOC (anz , int64_t, &Ci_size) ;
538 ok = (Cp != NULL && Ci != NULL) ;
539
540 if (allocate_new_Cx)
541 {
542 // allocate new space for the new typecasted numerical values of C
543 ASSERT (anz > 0) ;
544 Cx = GB_MALLOC (anz * ctype->size, GB_void, &Cx_size) ;
545 ok = ok && (Cx != NULL) ;
546 }
547
548 if (!ok)
549 {
550 // out of memory
551 GB_FREE (&Cp, Cp_size) ;
552 GB_FREE (&Ci, Ci_size) ;
553 GB_FREE (&Cx, Cx_size) ;
554 GB_FREE_IN_PLACE_A ;
555 GB_FREE_C ;
556 return (GrB_OUT_OF_MEMORY) ;
557 }
558
559 //----------------------------------------------------------------------
560 // fill the content of C
561 //----------------------------------------------------------------------
562
563 // numerical values: apply the operator, typecast, or make shallow copy
564 if (op1 != NULL || op2 != NULL)
565 {
566 // Cx = op (A)
567 info = GB_apply_op ( // op1 != identity of same types
568 (GB_void *) Cx, op1, op2, scalar, binop_bind1st, A, Context) ;
569 // GB_apply_op can only fail if op1/op2 are positional
570 ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
571 ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
572 ASSERT (info == GrB_SUCCESS) ;
573 C->x = Cx ; C->x_size = Cx_size ;
574 C->x_shallow = false ;
575 // prior Ax will be freed
576 }
577 else if (ctype != atype)
578 {
579 // copy the values from A into C and cast from atype to ctype
580 C->x = Cx ; C->x_size = Cx_size ;
581 C->x_shallow = false ;
582 GB_cast_array (Cx, ccode, Ax, acode, Ab, asize, anz, 1) ;
583 // prior Ax will be freed
584 }
585 else // ctype == atype
586 {
587 // no type change; numerical values of C are a shallow copy of A.
588 C->x = Ax ; C->x_size = Ax_size ;
589 C->x_shallow = (in_place) ? Ax_shallow : true ;
590 Ax = NULL ; // do not free prior Ax
591 }
592
593 // each entry in A becomes a non-empty vector in C
594 // C is a hypersparse 1-by-avlen matrix
595 C->h = Ai ; C->h_size = Ai_size ;
596 C->h_shallow = (in_place) ? Ai_shallow : true ;
597 Ai = NULL ; // do not free prior Ai
598 // C->p = 0:anz and C->i = zeros (1,anz), newly allocated
599 C->plen = anz ;
600 C->nvec = anz ;
601 C->nvec_nonempty = anz ;
602 C->i = Ci ; C->i_size = Ci_size ;
603 C->p = Cp ; C->p_size = Cp_size ;
604 // fill the vector pointers C->p
605 int nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
606 int64_t k ;
607 #pragma omp parallel for num_threads(nthreads) schedule(static)
608 for (k = 0 ; k < anz ; k++)
609 {
610 Ci [k] = 0 ;
611 Cp [k] = k ;
612 }
613 Cp [anz] = anz ;
614
615 C->nzmax = anz ;
616 C->magic = GB_MAGIC ;
617
618 // free prior space of A, if transpose is done in-place
619 GB_FREE_IN_PLACE_A ;
620
621 }
622 else if (avlen == 1)
623 {
624
625 //======================================================================
626 // transpose a "row" into a "column" vector
627 //======================================================================
628
629 // transpose a "row" matrix (1-by-avdim) into a vector (avdim-by-1).
630 // if A->vlen is 1, all vectors of A are implicitly sorted
631 ASSERT_MATRIX_OK (A, "1-by-n input A already sorted", GB0) ;
632
633 //----------------------------------------------------------------------
634 // allocate workspace, if needed
635 //----------------------------------------------------------------------
636
637 int ntasks = 0 ;
638 int nth = GB_nthreads (avdim, chunk, nthreads_max) ;
639 GB_WERK_DECLARE (Count, int64_t) ;
640 if (nth > 1 && !A_is_hyper)
641 {
642 // ntasks and Count are not needed if nth == 1
643 ntasks = 8 * nth ;
644 ntasks = GB_IMIN (ntasks, avdim) ;
645 ntasks = GB_IMAX (ntasks, 1) ;
646 GB_WERK_PUSH (Count, ntasks+1, int64_t) ;
647 if (Count == NULL)
648 {
649 // out of memory
650 GB_FREE_C ;
651 return (GrB_OUT_OF_MEMORY) ;
652 }
653 }
654
655 // Allocate the header of C, with no C->p, C->h, C->i, or C->x content,
656 // and initialize the type and dimension of C. If in-place, A->p,
657 // A->h, A->i, and A->x are all NULL. The new matrix is sparse, but
658 // can be CSR or CSC. This step does not allocate anything if in
659 // place.
660
661 // if *Chandle == NULL, allocate a new header; otherwise reuse existing
662 info = GB_new (Chandle, C_static_header, // sparse; old or new header
663 ctype, avdim, 1, GB_Ap_null, C_is_csc,
664 GxB_SPARSE, A_hyper_switch, 0, Context) ;
665 if (info != GrB_SUCCESS)
666 {
667 // out of memory
668 ASSERT (!in_place) ; // cannot fail if in-place,
669 ASSERT (!C_static_header) ; // or if C has a static header
670 GB_FREE_C ;
671 GB_WERK_POP (Count, int64_t) ;
672 return (info) ;
673 }
674
675 if (!in_place)
676 {
677 C = (*Chandle) ;
678 }
679 else
680 {
681 ASSERT (A == C && A == (*Chandle)) ;
682 }
683
684 // allocate new space for the values and pattern
685 int64_t *restrict Cp = NULL ; size_t Cp_size = 0 ;
686 int64_t *restrict Ci = NULL ; size_t Ci_size = 0 ;
687 GB_void *restrict Cx = NULL ; size_t Cx_size = 0 ;
688 bool ok = true ;
689 Cp = GB_MALLOC (2, int64_t, &Cp_size) ;
690 ok = ok && (Cp != NULL) ;
691 if (!A_is_hyper)
692 {
693 // A is sparse, so new space is needed for Ci
694 Ci = GB_MALLOC (anz, int64_t, &Ci_size) ;
695 ok = ok && (Ci != NULL) ;
696 }
697
698 if (allocate_new_Cx)
699 {
700 // allocate new space for the new typecasted numerical values of C
701 Cx = GB_MALLOC (anz * ctype->size, GB_void, &Cx_size) ;
702 ok = ok && (Cx != NULL) ;
703 }
704
705 if (!ok)
706 {
707 // out of memory
708 GB_FREE (&Cp, Cp_size) ;
709 GB_FREE (&Ci, Ci_size) ;
710 GB_FREE (&Cx, Cx_size) ;
711 GB_WERK_POP (Count, int64_t) ;
712 GB_FREE_IN_PLACE_A ;
713 GB_FREE_C ;
714 return (GrB_OUT_OF_MEMORY) ;
715 }
716
717 Cp [0] = 0 ;
718 Cp [1] = 0 ;
719
720 //----------------------------------------------------------------------
721 // numerical values of C: apply the op, typecast, or make shallow copy
722 //----------------------------------------------------------------------
723
724 // numerical values: apply the operator, typecast, or make shallow copy
725 if (op1 != NULL || op2 != NULL)
726 {
727 // Cx = op (A)
728 info = GB_apply_op ( // op1 != identity of same types
729 (GB_void *) Cx, op1, op2, scalar, binop_bind1st, A, Context) ;
730 // GB_apply_op can only fail if op1/op2 are positional
731 ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
732 ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
733 ASSERT (info == GrB_SUCCESS) ;
734 C->x = Cx ; C->x_size = Cx_size ;
735 C->x_shallow = false ;
736 // prior Ax will be freed
737 }
738 else if (ctype != atype)
739 {
740 // copy the values from A into C and cast from atype to ctype
741 C->x = Cx ; C->x_size = Cx_size ;
742 C->x_shallow = false ;
743 GB_cast_array (Cx, ccode, Ax, acode, Ab, asize, anz, 1) ;
744 // prior Ax will be freed
745 }
746 else // ctype == atype
747 {
748 // no type change; numerical values of C are a shallow copy of A
749 C->x = Ax ; C->x_size = Ax_size ;
750 C->x_shallow = (in_place) ? Ax_shallow : true ;
751 Ax = NULL ; // do not free prior Ax
752 }
753
754 //----------------------------------------------------------------------
755 // pattern of C
756 //----------------------------------------------------------------------
757
758 if (A_is_hyper)
759 {
760
761 //------------------------------------------------------------------
762 // each non-empty vector in A becomes an entry in C
763 //------------------------------------------------------------------
764
765 C->i = Ah ; C->i_size = Ah_size ;
766 C->i_shallow = (in_place) ? Ah_shallow : true ;
767 ASSERT (anvec == anz) ;
768 Ah = NULL ; // do not free prior Ah
769
770 }
771 else
772 {
773
774 //------------------------------------------------------------------
775 // find the non-empty vectors of A, which become entries in C
776 //------------------------------------------------------------------
777
778 ASSERT (Ah == NULL) ;
779
780 if (nth == 1)
781 {
782
783 //--------------------------------------------------------------
784 // construct Ci with a single thread
785 //--------------------------------------------------------------
786
787 int64_t k = 0 ;
788 for (int64_t j = 0 ; j < avdim ; j++)
789 {
790 if (Ap [j] < Ap [j+1])
791 {
792 Ci [k++] = j ;
793 }
794 }
795 ASSERT (k == anz) ;
796
797 }
798 else
799 {
800
801 //--------------------------------------------------------------
802 // construct Ci in parallel
803 //--------------------------------------------------------------
804
805 int tid ;
806 #pragma omp parallel for num_threads(nth) schedule(dynamic,1)
807 for (tid = 0 ; tid < ntasks ; tid++)
808 {
809 int64_t jstart, jend, k = 0 ;
810 GB_PARTITION (jstart, jend, avdim, tid, ntasks) ;
811 for (int64_t j = jstart ; j < jend ; j++)
812 {
813 if (Ap [j] < Ap [j+1])
814 {
815 k++ ;
816 }
817 }
818 Count [tid] = k ;
819 }
820
821 GB_cumsum (Count, ntasks, NULL, 1, NULL) ;
822 ASSERT (Count [ntasks] == anz) ;
823
824 #pragma omp parallel for num_threads(nth) schedule(dynamic,1)
825 for (tid = 0 ; tid < ntasks ; tid++)
826 {
827 int64_t jstart, jend, k = Count [tid] ;
828 GB_PARTITION (jstart, jend, avdim, tid, ntasks) ;
829 for (int64_t j = jstart ; j < jend ; j++)
830 {
831 if (Ap [j] < Ap [j+1])
832 {
833 Ci [k++] = j ;
834 }
835 }
836 }
837 }
838
839 #ifdef GB_DEBUG
840 int64_t k = 0 ;
841 for (int64_t j = 0 ; j < avdim ; j++)
842 {
843 if (Ap [j] < Ap [j+1])
844 {
845 ASSERT (Ci [k] == j) ;
846 k++ ;
847 }
848 }
849 ASSERT (k == anz) ;
850 #endif
851
852 C->i = Ci ; C->i_size = Ci_size ;
853 C->i_shallow = false ;
854 }
855
856 //---------------------------------------------------------------------
857 // vector pointers of C
858 //---------------------------------------------------------------------
859
860 // C->p = [0 anz] and C->h = NULL
861 ASSERT (C->plen == 1) ;
862 ASSERT (C->nvec == 1) ;
863 ASSERT (C->h == NULL) ;
864 C->p = Cp ; C->p_size = Cp_size ;
865 C->p_shallow = false ;
866 C->nvec_nonempty = (anz == 0) ? 0 : 1 ;
867 // fill the vector pointers C->p
868 Cp [0] = 0 ;
869 Cp [1] = anz ;
870 C->nzmax = anz ;
871 C->magic = GB_MAGIC ;
872 ASSERT (!GB_JUMBLED (C)) ;
873
874 // free prior space of A, if transpose done in-place, and free workspace
875 GB_FREE_IN_PLACE_A ;
876 GB_WERK_POP (Count, int64_t) ;
877
878 }
879 else
880 {
881
882 //======================================================================
883 // transpose a general sparse or hypersparse matrix
884 //======================================================================
885
886 ASSERT_MATRIX_OK (A, "A for GB_transpose", GB0) ;
887
888 // T=A' with optional typecasting, or T=op(A')
889
890 //----------------------------------------------------------------------
891 // select the method
892 //----------------------------------------------------------------------
893
894 int nworkspaces_bucket, nthreads_bucket ;
895 bool use_builder = GB_transpose_method (A,
896 &nworkspaces_bucket, &nthreads_bucket, Context) ;
897
898 //----------------------------------------------------------------------
899 // transpose the matrix with the selected method
900 //----------------------------------------------------------------------
901
902 if (use_builder)
903 {
904
905 //==================================================================
906 // transpose via GB_builder
907 //==================================================================
908
909 //------------------------------------------------------------------
910 // allocate and create iwork
911 //------------------------------------------------------------------
912
913 // allocate iwork of size anz
914 int64_t *iwork = NULL ; size_t iwork_size = 0 ;
915 iwork = GB_MALLOC (anz, int64_t, &iwork_size) ;
916 if (iwork == NULL)
917 {
918 // out of memory
919 GB_FREE_C ;
920 return (GrB_OUT_OF_MEMORY) ;
921 }
922
923 // Construct the "row" indices of C, which are "column" indices of
924 // A. This array becomes the permanent T->i on output. This phase
925 // must be done before Chandle is created below, since that step
926 // destroys A.
927
928 info = GB_extract_vector_list (iwork, A, Context) ;
929 if (info != GrB_SUCCESS)
930 {
931 // out of memory
932 GB_FREE (&iwork, iwork_size) ;
933 GB_FREE_C ;
934 return (info) ;
935 }
936
937 //------------------------------------------------------------------
938 // allocate the output matrix and additional space (jwork and S)
939 //------------------------------------------------------------------
940
941 // Allocate the header of C, with no C->p, C->h, C->i, or C->x
942 // content, and initialize the type and dimension of C. If in
943 // place, A->p, A->h, A->i, and A->x are all NULL. The new matrix
944 // is hypersparse, but can be CSR or CSC. This step does not
945 // allocate anything if in-place.
946
947 // if *Chandle == NULL, allocate a new header; otherwise reuse
948 info = GB_new (Chandle, C_static_header, // hyper, old or new header
949 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
950 GxB_HYPERSPARSE, A_hyper_switch, 0, Context) ;
951 if (info != GrB_SUCCESS)
952 {
953 // out of memory
954 ASSERT (!in_place) ; // cannot fail if in-place,
955 ASSERT (!C_static_header) ; // or if C has a static header
956 GB_FREE (&iwork, iwork_size) ;
957 GB_FREE_C ;
958 return (info) ;
959 }
960
961 if (!in_place)
962 {
963 C = (*Chandle) ;
964 }
965 else
966 {
967 ASSERT (A == C && A == (*Chandle)) ;
968 }
969
970 // if in_place, the prior Ap and Ah can now be freed
971 if (in_place)
972 {
973 if (!Ap_shallow) GB_FREE (&Ap, Ap_size) ;
974 if (!Ah_shallow) GB_FREE (&Ah, Ah_size) ;
975 }
976
977 int64_t *jwork = NULL ; size_t jwork_size = 0 ;
978 GB_Type_code scode ;
979 GB_void *S = NULL ;
980 GB_void *Swork = NULL ; size_t Swork_size = 0 ;
981
982 // for the GB_builder method, if the transpose is done in-place and
983 // A->i is not shallow, A->i can be used and then freed.
984 // Otherwise, A->i is not modified at all.
985 bool ok = true ;
986 bool recycle_Ai = (in_place && !Ai_shallow) ;
987 if (!recycle_Ai)
988 {
989 // allocate jwork of size anz
990 jwork = GB_MALLOC (anz, int64_t, &jwork_size) ;
991 ok = ok && (jwork != NULL) ;
992 }
993
994 if (op1 != NULL || op2 != NULL)
995 {
996 // allocate Swork of size anz * csize
997 Swork = GB_MALLOC (anz * csize, GB_void, &Swork_size) ;
998 ok = ok && (Swork != NULL) ;
999 }
1000
1001 if (!ok)
1002 {
1003 // out of memory
1004 GB_FREE (&iwork, iwork_size) ;
1005 GB_FREE (&jwork, jwork_size) ;
1006 GB_FREE (&Swork, Swork_size) ;
1007 GB_FREE_IN_PLACE_A ;
1008 GB_FREE_C ;
1009 return (GrB_OUT_OF_MEMORY) ;
1010 }
1011
1012 //------------------------------------------------------------------
1013 // construct jwork and Swork
1014 //------------------------------------------------------------------
1015
1016 // "row" indices of A become "column" indices of C
1017 if (recycle_Ai)
1018 {
1019 // Ai is used as workspace for the "column" indices of C.
1020 // jwork is a shallow copy of Ai, and is freed by GB_builder.
1021 jwork = Ai ;
1022 jwork_size = Ai_size ;
1023 ASSERT (in_place) ;
1024 // set Ai to NULL so it is not freed by GB_FREE_IN_PLACE_A
1025 Ai = NULL ;
1026 }
1027 else
1028 {
1029 // jwork = Ai, making a deep copy. jwork is freed by
1030 // GB_builder. A->i is not modified, even if out of memory.
1031 GB_memcpy (jwork, Ai, anz * sizeof (int64_t), nthreads_max) ;
1032 }
1033
1034 // numerical values: apply the op, typecast, or make shallow copy
1035 if (op1 != NULL || op2 != NULL)
1036 {
1037 // Swork = op (A)
1038 info = GB_apply_op ( // op1 != identity of same types
1039 (GB_void *) Swork, op1, op2, scalar, binop_bind1st,
1040 A, Context) ;
1041 // GB_apply_op can only fail if op1/op2 are positional
1042 ASSERT (!GB_OP_IS_POSITIONAL (op1)) ;
1043 ASSERT (!GB_OP_IS_POSITIONAL (op2)) ;
1044 ASSERT (info == GrB_SUCCESS) ;
1045 // GB_builder will not need to typecast Swork to T->x, and it
1046 // may choose to transplant it into T->x
1047 scode = ccode ;
1048 #if 0
1049 if (in_place && !Ax_shallow)
1050 {
1051 // A is being transposed in-place so A->x is no longer
1052 // needed. If A->x is shallow this can be skipped. T->x
1053 // will not be shallow if the op is present. A->x should
1054 // be freed early to free up space for GB_builder.
1055 // However, in the current usage, when op is used, A is not
1056 // transposed in-place, so this step is not needed.
1057 ASSERT (GB_DEAD_CODE) ;
1058 GB_FREE (&Ax, A->x_size) ;
1059 }
1060 #endif
1061 }
1062 else
1063 {
1064 // GB_builder will typecast S from atype to ctype if needed.
1065 // S is a shallow copy of Ax, and must not be modified.
1066 S = Ax ;
1067 scode = acode ;
1068 }
1069
1070 //------------------------------------------------------------------
1071 // build the matrix: T = (ctype) A' or op ((xtype) A')
1072 //------------------------------------------------------------------
1073
1074 // internally, jwork is freed and then T->x is allocated, so the
1075 // total high-water memory usage is anz * max (csize,
1076 // sizeof(int64_t)). T is always hypersparse.
1077
1078 // If op is not NULL, then Swork can be transplanted into T in
1079 // GB_builder, instead. However, this requires the tuples to be
1080 // sorted on input, which is possible but rare for GB_transpose.
1081
1082 info = GB_builder
1083 (
1084 T, // create T using a static header
1085 ctype, // T is of type ctype
1086 avdim, // T->vlen = A->vdim, always > 1
1087 avlen, // T->vdim = A->vlen, always > 1
1088 C_is_csc, // T has the same CSR/CSC format as C
1089 &iwork, // iwork_handle, becomes T->i on output
1090 &iwork_size,
1091 &jwork, // jwork_handle, freed on output
1092 &jwork_size,
1093 &Swork, // Swork_handle, freed on output
1094 &Swork_size,
1095 false, // tuples are not sorted on input
1096 true, // tuples have no duplicates
1097 anz, // size of iwork, jwork, and Swork
1098 true, // is_matrix: unused
1099 NULL, NULL, // original I,J indices: not used here
1100 S, // array of values of type scode, not modified
1101 anz, // number of tuples
1102 NULL, // no dup operator needed (input has no duplicates)
1103 scode, // type of S or Swork
1104 Context
1105 ) ;
1106
1107 // GB_builder always frees jwork, and either frees iwork or
1108 // transplants it in to T->i and sets iwork to NULL. So iwork and
1109 // jwork are always NULL on output. GB_builder does not modify S.
1110 ASSERT (iwork == NULL && jwork == NULL && Swork == NULL) ;
1111
1112 //------------------------------------------------------------------
1113 // free prior space and transplant T into C
1114 //------------------------------------------------------------------
1115
1116 // Free the prior content of the input matrix, if done in-place.
1117 // Ap, Ah, and Ai have already been freed, but Ax has not.
1118 GB_FREE_IN_PLACE_A ;
1119
1120 if (info != GrB_SUCCESS)
1121 {
1122 // out of memory in GB_builder
1123 GB_FREE_C ;
1124 return (info) ;
1125 }
1126
1127 // Transplant T in to the result C. The matrix T is not shallow
1128 // and no typecasting is done, so this will always succeed.
1129 ASSERT (!GB_JUMBLED (T)) ;
1130 info = GB_transplant (*Chandle, ctype, &T, Context) ;
1131 ASSERT (info == GrB_SUCCESS) ;
1132
1133 }
1134 else
1135 {
1136
1137 //==================================================================
1138 // transpose via bucket sort
1139 //==================================================================
1140
1141 // This method does not operate on the matrix in-place, so it must
1142 // create a temporary matrix T. Then the input matrix is freed and
1143 // replaced with the new matrix T.
1144
1145 // T = A' and typecast to ctype
1146 info = GB_transpose_bucket (T, ctype, C_is_csc, A,
1147 op1, op2, scalar, binop_bind1st,
1148 nworkspaces_bucket, nthreads_bucket, Context) ;
1149
1150 // free prior content, if C=A' is being done in-place
1151 if (in_place_A)
1152 {
1153 ASSERT (A == (*Chandle)) ;
1154 GB_phbix_free (A) ;
1155 }
1156 else if (in_place_C)
1157 {
1158 GB_phbix_free (C) ;
1159 }
1160
1161 if (info != GrB_SUCCESS)
1162 {
1163 // out of memory in GB_transpose_bucket
1164 GB_FREE_C ;
1165 return (info) ;
1166 }
1167
1168 ASSERT_MATRIX_OK (T, "T from bucket", GB0) ;
1169 ASSERT (GB_JUMBLED_OK (T)) ;
1170
1171 // allocate the output matrix C (just the header)
1172 // if *Chandle == NULL, allocate a new header; otherwise reuse
1173 info = GB_new (Chandle, C_static_header, // old/new header
1174 ctype, avdim, avlen, GB_Ap_null, C_is_csc,
1175 GxB_SPARSE, A_hyper_switch, 0, Context) ;
1176 if (info != GrB_SUCCESS)
1177 {
1178 // out of memory
1179 ASSERT (!in_place) ; // cannot fail if in-place,
1180 ASSERT (!C_static_header) ; // or if C has a static header
1181 GB_FREE_C ;
1182 GB_phbix_free (T) ;
1183 return (info) ;
1184 }
1185
1186 // Transplant T back into C and free T. T is not shallow and
1187 // no typecast is done so this will always succeed.
1188 info = GB_transplant (*Chandle, ctype, &T, Context) ;
1189 ASSERT (info == GrB_SUCCESS) ;
1190 }
1191 }
1192
1193 //--------------------------------------------------------------------------
1194 // get the output matrix
1195 //--------------------------------------------------------------------------
1196
1197 C = (*Chandle) ;
1198 ASSERT (GB_JUMBLED_OK (C)) ;
1199
1200 //--------------------------------------------------------------------------
1201 // apply a positional operator, after transposing the matrix
1202 //--------------------------------------------------------------------------
1203
1204 if (op_is_positional)
1205 {
1206 // the positional operator is applied in-place to the values of C
1207 op1 = save_op1 ;
1208 op2 = save_op2 ;
1209 // Cx = op (C)
1210 info = GB_apply_op ((GB_void *) C->x, op1, // positional op only
1211 op2, scalar, binop_bind1st, C, Context) ;
1212 if (info != GrB_SUCCESS)
1213 {
1214 // out of memory
1215 GB_FREE_C ;
1216 return (info) ;
1217 }
1218 }
1219
1220 //--------------------------------------------------------------------------
1221 // conform the result to the desired sparisty structure of A
1222 //--------------------------------------------------------------------------
1223
1224 // transplant the control settings from A to C
1225 C->hyper_switch = A_hyper_switch ;
1226 C->bitmap_switch = A_bitmap_switch ;
1227 C->sparsity = A_sparsity ;
1228
1229 ASSERT_MATRIX_OK (C, "C to conform in GB_transpose", GB0) ;
1230
1231 info = GB_conform (C, Context) ;
1232 if (info != GrB_SUCCESS)
1233 {
1234 // out of memory
1235 GB_FREE_C ;
1236 return (info) ;
1237 }
1238
1239 ASSERT_MATRIX_OK (*Chandle, "Chandle conformed in GB_transpose", GB0) ;
1240 return (GrB_SUCCESS) ;
1241 }
1242
1243