1 /*
2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3  *
4  * This code is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License version 2 only, as
6  * published by the Free Software Foundation.  Oracle designates this
7  * particular file as subject to the "Classpath" exception as provided
8  * by Oracle in the LICENSE file that accompanied this code.
9  *
10  * This code is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * version 2 for more details (a copy is included in the LICENSE file that
14  * accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License version
17  * 2 along with this work; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21  * or visit www.oracle.com if you need additional information or have any
22  * questions.
23  */
24 
25 /*
26  * This file is available under and governed by the GNU General Public
27  * License version 2 only, as published by the Free Software Foundation.
28  * However, the following notice accompanied the original version of this
29  * file:
30  *
31  * Written by Doug Lea with assistance from members of JCP JSR-166
32  * Expert Group and released to the public domain, as explained at
33  * http://creativecommons.org/publicdomain/zero/1.0/
34  */
35 
36 package java.util;
37 
38 import java.util.concurrent.CountedCompleter;
39 import java.util.concurrent.ForkJoinPool;
40 import java.util.function.BinaryOperator;
41 import java.util.function.DoubleBinaryOperator;
42 import java.util.function.IntBinaryOperator;
43 import java.util.function.LongBinaryOperator;
44 
45 /**
46  * ForkJoin tasks to perform Arrays.parallelPrefix operations.
47  *
48  * @author Doug Lea
49  * @since 1.8
50  */
51 class ArrayPrefixHelpers {
ArrayPrefixHelpers()52     private ArrayPrefixHelpers() {} // non-instantiable
53 
54     /*
55      * Parallel prefix (aka cumulate, scan) task classes
56      * are based loosely on Guy Blelloch's original
57      * algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):
58      *  Keep dividing by two to threshold segment size, and then:
59      *   Pass 1: Create tree of partial sums for each segment
60      *   Pass 2: For each segment, cumulate with offset of left sibling
61      *
62      * This version improves performance within FJ framework mainly by
63      * allowing the second pass of ready left-hand sides to proceed
64      * even if some right-hand side first passes are still executing.
65      * It also combines first and second pass for leftmost segment,
66      * and skips the first pass for rightmost segment (whose result is
67      * not needed for second pass).  It similarly manages to avoid
68      * requiring that users supply an identity basis for accumulations
69      * by tracking those segments/subtasks for which the first
70      * existing element is used as base.
71      *
72      * Managing this relies on ORing some bits in the pendingCount for
73      * phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the
74      * main phase bit. When false, segments compute only their sum.
75      * When true, they cumulate array elements. CUMULATE is set at
76      * root at beginning of second pass and then propagated down. But
77      * it may also be set earlier for subtrees with lo==0 (the left
78      * spine of tree). SUMMED is a one bit join count. For leafs, it
79      * is set when summed. For internal nodes, it becomes true when
80      * one child is summed.  When the second child finishes summing,
81      * we then moves up tree to trigger the cumulate phase. FINISHED
82      * is also a one bit join count. For leafs, it is set when
83      * cumulated. For internal nodes, it becomes true when one child
84      * is cumulated.  When the second child finishes cumulating, it
85      * then moves up tree, completing at the root.
86      *
87      * To better exploit locality and reduce overhead, the compute
88      * method loops starting with the current task, moving if possible
89      * to one of its subtasks rather than forking.
90      *
91      * As usual for this sort of utility, there are 4 versions, that
92      * are simple copy/paste/adapt variants of each other.  (The
93      * double and int versions differ from long version solely by
94      * replacing "long" (with case-matching)).
95      */
96 
97     // see above
98     static final int CUMULATE = 1;
99     static final int SUMMED   = 2;
100     static final int FINISHED = 4;
101 
102     /** The smallest subtask array partition size to use as threshold */
103     static final int MIN_PARTITION = 16;
104 
105     static final class CumulateTask<T> extends CountedCompleter<Void> {
106         @SuppressWarnings("serial") // Not statically typed as Serializable
107         final T[] array;
108         @SuppressWarnings("serial") // Not statically typed as Serializable
109         final BinaryOperator<T> function;
110         CumulateTask<T> left, right;
111         @SuppressWarnings("serial") // Not statically typed as Serializable
112         T in;
113         @SuppressWarnings("serial") // Not statically typed as Serializable
114         T out;
115         final int lo, hi, origin, fence, threshold;
116 
117         /** Root task constructor */
CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function, T[] array, int lo, int hi)118         public CumulateTask(CumulateTask<T> parent,
119                             BinaryOperator<T> function,
120                             T[] array, int lo, int hi) {
121             super(parent);
122             this.function = function; this.array = array;
123             this.lo = this.origin = lo; this.hi = this.fence = hi;
124             int p;
125             this.threshold =
126                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
127                 <= MIN_PARTITION ? MIN_PARTITION : p;
128         }
129 
130         /** Subtask constructor */
CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function, T[] array, int origin, int fence, int threshold, int lo, int hi)131         CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,
132                      T[] array, int origin, int fence, int threshold,
133                      int lo, int hi) {
134             super(parent);
135             this.function = function; this.array = array;
136             this.origin = origin; this.fence = fence;
137             this.threshold = threshold;
138             this.lo = lo; this.hi = hi;
139         }
140 
compute()141         public final void compute() {
142             final BinaryOperator<T> fn;
143             final T[] a;
144             if ((fn = this.function) == null || (a = this.array) == null)
145                 throw new NullPointerException();    // hoist checks
146             int th = threshold, org = origin, fnc = fence, l, h;
147             CumulateTask<T> t = this;
148             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
149                 if (h - l > th) {
150                     CumulateTask<T> lt = t.left, rt = t.right, f;
151                     if (lt == null) {                // first pass
152                         int mid = (l + h) >>> 1;
153                         f = rt = t.right =
154                             new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);
155                         t = lt = t.left =
156                             new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);
157                     }
158                     else {                           // possibly refork
159                         T pin = t.in;
160                         lt.in = pin;
161                         f = t = null;
162                         if (rt != null) {
163                             T lout = lt.out;
164                             rt.in = (l == org ? lout :
165                                      fn.apply(pin, lout));
166                             for (int c;;) {
167                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
168                                     break;
169                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
170                                     t = rt;
171                                     break;
172                                 }
173                             }
174                         }
175                         for (int c;;) {
176                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
177                                 break;
178                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
179                                 if (t != null)
180                                     f = t;
181                                 t = lt;
182                                 break;
183                             }
184                         }
185                         if (t == null)
186                             break;
187                     }
188                     if (f != null)
189                         f.fork();
190                 }
191                 else {
192                     int state; // Transition to sum, cumulate, or both
193                     for (int b;;) {
194                         if (((b = t.getPendingCount()) & FINISHED) != 0)
195                             break outer;                      // already done
196                         state = ((b & CUMULATE) != 0 ? FINISHED :
197                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
198                         if (t.compareAndSetPendingCount(b, b|state))
199                             break;
200                     }
201 
202                     T sum;
203                     if (state != SUMMED) {
204                         int first;
205                         if (l == org) {                       // leftmost; no in
206                             sum = a[org];
207                             first = org + 1;
208                         }
209                         else {
210                             sum = t.in;
211                             first = l;
212                         }
213                         for (int i = first; i < h; ++i)       // cumulate
214                             a[i] = sum = fn.apply(sum, a[i]);
215                     }
216                     else if (h < fnc) {                       // skip rightmost
217                         sum = a[l];
218                         for (int i = l + 1; i < h; ++i)       // sum only
219                             sum = fn.apply(sum, a[i]);
220                     }
221                     else
222                         sum = t.in;
223                     t.out = sum;
224                     for (CumulateTask<T> par;;) {             // propagate
225                         @SuppressWarnings("unchecked") CumulateTask<T> partmp
226                             = (CumulateTask<T>)t.getCompleter();
227                         if ((par = partmp) == null) {
228                             if ((state & FINISHED) != 0)      // enable join
229                                 t.quietlyComplete();
230                             break outer;
231                         }
232                         int b = par.getPendingCount();
233                         if ((b & state & FINISHED) != 0)
234                             t = par;                          // both done
235                         else if ((b & state & SUMMED) != 0) { // both summed
236                             int nextState; CumulateTask<T> lt, rt;
237                             if ((lt = par.left) != null &&
238                                 (rt = par.right) != null) {
239                                 T lout = lt.out;
240                                 par.out = (rt.hi == fnc ? lout :
241                                            fn.apply(lout, rt.out));
242                             }
243                             int refork = (((b & CUMULATE) == 0 &&
244                                            par.lo == org) ? CUMULATE : 0);
245                             if ((nextState = b|state|refork) == b ||
246                                 par.compareAndSetPendingCount(b, nextState)) {
247                                 state = SUMMED;               // drop finished
248                                 t = par;
249                                 if (refork != 0)
250                                     par.fork();
251                             }
252                         }
253                         else if (par.compareAndSetPendingCount(b, b|state))
254                             break outer;                      // sib not ready
255                     }
256                 }
257             }
258         }
259         @java.io.Serial
260         private static final long serialVersionUID = 5293554502939613543L;
261     }
262 
263     static final class LongCumulateTask extends CountedCompleter<Void> {
264         final long[] array;
265         @SuppressWarnings("serial") // Not statically typed as Serializable
266         final LongBinaryOperator function;
267         LongCumulateTask left, right;
268         long in, out;
269         final int lo, hi, origin, fence, threshold;
270 
271         /** Root task constructor */
LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function, long[] array, int lo, int hi)272         public LongCumulateTask(LongCumulateTask parent,
273                                 LongBinaryOperator function,
274                                 long[] array, int lo, int hi) {
275             super(parent);
276             this.function = function; this.array = array;
277             this.lo = this.origin = lo; this.hi = this.fence = hi;
278             int p;
279             this.threshold =
280                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
281                 <= MIN_PARTITION ? MIN_PARTITION : p;
282         }
283 
284         /** Subtask constructor */
LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function, long[] array, int origin, int fence, int threshold, int lo, int hi)285         LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,
286                          long[] array, int origin, int fence, int threshold,
287                          int lo, int hi) {
288             super(parent);
289             this.function = function; this.array = array;
290             this.origin = origin; this.fence = fence;
291             this.threshold = threshold;
292             this.lo = lo; this.hi = hi;
293         }
294 
compute()295         public final void compute() {
296             final LongBinaryOperator fn;
297             final long[] a;
298             if ((fn = this.function) == null || (a = this.array) == null)
299                 throw new NullPointerException();    // hoist checks
300             int th = threshold, org = origin, fnc = fence, l, h;
301             LongCumulateTask t = this;
302             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
303                 if (h - l > th) {
304                     LongCumulateTask lt = t.left, rt = t.right, f;
305                     if (lt == null) {                // first pass
306                         int mid = (l + h) >>> 1;
307                         f = rt = t.right =
308                             new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);
309                         t = lt = t.left =
310                             new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);
311                     }
312                     else {                           // possibly refork
313                         long pin = t.in;
314                         lt.in = pin;
315                         f = t = null;
316                         if (rt != null) {
317                             long lout = lt.out;
318                             rt.in = (l == org ? lout :
319                                      fn.applyAsLong(pin, lout));
320                             for (int c;;) {
321                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
322                                     break;
323                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
324                                     t = rt;
325                                     break;
326                                 }
327                             }
328                         }
329                         for (int c;;) {
330                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
331                                 break;
332                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
333                                 if (t != null)
334                                     f = t;
335                                 t = lt;
336                                 break;
337                             }
338                         }
339                         if (t == null)
340                             break;
341                     }
342                     if (f != null)
343                         f.fork();
344                 }
345                 else {
346                     int state; // Transition to sum, cumulate, or both
347                     for (int b;;) {
348                         if (((b = t.getPendingCount()) & FINISHED) != 0)
349                             break outer;                      // already done
350                         state = ((b & CUMULATE) != 0 ? FINISHED :
351                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
352                         if (t.compareAndSetPendingCount(b, b|state))
353                             break;
354                     }
355 
356                     long sum;
357                     if (state != SUMMED) {
358                         int first;
359                         if (l == org) {                       // leftmost; no in
360                             sum = a[org];
361                             first = org + 1;
362                         }
363                         else {
364                             sum = t.in;
365                             first = l;
366                         }
367                         for (int i = first; i < h; ++i)       // cumulate
368                             a[i] = sum = fn.applyAsLong(sum, a[i]);
369                     }
370                     else if (h < fnc) {                       // skip rightmost
371                         sum = a[l];
372                         for (int i = l + 1; i < h; ++i)       // sum only
373                             sum = fn.applyAsLong(sum, a[i]);
374                     }
375                     else
376                         sum = t.in;
377                     t.out = sum;
378                     for (LongCumulateTask par;;) {            // propagate
379                         if ((par = (LongCumulateTask)t.getCompleter()) == null) {
380                             if ((state & FINISHED) != 0)      // enable join
381                                 t.quietlyComplete();
382                             break outer;
383                         }
384                         int b = par.getPendingCount();
385                         if ((b & state & FINISHED) != 0)
386                             t = par;                          // both done
387                         else if ((b & state & SUMMED) != 0) { // both summed
388                             int nextState; LongCumulateTask lt, rt;
389                             if ((lt = par.left) != null &&
390                                 (rt = par.right) != null) {
391                                 long lout = lt.out;
392                                 par.out = (rt.hi == fnc ? lout :
393                                            fn.applyAsLong(lout, rt.out));
394                             }
395                             int refork = (((b & CUMULATE) == 0 &&
396                                            par.lo == org) ? CUMULATE : 0);
397                             if ((nextState = b|state|refork) == b ||
398                                 par.compareAndSetPendingCount(b, nextState)) {
399                                 state = SUMMED;               // drop finished
400                                 t = par;
401                                 if (refork != 0)
402                                     par.fork();
403                             }
404                         }
405                         else if (par.compareAndSetPendingCount(b, b|state))
406                             break outer;                      // sib not ready
407                     }
408                 }
409             }
410         }
411         @java.io.Serial
412         private static final long serialVersionUID = -5074099945909284273L;
413     }
414 
415     static final class DoubleCumulateTask extends CountedCompleter<Void> {
416         final double[] array;
417         @SuppressWarnings("serial") // Not statically typed as Serializable
418         final DoubleBinaryOperator function;
419         DoubleCumulateTask left, right;
420         double in, out;
421         final int lo, hi, origin, fence, threshold;
422 
423         /** Root task constructor */
DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function, double[] array, int lo, int hi)424         public DoubleCumulateTask(DoubleCumulateTask parent,
425                                   DoubleBinaryOperator function,
426                                   double[] array, int lo, int hi) {
427             super(parent);
428             this.function = function; this.array = array;
429             this.lo = this.origin = lo; this.hi = this.fence = hi;
430             int p;
431             this.threshold =
432                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
433                 <= MIN_PARTITION ? MIN_PARTITION : p;
434         }
435 
436         /** Subtask constructor */
DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function, double[] array, int origin, int fence, int threshold, int lo, int hi)437         DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,
438                            double[] array, int origin, int fence, int threshold,
439                            int lo, int hi) {
440             super(parent);
441             this.function = function; this.array = array;
442             this.origin = origin; this.fence = fence;
443             this.threshold = threshold;
444             this.lo = lo; this.hi = hi;
445         }
446 
compute()447         public final void compute() {
448             final DoubleBinaryOperator fn;
449             final double[] a;
450             if ((fn = this.function) == null || (a = this.array) == null)
451                 throw new NullPointerException();    // hoist checks
452             int th = threshold, org = origin, fnc = fence, l, h;
453             DoubleCumulateTask t = this;
454             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
455                 if (h - l > th) {
456                     DoubleCumulateTask lt = t.left, rt = t.right, f;
457                     if (lt == null) {                // first pass
458                         int mid = (l + h) >>> 1;
459                         f = rt = t.right =
460                             new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);
461                         t = lt = t.left =
462                             new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);
463                     }
464                     else {                           // possibly refork
465                         double pin = t.in;
466                         lt.in = pin;
467                         f = t = null;
468                         if (rt != null) {
469                             double lout = lt.out;
470                             rt.in = (l == org ? lout :
471                                      fn.applyAsDouble(pin, lout));
472                             for (int c;;) {
473                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
474                                     break;
475                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
476                                     t = rt;
477                                     break;
478                                 }
479                             }
480                         }
481                         for (int c;;) {
482                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
483                                 break;
484                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
485                                 if (t != null)
486                                     f = t;
487                                 t = lt;
488                                 break;
489                             }
490                         }
491                         if (t == null)
492                             break;
493                     }
494                     if (f != null)
495                         f.fork();
496                 }
497                 else {
498                     int state; // Transition to sum, cumulate, or both
499                     for (int b;;) {
500                         if (((b = t.getPendingCount()) & FINISHED) != 0)
501                             break outer;                      // already done
502                         state = ((b & CUMULATE) != 0 ? FINISHED :
503                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
504                         if (t.compareAndSetPendingCount(b, b|state))
505                             break;
506                     }
507 
508                     double sum;
509                     if (state != SUMMED) {
510                         int first;
511                         if (l == org) {                       // leftmost; no in
512                             sum = a[org];
513                             first = org + 1;
514                         }
515                         else {
516                             sum = t.in;
517                             first = l;
518                         }
519                         for (int i = first; i < h; ++i)       // cumulate
520                             a[i] = sum = fn.applyAsDouble(sum, a[i]);
521                     }
522                     else if (h < fnc) {                       // skip rightmost
523                         sum = a[l];
524                         for (int i = l + 1; i < h; ++i)       // sum only
525                             sum = fn.applyAsDouble(sum, a[i]);
526                     }
527                     else
528                         sum = t.in;
529                     t.out = sum;
530                     for (DoubleCumulateTask par;;) {            // propagate
531                         if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {
532                             if ((state & FINISHED) != 0)      // enable join
533                                 t.quietlyComplete();
534                             break outer;
535                         }
536                         int b = par.getPendingCount();
537                         if ((b & state & FINISHED) != 0)
538                             t = par;                          // both done
539                         else if ((b & state & SUMMED) != 0) { // both summed
540                             int nextState; DoubleCumulateTask lt, rt;
541                             if ((lt = par.left) != null &&
542                                 (rt = par.right) != null) {
543                                 double lout = lt.out;
544                                 par.out = (rt.hi == fnc ? lout :
545                                            fn.applyAsDouble(lout, rt.out));
546                             }
547                             int refork = (((b & CUMULATE) == 0 &&
548                                            par.lo == org) ? CUMULATE : 0);
549                             if ((nextState = b|state|refork) == b ||
550                                 par.compareAndSetPendingCount(b, nextState)) {
551                                 state = SUMMED;               // drop finished
552                                 t = par;
553                                 if (refork != 0)
554                                     par.fork();
555                             }
556                         }
557                         else if (par.compareAndSetPendingCount(b, b|state))
558                             break outer;                      // sib not ready
559                     }
560                 }
561             }
562         }
563         @java.io.Serial
564         private static final long serialVersionUID = -586947823794232033L;
565     }
566 
567     static final class IntCumulateTask extends CountedCompleter<Void> {
568         final int[] array;
569         @SuppressWarnings("serial") // Not statically typed as Serializable
570         final IntBinaryOperator function;
571         IntCumulateTask left, right;
572         int in, out;
573         final int lo, hi, origin, fence, threshold;
574 
575         /** Root task constructor */
IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function, int[] array, int lo, int hi)576         public IntCumulateTask(IntCumulateTask parent,
577                                IntBinaryOperator function,
578                                int[] array, int lo, int hi) {
579             super(parent);
580             this.function = function; this.array = array;
581             this.lo = this.origin = lo; this.hi = this.fence = hi;
582             int p;
583             this.threshold =
584                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
585                 <= MIN_PARTITION ? MIN_PARTITION : p;
586         }
587 
588         /** Subtask constructor */
IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function, int[] array, int origin, int fence, int threshold, int lo, int hi)589         IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,
590                         int[] array, int origin, int fence, int threshold,
591                         int lo, int hi) {
592             super(parent);
593             this.function = function; this.array = array;
594             this.origin = origin; this.fence = fence;
595             this.threshold = threshold;
596             this.lo = lo; this.hi = hi;
597         }
598 
compute()599         public final void compute() {
600             final IntBinaryOperator fn;
601             final int[] a;
602             if ((fn = this.function) == null || (a = this.array) == null)
603                 throw new NullPointerException();    // hoist checks
604             int th = threshold, org = origin, fnc = fence, l, h;
605             IntCumulateTask t = this;
606             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
607                 if (h - l > th) {
608                     IntCumulateTask lt = t.left, rt = t.right, f;
609                     if (lt == null) {                // first pass
610                         int mid = (l + h) >>> 1;
611                         f = rt = t.right =
612                             new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);
613                         t = lt = t.left =
614                             new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);
615                     }
616                     else {                           // possibly refork
617                         int pin = t.in;
618                         lt.in = pin;
619                         f = t = null;
620                         if (rt != null) {
621                             int lout = lt.out;
622                             rt.in = (l == org ? lout :
623                                      fn.applyAsInt(pin, lout));
624                             for (int c;;) {
625                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
626                                     break;
627                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
628                                     t = rt;
629                                     break;
630                                 }
631                             }
632                         }
633                         for (int c;;) {
634                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
635                                 break;
636                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
637                                 if (t != null)
638                                     f = t;
639                                 t = lt;
640                                 break;
641                             }
642                         }
643                         if (t == null)
644                             break;
645                     }
646                     if (f != null)
647                         f.fork();
648                 }
649                 else {
650                     int state; // Transition to sum, cumulate, or both
651                     for (int b;;) {
652                         if (((b = t.getPendingCount()) & FINISHED) != 0)
653                             break outer;                      // already done
654                         state = ((b & CUMULATE) != 0 ? FINISHED :
655                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
656                         if (t.compareAndSetPendingCount(b, b|state))
657                             break;
658                     }
659 
660                     int sum;
661                     if (state != SUMMED) {
662                         int first;
663                         if (l == org) {                       // leftmost; no in
664                             sum = a[org];
665                             first = org + 1;
666                         }
667                         else {
668                             sum = t.in;
669                             first = l;
670                         }
671                         for (int i = first; i < h; ++i)       // cumulate
672                             a[i] = sum = fn.applyAsInt(sum, a[i]);
673                     }
674                     else if (h < fnc) {                       // skip rightmost
675                         sum = a[l];
676                         for (int i = l + 1; i < h; ++i)       // sum only
677                             sum = fn.applyAsInt(sum, a[i]);
678                     }
679                     else
680                         sum = t.in;
681                     t.out = sum;
682                     for (IntCumulateTask par;;) {            // propagate
683                         if ((par = (IntCumulateTask)t.getCompleter()) == null) {
684                             if ((state & FINISHED) != 0)      // enable join
685                                 t.quietlyComplete();
686                             break outer;
687                         }
688                         int b = par.getPendingCount();
689                         if ((b & state & FINISHED) != 0)
690                             t = par;                          // both done
691                         else if ((b & state & SUMMED) != 0) { // both summed
692                             int nextState; IntCumulateTask lt, rt;
693                             if ((lt = par.left) != null &&
694                                 (rt = par.right) != null) {
695                                 int lout = lt.out;
696                                 par.out = (rt.hi == fnc ? lout :
697                                            fn.applyAsInt(lout, rt.out));
698                             }
699                             int refork = (((b & CUMULATE) == 0 &&
700                                            par.lo == org) ? CUMULATE : 0);
701                             if ((nextState = b|state|refork) == b ||
702                                 par.compareAndSetPendingCount(b, nextState)) {
703                                 state = SUMMED;               // drop finished
704                                 t = par;
705                                 if (refork != 0)
706                                     par.fork();
707                             }
708                         }
709                         else if (par.compareAndSetPendingCount(b, b|state))
710                             break outer;                      // sib not ready
711                     }
712                 }
713             }
714         }
715         @java.io.Serial
716         private static final long serialVersionUID = 3731755594596840961L;
717     }
718 }
719