1 /*
2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 package java.util;
26 
27 /*
28  * Written by Doug Lea with assistance from members of JCP JSR-166
29  * Expert Group and released to the public domain, as explained at
30  * http://creativecommons.org/publicdomain/zero/1.0/
31  */
32 
33 import java.util.concurrent.ForkJoinPool;
34 import java.util.concurrent.CountedCompleter;
35 import java.util.function.BinaryOperator;
36 import java.util.function.IntBinaryOperator;
37 import java.util.function.LongBinaryOperator;
38 import java.util.function.DoubleBinaryOperator;
39 
40 /**
41  * ForkJoin tasks to perform Arrays.parallelPrefix operations.
42  *
43  * @author Doug Lea
44  * @since 1.8
45  */
46 class ArrayPrefixHelpers {
ArrayPrefixHelpers()47     private ArrayPrefixHelpers() {}; // non-instantiable
48 
49     /*
50      * Parallel prefix (aka cumulate, scan) task classes
51      * are based loosely on Guy Blelloch's original
52      * algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):
53      *  Keep dividing by two to threshold segment size, and then:
54      *   Pass 1: Create tree of partial sums for each segment
55      *   Pass 2: For each segment, cumulate with offset of left sibling
56      *
57      * This version improves performance within FJ framework mainly by
58      * allowing the second pass of ready left-hand sides to proceed
59      * even if some right-hand side first passes are still executing.
60      * It also combines first and second pass for leftmost segment,
61      * and skips the first pass for rightmost segment (whose result is
62      * not needed for second pass).  It similarly manages to avoid
63      * requiring that users supply an identity basis for accumulations
64      * by tracking those segments/subtasks for which the first
65      * existing element is used as base.
66      *
67      * Managing this relies on ORing some bits in the pendingCount for
68      * phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the
69      * main phase bit. When false, segments compute only their sum.
70      * When true, they cumulate array elements. CUMULATE is set at
71      * root at beginning of second pass and then propagated down. But
72      * it may also be set earlier for subtrees with lo==0 (the left
73      * spine of tree). SUMMED is a one bit join count. For leafs, it
74      * is set when summed. For internal nodes, it becomes true when
75      * one child is summed.  When the second child finishes summing,
76      * we then moves up tree to trigger the cumulate phase. FINISHED
77      * is also a one bit join count. For leafs, it is set when
78      * cumulated. For internal nodes, it becomes true when one child
79      * is cumulated.  When the second child finishes cumulating, it
80      * then moves up tree, completing at the root.
81      *
82      * To better exploit locality and reduce overhead, the compute
83      * method loops starting with the current task, moving if possible
84      * to one of its subtasks rather than forking.
85      *
86      * As usual for this sort of utility, there are 4 versions, that
87      * are simple copy/paste/adapt variants of each other.  (The
88      * double and int versions differ from long version soley by
89      * replacing "long" (with case-matching)).
90      */
91 
92     // see above
93     static final int CUMULATE = 1;
94     static final int SUMMED   = 2;
95     static final int FINISHED = 4;
96 
97     /** The smallest subtask array partition size to use as threshold */
98     static final int MIN_PARTITION = 16;
99 
100     static final class CumulateTask<T> extends CountedCompleter<Void> {
101         final T[] array;
102         final BinaryOperator<T> function;
103         CumulateTask<T> left, right;
104         T in, out;
105         final int lo, hi, origin, fence, threshold;
106 
107         /** Root task constructor */
CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function, T[] array, int lo, int hi)108         public CumulateTask(CumulateTask<T> parent,
109                             BinaryOperator<T> function,
110                             T[] array, int lo, int hi) {
111             super(parent);
112             this.function = function; this.array = array;
113             this.lo = this.origin = lo; this.hi = this.fence = hi;
114             int p;
115             this.threshold =
116                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
117                     <= MIN_PARTITION ? MIN_PARTITION : p;
118         }
119 
120         /** Subtask constructor */
CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function, T[] array, int origin, int fence, int threshold, int lo, int hi)121         CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,
122                      T[] array, int origin, int fence, int threshold,
123                      int lo, int hi) {
124             super(parent);
125             this.function = function; this.array = array;
126             this.origin = origin; this.fence = fence;
127             this.threshold = threshold;
128             this.lo = lo; this.hi = hi;
129         }
130 
131         @SuppressWarnings("unchecked")
compute()132         public final void compute() {
133             final BinaryOperator<T> fn;
134             final T[] a;
135             if ((fn = this.function) == null || (a = this.array) == null)
136                 throw new NullPointerException();    // hoist checks
137             int th = threshold, org = origin, fnc = fence, l, h;
138             CumulateTask<T> t = this;
139             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
140                 if (h - l > th) {
141                     CumulateTask<T> lt = t.left, rt = t.right, f;
142                     if (lt == null) {                // first pass
143                         int mid = (l + h) >>> 1;
144                         f = rt = t.right =
145                                 new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);
146                         t = lt = t.left  =
147                                 new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);
148                     }
149                     else {                           // possibly refork
150                         T pin = t.in;
151                         lt.in = pin;
152                         f = t = null;
153                         if (rt != null) {
154                             T lout = lt.out;
155                             rt.in = (l == org ? lout :
156                                      fn.apply(pin, lout));
157                             for (int c;;) {
158                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
159                                     break;
160                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
161                                     t = rt;
162                                     break;
163                                 }
164                             }
165                         }
166                         for (int c;;) {
167                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
168                                 break;
169                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
170                                 if (t != null)
171                                     f = t;
172                                 t = lt;
173                                 break;
174                             }
175                         }
176                         if (t == null)
177                             break;
178                     }
179                     if (f != null)
180                         f.fork();
181                 }
182                 else {
183                     int state; // Transition to sum, cumulate, or both
184                     for (int b;;) {
185                         if (((b = t.getPendingCount()) & FINISHED) != 0)
186                             break outer;                      // already done
187                         state = ((b & CUMULATE) != 0? FINISHED :
188                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
189                         if (t.compareAndSetPendingCount(b, b|state))
190                             break;
191                     }
192 
193                     T sum;
194                     if (state != SUMMED) {
195                         int first;
196                         if (l == org) {                       // leftmost; no in
197                             sum = a[org];
198                             first = org + 1;
199                         }
200                         else {
201                             sum = t.in;
202                             first = l;
203                         }
204                         for (int i = first; i < h; ++i)       // cumulate
205                             a[i] = sum = fn.apply(sum, a[i]);
206                     }
207                     else if (h < fnc) {                       // skip rightmost
208                         sum = a[l];
209                         for (int i = l + 1; i < h; ++i)       // sum only
210                             sum = fn.apply(sum, a[i]);
211                     }
212                     else
213                         sum = t.in;
214                     t.out = sum;
215                     for (CumulateTask<T> par;;) {             // propagate
216                         if ((par = (CumulateTask<T>)t.getCompleter()) == null) {
217                             if ((state & FINISHED) != 0)      // enable join
218                                 t.quietlyComplete();
219                             break outer;
220                         }
221                         int b = par.getPendingCount();
222                         if ((b & state & FINISHED) != 0)
223                             t = par;                          // both done
224                         else if ((b & state & SUMMED) != 0) { // both summed
225                             int nextState; CumulateTask<T> lt, rt;
226                             if ((lt = par.left) != null &&
227                                 (rt = par.right) != null) {
228                                 T lout = lt.out;
229                                 par.out = (rt.hi == fnc ? lout :
230                                            fn.apply(lout, rt.out));
231                             }
232                             int refork = (((b & CUMULATE) == 0 &&
233                                            par.lo == org) ? CUMULATE : 0);
234                             if ((nextState = b|state|refork) == b ||
235                                 par.compareAndSetPendingCount(b, nextState)) {
236                                 state = SUMMED;               // drop finished
237                                 t = par;
238                                 if (refork != 0)
239                                     par.fork();
240                             }
241                         }
242                         else if (par.compareAndSetPendingCount(b, b|state))
243                             break outer;                      // sib not ready
244                     }
245                 }
246             }
247         }
248     }
249 
250     static final class LongCumulateTask extends CountedCompleter<Void> {
251         final long[] array;
252         final LongBinaryOperator function;
253         LongCumulateTask left, right;
254         long in, out;
255         final int lo, hi, origin, fence, threshold;
256 
257         /** Root task constructor */
LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function, long[] array, int lo, int hi)258         public LongCumulateTask(LongCumulateTask parent,
259                                 LongBinaryOperator function,
260                                 long[] array, int lo, int hi) {
261             super(parent);
262             this.function = function; this.array = array;
263             this.lo = this.origin = lo; this.hi = this.fence = hi;
264             int p;
265             this.threshold =
266                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
267                     <= MIN_PARTITION ? MIN_PARTITION : p;
268         }
269 
270         /** Subtask constructor */
LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function, long[] array, int origin, int fence, int threshold, int lo, int hi)271         LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,
272                          long[] array, int origin, int fence, int threshold,
273                          int lo, int hi) {
274             super(parent);
275             this.function = function; this.array = array;
276             this.origin = origin; this.fence = fence;
277             this.threshold = threshold;
278             this.lo = lo; this.hi = hi;
279         }
280 
compute()281         public final void compute() {
282             final LongBinaryOperator fn;
283             final long[] a;
284             if ((fn = this.function) == null || (a = this.array) == null)
285                 throw new NullPointerException();    // hoist checks
286             int th = threshold, org = origin, fnc = fence, l, h;
287             LongCumulateTask t = this;
288             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
289                 if (h - l > th) {
290                     LongCumulateTask lt = t.left, rt = t.right, f;
291                     if (lt == null) {                // first pass
292                         int mid = (l + h) >>> 1;
293                         f = rt = t.right =
294                                 new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);
295                         t = lt = t.left  =
296                                 new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);
297                     }
298                     else {                           // possibly refork
299                         long pin = t.in;
300                         lt.in = pin;
301                         f = t = null;
302                         if (rt != null) {
303                             long lout = lt.out;
304                             rt.in = (l == org ? lout :
305                                      fn.applyAsLong(pin, lout));
306                             for (int c;;) {
307                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
308                                     break;
309                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
310                                     t = rt;
311                                     break;
312                                 }
313                             }
314                         }
315                         for (int c;;) {
316                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
317                                 break;
318                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
319                                 if (t != null)
320                                     f = t;
321                                 t = lt;
322                                 break;
323                             }
324                         }
325                         if (t == null)
326                             break;
327                     }
328                     if (f != null)
329                         f.fork();
330                 }
331                 else {
332                     int state; // Transition to sum, cumulate, or both
333                     for (int b;;) {
334                         if (((b = t.getPendingCount()) & FINISHED) != 0)
335                             break outer;                      // already done
336                         state = ((b & CUMULATE) != 0? FINISHED :
337                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
338                         if (t.compareAndSetPendingCount(b, b|state))
339                             break;
340                     }
341 
342                     long sum;
343                     if (state != SUMMED) {
344                         int first;
345                         if (l == org) {                       // leftmost; no in
346                             sum = a[org];
347                             first = org + 1;
348                         }
349                         else {
350                             sum = t.in;
351                             first = l;
352                         }
353                         for (int i = first; i < h; ++i)       // cumulate
354                             a[i] = sum = fn.applyAsLong(sum, a[i]);
355                     }
356                     else if (h < fnc) {                       // skip rightmost
357                         sum = a[l];
358                         for (int i = l + 1; i < h; ++i)       // sum only
359                             sum = fn.applyAsLong(sum, a[i]);
360                     }
361                     else
362                         sum = t.in;
363                     t.out = sum;
364                     for (LongCumulateTask par;;) {            // propagate
365                         if ((par = (LongCumulateTask)t.getCompleter()) == null) {
366                             if ((state & FINISHED) != 0)      // enable join
367                                 t.quietlyComplete();
368                             break outer;
369                         }
370                         int b = par.getPendingCount();
371                         if ((b & state & FINISHED) != 0)
372                             t = par;                          // both done
373                         else if ((b & state & SUMMED) != 0) { // both summed
374                             int nextState; LongCumulateTask lt, rt;
375                             if ((lt = par.left) != null &&
376                                 (rt = par.right) != null) {
377                                 long lout = lt.out;
378                                 par.out = (rt.hi == fnc ? lout :
379                                            fn.applyAsLong(lout, rt.out));
380                             }
381                             int refork = (((b & CUMULATE) == 0 &&
382                                            par.lo == org) ? CUMULATE : 0);
383                             if ((nextState = b|state|refork) == b ||
384                                 par.compareAndSetPendingCount(b, nextState)) {
385                                 state = SUMMED;               // drop finished
386                                 t = par;
387                                 if (refork != 0)
388                                     par.fork();
389                             }
390                         }
391                         else if (par.compareAndSetPendingCount(b, b|state))
392                             break outer;                      // sib not ready
393                     }
394                 }
395             }
396         }
397     }
398 
399     static final class DoubleCumulateTask extends CountedCompleter<Void> {
400         final double[] array;
401         final DoubleBinaryOperator function;
402         DoubleCumulateTask left, right;
403         double in, out;
404         final int lo, hi, origin, fence, threshold;
405 
406         /** Root task constructor */
DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function, double[] array, int lo, int hi)407         public DoubleCumulateTask(DoubleCumulateTask parent,
408                                   DoubleBinaryOperator function,
409                                   double[] array, int lo, int hi) {
410             super(parent);
411             this.function = function; this.array = array;
412             this.lo = this.origin = lo; this.hi = this.fence = hi;
413             int p;
414             this.threshold =
415                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
416                     <= MIN_PARTITION ? MIN_PARTITION : p;
417         }
418 
419         /** Subtask constructor */
DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function, double[] array, int origin, int fence, int threshold, int lo, int hi)420         DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,
421                            double[] array, int origin, int fence, int threshold,
422                            int lo, int hi) {
423             super(parent);
424             this.function = function; this.array = array;
425             this.origin = origin; this.fence = fence;
426             this.threshold = threshold;
427             this.lo = lo; this.hi = hi;
428         }
429 
compute()430         public final void compute() {
431             final DoubleBinaryOperator fn;
432             final double[] a;
433             if ((fn = this.function) == null || (a = this.array) == null)
434                 throw new NullPointerException();    // hoist checks
435             int th = threshold, org = origin, fnc = fence, l, h;
436             DoubleCumulateTask t = this;
437             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
438                 if (h - l > th) {
439                     DoubleCumulateTask lt = t.left, rt = t.right, f;
440                     if (lt == null) {                // first pass
441                         int mid = (l + h) >>> 1;
442                         f = rt = t.right =
443                                 new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);
444                         t = lt = t.left  =
445                                 new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);
446                     }
447                     else {                           // possibly refork
448                         double pin = t.in;
449                         lt.in = pin;
450                         f = t = null;
451                         if (rt != null) {
452                             double lout = lt.out;
453                             rt.in = (l == org ? lout :
454                                      fn.applyAsDouble(pin, lout));
455                             for (int c;;) {
456                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
457                                     break;
458                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
459                                     t = rt;
460                                     break;
461                                 }
462                             }
463                         }
464                         for (int c;;) {
465                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
466                                 break;
467                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
468                                 if (t != null)
469                                     f = t;
470                                 t = lt;
471                                 break;
472                             }
473                         }
474                         if (t == null)
475                             break;
476                     }
477                     if (f != null)
478                         f.fork();
479                 }
480                 else {
481                     int state; // Transition to sum, cumulate, or both
482                     for (int b;;) {
483                         if (((b = t.getPendingCount()) & FINISHED) != 0)
484                             break outer;                      // already done
485                         state = ((b & CUMULATE) != 0? FINISHED :
486                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
487                         if (t.compareAndSetPendingCount(b, b|state))
488                             break;
489                     }
490 
491                     double sum;
492                     if (state != SUMMED) {
493                         int first;
494                         if (l == org) {                       // leftmost; no in
495                             sum = a[org];
496                             first = org + 1;
497                         }
498                         else {
499                             sum = t.in;
500                             first = l;
501                         }
502                         for (int i = first; i < h; ++i)       // cumulate
503                             a[i] = sum = fn.applyAsDouble(sum, a[i]);
504                     }
505                     else if (h < fnc) {                       // skip rightmost
506                         sum = a[l];
507                         for (int i = l + 1; i < h; ++i)       // sum only
508                             sum = fn.applyAsDouble(sum, a[i]);
509                     }
510                     else
511                         sum = t.in;
512                     t.out = sum;
513                     for (DoubleCumulateTask par;;) {            // propagate
514                         if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {
515                             if ((state & FINISHED) != 0)      // enable join
516                                 t.quietlyComplete();
517                             break outer;
518                         }
519                         int b = par.getPendingCount();
520                         if ((b & state & FINISHED) != 0)
521                             t = par;                          // both done
522                         else if ((b & state & SUMMED) != 0) { // both summed
523                             int nextState; DoubleCumulateTask lt, rt;
524                             if ((lt = par.left) != null &&
525                                 (rt = par.right) != null) {
526                                 double lout = lt.out;
527                                 par.out = (rt.hi == fnc ? lout :
528                                            fn.applyAsDouble(lout, rt.out));
529                             }
530                             int refork = (((b & CUMULATE) == 0 &&
531                                            par.lo == org) ? CUMULATE : 0);
532                             if ((nextState = b|state|refork) == b ||
533                                 par.compareAndSetPendingCount(b, nextState)) {
534                                 state = SUMMED;               // drop finished
535                                 t = par;
536                                 if (refork != 0)
537                                     par.fork();
538                             }
539                         }
540                         else if (par.compareAndSetPendingCount(b, b|state))
541                             break outer;                      // sib not ready
542                     }
543                 }
544             }
545         }
546     }
547 
548     static final class IntCumulateTask extends CountedCompleter<Void> {
549         final int[] array;
550         final IntBinaryOperator function;
551         IntCumulateTask left, right;
552         int in, out;
553         final int lo, hi, origin, fence, threshold;
554 
555         /** Root task constructor */
IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function, int[] array, int lo, int hi)556         public IntCumulateTask(IntCumulateTask parent,
557                                IntBinaryOperator function,
558                                int[] array, int lo, int hi) {
559             super(parent);
560             this.function = function; this.array = array;
561             this.lo = this.origin = lo; this.hi = this.fence = hi;
562             int p;
563             this.threshold =
564                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
565                     <= MIN_PARTITION ? MIN_PARTITION : p;
566         }
567 
568         /** Subtask constructor */
IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function, int[] array, int origin, int fence, int threshold, int lo, int hi)569         IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,
570                         int[] array, int origin, int fence, int threshold,
571                         int lo, int hi) {
572             super(parent);
573             this.function = function; this.array = array;
574             this.origin = origin; this.fence = fence;
575             this.threshold = threshold;
576             this.lo = lo; this.hi = hi;
577         }
578 
compute()579         public final void compute() {
580             final IntBinaryOperator fn;
581             final int[] a;
582             if ((fn = this.function) == null || (a = this.array) == null)
583                 throw new NullPointerException();    // hoist checks
584             int th = threshold, org = origin, fnc = fence, l, h;
585             IntCumulateTask t = this;
586             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
587                 if (h - l > th) {
588                     IntCumulateTask lt = t.left, rt = t.right, f;
589                     if (lt == null) {                // first pass
590                         int mid = (l + h) >>> 1;
591                         f = rt = t.right =
592                                 new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);
593                         t = lt = t.left  =
594                                 new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);
595                     }
596                     else {                           // possibly refork
597                         int pin = t.in;
598                         lt.in = pin;
599                         f = t = null;
600                         if (rt != null) {
601                             int lout = lt.out;
602                             rt.in = (l == org ? lout :
603                                      fn.applyAsInt(pin, lout));
604                             for (int c;;) {
605                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
606                                     break;
607                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
608                                     t = rt;
609                                     break;
610                                 }
611                             }
612                         }
613                         for (int c;;) {
614                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
615                                 break;
616                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
617                                 if (t != null)
618                                     f = t;
619                                 t = lt;
620                                 break;
621                             }
622                         }
623                         if (t == null)
624                             break;
625                     }
626                     if (f != null)
627                         f.fork();
628                 }
629                 else {
630                     int state; // Transition to sum, cumulate, or both
631                     for (int b;;) {
632                         if (((b = t.getPendingCount()) & FINISHED) != 0)
633                             break outer;                      // already done
634                         state = ((b & CUMULATE) != 0? FINISHED :
635                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
636                         if (t.compareAndSetPendingCount(b, b|state))
637                             break;
638                     }
639 
640                     int sum;
641                     if (state != SUMMED) {
642                         int first;
643                         if (l == org) {                       // leftmost; no in
644                             sum = a[org];
645                             first = org + 1;
646                         }
647                         else {
648                             sum = t.in;
649                             first = l;
650                         }
651                         for (int i = first; i < h; ++i)       // cumulate
652                             a[i] = sum = fn.applyAsInt(sum, a[i]);
653                     }
654                     else if (h < fnc) {                       // skip rightmost
655                         sum = a[l];
656                         for (int i = l + 1; i < h; ++i)       // sum only
657                             sum = fn.applyAsInt(sum, a[i]);
658                     }
659                     else
660                         sum = t.in;
661                     t.out = sum;
662                     for (IntCumulateTask par;;) {            // propagate
663                         if ((par = (IntCumulateTask)t.getCompleter()) == null) {
664                             if ((state & FINISHED) != 0)      // enable join
665                                 t.quietlyComplete();
666                             break outer;
667                         }
668                         int b = par.getPendingCount();
669                         if ((b & state & FINISHED) != 0)
670                             t = par;                          // both done
671                         else if ((b & state & SUMMED) != 0) { // both summed
672                             int nextState; IntCumulateTask lt, rt;
673                             if ((lt = par.left) != null &&
674                                 (rt = par.right) != null) {
675                                 int lout = lt.out;
676                                 par.out = (rt.hi == fnc ? lout :
677                                            fn.applyAsInt(lout, rt.out));
678                             }
679                             int refork = (((b & CUMULATE) == 0 &&
680                                            par.lo == org) ? CUMULATE : 0);
681                             if ((nextState = b|state|refork) == b ||
682                                 par.compareAndSetPendingCount(b, nextState)) {
683                                 state = SUMMED;               // drop finished
684                                 t = par;
685                                 if (refork != 0)
686                                     par.fork();
687                             }
688                         }
689                         else if (par.compareAndSetPendingCount(b, b|state))
690                             break outer;                      // sib not ready
691                     }
692                 }
693             }
694         }
695     }
696 }
697