1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2015  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "Bif_OPER1_REDUCE.hh"
22 #include "Bif_OPER2_OUTER.hh"
23 #include "Bif_F12_TAKE_DROP.hh"
24 #include "Macro.hh"
25 #include "Workspace.hh"
26 
27 Bif_JOT          Bif_JOT        ::_fun;
28 Bif_OPER2_OUTER  Bif_OPER2_OUTER::_fun;
29 
30 Bif_JOT         * Bif_JOT        ::fun = &Bif_JOT        ::_fun;
31 Bif_OPER2_OUTER * Bif_OPER2_OUTER::fun = &Bif_OPER2_OUTER::_fun;
32 
33 Bif_OPER2_OUTER::PJob_product Bif_OPER2_OUTER::job;
34 
35 //-----------------------------------------------------------------------------
36 Token
eval_ALRB(Value_P A,Token & LO,Token & _RO,Value_P B)37 Bif_OPER2_OUTER::eval_ALRB(Value_P A, Token & LO, Token & _RO, Value_P B)
38 {
39    if (!_RO.is_function())    SYNTAX_ERROR;
40 
41 Function * RO = _RO.get_function();
42    Assert(RO);
43 
44    if (!RO->has_result())   DOMAIN_ERROR;
45 
46 Value_P Z(A->get_shape() + B->get_shape(), LOC);
47 
48    // an important (and the most likely) special case is RO being a scalar
49    // function. This case can be implemented in a far simpler fashion than
50    // the general case.
51    //
52    if (RO->get_scalar_f2() && A->is_simple() && B->is_simple())
53       {
54         job.cZ     = &Z->get_ravel(0);
55         job.cA     = &A->get_ravel(0);
56         job.ZAh    = A->element_count();
57         job.RO     = RO->get_scalar_f2();
58         job.cB     = &B->get_ravel(0);
59         job.ZBl    = B->element_count();
60         job.ec     = E_NO_ERROR;
61 
62         scalar_outer_product();
63         if (job.ec != E_NO_ERROR)   throw_apl_error(job.ec, LOC);
64 
65         Z->set_default(*B.get(), LOC);
66 
67         Z->check_value(LOC);
68         return Token(TOK_APL_VALUE1, Z);
69       }
70 
71    if (Z->is_empty())
72       {
73         Value_P Fill_A = Bif_F12_TAKE::first(A);
74         Value_P Fill_B = Bif_F12_TAKE::first(B);
75 
76         Value_P Z1 = RO->eval_fill_AB(Fill_A, Fill_B).get_apl_val();
77         Z->get_ravel(0).init(Z1->get_ravel(0), Z.getref(), LOC);
78         Z->check_value(LOC);
79         return Token(TOK_APL_VALUE1, Z);
80       }
81 
82    if (RO->may_push_SI())   // user defined LO
83       {
84         return Macro::get_macro(Macro::MAC_Z__A_LO_OUTER_B)
85                     ->eval_ALB(A, _RO, B);
86       }
87 
88 const ShapeItem len_B = B->element_count();
89 const ShapeItem len_Z = A->element_count() * len_B;
90 
91 Value_P RO_A;
92 Value_P RO_B;
93 
94    loop(z, len_Z)
95       {
96         const Cell * cA = &A->get_ravel(z / len_B);
97         const Cell * cB = &B->get_ravel(z % len_B);
98 
99         if (cA->is_pointer_cell())
100            {
101              RO_A = cA->get_pointer_value();
102            }
103         else
104            {
105              RO_A = Value_P(LOC);
106              RO_A->get_ravel(0).init(*cA, RO_A.getref(), LOC);
107            }
108 
109         if (cB->is_pointer_cell())
110            {
111              RO_B = cB->get_pointer_value();
112            }
113         else
114            {
115              RO_B = Value_P(LOC);
116              RO_B->get_ravel(0).init(*cB, RO_B.getref(), LOC);
117            }
118 
119         Token result = RO->eval_AB(RO_A, RO_B);
120 
121       // if RO was a primitive function, then result may be a value.
122       // if RO was a user defined function then result may be
123       // TOK_SI_PUSHED. In both cases result could be TOK_ERROR.
124       //
125       if (result.get_Class() == TC_VALUE)
126          {
127            Value_P ZZ = result.get_apl_val();
128            Z->next_ravel()->init_from_value(ZZ.get(), Z.getref(), LOC);
129            continue;
130          }
131 
132       if (result.get_tag() == TOK_ERROR)   return result;
133 
134         Q1(result);   FIXME;
135       }
136 
137    Z->set_default(*B.get(), LOC);
138 
139    Z->check_value(LOC);
140    return Token(TOK_APL_VALUE1, Z);
141 }
142 //-----------------------------------------------------------------------------
143 void
scalar_outer_product() const144 Bif_OPER2_OUTER::scalar_outer_product() const
145 {
146 #ifdef PERFORMANCE_COUNTERS_WANTED
147 const uint64_t start_1 = cycle_counter();
148 #endif
149 
150   // the empty cases have been handled already in eval_ALRB()
151 
152    job.ec = E_NO_ERROR;
153 
154 #if PARALLEL_ENABLED
155    if (  Parallel::run_parallel
156       && Thread_context::get_active_core_count() > 1
157       && job.ZAh * job.ZBl > get_dyadic_threshold())
158       {
159         job.cores = Thread_context::get_active_core_count();
160         Thread_context::do_work = PF_scalar_outer_product;
161         Thread_context::M_fork("scalar_outer_product");   // start pool
162         PF_scalar_outer_product(Thread_context::get_master());
163         Thread_context::M_join();
164       }
165    else
166 #endif // PARALLEL_ENABLED
167       {
168         job.cores = CCNT_1;
169         PF_scalar_outer_product(Thread_context::get_master());
170       }
171 
172 #ifdef PERFORMANCE_COUNTERS_WANTED
173 const uint64_t end_1 = cycle_counter();
174    Performance::fs_OPER2_OUTER_AB.add_sample(end_1 - start_1,
175                                              job.ZAh * job.ZBl);
176 #endif
177 }
178 //-----------------------------------------------------------------------------
179 void
PF_scalar_outer_product(Thread_context & tctx)180 Bif_OPER2_OUTER::PF_scalar_outer_product(Thread_context & tctx)
181 {
182 const ShapeItem Z_len = job.ZAh * job.ZBl;
183 
184 const ShapeItem slice_len = (Z_len + job.cores - 1)/job.cores;
185 ShapeItem z = tctx.get_N() * slice_len;
186 ShapeItem end_z = z + slice_len;
187    if (end_z > Z_len)   end_z = Z_len;
188 
189    for (; z < end_z; ++z)
190        {
191         const ShapeItem zah = z/job.ZBl;
192         const ShapeItem zbl = z - zah*job.ZBl;
193         job.ec = ((job.cB + zbl)->*job.RO)(job.cZ + z, job.cA + zah);
194         if (job.ec != E_NO_ERROR)   return;
195        }
196 }
197 //-----------------------------------------------------------------------------
198