1 /*
2 This file is part of GNU APL, a free implementation of the
3 ISO/IEC Standard 13751, "Programming Language APL, Extended"
4
5 Copyright (C) 2008-2015 Dr. Jürgen Sauermann
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "Bif_OPER1_REDUCE.hh"
22 #include "Bif_OPER2_OUTER.hh"
23 #include "Bif_F12_TAKE_DROP.hh"
24 #include "Macro.hh"
25 #include "Workspace.hh"
26
27 Bif_JOT Bif_JOT ::_fun;
28 Bif_OPER2_OUTER Bif_OPER2_OUTER::_fun;
29
30 Bif_JOT * Bif_JOT ::fun = &Bif_JOT ::_fun;
31 Bif_OPER2_OUTER * Bif_OPER2_OUTER::fun = &Bif_OPER2_OUTER::_fun;
32
33 Bif_OPER2_OUTER::PJob_product Bif_OPER2_OUTER::job;
34
35 //-----------------------------------------------------------------------------
36 Token
eval_ALRB(Value_P A,Token & LO,Token & _RO,Value_P B)37 Bif_OPER2_OUTER::eval_ALRB(Value_P A, Token & LO, Token & _RO, Value_P B)
38 {
39 if (!_RO.is_function()) SYNTAX_ERROR;
40
41 Function * RO = _RO.get_function();
42 Assert(RO);
43
44 if (!RO->has_result()) DOMAIN_ERROR;
45
46 Value_P Z(A->get_shape() + B->get_shape(), LOC);
47
48 // an important (and the most likely) special case is RO being a scalar
49 // function. This case can be implemented in a far simpler fashion than
50 // the general case.
51 //
52 if (RO->get_scalar_f2() && A->is_simple() && B->is_simple())
53 {
54 job.cZ = &Z->get_ravel(0);
55 job.cA = &A->get_ravel(0);
56 job.ZAh = A->element_count();
57 job.RO = RO->get_scalar_f2();
58 job.cB = &B->get_ravel(0);
59 job.ZBl = B->element_count();
60 job.ec = E_NO_ERROR;
61
62 scalar_outer_product();
63 if (job.ec != E_NO_ERROR) throw_apl_error(job.ec, LOC);
64
65 Z->set_default(*B.get(), LOC);
66
67 Z->check_value(LOC);
68 return Token(TOK_APL_VALUE1, Z);
69 }
70
71 if (Z->is_empty())
72 {
73 Value_P Fill_A = Bif_F12_TAKE::first(A);
74 Value_P Fill_B = Bif_F12_TAKE::first(B);
75
76 Value_P Z1 = RO->eval_fill_AB(Fill_A, Fill_B).get_apl_val();
77 Z->get_ravel(0).init(Z1->get_ravel(0), Z.getref(), LOC);
78 Z->check_value(LOC);
79 return Token(TOK_APL_VALUE1, Z);
80 }
81
82 if (RO->may_push_SI()) // user defined LO
83 {
84 return Macro::get_macro(Macro::MAC_Z__A_LO_OUTER_B)
85 ->eval_ALB(A, _RO, B);
86 }
87
88 const ShapeItem len_B = B->element_count();
89 const ShapeItem len_Z = A->element_count() * len_B;
90
91 Value_P RO_A;
92 Value_P RO_B;
93
94 loop(z, len_Z)
95 {
96 const Cell * cA = &A->get_ravel(z / len_B);
97 const Cell * cB = &B->get_ravel(z % len_B);
98
99 if (cA->is_pointer_cell())
100 {
101 RO_A = cA->get_pointer_value();
102 }
103 else
104 {
105 RO_A = Value_P(LOC);
106 RO_A->get_ravel(0).init(*cA, RO_A.getref(), LOC);
107 }
108
109 if (cB->is_pointer_cell())
110 {
111 RO_B = cB->get_pointer_value();
112 }
113 else
114 {
115 RO_B = Value_P(LOC);
116 RO_B->get_ravel(0).init(*cB, RO_B.getref(), LOC);
117 }
118
119 Token result = RO->eval_AB(RO_A, RO_B);
120
121 // if RO was a primitive function, then result may be a value.
122 // if RO was a user defined function then result may be
123 // TOK_SI_PUSHED. In both cases result could be TOK_ERROR.
124 //
125 if (result.get_Class() == TC_VALUE)
126 {
127 Value_P ZZ = result.get_apl_val();
128 Z->next_ravel()->init_from_value(ZZ.get(), Z.getref(), LOC);
129 continue;
130 }
131
132 if (result.get_tag() == TOK_ERROR) return result;
133
134 Q1(result); FIXME;
135 }
136
137 Z->set_default(*B.get(), LOC);
138
139 Z->check_value(LOC);
140 return Token(TOK_APL_VALUE1, Z);
141 }
142 //-----------------------------------------------------------------------------
143 void
scalar_outer_product() const144 Bif_OPER2_OUTER::scalar_outer_product() const
145 {
146 #ifdef PERFORMANCE_COUNTERS_WANTED
147 const uint64_t start_1 = cycle_counter();
148 #endif
149
150 // the empty cases have been handled already in eval_ALRB()
151
152 job.ec = E_NO_ERROR;
153
154 #if PARALLEL_ENABLED
155 if ( Parallel::run_parallel
156 && Thread_context::get_active_core_count() > 1
157 && job.ZAh * job.ZBl > get_dyadic_threshold())
158 {
159 job.cores = Thread_context::get_active_core_count();
160 Thread_context::do_work = PF_scalar_outer_product;
161 Thread_context::M_fork("scalar_outer_product"); // start pool
162 PF_scalar_outer_product(Thread_context::get_master());
163 Thread_context::M_join();
164 }
165 else
166 #endif // PARALLEL_ENABLED
167 {
168 job.cores = CCNT_1;
169 PF_scalar_outer_product(Thread_context::get_master());
170 }
171
172 #ifdef PERFORMANCE_COUNTERS_WANTED
173 const uint64_t end_1 = cycle_counter();
174 Performance::fs_OPER2_OUTER_AB.add_sample(end_1 - start_1,
175 job.ZAh * job.ZBl);
176 #endif
177 }
178 //-----------------------------------------------------------------------------
179 void
PF_scalar_outer_product(Thread_context & tctx)180 Bif_OPER2_OUTER::PF_scalar_outer_product(Thread_context & tctx)
181 {
182 const ShapeItem Z_len = job.ZAh * job.ZBl;
183
184 const ShapeItem slice_len = (Z_len + job.cores - 1)/job.cores;
185 ShapeItem z = tctx.get_N() * slice_len;
186 ShapeItem end_z = z + slice_len;
187 if (end_z > Z_len) end_z = Z_len;
188
189 for (; z < end_z; ++z)
190 {
191 const ShapeItem zah = z/job.ZBl;
192 const ShapeItem zbl = z - zah*job.ZBl;
193 job.ec = ((job.cB + zbl)->*job.RO)(job.cZ + z, job.cA + zah);
194 if (job.ec != E_NO_ERROR) return;
195 }
196 }
197 //-----------------------------------------------------------------------------
198