1 /*
2 This file is part of GNU APL, a free implementation of the
3 ISO/IEC Standard 13751, "Programming Language APL, Extended"
4
5 Copyright (C) 2008-2016 Dr. Jürgen Sauermann
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "Assert.hh"
22 #include "Bif_F12_SORT.hh"
23 #include "Cell.hh"
24 #include "Heapsort.hh"
25 #include "Macro.hh"
26 #include "Value.hh"
27 #include "Workspace.hh"
28
29 Bif_F12_SORT_ASC Bif_F12_SORT_ASC::_fun; // ⍋
30 Bif_F12_SORT_DES Bif_F12_SORT_DES::_fun; // ⍒
31
32 Bif_F12_SORT_ASC * Bif_F12_SORT_ASC::fun = &Bif_F12_SORT_ASC::_fun;
33 Bif_F12_SORT_DES * Bif_F12_SORT_DES::fun = &Bif_F12_SORT_DES::_fun;
34
35 //-----------------------------------------------------------------------------
CollatingCache(const Value & A,const Cell * base,ShapeItem clen)36 CollatingCache::CollatingCache(const Value & A, const Cell * base,
37 ShapeItem clen)
38 : rank(A.get_rank()),
39 base_B1(base),
40 comp_len(clen)
41 {
42 const ShapeItem ec_A = A.element_count();
43 UCS_string UA;
44 UA.reserve(ec_A);
45 loop(a, ec_A) UA.append(A.get_ravel(a).get_char_value());
46
47 UCS_string UA1 = UA.unique();
48
49 reserve(UA1.size());
50
51 // create CollatingCacheEntry for every char in UA1. At this point, all
52 // entries are located at the end of A.
53 //
54 loop(a, UA1.size())
55 {
56 const Unicode uni = UA1[a];
57 const CollatingCacheEntry entry(uni, A.get_shape());
58 push_back(entry);
59 }
60
61 // move entries back
62 //
63 loop(a, ec_A)
64 {
65 const Unicode uni = A.get_ravel(a).get_char_value();
66 CollatingCacheEntry & entry = at(find_entry(uni));
67
68 ShapeItem aq = a;
69 loop(r, A.get_rank())
70 {
71 const Rank axis = entry.ce_shape.get_rank() - r - 1;
72 const ShapeItem ar = aq % A.get_shape_item(axis);
73 Assert(ar <= A.get_shape_item(axis));
74 if (entry.ce_shape.get_shape_item(axis) > ar)
75 entry.ce_shape.set_shape_item(axis, ar);
76 aq /= A.get_shape_item(axis);
77 }
78 }
79
80 // add one entry for all characters in B that are not in A
81 //
82 CollatingCacheEntry others(Invalid_Unicode, A.get_shape());
83 push_back(others);
84 }
85 //-----------------------------------------------------------------------------
86 bool
greater_vec(const IntCell & Za,const IntCell & Zb,const void * comp_arg)87 CollatingCache::greater_vec(const IntCell & Za, const IntCell & Zb,
88 const void * comp_arg)
89 {
90 const CollatingCache & cache =
91 *reinterpret_cast<const CollatingCache *>(comp_arg);
92 const Cell * ca = cache.base_B1 + cache.comp_len * Za.get_int_value();
93 const Cell * cb = cache.base_B1 + cache.comp_len * Zb.get_int_value();
94
95 const Rank rank = cache.get_rank();
96
97 loop(r, rank)
98 {
99 loop(c, cache.get_comp_len())
100 {
101 const APL_Integer a = ca[c].get_int_value();
102 const APL_Integer b = cb[c].get_int_value();
103 const int diff = cache[a].compare_axis(cache[b], rank - r - 1);
104
105 if (diff) return diff > 0;
106 }
107 }
108
109 return ca > cb;
110 }
111 //-----------------------------------------------------------------------------
112 bool
smaller_vec(const IntCell & Za,const IntCell & Zb,const void * comp_arg)113 CollatingCache::smaller_vec(const IntCell & Za, const IntCell & Zb,
114 const void * comp_arg)
115 {
116 const CollatingCache & cache =
117 *reinterpret_cast<const CollatingCache *>(comp_arg);
118 const Cell * ca = cache.base_B1 + cache.comp_len * Za.get_int_value();
119 const Cell * cb = cache.base_B1 + cache.comp_len * Zb.get_int_value();
120 const Rank rank = cache.get_rank();
121
122 loop(r, rank)
123 {
124 loop(c, cache.get_comp_len())
125 {
126 const APL_Integer a = ca[c].get_int_value();
127 const APL_Integer b = cb[c].get_int_value();
128 const int diff = cache[a].compare_axis(cache[b], rank - r - 1);
129
130 if (diff) return diff < 0;
131 }
132 }
133
134 return ca > cb;
135 }
136 //-----------------------------------------------------------------------------
137 ShapeItem
find_entry(Unicode uni) const138 CollatingCache::find_entry(Unicode uni) const
139 {
140 const CollatingCacheEntry * entries = &at(0);
141 const CollatingCacheEntry * entry =
142
143 Heapsort<CollatingCacheEntry>:: search<Unicode>(uni, entries, size(),
144 CollatingCacheEntry::compare_chars);
145
146 if (entry) return entry - entries;
147 return size() - 1; // the entry for characters not in A
148 }
149 //=============================================================================
150 Token
sort(Value_P B,Sort_order order)151 Bif_F12_SORT::sort(Value_P B, Sort_order order)
152 {
153 if (B->is_scalar()) return Token(TOK_ERROR, E_RANK_ERROR);
154 if (!B->can_be_compared()) return Token(TOK_ERROR, E_DOMAIN_ERROR);
155
156 const ShapeItem len_BZ = B->get_shape_item(0);
157 if (len_BZ == 0) return Token(TOK_APL_VALUE1, Idx0(LOC));
158
159 const ShapeItem comp_len = B->element_count()/len_BZ;
160
161 // first set Z←⍳len_BZ
162 //
163 const int qio = Workspace::get_IO();
164 Value_P Z(len_BZ, LOC);
165 loop(l, len_BZ) new (Z->next_ravel()) IntCell(l + qio);
166 Z->check_value(LOC);
167 if (len_BZ == 1) return Token(TOK_APL_VALUE1, Z);
168
169 // then sort Z (actually re-arrange Z so that B[Z] is sorted)
170 //
171 const Cell * base = &B->get_ravel(0) - qio*comp_len;
172 const struct { const Cell * base; ShapeItem comp_len; } ctx = { base, comp_len};
173
174 if (order == SORT_ASCENDING)
175 Heapsort<IntCell>::sort(&Z->get_ravel(0).vIntCell(),
176 len_BZ, &ctx, &Cell::greater_vec);
177 else
178 Heapsort<IntCell>::sort(&Z->get_ravel(0).vIntCell(),
179 len_BZ, &ctx, &Cell::smaller_vec);
180
181 return Token(TOK_APL_VALUE1, Z);
182 }
183 //-----------------------------------------------------------------------------
184 Token
sort_collating(Value_P A,Value_P B,Sort_order order)185 Bif_F12_SORT::sort_collating(Value_P A, Value_P B, Sort_order order)
186 {
187 if (A->is_scalar()) RANK_ERROR;
188 if (A->NOTCHAR()) DOMAIN_ERROR;
189
190 const APL_Integer qio = Workspace::get_IO();
191 if (B->NOTCHAR()) DOMAIN_ERROR;
192 if (B->is_scalar()) return Token(TOK_APL_VALUE1, IntScalar(qio, LOC));
193
194 const ShapeItem len_BZ = B->get_shape_item(0);
195 if (len_BZ == 0) return Token(TOK_APL_VALUE1, Idx0(LOC));
196
197 // first set Z←⍳len_BZ
198 //
199 Value_P Z(len_BZ, LOC);
200 loop(l, len_BZ) new (Z->next_ravel()) IntCell(l + qio);
201 Z->check_value(LOC);
202 if (len_BZ == 1) return Token(TOK_APL_VALUE1, Z);
203
204 const ShapeItem ec_B = B->element_count();
205 const ShapeItem comp_len = ec_B/len_BZ;
206
207 // create a vector B1 which has the same shape as B, but instead of
208 // B's characters, it has the index of the corresponding CollatingCache
209 // index for each character in B.
210 //
211 Value_P B1(B->get_shape(), LOC);
212 const Cell * base_B1 = &B1->get_ravel(0) - qio*comp_len;
213 CollatingCache cache(A.getref(), base_B1, comp_len);
214 loop(b, ec_B)
215 {
216 const Unicode uni = B->get_ravel(b).get_char_value();
217 const APL_Integer b1 = cache.find_entry(uni);
218 new (B1->next_ravel()) IntCell(b1);
219 }
220 B1->check_value(LOC);
221
222 // then sort Z (actually re-arrange Z so that B[Z] is sorted)
223 //
224 IntCell * z0 = &Z->get_ravel(0).vIntCell();
225 if (order == SORT_ASCENDING)
226 Heapsort<IntCell>::sort(z0, len_BZ, &cache, &CollatingCache::greater_vec);
227 else
228 Heapsort<IntCell>::sort(z0, len_BZ, &cache, &CollatingCache::smaller_vec);
229
230 Z->check_value(LOC);
231 return Token(TOK_APL_VALUE1, Z);
232 }
233 //-----------------------------------------------------------------------------
234
235