1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2016  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "Assert.hh"
22 #include "Bif_F12_SORT.hh"
23 #include "Cell.hh"
24 #include "Heapsort.hh"
25 #include "Macro.hh"
26 #include "Value.hh"
27 #include "Workspace.hh"
28 
29 Bif_F12_SORT_ASC  Bif_F12_SORT_ASC::_fun;     // ⍋
30 Bif_F12_SORT_DES  Bif_F12_SORT_DES::_fun;     // ⍒
31 
32 Bif_F12_SORT_ASC * Bif_F12_SORT_ASC::fun = &Bif_F12_SORT_ASC::_fun;
33 Bif_F12_SORT_DES * Bif_F12_SORT_DES::fun = &Bif_F12_SORT_DES::_fun;
34 
35 //-----------------------------------------------------------------------------
CollatingCache(const Value & A,const Cell * base,ShapeItem clen)36 CollatingCache::CollatingCache(const Value & A, const Cell * base,
37                                ShapeItem clen)
38    : rank(A.get_rank()),
39      base_B1(base),
40      comp_len(clen)
41 {
42 const ShapeItem ec_A = A.element_count();
43 UCS_string UA;
44    UA.reserve(ec_A);
45    loop(a, ec_A)   UA.append(A.get_ravel(a).get_char_value());
46 
47 UCS_string UA1 = UA.unique();
48 
49    reserve(UA1.size());
50 
51    // create CollatingCacheEntry for every char in UA1. At this point, all
52    // entries are located at the end of A.
53    //
54    loop(a, UA1.size())
55        {
56          const Unicode uni = UA1[a];
57          const CollatingCacheEntry entry(uni, A.get_shape());
58          push_back(entry);
59        }
60 
61    // move entries back
62    //
63    loop(a, ec_A)
64       {
65         const Unicode uni = A.get_ravel(a).get_char_value();
66         CollatingCacheEntry & entry = at(find_entry(uni));
67 
68         ShapeItem aq = a;
69         loop(r, A.get_rank())
70            {
71              const Rank axis = entry.ce_shape.get_rank() - r - 1;
72              const ShapeItem ar = aq % A.get_shape_item(axis);
73              Assert(ar <= A.get_shape_item(axis));
74              if (entry.ce_shape.get_shape_item(axis) > ar)
75                 entry.ce_shape.set_shape_item(axis, ar);
76              aq /= A.get_shape_item(axis);
77            }
78       }
79 
80    // add one entry for all characters in B that are not in A
81    //
82 CollatingCacheEntry others(Invalid_Unicode, A.get_shape());
83    push_back(others);
84 }
85 //-----------------------------------------------------------------------------
86 bool
greater_vec(const IntCell & Za,const IntCell & Zb,const void * comp_arg)87 CollatingCache::greater_vec(const IntCell & Za, const IntCell & Zb,
88                             const void * comp_arg)
89 {
90 const CollatingCache & cache =
91                       *reinterpret_cast<const CollatingCache *>(comp_arg);
92 const Cell * ca = cache.base_B1 + cache.comp_len * Za.get_int_value();
93 const Cell * cb = cache.base_B1 + cache.comp_len * Zb.get_int_value();
94 
95 const Rank rank = cache.get_rank();
96 
97    loop(r, rank)
98       {
99         loop(c, cache.get_comp_len())
100            {
101              const APL_Integer a = ca[c].get_int_value();
102              const APL_Integer b = cb[c].get_int_value();
103              const int diff = cache[a].compare_axis(cache[b], rank - r - 1);
104 
105               if (diff)   return diff > 0;
106            }
107       }
108 
109    return ca > cb;
110 }
111 //-----------------------------------------------------------------------------
112 bool
smaller_vec(const IntCell & Za,const IntCell & Zb,const void * comp_arg)113 CollatingCache::smaller_vec(const IntCell & Za, const IntCell & Zb,
114                             const void * comp_arg)
115 {
116 const CollatingCache & cache =
117                       *reinterpret_cast<const CollatingCache *>(comp_arg);
118 const Cell * ca = cache.base_B1 + cache.comp_len * Za.get_int_value();
119 const Cell * cb = cache.base_B1 + cache.comp_len * Zb.get_int_value();
120 const Rank rank = cache.get_rank();
121 
122    loop(r, rank)
123       {
124         loop(c, cache.get_comp_len())
125            {
126              const APL_Integer a = ca[c].get_int_value();
127              const APL_Integer b = cb[c].get_int_value();
128              const int diff = cache[a].compare_axis(cache[b], rank - r - 1);
129 
130               if (diff)   return diff < 0;
131            }
132       }
133 
134    return ca > cb;
135 }
136 //-----------------------------------------------------------------------------
137 ShapeItem
find_entry(Unicode uni) const138 CollatingCache::find_entry(Unicode uni) const
139 {
140 const CollatingCacheEntry * entries = &at(0);
141 const CollatingCacheEntry * entry =
142 
143    Heapsort<CollatingCacheEntry>:: search<Unicode>(uni, entries, size(),
144                                            CollatingCacheEntry::compare_chars);
145 
146    if (entry)   return entry - entries;
147    return size() - 1;   // the entry for characters not in A
148 }
149 //=============================================================================
150 Token
sort(Value_P B,Sort_order order)151 Bif_F12_SORT::sort(Value_P B, Sort_order order)
152 {
153    if (B->is_scalar())          return Token(TOK_ERROR, E_RANK_ERROR);
154    if (!B->can_be_compared())   return Token(TOK_ERROR, E_DOMAIN_ERROR);
155 
156 const ShapeItem len_BZ = B->get_shape_item(0);
157    if (len_BZ == 0)   return Token(TOK_APL_VALUE1, Idx0(LOC));
158 
159 const ShapeItem comp_len = B->element_count()/len_BZ;
160 
161    // first set Z←⍳len_BZ
162    //
163 const int qio = Workspace::get_IO();
164 Value_P Z(len_BZ, LOC);
165    loop(l, len_BZ)   new (Z->next_ravel())   IntCell(l + qio);
166    Z->check_value(LOC);
167    if (len_BZ == 1)   return Token(TOK_APL_VALUE1, Z);
168 
169    // then sort Z (actually re-arrange Z so that B[Z] is sorted)
170    //
171 const Cell * base = &B->get_ravel(0) - qio*comp_len;
172 const struct { const Cell * base; ShapeItem comp_len; } ctx = { base, comp_len};
173 
174    if (order == SORT_ASCENDING)
175       Heapsort<IntCell>::sort(&Z->get_ravel(0).vIntCell(),
176                               len_BZ, &ctx, &Cell::greater_vec);
177    else
178       Heapsort<IntCell>::sort(&Z->get_ravel(0).vIntCell(),
179                                len_BZ, &ctx, &Cell::smaller_vec);
180 
181    return Token(TOK_APL_VALUE1, Z);
182 }
183 //-----------------------------------------------------------------------------
184 Token
sort_collating(Value_P A,Value_P B,Sort_order order)185 Bif_F12_SORT::sort_collating(Value_P A, Value_P B, Sort_order order)
186 {
187    if (A->is_scalar())   RANK_ERROR;
188    if (A->NOTCHAR())     DOMAIN_ERROR;
189 
190 const APL_Integer qio = Workspace::get_IO();
191    if (B->NOTCHAR())     DOMAIN_ERROR;
192    if (B->is_scalar())   return Token(TOK_APL_VALUE1, IntScalar(qio, LOC));
193 
194 const ShapeItem len_BZ = B->get_shape_item(0);
195    if (len_BZ == 0)   return Token(TOK_APL_VALUE1, Idx0(LOC));
196 
197    // first set Z←⍳len_BZ
198    //
199 Value_P Z(len_BZ, LOC);
200    loop(l, len_BZ)   new (Z->next_ravel())   IntCell(l + qio);
201    Z->check_value(LOC);
202    if (len_BZ == 1)   return Token(TOK_APL_VALUE1, Z);
203 
204 const ShapeItem ec_B = B->element_count();
205 const ShapeItem comp_len = ec_B/len_BZ;
206 
207    // create a vector B1 which has the same shape as B, but instead of
208    // B's characters, it has the index of the corresponding CollatingCache
209    // index for each character in B.
210    //
211 Value_P B1(B->get_shape(), LOC);
212 const Cell * base_B1 = &B1->get_ravel(0) - qio*comp_len;
213 CollatingCache cache(A.getref(), base_B1, comp_len);
214    loop(b, ec_B)
215       {
216         const Unicode uni = B->get_ravel(b).get_char_value();
217         const APL_Integer b1 = cache.find_entry(uni);
218         new (B1->next_ravel()) IntCell(b1);
219       }
220    B1->check_value(LOC);
221 
222    // then sort Z (actually re-arrange Z so that B[Z] is sorted)
223    //
224 IntCell * z0 = &Z->get_ravel(0).vIntCell();
225    if (order == SORT_ASCENDING)
226       Heapsort<IntCell>::sort(z0, len_BZ, &cache, &CollatingCache::greater_vec);
227    else
228       Heapsort<IntCell>::sort(z0, len_BZ, &cache, &CollatingCache::smaller_vec);
229 
230    Z->check_value(LOC);
231    return Token(TOK_APL_VALUE1, Z);
232 }
233 //-----------------------------------------------------------------------------
234 
235