1 /*
2  *  nextpnr -- Next Generation Place and Route
3  *
4  *  Copyright (C) 2019  David Shah <david@symbioticeda.com>
5  *
6  *  Permission to use, copy, modify, and/or distribute this software for any
7  *  purpose with or without fee is hereby granted, provided that the above
8  *  copyright notice and this permission notice appear in all copies.
9  *
10  *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  *  [[cite]] HeAP
19  *  Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson
20  *  https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf
21  *
22  *  [[cite]] SimPL
23  *  SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov
24  *  http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf
25  *
26  *  Notable changes from the original algorithm
27  *   - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is
28  *     added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation)
29  *     as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by
30  *     spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices.
31  *   - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2)
32  */
33 
34 #ifdef WITH_HEAP
35 
36 #include "placer_heap.h"
37 #include <Eigen/Core>
38 #include <Eigen/IterativeLinearSolvers>
39 #include <boost/optional.hpp>
40 #include <chrono>
41 #include <deque>
42 #include <fstream>
43 #include <numeric>
44 #include <queue>
45 #include <tuple>
46 #include <unordered_map>
47 #include "log.h"
48 #include "nextpnr.h"
49 #include "place_common.h"
50 #include "placer1.h"
51 #include "timing.h"
52 #include "util.h"
53 NEXTPNR_NAMESPACE_BEGIN
54 
55 namespace {
56 // A simple internal representation for a sparse system of equations Ax = rhs
57 // This is designed to decouple the functions that build the matrix to the engine that
58 // solves it, and the representation that requires
59 template <typename T> struct EquationSystem
60 {
61 
EquationSystem__anond8f201400111::EquationSystem62     EquationSystem(size_t rows, size_t cols)
63     {
64         A.resize(cols);
65         rhs.resize(rows);
66     }
67 
68     // Simple sparse format, easy to convert to CCS for solver
69     std::vector<std::vector<std::pair<int, T>>> A; // col -> (row, x[row, col]) sorted by row
70     std::vector<T> rhs;                            // RHS vector
reset__anond8f201400111::EquationSystem71     void reset()
72     {
73         for (auto &col : A)
74             col.clear();
75         std::fill(rhs.begin(), rhs.end(), T());
76     }
77 
add_coeff__anond8f201400111::EquationSystem78     void add_coeff(int row, int col, T val)
79     {
80         auto &Ac = A.at(col);
81         // Binary search
82         int b = 0, e = int(Ac.size()) - 1;
83         while (b <= e) {
84             int i = (b + e) / 2;
85             if (Ac.at(i).first == row) {
86                 Ac.at(i).second += val;
87                 return;
88             }
89             if (Ac.at(i).first > row)
90                 e = i - 1;
91             else
92                 b = i + 1;
93         }
94         Ac.insert(Ac.begin() + b, std::make_pair(row, val));
95     }
96 
add_rhs__anond8f201400111::EquationSystem97     void add_rhs(int row, T val) { rhs[row] += val; }
98 
solve__anond8f201400111::EquationSystem99     void solve(std::vector<T> &x, float tolerance)
100     {
101         using namespace Eigen;
102         if (x.empty())
103             return;
104         NPNR_ASSERT(x.size() == A.size());
105 
106         VectorXd vx(x.size()), vb(rhs.size());
107         SparseMatrix<T> mat(A.size(), A.size());
108 
109         std::vector<int> colnnz;
110         for (auto &Ac : A)
111             colnnz.push_back(int(Ac.size()));
112         mat.reserve(colnnz);
113         for (int col = 0; col < int(A.size()); col++) {
114             auto &Ac = A.at(col);
115             for (auto &el : Ac)
116                 mat.insert(el.first, col) = el.second;
117         }
118 
119         for (int i = 0; i < int(x.size()); i++)
120             vx[i] = x.at(i);
121         for (int i = 0; i < int(rhs.size()); i++)
122             vb[i] = rhs.at(i);
123 
124         ConjugateGradient<SparseMatrix<T>, Lower | Upper> solver;
125         solver.setTolerance(tolerance);
126         VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx);
127         for (int i = 0; i < int(x.size()); i++)
128             x.at(i) = xr[i];
129         // for (int i = 0; i < int(x.size()); i++)
130         //    log_info("x[%d] = %f\n", i, x.at(i));
131     }
132 };
133 
134 } // namespace
135 
136 class HeAPPlacer
137 {
138   public:
HeAPPlacer(Context * ctx,PlacerHeapCfg cfg)139     HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg) { Eigen::initParallel(); }
140 
place()141     bool place()
142     {
143         auto startt = std::chrono::high_resolution_clock::now();
144 
145         ctx->lock();
146         place_constraints();
147         build_fast_bels();
148         seed_placement();
149         update_all_chains();
150         wirelen_t hpwl = total_hpwl();
151         log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n",
152                  int(place_cells.size()), int(hpwl));
153         for (int i = 0; i < 4; i++) {
154             setup_solve_cells();
155             auto solve_startt = std::chrono::high_resolution_clock::now();
156 #ifdef NPNR_DISABLE_THREADS
157             build_solve_direction(false, -1);
158             build_solve_direction(true, -1);
159 #else
160             boost::thread xaxis([&]() { build_solve_direction(false, -1); });
161             build_solve_direction(true, -1);
162             xaxis.join();
163 #endif
164             auto solve_endt = std::chrono::high_resolution_clock::now();
165             solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count();
166 
167             update_all_chains();
168 
169             hpwl = total_hpwl();
170             log_info("    at initial placer iter %d, wirelen = %d\n", i, int(hpwl));
171         }
172 
173         wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits<wirelen_t>::max();
174         int iter = 0, stalled = 0;
175 
176         std::vector<std::tuple<CellInfo *, BelId, PlaceStrength>> solution;
177 
178         std::vector<std::unordered_set<IdString>> heap_runs;
179         std::unordered_set<IdString> all_celltypes;
180         std::unordered_map<IdString, int> ct_count;
181 
182         for (auto cell : place_cells) {
183             if (!all_celltypes.count(cell->type)) {
184                 heap_runs.push_back(std::unordered_set<IdString>{cell->type});
185                 all_celltypes.insert(cell->type);
186             }
187             ct_count[cell->type]++;
188         }
189         // If more than 98% of cells are one cell type, always solve all at once
190         // Otherwise, follow full HeAP strategy of rotate&all
191         for (auto &c : ct_count)
192             if (c.second >= 0.98 * int(place_cells.size())) {
193                 heap_runs.clear();
194                 break;
195             }
196 
197         if (cfg.placeAllAtOnce) {
198             // Never want to deal with LUTs, FFs, MUXFxs seperately,
199             // for now disable all single-cell-type runs and only have heteregenous
200             // runs
201             heap_runs.clear();
202         }
203 
204         heap_runs.push_back(all_celltypes);
205         // The main HeAP placer loop
206         log_info("Running main analytical placer.\n");
207         while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) {
208             // Alternate between particular Bel types and all bels
209             for (auto &run : heap_runs) {
210                 auto run_startt = std::chrono::high_resolution_clock::now();
211 
212                 setup_solve_cells(&run);
213                 if (solve_cells.empty())
214                     continue;
215                 // Heuristic: don't bother with threading below a certain size
216                 auto solve_startt = std::chrono::high_resolution_clock::now();
217 
218 #ifndef NPNR_DISABLE_THREADS
219                 if (solve_cells.size() >= 500) {
220                     boost::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); });
221                     build_solve_direction(true, (iter == 0) ? -1 : iter);
222                     xaxis.join();
223                 } else
224 #endif
225                 {
226                     build_solve_direction(false, (iter == 0) ? -1 : iter);
227                     build_solve_direction(true, (iter == 0) ? -1 : iter);
228                 }
229                 auto solve_endt = std::chrono::high_resolution_clock::now();
230                 solve_time += std::chrono::duration<double>(solve_endt - solve_startt).count();
231                 update_all_chains();
232                 solved_hpwl = total_hpwl();
233 
234                 update_all_chains();
235 
236                 for (const auto &group : cfg.cellGroups)
237                     CutSpreader(this, group).run();
238 
239                 for (auto type : sorted(run))
240                     if (std::all_of(cfg.cellGroups.begin(), cfg.cellGroups.end(),
241                                     [type](const std::unordered_set<IdString> &grp) { return !grp.count(type); }))
242                         CutSpreader(this, {type}).run();
243 
244                 update_all_chains();
245                 spread_hpwl = total_hpwl();
246                 legalise_placement_strict(true);
247                 update_all_chains();
248 
249                 legal_hpwl = total_hpwl();
250                 auto run_stopt = std::chrono::high_resolution_clock::now();
251                 log_info("    at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n",
252                          iter + 1, (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), int(solved_hpwl),
253                          int(spread_hpwl), int(legal_hpwl),
254                          std::chrono::duration<double>(run_stopt - run_startt).count());
255             }
256 
257             if (cfg.timing_driven)
258                 get_criticalities(ctx, &net_crit);
259 
260             if (legal_hpwl < best_hpwl) {
261                 best_hpwl = legal_hpwl;
262                 stalled = 0;
263                 // Save solution
264                 solution.clear();
265                 for (auto cell : sorted(ctx->cells)) {
266                     solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength);
267                 }
268             } else {
269                 ++stalled;
270             }
271             for (auto &cl : cell_locs) {
272                 cl.second.legal_x = cl.second.x;
273                 cl.second.legal_y = cl.second.y;
274             }
275             ctx->yield();
276             ++iter;
277         }
278 
279         // Apply saved solution
280         for (auto &sc : solution) {
281             CellInfo *cell = std::get<0>(sc);
282             if (cell->bel != BelId())
283                 ctx->unbindBel(cell->bel);
284         }
285         for (auto &sc : solution) {
286             CellInfo *cell;
287             BelId bel;
288             PlaceStrength strength;
289             std::tie(cell, bel, strength) = sc;
290             ctx->bindBel(bel, cell, strength);
291         }
292 
293         for (auto cell : sorted(ctx->cells)) {
294             if (cell.second->bel == BelId())
295                 log_error("Found unbound cell %s\n", cell.first.c_str(ctx));
296             if (ctx->getBoundBelCell(cell.second->bel) != cell.second)
297                 log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx));
298             if (ctx->debug)
299                 log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->getBelName(cell.second->bel).c_str(ctx));
300         }
301 
302         ctx->unlock();
303         auto endtt = std::chrono::high_resolution_clock::now();
304         log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration<double>(endtt - startt).count());
305         log_info("  of which solving equations: %.02fs\n", solve_time);
306         log_info("  of which spreading cells: %.02fs\n", cl_time);
307         log_info("  of which strict legalisation: %.02fs\n", sl_time);
308 
309         ctx->check();
310 
311         placer1_refine(ctx, Placer1Cfg(ctx));
312 
313         return true;
314     }
315 
316   private:
317     Context *ctx;
318     PlacerHeapCfg cfg;
319 
320     int max_x = 0, max_y = 0;
321     std::vector<std::vector<std::vector<std::vector<BelId>>>> fast_bels;
322     std::unordered_map<IdString, std::tuple<int, int>> bel_types;
323 
324     // For fast handling of heterogeneosity during initial placement without full legalisation,
325     // for each Bel type this goes from x or y to the nearest x or y where a Bel of a given type exists
326     // This is particularly important for the iCE40 architecture, where multipliers and BRAM only exist at the
327     // edges and corners respectively
328     std::vector<std::vector<int>> nearest_row_with_bel;
329     std::vector<std::vector<int>> nearest_col_with_bel;
330 
331     struct BoundingBox
332     {
333         // Actual bounding box
334         int x0 = 0, x1 = 0, y0 = 0, y1 = 0;
335     };
336 
337     std::unordered_map<IdString, BoundingBox> constraint_region_bounds;
338 
339     // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom
340     // structure instead
341     struct CellLocation
342     {
343         int x, y;
344         int legal_x, legal_y;
345         double rawx, rawy;
346         bool locked, global;
347     };
348     std::unordered_map<IdString, CellLocation> cell_locs;
349     // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains
350     // (only the root of each macro is placed.)
351     std::vector<CellInfo *> place_cells;
352 
353     // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place
354     // cells of a certain type)
355     std::vector<CellInfo *> solve_cells;
356 
357     // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent
358     // where chains are within chains
359     std::unordered_map<IdString, CellInfo *> chain_root;
360     std::unordered_map<IdString, int> chain_size;
361 
362     // The offset from chain_root to a cell in the chain
363     std::unordered_map<IdString, std::pair<int, int>> cell_offsets;
364 
365     // Performance counting
366     double solve_time = 0, cl_time = 0, sl_time = 0;
367 
368     NetCriticalityMap net_crit;
369 
370     // Place cells with the BEL attribute set to constrain them
place_constraints()371     void place_constraints()
372     {
373         size_t placed_cells = 0;
374         // Initial constraints placer
375         for (auto &cell_entry : ctx->cells) {
376             CellInfo *cell = cell_entry.second.get();
377             auto loc = cell->attrs.find(ctx->id("BEL"));
378             if (loc != cell->attrs.end()) {
379                 std::string loc_name = loc->second.as_string();
380                 BelId bel = ctx->getBelByName(ctx->id(loc_name));
381                 if (bel == BelId()) {
382                     log_error("No Bel named \'%s\' located for "
383                               "this chip (processing BEL attribute on \'%s\')\n",
384                               loc_name.c_str(), cell->name.c_str(ctx));
385                 }
386 
387                 IdString bel_type = ctx->getBelType(bel);
388                 if (bel_type != cell->type) {
389                     log_error("Bel \'%s\' of type \'%s\' does not match cell "
390                               "\'%s\' of type \'%s\'\n",
391                               loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
392                 }
393                 if (!ctx->isValidBelForCell(cell, bel)) {
394                     log_error("Bel \'%s\' of type \'%s\' is not valid for cell "
395                               "\'%s\' of type \'%s\'\n",
396                               loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
397                 }
398 
399                 auto bound_cell = ctx->getBoundBelCell(bel);
400                 if (bound_cell) {
401                     log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n",
402                               cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx));
403                 }
404 
405                 ctx->bindBel(bel, cell, STRENGTH_USER);
406                 placed_cells++;
407             }
408         }
409         log_info("Placed %d cells based on constraints.\n", int(placed_cells));
410         ctx->yield();
411     }
412 
413     // Construct the fast_bels, nearest_row_with_bel and nearest_col_with_bel
build_fast_bels()414     void build_fast_bels()
415     {
416 
417         int num_bel_types = 0;
418         for (auto bel : ctx->getBels()) {
419             IdString type = ctx->getBelType(bel);
420             if (bel_types.find(type) == bel_types.end()) {
421                 bel_types[type] = std::tuple<int, int>(num_bel_types++, 1);
422             } else {
423                 std::get<1>(bel_types.at(type))++;
424             }
425         }
426         for (auto bel : ctx->getBels()) {
427             if (!ctx->checkBelAvail(bel))
428                 continue;
429             Loc loc = ctx->getBelLocation(bel);
430             IdString type = ctx->getBelType(bel);
431             int type_idx = std::get<0>(bel_types.at(type));
432             if (int(fast_bels.size()) < type_idx + 1)
433                 fast_bels.resize(type_idx + 1);
434             if (int(fast_bels.at(type_idx).size()) < (loc.x + 1))
435                 fast_bels.at(type_idx).resize(loc.x + 1);
436             if (int(fast_bels.at(type_idx).at(loc.x).size()) < (loc.y + 1))
437                 fast_bels.at(type_idx).at(loc.x).resize(loc.y + 1);
438             max_x = std::max(max_x, loc.x);
439             max_y = std::max(max_y, loc.y);
440             fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel);
441         }
442 
443         nearest_row_with_bel.resize(num_bel_types, std::vector<int>(max_y + 1, -1));
444         nearest_col_with_bel.resize(num_bel_types, std::vector<int>(max_x + 1, -1));
445         for (auto bel : ctx->getBels()) {
446             if (!ctx->checkBelAvail(bel))
447                 continue;
448             Loc loc = ctx->getBelLocation(bel);
449             int type_idx = std::get<0>(bel_types.at(ctx->getBelType(bel)));
450             auto &nr = nearest_row_with_bel.at(type_idx), &nc = nearest_col_with_bel.at(type_idx);
451             // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once
452             // another row/col is already recorded as being nearer
453             for (int x = loc.x; x <= max_x; x++) {
454                 if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (x - loc.x))
455                     break;
456                 nc.at(x) = loc.x;
457             }
458             for (int x = loc.x - 1; x >= 0; x--) {
459                 if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (loc.x - x))
460                     break;
461                 nc.at(x) = loc.x;
462             }
463             for (int y = loc.y; y <= max_y; y++) {
464                 if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (y - loc.y))
465                     break;
466                 nr.at(y) = loc.y;
467             }
468             for (int y = loc.y - 1; y >= 0; y--) {
469                 if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (loc.y - y))
470                     break;
471                 nr.at(y) = loc.y;
472             }
473         }
474 
475         // Determine bounding boxes of region constraints
476         for (auto &region : sorted(ctx->region)) {
477             Region *r = region.second;
478             BoundingBox bb;
479             if (r->constr_bels) {
480                 bb.x0 = std::numeric_limits<int>::max();
481                 bb.x1 = std::numeric_limits<int>::min();
482                 bb.y0 = std::numeric_limits<int>::max();
483                 bb.y1 = std::numeric_limits<int>::min();
484                 for (auto bel : r->bels) {
485                     Loc loc = ctx->getBelLocation(bel);
486                     bb.x0 = std::min(bb.x0, loc.x);
487                     bb.x1 = std::max(bb.x1, loc.x);
488                     bb.y0 = std::min(bb.y0, loc.y);
489                     bb.y1 = std::max(bb.y1, loc.y);
490                 }
491             } else {
492                 bb.x0 = 0;
493                 bb.y0 = 0;
494                 bb.x1 = max_x;
495                 bb.y1 = max_y;
496             }
497             constraint_region_bounds[r->name] = bb;
498         }
499     }
500 
501     // Build and solve in one direction
build_solve_direction(bool yaxis,int iter)502     void build_solve_direction(bool yaxis, int iter)
503     {
504         for (int i = 0; i < 5; i++) {
505             EquationSystem<double> esx(solve_cells.size(), solve_cells.size());
506             build_equations(esx, yaxis, iter);
507             solve_equations(esx, yaxis);
508         }
509     }
510 
511     // Check if a cell has any meaningful connectivity
has_connectivity(CellInfo * cell)512     bool has_connectivity(CellInfo *cell)
513     {
514         for (auto port : cell->ports) {
515             if (port.second.net != nullptr && port.second.net->driver.cell != nullptr &&
516                 !port.second.net->users.empty())
517                 return true;
518         }
519         return false;
520     }
521 
522     // Build up a random initial placement, without regard to legality
523     // FIXME: Are there better approaches to the initial placement (e.g. greedy?)
seed_placement()524     void seed_placement()
525     {
526         std::unordered_map<IdString, std::deque<BelId>> available_bels;
527         for (auto bel : ctx->getBels()) {
528             if (!ctx->checkBelAvail(bel))
529                 continue;
530             available_bels[ctx->getBelType(bel)].push_back(bel);
531         }
532         for (auto &t : available_bels) {
533             std::random_shuffle(t.second.begin(), t.second.end(), [&](size_t n) { return ctx->rng(int(n)); });
534         }
535         for (auto cell : sorted(ctx->cells)) {
536             CellInfo *ci = cell.second;
537             if (ci->bel != BelId()) {
538                 Loc loc = ctx->getBelLocation(ci->bel);
539                 cell_locs[cell.first].x = loc.x;
540                 cell_locs[cell.first].y = loc.y;
541                 cell_locs[cell.first].locked = true;
542                 cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel);
543             } else if (ci->constr_parent == nullptr) {
544                 bool placed = false;
545                 int attempt_count = 0;
546                 while (!placed) {
547                     if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty())
548                         log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx),
549                                   ci->type.c_str(ctx));
550                     ++attempt_count;
551                     if (attempt_count > 25000)
552                         log_error("Unable to find a placement location for cell '%s'\n", ci->name.c_str(ctx));
553                     BelId bel = available_bels.at(ci->type).back();
554                     available_bels.at(ci->type).pop_back();
555                     Loc loc = ctx->getBelLocation(bel);
556                     cell_locs[cell.first].x = loc.x;
557                     cell_locs[cell.first].y = loc.y;
558                     cell_locs[cell.first].locked = false;
559                     cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel);
560                     // FIXME
561                     if (has_connectivity(cell.second) && !cfg.ioBufTypes.count(ci->type)) {
562                         place_cells.push_back(ci);
563                         placed = true;
564                     } else {
565                         if (ctx->isValidBelForCell(ci, bel)) {
566                             ctx->bindBel(bel, ci, STRENGTH_STRONG);
567                             cell_locs[cell.first].locked = true;
568                             placed = true;
569                         } else {
570                             available_bels.at(ci->type).push_front(bel);
571                         }
572                     }
573                 }
574             }
575         }
576     }
577 
578     // Setup the cells to be solved, returns the number of rows
setup_solve_cells(std::unordered_set<IdString> * celltypes=nullptr)579     int setup_solve_cells(std::unordered_set<IdString> *celltypes = nullptr)
580     {
581         int row = 0;
582         solve_cells.clear();
583         // First clear the udata of all cells
584         for (auto cell : sorted(ctx->cells))
585             cell.second->udata = dont_solve;
586         // Then update cells to be placed, which excludes cell children
587         for (auto cell : place_cells) {
588             if (celltypes && !celltypes->count(cell->type))
589                 continue;
590             cell->udata = row++;
591             solve_cells.push_back(cell);
592         }
593         // Finally, update the udata of children
594         for (auto chained : chain_root)
595             ctx->cells.at(chained.first)->udata = chained.second->udata;
596         return row;
597     }
598 
599     // Update the location of all children of a chain
update_chain(CellInfo * cell,CellInfo * root)600     void update_chain(CellInfo *cell, CellInfo *root)
601     {
602         const auto &base = cell_locs[cell->name];
603         for (auto child : cell->constr_children) {
604             // FIXME: Improve handling of heterogeneous chains
605             if (child->type == root->type)
606                 chain_size[root->name]++;
607             if (child->constr_x != child->UNCONSTR)
608                 cell_locs[child->name].x = std::max(0, std::min(max_x, base.x + child->constr_x));
609             else
610                 cell_locs[child->name].x = base.x; // better handling of UNCONSTR?
611             if (child->constr_y != child->UNCONSTR)
612                 cell_locs[child->name].y = std::max(0, std::min(max_y, base.y + child->constr_y));
613             else
614                 cell_locs[child->name].y = base.y; // better handling of UNCONSTR?
615             chain_root[child->name] = root;
616             if (!child->constr_children.empty())
617                 update_chain(child, root);
618         }
619     }
620 
621     // Update all chains
update_all_chains()622     void update_all_chains()
623     {
624         for (auto cell : place_cells) {
625             chain_size[cell->name] = 1;
626             if (!cell->constr_children.empty())
627                 update_chain(cell, cell);
628         }
629     }
630 
631     // Run a function on all ports of a net - including the driver and all users
foreach_port(NetInfo * net,Tf func)632     template <typename Tf> void foreach_port(NetInfo *net, Tf func)
633     {
634         if (net->driver.cell != nullptr)
635             func(net->driver, -1);
636         for (size_t i = 0; i < net->users.size(); i++)
637             func(net->users.at(i), i);
638     }
639 
640     // Build the system of equations for either X or Y
build_equations(EquationSystem<double> & es,bool yaxis,int iter=-1)641     void build_equations(EquationSystem<double> &es, bool yaxis, int iter = -1)
642     {
643         // Return the x or y position of a cell, depending on ydir
644         auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
645         auto legal_pos = [&](CellInfo *cell) {
646             return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x;
647         };
648 
649         es.reset();
650 
651         for (auto net : sorted(ctx->nets)) {
652             NetInfo *ni = net.second;
653             if (ni->driver.cell == nullptr)
654                 continue;
655             if (ni->users.empty())
656                 continue;
657             if (cell_locs.at(ni->driver.cell->name).global)
658                 continue;
659             // Find the bounds of the net in this axis, and the ports that correspond to these bounds
660             PortRef *lbport = nullptr, *ubport = nullptr;
661             int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min();
662             foreach_port(ni, [&](PortRef &port, int user_idx) {
663                 int pos = cell_pos(port.cell);
664                 if (pos < lbpos) {
665                     lbpos = pos;
666                     lbport = &port;
667                 }
668                 if (pos > ubpos) {
669                     ubpos = pos;
670                     ubport = &port;
671                 }
672             });
673             NPNR_ASSERT(lbport != nullptr);
674             NPNR_ASSERT(ubport != nullptr);
675 
676             auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) {
677                 if (eqn.cell->udata == dont_solve)
678                     return;
679                 int row = eqn.cell->udata;
680                 int v_pos = cell_pos(var.cell);
681                 if (var.cell->udata != dont_solve) {
682                     es.add_coeff(row, var.cell->udata, weight);
683                 } else {
684                     es.add_rhs(row, -v_pos * weight);
685                 }
686                 if (cell_offsets.count(var.cell->name)) {
687                     es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second
688                                             : cell_offsets.at(var.cell->name).first) *
689                                             weight);
690                 }
691             };
692 
693             // Add all relevant connections to the matrix
694             foreach_port(ni, [&](PortRef &port, int user_idx) {
695                 int this_pos = cell_pos(port.cell);
696                 auto process_arc = [&](PortRef *other) {
697                     if (other == &port)
698                         return;
699                     int o_pos = cell_pos(other->cell);
700                     double weight = 1.0 / (ni->users.size() *
701                                            std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) *
702                                                                        std::abs(o_pos - this_pos)));
703 
704                     if (user_idx != -1 && net_crit.count(ni->name)) {
705                         auto &nc = net_crit.at(ni->name);
706                         if (user_idx < int(nc.criticality.size()))
707                             weight *= (1.0 + cfg.timingWeight *
708                                                      std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent));
709                     }
710 
711                     // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
712                     // if the other end isn't fixed
713                     stamp_equation(port, port, weight);
714                     stamp_equation(port, *other, -weight);
715                     stamp_equation(*other, *other, weight);
716                     stamp_equation(*other, port, -weight);
717                 };
718                 process_arc(lbport);
719                 process_arc(ubport);
720             });
721         }
722         if (iter != -1) {
723             float alpha = cfg.alpha;
724             for (size_t row = 0; row < solve_cells.size(); row++) {
725                 int l_pos = legal_pos(solve_cells.at(row));
726                 int c_pos = cell_pos(solve_cells.at(row));
727 
728                 double weight =
729                         alpha * iter /
730                         std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::abs(l_pos - c_pos));
731                 // Add an arc from legalised to current position
732                 es.add_coeff(row, row, weight);
733                 es.add_rhs(row, weight * l_pos);
734             }
735         }
736     }
737 
738     // Build the system of equations for either X or Y
solve_equations(EquationSystem<double> & es,bool yaxis)739     void solve_equations(EquationSystem<double> &es, bool yaxis)
740     {
741         // Return the x or y position of a cell, depending on ydir
742         auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
743         std::vector<double> vals;
744         std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos);
745         es.solve(vals, cfg.solverTolerance);
746         for (size_t i = 0; i < vals.size(); i++)
747             if (yaxis) {
748                 cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i);
749                 cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i))));
750                 if (solve_cells.at(i)->region != nullptr)
751                     cell_locs.at(solve_cells.at(i)->name).y =
752                             limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).y, true);
753             } else {
754                 cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i);
755                 cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i))));
756                 if (solve_cells.at(i)->region != nullptr)
757                     cell_locs.at(solve_cells.at(i)->name).x =
758                             limit_to_reg(solve_cells.at(i)->region, cell_locs.at(solve_cells.at(i)->name).x, false);
759             }
760     }
761 
762     // Compute HPWL
total_hpwl()763     wirelen_t total_hpwl()
764     {
765         wirelen_t hpwl = 0;
766         for (auto net : sorted(ctx->nets)) {
767             NetInfo *ni = net.second;
768             if (ni->driver.cell == nullptr)
769                 continue;
770             CellLocation &drvloc = cell_locs.at(ni->driver.cell->name);
771             if (drvloc.global)
772                 continue;
773             int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y;
774             for (auto &user : ni->users) {
775                 CellLocation &usrloc = cell_locs.at(user.cell->name);
776                 xmin = std::min(xmin, usrloc.x);
777                 xmax = std::max(xmax, usrloc.x);
778                 ymin = std::min(ymin, usrloc.y);
779                 ymax = std::max(ymax, usrloc.y);
780             }
781             hpwl += cfg.hpwl_scale_x * (xmax - xmin) + cfg.hpwl_scale_y * (ymax - ymin);
782         }
783         return hpwl;
784     }
785 
786     // Strict placement legalisation, performed after the initial HeAP spreading
legalise_placement_strict(bool require_validity=false)787     void legalise_placement_strict(bool require_validity = false)
788     {
789         auto startt = std::chrono::high_resolution_clock::now();
790 
791         // Unbind all cells placed in this solution
792         for (auto cell : sorted(ctx->cells)) {
793             CellInfo *ci = cell.second;
794             if (ci->bel != BelId() && (ci->udata != dont_solve ||
795                                        (chain_root.count(ci->name) && chain_root.at(ci->name)->udata != dont_solve)))
796                 ctx->unbindBel(ci->bel);
797         }
798 
799         // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using
800         // the simple greedy largest-macro-first approach.
801         std::priority_queue<std::pair<int, IdString>> remaining;
802         for (auto cell : solve_cells) {
803             remaining.emplace(chain_size[cell->name], cell->name);
804         }
805         int ripup_radius = 2;
806         int total_iters = 0;
807         int total_iters_noreset = 0;
808         while (!remaining.empty()) {
809             auto top = remaining.top();
810             remaining.pop();
811 
812             CellInfo *ci = ctx->cells.at(top.second).get();
813             // Was now placed, ignore
814             if (ci->bel != BelId())
815                 continue;
816             // log_info("   Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx));
817             int bt = std::get<0>(bel_types.at(ci->type));
818             auto &fb = fast_bels.at(bt);
819             int radius = 0;
820             int iter = 0;
821             int iter_at_radius = 0;
822             bool placed = false;
823             BelId bestBel;
824             int best_inp_len = std::numeric_limits<int>::max();
825 
826             total_iters++;
827             total_iters_noreset++;
828             if (total_iters > int(solve_cells.size())) {
829                 total_iters = 0;
830                 ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2);
831             }
832 
833             if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) {
834                 log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n");
835             }
836 
837             while (!placed) {
838 
839                 // Set a conservative timeout
840                 if (iter > std::max(10000, 3 * int(ctx->cells.size())))
841                     log_error("Unable to find legal placement for cell '%s', check constraints and utilisation.\n",
842                               ctx->nameOf(ci));
843 
844                 int rx = radius, ry = radius;
845 
846                 if (ci->region != nullptr) {
847                     rx = std::min(radius, (constraint_region_bounds[ci->region->name].x1 -
848                                            constraint_region_bounds[ci->region->name].x0) /
849                                                           2 +
850                                                   1);
851                     ry = std::min(radius, (constraint_region_bounds[ci->region->name].y1 -
852                                            constraint_region_bounds[ci->region->name].y0) /
853                                                           2 +
854                                                   1);
855                 }
856 
857                 int nx = ctx->rng(2 * rx + 1) + std::max(cell_locs.at(ci->name).x - rx, 0);
858                 int ny = ctx->rng(2 * ry + 1) + std::max(cell_locs.at(ci->name).y - ry, 0);
859 
860                 iter++;
861                 iter_at_radius++;
862                 if (iter >= (10 * (radius + 1))) {
863                     radius = std::min(std::max(max_x, max_y), radius + 1);
864                     while (radius < std::max(max_x, max_y)) {
865                         for (int x = std::max(0, cell_locs.at(ci->name).x - radius);
866                              x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) {
867                             if (x >= int(fb.size()))
868                                 break;
869                             for (int y = std::max(0, cell_locs.at(ci->name).y - radius);
870                                  y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) {
871                                 if (y >= int(fb.at(x).size()))
872                                     break;
873                                 if (fb.at(x).at(y).size() > 0)
874                                     goto notempty;
875                             }
876                         }
877                         radius = std::min(std::max(max_x, max_y), radius + 1);
878                     }
879                 notempty:
880                     iter_at_radius = 0;
881                     iter = 0;
882                 }
883                 if (nx < 0 || nx > max_x)
884                     continue;
885                 if (ny < 0 || ny > max_y)
886                     continue;
887 
888                 // ny = nearest_row_with_bel.at(bt).at(ny);
889                 // nx = nearest_col_with_bel.at(bt).at(nx);
890 
891                 if (nx >= int(fb.size()))
892                     continue;
893                 if (ny >= int(fb.at(nx).size()))
894                     continue;
895                 if (fb.at(nx).at(ny).empty())
896                     continue;
897 
898                 int need_to_explore = 2 * radius;
899 
900                 if (iter_at_radius >= need_to_explore && bestBel != BelId()) {
901                     CellInfo *bound = ctx->getBoundBelCell(bestBel);
902                     if (bound != nullptr) {
903                         ctx->unbindBel(bound->bel);
904                         remaining.emplace(chain_size[bound->name], bound->name);
905                     }
906                     ctx->bindBel(bestBel, ci, STRENGTH_WEAK);
907                     placed = true;
908                     Loc loc = ctx->getBelLocation(bestBel);
909                     cell_locs[ci->name].x = loc.x;
910                     cell_locs[ci->name].y = loc.y;
911                     break;
912                 }
913 
914                 if (ci->constr_children.empty() && !ci->constr_abs_z) {
915                     for (auto sz : fb.at(nx).at(ny)) {
916                         if (ci->region != nullptr && ci->region->constr_bels && !ci->region->bels.count(sz))
917                             continue;
918                         if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) {
919                             CellInfo *bound = ctx->getBoundBelCell(sz);
920                             if (bound != nullptr) {
921                                 if (bound->constr_parent != nullptr || !bound->constr_children.empty() ||
922                                     bound->constr_abs_z)
923                                     continue;
924                                 ctx->unbindBel(bound->bel);
925                             }
926                             ctx->bindBel(sz, ci, STRENGTH_WEAK);
927                             if (require_validity && !ctx->isBelLocationValid(sz)) {
928                                 ctx->unbindBel(sz);
929                                 if (bound != nullptr)
930                                     ctx->bindBel(sz, bound, STRENGTH_WEAK);
931                             } else if (iter_at_radius < need_to_explore) {
932                                 ctx->unbindBel(sz);
933                                 if (bound != nullptr)
934                                     ctx->bindBel(sz, bound, STRENGTH_WEAK);
935                                 int input_len = 0;
936                                 for (auto &port : ci->ports) {
937                                     auto &p = port.second;
938                                     if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr)
939                                         continue;
940                                     CellInfo *drv = p.net->driver.cell;
941                                     auto drv_loc = cell_locs.find(drv->name);
942                                     if (drv_loc == cell_locs.end())
943                                         continue;
944                                     if (drv_loc->second.global)
945                                         continue;
946                                     input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny);
947                                 }
948                                 if (input_len < best_inp_len) {
949                                     best_inp_len = input_len;
950                                     bestBel = sz;
951                                 }
952                                 break;
953                             } else {
954                                 if (bound != nullptr)
955                                     remaining.emplace(chain_size[bound->name], bound->name);
956                                 Loc loc = ctx->getBelLocation(sz);
957                                 cell_locs[ci->name].x = loc.x;
958                                 cell_locs[ci->name].y = loc.y;
959                                 placed = true;
960                                 break;
961                             }
962                         }
963                     }
964                 } else {
965                     for (auto sz : fb.at(nx).at(ny)) {
966                         Loc loc = ctx->getBelLocation(sz);
967                         if (ci->constr_abs_z && loc.z != ci->constr_z)
968                             continue;
969                         std::vector<std::pair<CellInfo *, BelId>> targets;
970                         std::vector<std::pair<BelId, CellInfo *>> swaps_made;
971                         std::queue<std::pair<CellInfo *, Loc>> visit;
972                         visit.emplace(ci, loc);
973                         while (!visit.empty()) {
974                             CellInfo *vc = visit.front().first;
975                             NPNR_ASSERT(vc->bel == BelId());
976                             Loc ploc = visit.front().second;
977                             visit.pop();
978                             BelId target = ctx->getBelByLocation(ploc);
979                             if (vc->region != nullptr && vc->region->constr_bels && !vc->region->bels.count(target))
980                                 goto fail;
981                             CellInfo *bound;
982                             if (target == BelId() || ctx->getBelType(target) != vc->type)
983                                 goto fail;
984                             bound = ctx->getBoundBelCell(target);
985                             // Chains cannot overlap
986                             if (bound != nullptr)
987                                 if (bound->constr_z != bound->UNCONSTR || bound->constr_parent != nullptr ||
988                                     !bound->constr_children.empty() || bound->belStrength > STRENGTH_WEAK)
989                                     goto fail;
990                             targets.emplace_back(vc, target);
991                             for (auto child : vc->constr_children) {
992                                 Loc cloc = ploc;
993                                 if (child->constr_x != child->UNCONSTR)
994                                     cloc.x += child->constr_x;
995                                 if (child->constr_y != child->UNCONSTR)
996                                     cloc.y += child->constr_y;
997                                 if (child->constr_z != child->UNCONSTR)
998                                     cloc.z = child->constr_abs_z ? child->constr_z : (ploc.z + child->constr_z);
999                                 visit.emplace(child, cloc);
1000                             }
1001                         }
1002 
1003                         for (auto &target : targets) {
1004                             CellInfo *bound = ctx->getBoundBelCell(target.second);
1005                             if (bound != nullptr)
1006                                 ctx->unbindBel(target.second);
1007                             ctx->bindBel(target.second, target.first, STRENGTH_STRONG);
1008                             swaps_made.emplace_back(target.second, bound);
1009                         }
1010 
1011                         for (auto &sm : swaps_made) {
1012                             if (!ctx->isBelLocationValid(sm.first))
1013                                 goto fail;
1014                         }
1015 
1016                         if (false) {
1017                         fail:
1018                             for (auto &swap : swaps_made) {
1019                                 ctx->unbindBel(swap.first);
1020                                 if (swap.second != nullptr)
1021                                     ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK);
1022                             }
1023                             continue;
1024                         }
1025                         for (auto &target : targets) {
1026                             Loc loc = ctx->getBelLocation(target.second);
1027                             cell_locs[target.first->name].x = loc.x;
1028                             cell_locs[target.first->name].y = loc.y;
1029                             // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z);
1030                         }
1031                         for (auto &swap : swaps_made) {
1032                             if (swap.second != nullptr)
1033                                 remaining.emplace(chain_size[swap.second->name], swap.second->name);
1034                         }
1035 
1036                         placed = true;
1037                         break;
1038                     }
1039                 }
1040             }
1041         }
1042         auto endt = std::chrono::high_resolution_clock::now();
1043         sl_time += std::chrono::duration<float>(endt - startt).count();
1044     }
1045     // Implementation of the cut-based spreading as described in the HeAP/SimPL papers
1046 
limit_to_reg(Region * reg,T val,bool dir)1047     template <typename T> T limit_to_reg(Region *reg, T val, bool dir)
1048     {
1049         if (reg == nullptr)
1050             return val;
1051         int limit_low = dir ? constraint_region_bounds[reg->name].y0 : constraint_region_bounds[reg->name].x0;
1052         int limit_high = dir ? constraint_region_bounds[reg->name].y1 : constraint_region_bounds[reg->name].x1;
1053         return std::max<T>(std::min<T>(val, limit_high), limit_low);
1054     }
1055 
1056     struct ChainExtent
1057     {
1058         int x0, y0, x1, y1;
1059     };
1060 
1061     struct SpreaderRegion
1062     {
1063         int id;
1064         int x0, y0, x1, y1;
1065         std::vector<int> cells, bels;
overusedHeAPPlacer::SpreaderRegion1066         bool overused(float beta) const
1067         {
1068             for (size_t t = 0; t < cells.size(); t++) {
1069                 if (bels.at(t) < 4) {
1070                     if (cells.at(t) > bels.at(t))
1071                         return true;
1072                 } else {
1073                     if (cells.at(t) > beta * bels.at(t))
1074                         return true;
1075                 }
1076             }
1077             return false;
1078         }
1079     };
1080 
1081     class CutSpreader
1082     {
1083       public:
CutSpreader(HeAPPlacer * p,const std::unordered_set<IdString> & beltype)1084         CutSpreader(HeAPPlacer *p, const std::unordered_set<IdString> &beltype) : p(p), ctx(p->ctx), beltype(beltype)
1085         {
1086             int idx = 0;
1087             for (IdString type : sorted(beltype)) {
1088                 type_index[type] = idx;
1089                 fb.emplace_back(&(p->fast_bels.at(std::get<0>(p->bel_types.at(type)))));
1090                 ++idx;
1091             }
1092         }
1093         static int seq;
run()1094         void run()
1095         {
1096             auto startt = std::chrono::high_resolution_clock::now();
1097             init();
1098             find_overused_regions();
1099             for (auto &r : regions) {
1100                 if (merged_regions.count(r.id))
1101                     continue;
1102 #if 0
1103                 log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells,
1104                          r.bels);
1105 #endif
1106             }
1107             expand_regions();
1108             std::queue<std::pair<int, bool>> workqueue;
1109 #if 0
1110             std::vector<std::pair<double, double>> orig;
1111             if (ctx->debug)
1112                 for (auto c : p->solve_cells)
1113                     orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy);
1114 #endif
1115             for (auto &r : regions) {
1116                 if (merged_regions.count(r.id))
1117                     continue;
1118 #if 0
1119                 for (auto t : sorted(beltype)) {
1120                     log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", t.c_str(ctx), r.x0, r.y0, r.x1, r.y1,
1121                              r.cells.at(type_index.at(t)), r.bels.at(type_index.at(t)));
1122                 }
1123 
1124 #endif
1125                 workqueue.emplace(r.id, false);
1126                 // cut_region(r, false);
1127             }
1128             while (!workqueue.empty()) {
1129                 auto front = workqueue.front();
1130                 workqueue.pop();
1131                 auto &r = regions.at(front.first);
1132                 if (std::all_of(r.cells.begin(), r.cells.end(), [](int x) { return x == 0; }))
1133                     continue;
1134                 auto res = cut_region(r, front.second);
1135                 if (res) {
1136                     workqueue.emplace(res->first, !front.second);
1137                     workqueue.emplace(res->second, !front.second);
1138                 } else {
1139                     // Try the other dir, in case stuck in one direction only
1140                     auto res2 = cut_region(r, !front.second);
1141                     if (res2) {
1142                         // log_info("RETRY SUCCESS\n");
1143                         workqueue.emplace(res2->first, front.second);
1144                         workqueue.emplace(res2->second, front.second);
1145                     }
1146                 }
1147             }
1148 #if 0
1149             if (ctx->debug) {
1150                 std::ofstream sp("spread" + std::to_string(seq) + ".csv");
1151                 for (size_t i = 0; i < p->solve_cells.size(); i++) {
1152                     auto &c = p->solve_cells.at(i);
1153                     if (c->type != beltype)
1154                         continue;
1155                     sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl;
1156                 }
1157                 std::ofstream oc("cells" + std::to_string(seq) + ".csv");
1158                 for (size_t y = 0; y <= p->max_y; y++) {
1159                     for (size_t x = 0; x <= p->max_x; x++) {
1160                         oc << cells_at_location.at(x).at(y).size() << ", ";
1161                     }
1162                     oc << std::endl;
1163                 }
1164                 ++seq;
1165             }
1166 #endif
1167             auto endt = std::chrono::high_resolution_clock::now();
1168             p->cl_time += std::chrono::duration<float>(endt - startt).count();
1169         }
1170 
1171       private:
1172         HeAPPlacer *p;
1173         Context *ctx;
1174         std::unordered_set<IdString> beltype;
1175         std::unordered_map<IdString, int> type_index;
1176         std::vector<std::vector<std::vector<int>>> occupancy;
1177         std::vector<std::vector<int>> groups;
1178         std::vector<std::vector<ChainExtent>> chaines;
1179         std::map<IdString, ChainExtent> cell_extents;
1180 
1181         std::vector<std::vector<std::vector<std::vector<BelId>>> *> fb;
1182 
1183         std::vector<SpreaderRegion> regions;
1184         std::unordered_set<int> merged_regions;
1185         // Cells at a location, sorted by real (not integer) x and y
1186         std::vector<std::vector<std::vector<CellInfo *>>> cells_at_location;
1187 
occ_at(int x,int y,int type)1188         int occ_at(int x, int y, int type) { return occupancy.at(x).at(y).at(type); }
1189 
bels_at(int x,int y,int type)1190         int bels_at(int x, int y, int type)
1191         {
1192             if (x >= int(fb.at(type)->size()) || y >= int(fb.at(type)->at(x).size()))
1193                 return 0;
1194             return int(fb.at(type)->at(x).at(y).size());
1195         }
1196 
init()1197         void init()
1198         {
1199             occupancy.resize(p->max_x + 1,
1200                              std::vector<std::vector<int>>(p->max_y + 1, std::vector<int>(beltype.size(), 0)));
1201             groups.resize(p->max_x + 1, std::vector<int>(p->max_y + 1, -1));
1202             chaines.resize(p->max_x + 1, std::vector<ChainExtent>(p->max_y + 1));
1203             cells_at_location.resize(p->max_x + 1, std::vector<std::vector<CellInfo *>>(p->max_y + 1));
1204             for (int x = 0; x <= p->max_x; x++)
1205                 for (int y = 0; y <= p->max_y; y++) {
1206                     for (int t = 0; t < int(beltype.size()); t++) {
1207                         occupancy.at(x).at(y).at(t) = 0;
1208                     }
1209                     groups.at(x).at(y) = -1;
1210                     chaines.at(x).at(y) = {x, y, x, y};
1211                 }
1212 
1213             auto set_chain_ext = [&](IdString cell, int x, int y) {
1214                 if (!cell_extents.count(cell))
1215                     cell_extents[cell] = {x, y, x, y};
1216                 else {
1217                     cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x);
1218                     cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y);
1219                     cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x);
1220                     cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y);
1221                 }
1222             };
1223 
1224             for (auto &cell : p->cell_locs) {
1225                 if (!beltype.count(ctx->cells.at(cell.first)->type))
1226                     continue;
1227                 if (ctx->cells.at(cell.first)->belStrength > STRENGTH_STRONG)
1228                     continue;
1229                 occupancy.at(cell.second.x).at(cell.second.y).at(type_index.at(ctx->cells.at(cell.first)->type))++;
1230                 // Compute ultimate extent of each chain root
1231                 if (p->chain_root.count(cell.first)) {
1232                     set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y);
1233                 } else if (!ctx->cells.at(cell.first)->constr_children.empty()) {
1234                     set_chain_ext(cell.first, cell.second.x, cell.second.y);
1235                 }
1236             }
1237             for (auto &cell : p->cell_locs) {
1238                 if (!beltype.count(ctx->cells.at(cell.first)->type))
1239                     continue;
1240                 // Transfer chain extents to the actual chaines structure
1241                 ChainExtent *ce = nullptr;
1242                 if (p->chain_root.count(cell.first))
1243                     ce = &(cell_extents.at(p->chain_root.at(cell.first)->name));
1244                 else if (!ctx->cells.at(cell.first)->constr_children.empty())
1245                     ce = &(cell_extents.at(cell.first));
1246                 if (ce) {
1247                     auto &lce = chaines.at(cell.second.x).at(cell.second.y);
1248                     lce.x0 = std::min(lce.x0, ce->x0);
1249                     lce.y0 = std::min(lce.y0, ce->y0);
1250                     lce.x1 = std::max(lce.x1, ce->x1);
1251                     lce.y1 = std::max(lce.y1, ce->y1);
1252                 }
1253             }
1254             for (auto cell : p->solve_cells) {
1255                 if (!beltype.count(cell->type))
1256                     continue;
1257                 cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell);
1258             }
1259         }
merge_regions(SpreaderRegion & merged,SpreaderRegion & mergee)1260         void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee)
1261         {
1262             // Prevent grow_region from recursing while doing this
1263             for (int x = mergee.x0; x <= mergee.x1; x++)
1264                 for (int y = mergee.y0; y <= mergee.y1; y++) {
1265                     // log_info("%d %d\n", groups.at(x).at(y), mergee.id);
1266                     NPNR_ASSERT(groups.at(x).at(y) == mergee.id);
1267                     groups.at(x).at(y) = merged.id;
1268                     for (size_t t = 0; t < beltype.size(); t++) {
1269                         merged.cells.at(t) += occ_at(x, y, t);
1270                         merged.bels.at(t) += bels_at(x, y, t);
1271                     }
1272                 }
1273             merged_regions.insert(mergee.id);
1274             grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1);
1275         }
1276 
grow_region(SpreaderRegion & r,int x0,int y0,int x1,int y1,bool init=false)1277         void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false)
1278         {
1279             // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1);
1280             if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init)
1281                 return;
1282             int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1;
1283             r.x0 = std::min(r.x0, x0);
1284             r.y0 = std::min(r.y0, y0);
1285             r.x1 = std::max(r.x1, x1);
1286             r.y1 = std::max(r.y1, y1);
1287 
1288             auto process_location = [&](int x, int y) {
1289                 // Merge with any overlapping regions
1290                 if (groups.at(x).at(y) == -1) {
1291                     for (int t = 0; t < int(beltype.size()); t++) {
1292                         r.bels.at(t) += bels_at(x, y, t);
1293                         r.cells.at(t) += occ_at(x, y, t);
1294                     }
1295                 }
1296                 if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id)
1297                     merge_regions(r, regions.at(groups.at(x).at(y)));
1298                 groups.at(x).at(y) = r.id;
1299                 // Grow to cover any chains
1300                 auto &chaine = chaines.at(x).at(y);
1301                 grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1);
1302             };
1303             for (int x = r.x0; x < old_x0; x++)
1304                 for (int y = r.y0; y <= r.y1; y++)
1305                     process_location(x, y);
1306             for (int x = old_x1 + 1; x <= x1; x++)
1307                 for (int y = r.y0; y <= r.y1; y++)
1308                     process_location(x, y);
1309             for (int y = r.y0; y < old_y0; y++)
1310                 for (int x = r.x0; x <= r.x1; x++)
1311                     process_location(x, y);
1312             for (int y = old_y1 + 1; y <= r.y1; y++)
1313                 for (int x = r.x0; x <= r.x1; x++)
1314                     process_location(x, y);
1315         }
1316 
find_overused_regions()1317         void find_overused_regions()
1318         {
1319             for (int x = 0; x <= p->max_x; x++)
1320                 for (int y = 0; y <= p->max_y; y++) {
1321                     // Either already in a group, or not overutilised. Ignore
1322                     if (groups.at(x).at(y) != -1)
1323                         continue;
1324                     bool overutilised = false;
1325                     for (size_t t = 0; t < beltype.size(); t++) {
1326                         if (occ_at(x, y, t) > bels_at(x, y, t)) {
1327                             overutilised = true;
1328                             break;
1329                         }
1330                     }
1331                     if (!overutilised)
1332                         continue;
1333                     // log_info("%d %d %d\n", x, y, occ_at(x, y));
1334                     int id = int(regions.size());
1335                     groups.at(x).at(y) = id;
1336                     SpreaderRegion reg;
1337                     reg.id = id;
1338                     reg.x0 = reg.x1 = x;
1339                     reg.y0 = reg.y1 = y;
1340                     for (size_t t = 0; t < beltype.size(); t++) {
1341                         reg.bels.push_back(bels_at(x, y, t));
1342                         reg.cells.push_back(occ_at(x, y, t));
1343                     }
1344                     // Make sure we cover carries, etc
1345                     grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true);
1346 
1347                     bool expanded = true;
1348                     while (expanded) {
1349                         expanded = false;
1350                         // Keep trying expansion in x and y, until we find no over-occupancy cells
1351                         // or hit grouped cells
1352 
1353                         // First try expanding in x
1354                         if (reg.x1 < p->max_x) {
1355                             bool over_occ_x = false;
1356                             for (int y1 = reg.y0; y1 <= reg.y1; y1++) {
1357                                 for (size_t t = 0; t < beltype.size(); t++) {
1358                                     if (occ_at(reg.x1 + 1, y1, t) > bels_at(reg.x1 + 1, y1, t)) {
1359                                         // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1),
1360                                         // bels_at(reg.x1 + 1, y1));
1361                                         over_occ_x = true;
1362                                         break;
1363                                     }
1364                                 }
1365                             }
1366                             if (over_occ_x) {
1367                                 expanded = true;
1368                                 grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1);
1369                             }
1370                         }
1371 
1372                         if (reg.y1 < p->max_y) {
1373                             bool over_occ_y = false;
1374                             for (int x1 = reg.x0; x1 <= reg.x1; x1++) {
1375                                 for (size_t t = 0; t < beltype.size(); t++) {
1376                                     if (occ_at(x1, reg.y1 + 1, t) > bels_at(x1, reg.y1 + 1, t)) {
1377                                         // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1),
1378                                         // bels_at(x1, reg.y1 + 1));
1379                                         over_occ_y = true;
1380                                         break;
1381                                     }
1382                                 }
1383                             }
1384                             if (over_occ_y) {
1385                                 expanded = true;
1386                                 grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1);
1387                             }
1388                         }
1389                     }
1390                     regions.push_back(reg);
1391                 }
1392         }
1393 
expand_regions()1394         void expand_regions()
1395         {
1396             std::queue<int> overu_regions;
1397             float beta = p->cfg.beta;
1398             for (auto &r : regions) {
1399                 if (!merged_regions.count(r.id) && r.overused(beta))
1400                     overu_regions.push(r.id);
1401             }
1402             while (!overu_regions.empty()) {
1403                 int rid = overu_regions.front();
1404                 overu_regions.pop();
1405                 if (merged_regions.count(rid))
1406                     continue;
1407                 auto &reg = regions.at(rid);
1408                 while (reg.overused(beta)) {
1409                     bool changed = false;
1410                     for (int j = 0; j < p->cfg.spread_scale_x; j++) {
1411                         if (reg.x0 > 0) {
1412                             grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1);
1413                             changed = true;
1414                             if (!reg.overused(beta))
1415                                 break;
1416                         }
1417                         if (reg.x1 < p->max_x) {
1418                             grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1);
1419                             changed = true;
1420                             if (!reg.overused(beta))
1421                                 break;
1422                         }
1423                     }
1424                     for (int j = 0; j < p->cfg.spread_scale_y; j++) {
1425                         if (reg.y0 > 0) {
1426                             grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1);
1427                             changed = true;
1428                             if (!reg.overused(beta))
1429                                 break;
1430                         }
1431                         if (reg.y1 < p->max_y) {
1432                             grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1);
1433                             changed = true;
1434                             if (!reg.overused(beta))
1435                                 break;
1436                         }
1437                     }
1438                     if (!changed) {
1439                         for (auto bt : sorted(beltype)) {
1440                             if (reg.cells > reg.bels)
1441                                 log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0,
1442                                           reg.x1, reg.y1, reg.cells.at(type_index.at(bt)), bt.c_str(ctx));
1443                         }
1444                         break;
1445                     }
1446                 }
1447             }
1448         }
1449 
1450         // Implementation of the recursive cut-based spreading as described in the HeAP paper
1451         // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir
1452 
1453         std::vector<CellInfo *> cut_cells;
1454 
cut_region(SpreaderRegion & r,bool dir)1455         boost::optional<std::pair<int, int>> cut_region(SpreaderRegion &r, bool dir)
1456         {
1457             cut_cells.clear();
1458             auto &cal = cells_at_location;
1459             int total_cells = 0, total_bels = 0;
1460             for (int x = r.x0; x <= r.x1; x++) {
1461                 for (int y = r.y0; y <= r.y1; y++) {
1462                     std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells));
1463                     for (size_t t = 0; t < beltype.size(); t++)
1464                         total_bels += bels_at(x, y, t);
1465                 }
1466             }
1467             for (auto &cell : cut_cells) {
1468                 total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1;
1469             }
1470             std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) {
1471                 return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy)
1472                            : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx);
1473             });
1474 
1475             if (cut_cells.size() < 2)
1476                 return {};
1477             // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut
1478             int pivot_cells = 0;
1479             int pivot = 0;
1480             for (auto &cell : cut_cells) {
1481                 pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1;
1482                 if (pivot_cells >= total_cells / 2)
1483                     break;
1484                 pivot++;
1485             }
1486             if (pivot >= int(cut_cells.size())) {
1487                 pivot = int(cut_cells.size()) - 1;
1488             }
1489             // log_info("orig pivot %d/%d lc %d rc %d\n", pivot, int(cut_cells.size()), pivot_cells, total_cells -
1490             // pivot_cells);
1491 
1492             // Find the clearance required either side of the pivot
1493             int clearance_l = 0, clearance_r = 0;
1494             for (size_t i = 0; i < cut_cells.size(); i++) {
1495                 int size;
1496                 if (cell_extents.count(cut_cells.at(i)->name)) {
1497                     auto &ce = cell_extents.at(cut_cells.at(i)->name);
1498                     size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1);
1499                 } else {
1500                     size = 1;
1501                 }
1502                 if (int(i) < pivot)
1503                     clearance_l = std::max(clearance_l, size);
1504                 else
1505                     clearance_r = std::max(clearance_r, size);
1506             }
1507             // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains
1508             // still fit
1509 
1510             // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any
1511             // bels of the appropriate type
1512             int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1;
1513             while (trimmed_l < (dir ? r.y1 : r.x1)) {
1514                 bool have_bels = false;
1515                 for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++)
1516                     for (size_t t = 0; t < beltype.size(); t++)
1517                         if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i, t) > 0) {
1518                             have_bels = true;
1519                             break;
1520                         }
1521                 if (have_bels)
1522                     break;
1523                 trimmed_l++;
1524             }
1525             while (trimmed_r > (dir ? r.y0 : r.x0)) {
1526                 bool have_bels = false;
1527                 for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++)
1528                     for (size_t t = 0; t < beltype.size(); t++)
1529                         if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i, t) > 0) {
1530                             have_bels = true;
1531                             break;
1532                         }
1533                 if (have_bels)
1534                     break;
1535                 trimmed_r--;
1536             }
1537             // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r);
1538             if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r))
1539                 return {};
1540             // Now find the initial target cut that minimises utilisation imbalance, whilst
1541             // meeting the clearance requirements for any large macros
1542             std::vector<int> left_cells_v(beltype.size(), 0), right_cells_v(beltype.size(), 0);
1543             std::vector<int> left_bels_v(beltype.size(), 0), right_bels_v(r.bels);
1544             for (int i = 0; i <= pivot; i++)
1545                 left_cells_v.at(type_index.at(cut_cells.at(i)->type)) +=
1546                         p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1;
1547             for (int i = pivot + 1; i < int(cut_cells.size()); i++)
1548                 right_cells_v.at(type_index.at(cut_cells.at(i)->type)) +=
1549                         p->chain_size.count(cut_cells.at(i)->name) ? p->chain_size.at(cut_cells.at(i)->name) : 1;
1550 
1551             int best_tgt_cut = -1;
1552             double best_deltaU = std::numeric_limits<double>::max();
1553             // std::pair<int, int> target_cut_bels;
1554             std::vector<int> slither_bels(beltype.size(), 0);
1555             for (int i = trimmed_l; i <= trimmed_r; i++) {
1556                 for (size_t t = 0; t < beltype.size(); t++)
1557                     slither_bels.at(t) = 0;
1558                 for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) {
1559                     for (size_t t = 0; t < beltype.size(); t++)
1560                         slither_bels.at(t) += dir ? bels_at(j, i, t) : bels_at(i, j, t);
1561                 }
1562                 for (size_t t = 0; t < beltype.size(); t++) {
1563                     left_bels_v.at(t) += slither_bels.at(t);
1564                     right_bels_v.at(t) -= slither_bels.at(t);
1565                 }
1566 
1567                 if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) {
1568                     // Solution is potentially valid
1569                     double aU = 0;
1570                     for (size_t t = 0; t < beltype.size(); t++)
1571                         aU += (left_cells_v.at(t) + right_cells_v.at(t)) *
1572                               std::abs(double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) -
1573                                        double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1)));
1574                     if (aU < best_deltaU) {
1575                         best_deltaU = aU;
1576                         best_tgt_cut = i;
1577                     }
1578                 }
1579             }
1580             if (best_tgt_cut == -1)
1581                 return {};
1582             // left_bels = target_cut_bels.first;
1583             // right_bels = target_cut_bels.second;
1584             for (size_t t = 0; t < beltype.size(); t++) {
1585                 left_bels_v.at(t) = 0;
1586                 right_bels_v.at(t) = 0;
1587             }
1588             for (int x = r.x0; x <= (dir ? r.x1 : best_tgt_cut); x++)
1589                 for (int y = r.y0; y <= (dir ? best_tgt_cut : r.y1); y++) {
1590                     for (size_t t = 0; t < beltype.size(); t++) {
1591                         left_bels_v.at(t) += bels_at(x, y, t);
1592                     }
1593                 }
1594             for (int x = dir ? r.x0 : (best_tgt_cut + 1); x <= r.x1; x++)
1595                 for (int y = dir ? (best_tgt_cut + 1) : r.y0; y <= r.y1; y++) {
1596                     for (size_t t = 0; t < beltype.size(); t++) {
1597                         right_bels_v.at(t) += bels_at(x, y, t);
1598                     }
1599                 }
1600             if (std::accumulate(left_bels_v.begin(), left_bels_v.end(), 0) == 0 ||
1601                 std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0) == 0)
1602                 return {};
1603             // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut,
1604             // std::accumulate(left_cells_v.begin(), left_cells_v.end(), 0), std::accumulate(left_bels_v.begin(),
1605             // left_bels_v.end(), 0),
1606             //          std::accumulate(right_cells_v.begin(), right_cells_v.end(), 0),
1607             //          std::accumulate(right_bels_v.begin(), right_bels_v.end(), 0));
1608 
1609             // Peturb the source cut to eliminate overutilisation
1610             auto is_part_overutil = [&](bool r) {
1611                 double delta = 0;
1612                 for (size_t t = 0; t < left_cells_v.size(); t++) {
1613                     delta += double(left_cells_v.at(t)) / double(std::max(left_bels_v.at(t), 1)) -
1614                              double(right_cells_v.at(t)) / double(std::max(right_bels_v.at(t), 1));
1615                 }
1616                 return r ? delta < 0 : delta > 0;
1617             };
1618             while (pivot > 0 && is_part_overutil(false)) {
1619                 auto &move_cell = cut_cells.at(pivot);
1620                 int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1;
1621                 left_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) -= size;
1622                 right_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) += size;
1623                 pivot--;
1624             }
1625             while (pivot < int(cut_cells.size()) - 1 && is_part_overutil(true)) {
1626                 auto &move_cell = cut_cells.at(pivot + 1);
1627                 int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1;
1628                 left_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) += size;
1629                 right_cells_v.at(type_index.at(cut_cells.at(pivot)->type)) -= size;
1630                 pivot++;
1631             }
1632             // log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells,
1633             // right_bels);
1634             // Split regions into bins, and then spread cells by linear interpolation within those bins
1635             auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) {
1636                 int N = cells_end - cells_start;
1637                 if (N <= 2) {
1638                     for (int i = cells_start; i < cells_end; i++) {
1639                         auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy
1640                                         : p->cell_locs.at(cut_cells.at(i)->name).rawx;
1641                         pos = area_l + i * ((area_r - area_l) / N);
1642                     }
1643                     return;
1644                 }
1645                 // Split region into up to 10 (K) bins
1646                 int K = std::min<int>(N, 10);
1647                 std::vector<std::pair<int, double>> bin_bounds; // [(cell start, area start)]
1648                 bin_bounds.emplace_back(cells_start, area_l);
1649                 for (int i = 1; i < K; i++)
1650                     bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K);
1651                 bin_bounds.emplace_back(cells_end, area_r + 0.99);
1652                 for (int i = 0; i < K; i++) {
1653                     auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1);
1654                     double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy
1655                                            : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx;
1656                     double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy
1657                                             : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx;
1658                     double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left);
1659                     for (int j = bl.first; j < br.first; j++) {
1660                         Region *cr = cut_cells.at(j)->region;
1661                         if (cr != nullptr) {
1662                             // Limit spreading bounds to constraint region; if applicable
1663                             double brsc = p->limit_to_reg(cr, br.second, dir);
1664                             double blsc = p->limit_to_reg(cr, bl.second, dir);
1665                             double mr = (brsc - blsc) / std::max(0.00001, orig_right - orig_left);
1666                             auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy
1667                                             : p->cell_locs.at(cut_cells.at(j)->name).rawx;
1668                             NPNR_ASSERT(pos >= orig_left && pos <= orig_right);
1669                             pos = blsc + mr * (pos - orig_left);
1670                         } else {
1671                             auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy
1672                                             : p->cell_locs.at(cut_cells.at(j)->name).rawx;
1673                             NPNR_ASSERT(pos >= orig_left && pos <= orig_right);
1674                             pos = bl.second + m * (pos - orig_left);
1675                         }
1676                         // log("[%f, %f] -> [%f, %f]: %f -> %f\n", orig_left, orig_right, bl.second, br.second,
1677                         // orig_pos, pos);
1678                     }
1679                 }
1680             };
1681             spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut);
1682             spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r);
1683             // Update various data structures
1684             for (int x = r.x0; x <= r.x1; x++)
1685                 for (int y = r.y0; y <= r.y1; y++) {
1686                     cells_at_location.at(x).at(y).clear();
1687                 }
1688             for (auto cell : cut_cells) {
1689                 auto &cl = p->cell_locs.at(cell->name);
1690                 cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx)));
1691                 cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy)));
1692                 cells_at_location.at(cl.x).at(cl.y).push_back(cell);
1693                 // log_info("spread pos %d %d\n", cl.x, cl.y);
1694             }
1695             SpreaderRegion rl, rr;
1696             rl.id = int(regions.size());
1697             rl.x0 = r.x0;
1698             rl.y0 = r.y0;
1699             rl.x1 = dir ? r.x1 : best_tgt_cut;
1700             rl.y1 = dir ? best_tgt_cut : r.y1;
1701             rl.cells = left_cells_v;
1702             rl.bels = left_bels_v;
1703             rr.id = int(regions.size()) + 1;
1704             rr.x0 = dir ? r.x0 : (best_tgt_cut + 1);
1705             rr.y0 = dir ? (best_tgt_cut + 1) : r.y0;
1706             rr.x1 = r.x1;
1707             rr.y1 = r.y1;
1708             rr.cells = right_cells_v;
1709             rr.bels = right_bels_v;
1710             regions.push_back(rl);
1711             regions.push_back(rr);
1712             for (int x = rl.x0; x <= rl.x1; x++)
1713                 for (int y = rl.y0; y <= rl.y1; y++)
1714                     groups.at(x).at(y) = rl.id;
1715             for (int x = rr.x0; x <= rr.x1; x++)
1716                 for (int y = rr.y0; y <= rr.y1; y++)
1717                     groups.at(x).at(y) = rr.id;
1718             return std::make_pair(rl.id, rr.id);
1719         };
1720     };
1721     typedef decltype(CellInfo::udata) cell_udata_t;
1722     cell_udata_t dont_solve = std::numeric_limits<cell_udata_t>::max();
1723 };
1724 int HeAPPlacer::CutSpreader::seq = 0;
1725 
placer_heap(Context * ctx,PlacerHeapCfg cfg)1726 bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); }
1727 
PlacerHeapCfg(Context * ctx)1728 PlacerHeapCfg::PlacerHeapCfg(Context *ctx)
1729 {
1730     alpha = ctx->setting<float>("placerHeap/alpha", 0.1);
1731     beta = ctx->setting<float>("placerHeap/beta", 0.9);
1732     criticalityExponent = ctx->setting<int>("placerHeap/criticalityExponent", 2);
1733     timingWeight = ctx->setting<int>("placerHeap/timingWeight", 10);
1734     timing_driven = ctx->setting<bool>("timing_driven");
1735     solverTolerance = 1e-5;
1736     placeAllAtOnce = false;
1737 
1738     hpwl_scale_x = 1;
1739     hpwl_scale_y = 1;
1740     spread_scale_x = 1;
1741     spread_scale_y = 1;
1742 }
1743 
1744 NEXTPNR_NAMESPACE_END
1745 
1746 #else
1747 
1748 #include "log.h"
1749 #include "nextpnr.h"
1750 #include "placer_heap.h"
1751 
1752 NEXTPNR_NAMESPACE_BEGIN
1753 bool placer_heap(Context *ctx, PlacerHeapCfg cfg)
1754 {
1755     log_error("nextpnr was built without the HeAP placer\n");
1756     return false;
1757 }
1758 
1759 PlacerHeapCfg::PlacerHeapCfg(Context *ctx) {}
1760 
1761 NEXTPNR_NAMESPACE_END
1762 
1763 #endif
1764