1 /*
2  *  nextpnr -- Next Generation Place and Route
3  *
4  *  Copyright (C) 2018  David Shah <david@symbioticeda.com>
5  *  Copyright (C) 2018  Eddie Hung <eddieh@ece.ubc.ca>
6  *
7  *  Permission to use, copy, modify, and/or distribute this software for any
8  *  purpose with or without fee is hereby granted, provided that the above
9  *  copyright notice and this permission notice appear in all copies.
10  *
11  *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  */
20 
21 #include "timing.h"
22 #include <algorithm>
23 #include <boost/range/adaptor/reversed.hpp>
24 #include <deque>
25 #include <map>
26 #include <unordered_map>
27 #include <utility>
28 #include "log.h"
29 #include "util.h"
30 
31 NEXTPNR_NAMESPACE_BEGIN
32 
33 namespace {
34 struct ClockEvent
35 {
36     IdString clock;
37     ClockEdge edge;
38 
operator ==__anond42635b40111::ClockEvent39     bool operator==(const ClockEvent &other) const { return clock == other.clock && edge == other.edge; }
40 };
41 
42 struct ClockPair
43 {
44     ClockEvent start, end;
45 
operator ==__anond42635b40111::ClockPair46     bool operator==(const ClockPair &other) const { return start == other.start && end == other.end; }
47 };
48 } // namespace
49 
50 NEXTPNR_NAMESPACE_END
51 namespace std {
52 
53 template <> struct hash<NEXTPNR_NAMESPACE_PREFIX ClockEvent>
54 {
operator ()std::hash55     std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX ClockEvent &obj) const noexcept
56     {
57         std::size_t seed = 0;
58         boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX IdString>()(obj.clock));
59         boost::hash_combine(seed, hash<int>()(int(obj.edge)));
60         return seed;
61     }
62 };
63 
64 template <> struct hash<NEXTPNR_NAMESPACE_PREFIX ClockPair>
65 {
operator ()std::hash66     std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX ClockPair &obj) const noexcept
67     {
68         std::size_t seed = 0;
69         boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX ClockEvent>()(obj.start));
70         boost::hash_combine(seed, hash<NEXTPNR_NAMESPACE_PREFIX ClockEvent>()(obj.start));
71         return seed;
72     }
73 };
74 
75 } // namespace std
76 NEXTPNR_NAMESPACE_BEGIN
77 
78 typedef std::vector<const PortRef *> PortRefVector;
79 typedef std::map<int, unsigned> DelayFrequency;
80 
81 struct CriticalPath
82 {
83     PortRefVector ports;
84     delay_t path_delay;
85     delay_t path_period;
86 };
87 
88 typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap;
89 typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
90 
91 struct Timing
92 {
93     Context *ctx;
94     bool net_delays;
95     bool update;
96     delay_t min_slack;
97     CriticalPathMap *crit_path;
98     DelayFrequency *slack_histogram;
99     NetCriticalityMap *net_crit;
100     IdString async_clock;
101 
102     struct TimingData
103     {
TimingDataTiming::TimingData104         TimingData() : max_arrival(), max_path_length(), min_remaining_budget() {}
TimingDataTiming::TimingData105         TimingData(delay_t max_arrival) : max_arrival(max_arrival), max_path_length(), min_remaining_budget() {}
106         delay_t max_arrival;
107         unsigned max_path_length = 0;
108         delay_t min_remaining_budget;
109         bool false_startpoint = false;
110         std::vector<delay_t> min_required;
111         std::unordered_map<ClockEvent, delay_t> arrival_time;
112     };
113 
TimingTiming114     Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr,
115            DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr)
116             : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")),
117               crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit),
118               async_clock(ctx->id("$async$"))
119     {
120     }
121 
walk_pathsTiming122     delay_t walk_paths()
123     {
124         const auto clk_period = ctx->getDelayFromNS(1.0e9 / ctx->setting<float>("target_freq")).maxDelay();
125 
126         // First, compute the topological order of nets to walk through the circuit, assuming it is a _acyclic_ graph
127         // TODO(eddieh): Handle the case where it is cyclic, e.g. combinatorial loops
128         std::vector<NetInfo *> topological_order;
129         std::unordered_map<const NetInfo *, std::unordered_map<ClockEvent, TimingData>> net_data;
130         // In lieu of deleting edges from the graph, simply count the number of fanins to each output port
131         std::unordered_map<const PortInfo *, unsigned> port_fanin;
132 
133         std::vector<IdString> input_ports;
134         std::vector<const PortInfo *> output_ports;
135 
136         std::unordered_set<IdString> ooc_port_nets;
137 
138         // In out-of-context mode, top-level inputs look floating but aren't
139         if (bool_or_default(ctx->settings, ctx->id("arch.ooc"))) {
140             for (auto &p : ctx->ports) {
141                 if (p.second.type != PORT_IN || p.second.net == nullptr)
142                     continue;
143                 ooc_port_nets.insert(p.second.net->name);
144             }
145         }
146 
147         for (auto &cell : ctx->cells) {
148             input_ports.clear();
149             output_ports.clear();
150             for (auto &port : cell.second->ports) {
151                 if (!port.second.net)
152                     continue;
153                 if (port.second.type == PORT_OUT)
154                     output_ports.push_back(&port.second);
155                 else
156                     input_ports.push_back(port.first);
157             }
158 
159             for (auto o : output_ports) {
160                 int clocks = 0;
161                 TimingPortClass portClass = ctx->getPortTimingClass(cell.second.get(), o->name, clocks);
162                 // If output port is influenced by a clock (e.g. FF output) then add it to the ordering as a timing
163                 // start-point
164                 if (portClass == TMG_REGISTER_OUTPUT) {
165                     topological_order.emplace_back(o->net);
166                     for (int i = 0; i < clocks; i++) {
167                         TimingClockingInfo clkInfo = ctx->getPortClockingInfo(cell.second.get(), o->name, i);
168                         const NetInfo *clknet = get_net_or_empty(cell.second.get(), clkInfo.clock_port);
169                         IdString clksig = clknet ? clknet->name : async_clock;
170                         net_data[o->net][ClockEvent{clksig, clknet ? clkInfo.edge : RISING_EDGE}] =
171                                 TimingData{clkInfo.clockToQ.maxDelay()};
172                     }
173 
174                 } else {
175                     if (portClass == TMG_STARTPOINT || portClass == TMG_GEN_CLOCK || portClass == TMG_IGNORE) {
176                         topological_order.emplace_back(o->net);
177                         TimingData td;
178                         td.false_startpoint = (portClass == TMG_GEN_CLOCK || portClass == TMG_IGNORE);
179                         td.max_arrival = 0;
180                         net_data[o->net][ClockEvent{async_clock, RISING_EDGE}] = td;
181                     }
182 
183                     // Don't analyse paths from a clock input to other pins - they will be considered by the
184                     // special-case handling register input/output class ports
185                     if (portClass == TMG_CLOCK_INPUT)
186                         continue;
187 
188                     // Otherwise, for all driven input ports on this cell, if a timing arc exists between the input and
189                     // the current output port, increment fanin counter
190                     for (auto i : input_ports) {
191                         DelayInfo comb_delay;
192                         NetInfo *i_net = cell.second->ports[i].net;
193                         if (i_net->driver.cell == nullptr && !ooc_port_nets.count(i_net->name))
194                             continue;
195                         bool is_path = ctx->getCellDelay(cell.second.get(), i, o->name, comb_delay);
196                         if (is_path)
197                             port_fanin[o]++;
198                     }
199                     // If there is no fanin, add the port as a false startpoint
200                     if (!port_fanin.count(o) && !net_data.count(o->net)) {
201                         topological_order.emplace_back(o->net);
202                         TimingData td;
203                         td.false_startpoint = true;
204                         td.max_arrival = 0;
205                         net_data[o->net][ClockEvent{async_clock, RISING_EDGE}] = td;
206                     }
207                 }
208             }
209         }
210 
211         // In out-of-context mode, handle top-level ports correctly
212         if (bool_or_default(ctx->settings, ctx->id("arch.ooc"))) {
213             for (auto &p : ctx->ports) {
214                 if (p.second.type != PORT_IN || p.second.net == nullptr)
215                     continue;
216                 topological_order.emplace_back(p.second.net);
217             }
218         }
219 
220         std::deque<NetInfo *> queue(topological_order.begin(), topological_order.end());
221         // Now walk the design, from the start points identified previously, building up a topological order
222         while (!queue.empty()) {
223             const auto net = queue.front();
224             queue.pop_front();
225 
226             for (auto &usr : net->users) {
227                 int user_clocks;
228                 TimingPortClass usrClass = ctx->getPortTimingClass(usr.cell, usr.port, user_clocks);
229                 if (usrClass == TMG_IGNORE || usrClass == TMG_CLOCK_INPUT)
230                     continue;
231                 for (auto &port : usr.cell->ports) {
232                     if (port.second.type != PORT_OUT || !port.second.net)
233                         continue;
234                     int port_clocks;
235                     TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, port.first, port_clocks);
236 
237                     // Skip if this is a clocked output (but allow non-clocked ones)
238                     if (portClass == TMG_REGISTER_OUTPUT || portClass == TMG_STARTPOINT || portClass == TMG_IGNORE ||
239                         portClass == TMG_GEN_CLOCK)
240                         continue;
241                     DelayInfo comb_delay;
242                     bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
243                     if (!is_path)
244                         continue;
245                     // Decrement the fanin count, and only add to topological order if all its fanins have already
246                     // been visited
247                     auto it = port_fanin.find(&port.second);
248                     if (it == port_fanin.end())
249                         log_error("Timing counted negative fanin count for port %s.%s (net %s), please report this "
250                                   "error.\n",
251                                   ctx->nameOf(usr.cell), ctx->nameOf(port.first), ctx->nameOf(port.second.net));
252                     if (--it->second == 0) {
253                         topological_order.emplace_back(port.second.net);
254                         queue.emplace_back(port.second.net);
255                         port_fanin.erase(it);
256                     }
257                 }
258             }
259         }
260 
261         // Sanity check to ensure that all ports where fanins were recorded were indeed visited
262         if (!port_fanin.empty() && !bool_or_default(ctx->settings, ctx->id("timing/ignoreLoops"), false)) {
263             for (auto fanin : port_fanin) {
264                 NetInfo *net = fanin.first->net;
265                 if (net != nullptr) {
266                     log_info("   remaining fanin includes %s (net %s)\n", fanin.first->name.c_str(ctx),
267                              net->name.c_str(ctx));
268                     if (net->driver.cell != nullptr)
269                         log_info("        driver = %s.%s\n", net->driver.cell->name.c_str(ctx),
270                                  net->driver.port.c_str(ctx));
271                     for (auto net_user : net->users)
272                         log_info("        user: %s.%s\n", net_user.cell->name.c_str(ctx), net_user.port.c_str(ctx));
273                 } else {
274                     log_info("   remaining fanin includes %s (no net)\n", fanin.first->name.c_str(ctx));
275                 }
276             }
277             if (ctx->force)
278                 log_warning("timing analysis failed due to presence of combinatorial loops, incomplete specification "
279                             "of timing ports, etc.\n");
280             else
281                 log_error("timing analysis failed due to presence of combinatorial loops, incomplete specification of "
282                           "timing ports, etc.\n");
283         }
284 
285         // Go forwards topologically to find the maximum arrival time and max path length for each net
286         for (auto net : topological_order) {
287             if (!net_data.count(net))
288                 continue;
289             auto &nd_map = net_data.at(net);
290             for (auto &startdomain : nd_map) {
291                 ClockEvent start_clk = startdomain.first;
292                 auto &nd = startdomain.second;
293                 if (nd.false_startpoint)
294                     continue;
295                 const auto net_arrival = nd.max_arrival;
296                 const auto net_length_plus_one = nd.max_path_length + 1;
297                 nd.min_remaining_budget = clk_period;
298                 for (auto &usr : net->users) {
299                     int port_clocks;
300                     TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
301                     auto net_delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();
302                     auto usr_arrival = net_arrival + net_delay;
303 
304                     if (portClass == TMG_ENDPOINT || portClass == TMG_IGNORE || portClass == TMG_CLOCK_INPUT) {
305                         // Skip
306                     } else {
307                         auto budget_override = ctx->getBudgetOverride(net, usr, net_delay);
308                         // Iterate over all output ports on the same cell as the sink
309                         for (auto port : usr.cell->ports) {
310                             if (port.second.type != PORT_OUT || !port.second.net)
311                                 continue;
312                             DelayInfo comb_delay;
313                             // Look up delay through this path
314                             bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
315                             if (!is_path)
316                                 continue;
317                             auto &data = net_data[port.second.net][start_clk];
318                             auto &arrival = data.max_arrival;
319                             arrival = std::max(arrival, usr_arrival + comb_delay.maxDelay());
320                             if (!budget_override) { // Do not increment path length if budget overriden since it doesn't
321                                 // require a share of the slack
322                                 auto &path_length = data.max_path_length;
323                                 path_length = std::max(path_length, net_length_plus_one);
324                             }
325                         }
326                     }
327                 }
328             }
329         }
330 
331         std::unordered_map<ClockPair, std::pair<delay_t, NetInfo *>> crit_nets;
332 
333         // Now go backwards topologically to determine the minimum path slack, and to distribute all path slack evenly
334         // between all nets on the path
335         for (auto net : boost::adaptors::reverse(topological_order)) {
336             if (!net_data.count(net))
337                 continue;
338             auto &nd_map = net_data.at(net);
339             for (auto &startdomain : nd_map) {
340                 auto &nd = startdomain.second;
341                 // Ignore false startpoints
342                 if (nd.false_startpoint)
343                     continue;
344                 const delay_t net_length_plus_one = nd.max_path_length + 1;
345                 auto &net_min_remaining_budget = nd.min_remaining_budget;
346                 for (auto &usr : net->users) {
347                     auto net_delay = net_delays ? ctx->getNetinfoRouteDelay(net, usr) : delay_t();
348                     auto budget_override = ctx->getBudgetOverride(net, usr, net_delay);
349                     int port_clocks;
350                     TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
351                     if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
352                         auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) {
353                             const auto net_arrival = nd.max_arrival;
354                             const auto endpoint_arrival = net_arrival + net_delay + setup;
355                             delay_t period;
356                             // Set default period
357                             if (edge == startdomain.first.edge) {
358                                 period = clk_period;
359                             } else {
360                                 period = clk_period / 2;
361                             }
362                             if (clksig != async_clock) {
363                                 if (ctx->nets.at(clksig)->clkconstr) {
364                                     if (edge == startdomain.first.edge) {
365                                         // same edge
366                                         period = ctx->nets.at(clksig)->clkconstr->period.minDelay();
367                                     } else if (edge == RISING_EDGE) {
368                                         // falling -> rising
369                                         period = ctx->nets.at(clksig)->clkconstr->low.minDelay();
370                                     } else if (edge == FALLING_EDGE) {
371                                         // rising -> falling
372                                         period = ctx->nets.at(clksig)->clkconstr->high.minDelay();
373                                     }
374                                 }
375                             }
376                             auto path_budget = period - endpoint_arrival;
377 
378                             if (update) {
379                                 auto budget_share = budget_override ? 0 : path_budget / net_length_plus_one;
380                                 usr.budget = std::min(usr.budget, net_delay + budget_share);
381                                 net_min_remaining_budget =
382                                         std::min(net_min_remaining_budget, path_budget - budget_share);
383                             }
384 
385                             if (path_budget < min_slack)
386                                 min_slack = path_budget;
387 
388                             if (slack_histogram) {
389                                 int slack_ps = ctx->getDelayNS(path_budget) * 1000;
390                                 (*slack_histogram)[slack_ps]++;
391                             }
392                             ClockEvent dest_ev{clksig, edge};
393                             ClockPair clockPair{startdomain.first, dest_ev};
394                             nd.arrival_time[dest_ev] = std::max(nd.arrival_time[dest_ev], endpoint_arrival);
395 
396                             if (crit_path) {
397                                 if (!crit_nets.count(clockPair) || crit_nets.at(clockPair).first < endpoint_arrival) {
398                                     crit_nets[clockPair] = std::make_pair(endpoint_arrival, net);
399                                     (*crit_path)[clockPair].path_delay = endpoint_arrival;
400                                     (*crit_path)[clockPair].path_period = period;
401                                     (*crit_path)[clockPair].ports.clear();
402                                     (*crit_path)[clockPair].ports.push_back(&usr);
403                                 }
404                             }
405                         };
406                         if (portClass == TMG_REGISTER_INPUT) {
407                             for (int i = 0; i < port_clocks; i++) {
408                                 TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, i);
409                                 const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port);
410                                 IdString clksig = clknet ? clknet->name : async_clock;
411                                 process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE, clkInfo.setup.maxDelay());
412                             }
413                         } else {
414                             process_endpoint(async_clock, RISING_EDGE, 0);
415                         }
416 
417                     } else if (update) {
418 
419                         // Iterate over all output ports on the same cell as the sink
420                         for (const auto &port : usr.cell->ports) {
421                             if (port.second.type != PORT_OUT || !port.second.net)
422                                 continue;
423                             DelayInfo comb_delay;
424                             bool is_path = ctx->getCellDelay(usr.cell, usr.port, port.first, comb_delay);
425                             if (!is_path)
426                                 continue;
427                             if (net_data.count(port.second.net) &&
428                                 net_data.at(port.second.net).count(startdomain.first)) {
429                                 auto path_budget =
430                                         net_data.at(port.second.net).at(startdomain.first).min_remaining_budget;
431                                 auto budget_share = budget_override ? 0 : path_budget / net_length_plus_one;
432                                 usr.budget = std::min(usr.budget, net_delay + budget_share);
433                                 net_min_remaining_budget =
434                                         std::min(net_min_remaining_budget, path_budget - budget_share);
435                             }
436                         }
437                     }
438                 }
439             }
440         }
441 
442         if (crit_path) {
443             // Walk backwards from the most critical net
444             for (auto crit_pair : crit_nets) {
445                 NetInfo *crit_net = crit_pair.second.second;
446                 auto &cp_ports = (*crit_path)[crit_pair.first].ports;
447                 while (crit_net) {
448                     const PortInfo *crit_ipin = nullptr;
449                     delay_t max_arrival = std::numeric_limits<delay_t>::min();
450                     // Look at all input ports on its driving cell
451                     for (const auto &port : crit_net->driver.cell->ports) {
452                         if (port.second.type != PORT_IN || !port.second.net)
453                             continue;
454                         DelayInfo comb_delay;
455                         bool is_path =
456                                 ctx->getCellDelay(crit_net->driver.cell, port.first, crit_net->driver.port, comb_delay);
457                         if (!is_path)
458                             continue;
459                         // If input port is influenced by a clock, skip
460                         int port_clocks;
461                         TimingPortClass portClass =
462                                 ctx->getPortTimingClass(crit_net->driver.cell, port.first, port_clocks);
463                         if (portClass == TMG_CLOCK_INPUT || portClass == TMG_ENDPOINT || portClass == TMG_IGNORE)
464                             continue;
465                         // And find the fanin net with the latest arrival time
466                         if (net_data.count(port.second.net) &&
467                             net_data.at(port.second.net).count(crit_pair.first.start)) {
468                             auto net_arrival = net_data.at(port.second.net).at(crit_pair.first.start).max_arrival;
469                             if (net_delays) {
470                                 for (auto &user : port.second.net->users)
471                                     if (user.port == port.first && user.cell == crit_net->driver.cell) {
472                                         net_arrival += ctx->getNetinfoRouteDelay(port.second.net, user);
473                                         break;
474                                     }
475                             }
476                             net_arrival += comb_delay.maxDelay();
477                             if (net_arrival > max_arrival) {
478                                 max_arrival = net_arrival;
479                                 crit_ipin = &port.second;
480                             }
481                         }
482                     }
483 
484                     if (!crit_ipin)
485                         break;
486                     // Now convert PortInfo* into a PortRef*
487                     for (auto &usr : crit_ipin->net->users) {
488                         if (usr.cell->name == crit_net->driver.cell->name && usr.port == crit_ipin->name) {
489                             cp_ports.push_back(&usr);
490                             break;
491                         }
492                     }
493                     crit_net = crit_ipin->net;
494                 }
495                 std::reverse(cp_ports.begin(), cp_ports.end());
496             }
497         }
498 
499         if (net_crit) {
500             NPNR_ASSERT(crit_path);
501             // Go through in reverse topological order to set required times
502             for (auto net : boost::adaptors::reverse(topological_order)) {
503                 if (!net_data.count(net))
504                     continue;
505                 auto &nd_map = net_data.at(net);
506                 for (auto &startdomain : nd_map) {
507                     auto &nd = startdomain.second;
508                     if (nd.false_startpoint)
509                         continue;
510                     if (startdomain.first.clock == async_clock)
511                         continue;
512                     if (nd.min_required.empty())
513                         nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max());
514                     delay_t net_min_required = std::numeric_limits<delay_t>::max();
515                     for (size_t i = 0; i < net->users.size(); i++) {
516                         auto &usr = net->users.at(i);
517                         auto net_delay = ctx->getNetinfoRouteDelay(net, usr);
518                         int port_clocks;
519                         TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
520                         if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
521                             auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) {
522                                 delay_t period;
523                                 // Set default period
524                                 if (edge == startdomain.first.edge) {
525                                     period = clk_period;
526                                 } else {
527                                     period = clk_period / 2;
528                                 }
529                                 if (clksig != async_clock) {
530                                     if (ctx->nets.at(clksig)->clkconstr) {
531                                         if (edge == startdomain.first.edge) {
532                                             // same edge
533                                             period = ctx->nets.at(clksig)->clkconstr->period.minDelay();
534                                         } else if (edge == RISING_EDGE) {
535                                             // falling -> rising
536                                             period = ctx->nets.at(clksig)->clkconstr->low.minDelay();
537                                         } else if (edge == FALLING_EDGE) {
538                                             // rising -> falling
539                                             period = ctx->nets.at(clksig)->clkconstr->high.minDelay();
540                                         }
541                                     }
542                                 }
543                                 nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i));
544                             };
545                             if (portClass == TMG_REGISTER_INPUT) {
546                                 for (int j = 0; j < port_clocks; j++) {
547                                     TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j);
548                                     const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port);
549                                     IdString clksig = clknet ? clknet->name : async_clock;
550                                     process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE,
551                                                      clkInfo.setup.maxDelay());
552                                 }
553                             } else {
554                                 process_endpoint(async_clock, RISING_EDGE, 0);
555                             }
556                         }
557                         net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay);
558                     }
559                     PortRef &drv = net->driver;
560                     if (drv.cell == nullptr)
561                         continue;
562                     for (const auto &port : drv.cell->ports) {
563                         if (port.second.type != PORT_IN || !port.second.net)
564                             continue;
565                         DelayInfo comb_delay;
566                         bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay);
567                         if (!is_path)
568                             continue;
569                         int cc;
570                         auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc);
571                         if (pclass != TMG_COMB_INPUT)
572                             continue;
573                         NetInfo *sink_net = port.second.net;
574                         if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) {
575                             auto &sink_nd = net_data.at(sink_net).at(startdomain.first);
576                             if (sink_nd.min_required.empty())
577                                 sink_nd.min_required.resize(sink_net->users.size(),
578                                                             std::numeric_limits<delay_t>::max());
579                             for (size_t i = 0; i < sink_net->users.size(); i++) {
580                                 auto &user = sink_net->users.at(i);
581                                 if (user.cell == drv.cell && user.port == port.first) {
582                                     sink_nd.min_required.at(i) = std::min(sink_nd.min_required.at(i),
583                                                                           net_min_required - comb_delay.maxDelay());
584                                     break;
585                                 }
586                             }
587                         }
588                     }
589                 }
590             }
591             std::unordered_map<ClockEvent, delay_t> worst_slack;
592 
593             // Assign slack values
594             for (auto &net_entry : net_data) {
595                 const NetInfo *net = net_entry.first;
596                 for (auto &startdomain : net_entry.second) {
597                     auto &nd = startdomain.second;
598                     if (startdomain.first.clock == async_clock)
599                         continue;
600                     if (nd.min_required.empty())
601                         continue;
602                     auto &nc = (*net_crit)[net->name];
603                     if (nc.slack.empty())
604                         nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max());
605 #if 0
606                     if (ctx->debug)
607                         log_info("Net %s cd %s\n", net->name.c_str(ctx), startdomain.first.clock.c_str(ctx));
608 #endif
609                     for (size_t i = 0; i < net->users.size(); i++) {
610                         delay_t slack = nd.min_required.at(i) -
611                                         (nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i)));
612 #if 0
613                         if (ctx->debug)
614                             log_info("    user %s.%s required %.02fns arrival %.02f route %.02f slack %.02f\n",
615                                     net->users.at(i).cell->name.c_str(ctx), net->users.at(i).port.c_str(ctx),
616                                     ctx->getDelayNS(nd.min_required.at(i)), ctx->getDelayNS(nd.max_arrival),
617                                     ctx->getDelayNS(ctx->getNetinfoRouteDelay(net, net->users.at(i))), ctx->getDelayNS(slack));
618 #endif
619                         if (worst_slack.count(startdomain.first))
620                             worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack);
621                         else
622                             worst_slack[startdomain.first] = slack;
623                         nc.slack.at(i) = slack;
624                     }
625                     if (ctx->debug)
626                         log_break();
627                 }
628             }
629             // Assign criticality values
630             for (auto &net_entry : net_data) {
631                 const NetInfo *net = net_entry.first;
632                 for (auto &startdomain : net_entry.second) {
633                     if (startdomain.first.clock == async_clock)
634                         continue;
635                     auto &nd = startdomain.second;
636                     if (nd.min_required.empty())
637                         continue;
638                     auto &nc = (*net_crit)[net->name];
639                     if (nc.slack.empty())
640                         continue;
641                     if (nc.criticality.empty())
642                         nc.criticality.resize(net->users.size(), 0);
643                     // Only consider intra-clock paths for criticality
644                     if (!crit_path->count(ClockPair{startdomain.first, startdomain.first}))
645                         continue;
646                     delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay;
647                     for (size_t i = 0; i < net->users.size(); i++) {
648                         float criticality =
649                                 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax);
650                         nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality));
651                     }
652                     nc.max_path_length = nd.max_path_length;
653                     nc.cd_worst_slack = worst_slack.at(startdomain.first);
654                 }
655             }
656 #if 0
657             if (ctx->debug) {
658                 for (auto &nc : *net_crit) {
659                     NetInfo *net = ctx->nets.at(nc.first).get();
660                     log_info("Net %s maxlen %d worst_slack %.02fns: \n", nc.first.c_str(ctx), nc.second.max_path_length,
661                              ctx->getDelayNS(nc.second.cd_worst_slack));
662                     if (!nc.second.criticality.empty() && !nc.second.slack.empty()) {
663                         for (size_t i = 0; i < net->users.size(); i++) {
664                             log_info("   user %s.%s slack %.02fns crit %.03f\n", net->users.at(i).cell->name.c_str(ctx),
665                                      net->users.at(i).port.c_str(ctx), ctx->getDelayNS(nc.second.slack.at(i)),
666                                      nc.second.criticality.at(i));
667                         }
668                     }
669                     log_break();
670                 }
671             }
672 #endif
673         }
674         return min_slack;
675     }
676 
assign_budgetTiming677     void assign_budget()
678     {
679         // Clear delays to a very high value first
680         for (auto &net : ctx->nets) {
681             for (auto &usr : net.second->users) {
682                 usr.budget = std::numeric_limits<delay_t>::max();
683             }
684         }
685 
686         walk_paths();
687     }
688 };
689 
assign_budget(Context * ctx,bool quiet)690 void assign_budget(Context *ctx, bool quiet)
691 {
692     if (!quiet) {
693         log_break();
694         log_info("Annotating ports with timing budgets for target frequency %.2f MHz\n",
695                  ctx->setting<float>("target_freq") / 1e6);
696     }
697 
698     Timing timing(ctx, ctx->setting<int>("slack_redist_iter") > 0 /* net_delays */, true /* update */);
699     timing.assign_budget();
700 
701     if (!quiet || ctx->verbose) {
702         for (auto &net : ctx->nets) {
703             for (auto &user : net.second->users) {
704                 // Post-update check
705                 if (!ctx->setting<bool>("auto_freq") && user.budget < 0)
706                     log_info("port %s.%s, connected to net '%s', has negative "
707                              "timing budget of %fns\n",
708                              user.cell->name.c_str(ctx), user.port.c_str(ctx), net.first.c_str(ctx),
709                              ctx->getDelayNS(user.budget));
710                 else if (ctx->debug)
711                     log_info("port %s.%s, connected to net '%s', has "
712                              "timing budget of %fns\n",
713                              user.cell->name.c_str(ctx), user.port.c_str(ctx), net.first.c_str(ctx),
714                              ctx->getDelayNS(user.budget));
715             }
716         }
717     }
718 
719     // For slack redistribution, if user has not specified a frequency dynamically adjust the target frequency to be the
720     // currently achieved maximum
721     if (ctx->setting<bool>("auto_freq") && ctx->setting<int>("slack_redist_iter") > 0) {
722         delay_t default_slack = delay_t((1.0e9 / ctx->getDelayNS(1)) / ctx->setting<float>("target_freq"));
723         ctx->settings[ctx->id("target_freq")] =
724                 std::to_string(1.0e9 / ctx->getDelayNS(default_slack - timing.min_slack));
725         if (ctx->verbose)
726             log_info("minimum slack for this assign = %.2f ns, target Fmax for next "
727                      "update = %.2f MHz\n",
728                      ctx->getDelayNS(timing.min_slack), ctx->setting<float>("target_freq") / 1e6);
729     }
730 
731     if (!quiet)
732         log_info("Checksum: 0x%08x\n", ctx->checksum());
733 }
734 
timing_analysis(Context * ctx,bool print_histogram,bool print_fmax,bool print_path,bool warn_on_failure)735 void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool print_path, bool warn_on_failure)
736 {
737     auto format_event = [ctx](const ClockEvent &e, int field_width = 0) {
738         std::string value;
739         if (e.clock == ctx->id("$async$"))
740             value = std::string("<async>");
741         else
742             value = (e.edge == FALLING_EDGE ? std::string("negedge ") : std::string("posedge ")) + e.clock.str(ctx);
743         if (int(value.length()) < field_width)
744             value.insert(value.length(), field_width - int(value.length()), ' ');
745         return value;
746     };
747 
748     CriticalPathMap crit_paths;
749     DelayFrequency slack_histogram;
750 
751     Timing timing(ctx, true /* net_delays */, false /* update */, (print_path || print_fmax) ? &crit_paths : nullptr,
752                   print_histogram ? &slack_histogram : nullptr);
753     timing.walk_paths();
754     std::map<IdString, std::pair<ClockPair, CriticalPath>> clock_reports;
755     std::map<IdString, double> clock_fmax;
756     std::vector<ClockPair> xclock_paths;
757     std::set<IdString> empty_clocks; // set of clocks with no interior paths
758     if (print_path || print_fmax) {
759         for (auto path : crit_paths) {
760             const ClockEvent &a = path.first.start;
761             const ClockEvent &b = path.first.end;
762             empty_clocks.insert(a.clock);
763             empty_clocks.insert(b.clock);
764         }
765         for (auto path : crit_paths) {
766             const ClockEvent &a = path.first.start;
767             const ClockEvent &b = path.first.end;
768             if (a.clock != b.clock || a.clock == ctx->id("$async$"))
769                 continue;
770             double Fmax;
771             empty_clocks.erase(a.clock);
772             if (a.edge == b.edge)
773                 Fmax = 1000 / ctx->getDelayNS(path.second.path_delay);
774             else
775                 Fmax = 500 / ctx->getDelayNS(path.second.path_delay);
776             if (!clock_fmax.count(a.clock) || Fmax < clock_fmax.at(a.clock)) {
777                 clock_reports[a.clock] = path;
778                 clock_fmax[a.clock] = Fmax;
779             }
780         }
781 
782         for (auto &path : crit_paths) {
783             const ClockEvent &a = path.first.start;
784             const ClockEvent &b = path.first.end;
785             if (a.clock == b.clock && a.clock != ctx->id("$async$"))
786                 continue;
787             xclock_paths.push_back(path.first);
788         }
789 
790         if (clock_reports.empty()) {
791             log_warning("No clocks found in design\n");
792         }
793 
794         std::sort(xclock_paths.begin(), xclock_paths.end(), [ctx](const ClockPair &a, const ClockPair &b) {
795             if (a.start.clock.str(ctx) < b.start.clock.str(ctx))
796                 return true;
797             if (a.start.clock.str(ctx) > b.start.clock.str(ctx))
798                 return false;
799             if (a.start.edge < b.start.edge)
800                 return true;
801             if (a.start.edge > b.start.edge)
802                 return false;
803             if (a.end.clock.str(ctx) < b.end.clock.str(ctx))
804                 return true;
805             if (a.end.clock.str(ctx) > b.end.clock.str(ctx))
806                 return false;
807             if (a.end.edge < b.end.edge)
808                 return true;
809             return false;
810         });
811     }
812 
813     if (print_path) {
814         static auto print_net_source = [](Context *ctx, NetInfo *net) {
815             // Check if this net is annotated with a source list
816             auto sources = net->attrs.find(ctx->id("src"));
817             if (sources == net->attrs.end()) {
818                 // No sources for this net, can't print anything
819                 return;
820             }
821 
822             // Sources are separated by pipe characters.
823             // There is no guaranteed ordering on sources, so we just print all
824             auto sourcelist = sources->second.as_string();
825             std::vector<std::string> source_entries;
826             size_t current = 0, prev = 0;
827             while ((current = sourcelist.find("|", prev)) != std::string::npos) {
828                 source_entries.emplace_back(sourcelist.substr(prev, current - prev));
829                 prev = current + 1;
830             }
831             // Ensure we emplace the final entry
832             source_entries.emplace_back(sourcelist.substr(prev, current - prev));
833 
834             // Iterate and print our source list at the correct indentation level
835             log_info("               Defined in:\n");
836             for (auto entry : source_entries) {
837                 log_info("                 %s\n", entry.c_str());
838             }
839         };
840 
841         auto print_path_report = [ctx](ClockPair &clocks, PortRefVector &crit_path) {
842             delay_t total = 0, logic_total = 0, route_total = 0;
843             auto &front = crit_path.front();
844             auto &front_port = front->cell->ports.at(front->port);
845             auto &front_driver = front_port.net->driver;
846 
847             int port_clocks;
848             auto portClass = ctx->getPortTimingClass(front_driver.cell, front_driver.port, port_clocks);
849             IdString last_port = front_driver.port;
850             int clock_start = -1;
851             if (portClass == TMG_REGISTER_OUTPUT) {
852                 for (int i = 0; i < port_clocks; i++) {
853                     TimingClockingInfo clockInfo = ctx->getPortClockingInfo(front_driver.cell, front_driver.port, i);
854                     const NetInfo *clknet = get_net_or_empty(front_driver.cell, clockInfo.clock_port);
855                     if (clknet != nullptr && clknet->name == clocks.start.clock &&
856                         clockInfo.edge == clocks.start.edge) {
857                         last_port = clockInfo.clock_port;
858                         clock_start = i;
859                         break;
860                     }
861                 }
862             }
863 
864             log_info("curr total\n");
865             for (auto sink : crit_path) {
866                 auto sink_cell = sink->cell;
867                 auto &port = sink_cell->ports.at(sink->port);
868                 auto net = port.net;
869                 auto &driver = net->driver;
870                 auto driver_cell = driver.cell;
871                 DelayInfo comb_delay;
872                 if (clock_start != -1) {
873                     auto clockInfo = ctx->getPortClockingInfo(driver_cell, driver.port, clock_start);
874                     comb_delay = clockInfo.clockToQ;
875                     clock_start = -1;
876                 } else if (last_port == driver.port) {
877                     // Case where we start with a STARTPOINT etc
878                     comb_delay = ctx->getDelayFromNS(0);
879                 } else {
880                     ctx->getCellDelay(driver_cell, last_port, driver.port, comb_delay);
881                 }
882                 total += comb_delay.maxDelay();
883                 logic_total += comb_delay.maxDelay();
884                 log_info("%4.1f %4.1f  Source %s.%s\n", ctx->getDelayNS(comb_delay.maxDelay()), ctx->getDelayNS(total),
885                          driver_cell->name.c_str(ctx), driver.port.c_str(ctx));
886                 auto net_delay = ctx->getNetinfoRouteDelay(net, *sink);
887                 total += net_delay;
888                 route_total += net_delay;
889                 auto driver_loc = ctx->getBelLocation(driver_cell->bel);
890                 auto sink_loc = ctx->getBelLocation(sink_cell->bel);
891                 log_info("%4.1f %4.1f    Net %s budget %f ns (%d,%d) -> (%d,%d)\n", ctx->getDelayNS(net_delay),
892                          ctx->getDelayNS(total), net->name.c_str(ctx), ctx->getDelayNS(sink->budget), driver_loc.x,
893                          driver_loc.y, sink_loc.x, sink_loc.y);
894                 log_info("               Sink %s.%s\n", sink_cell->name.c_str(ctx), sink->port.c_str(ctx));
895                 if (ctx->verbose) {
896                     auto driver_wire = ctx->getNetinfoSourceWire(net);
897                     auto sink_wire = ctx->getNetinfoSinkWire(net, *sink);
898                     log_info("                 prediction: %f ns estimate: %f ns\n",
899                              ctx->getDelayNS(ctx->predictDelay(net, *sink)),
900                              ctx->getDelayNS(ctx->estimateDelay(driver_wire, sink_wire)));
901                     auto cursor = sink_wire;
902                     delay_t delay;
903                     while (driver_wire != cursor) {
904 #ifdef ARCH_ECP5
905                         if (net->is_global)
906                             break;
907 #endif
908                         auto it = net->wires.find(cursor);
909                         assert(it != net->wires.end());
910                         auto pip = it->second.pip;
911                         NPNR_ASSERT(pip != PipId());
912                         delay = ctx->getPipDelay(pip).maxDelay();
913                         log_info("                 %1.3f %s\n", ctx->getDelayNS(delay),
914                                  ctx->getPipName(pip).c_str(ctx));
915                         cursor = ctx->getPipSrcWire(pip);
916                     }
917                 }
918                 if (!ctx->disable_critical_path_source_print) {
919                     print_net_source(ctx, net);
920                 }
921                 last_port = sink->port;
922             }
923             int clockCount = 0;
924             auto sinkClass = ctx->getPortTimingClass(crit_path.back()->cell, crit_path.back()->port, clockCount);
925             if (sinkClass == TMG_REGISTER_INPUT && clockCount > 0) {
926                 auto sinkClockInfo = ctx->getPortClockingInfo(crit_path.back()->cell, crit_path.back()->port, 0);
927                 delay_t setup = sinkClockInfo.setup.maxDelay();
928                 total += setup;
929                 logic_total += setup;
930                 log_info("%4.1f %4.1f  Setup %s.%s\n", ctx->getDelayNS(setup), ctx->getDelayNS(total),
931                          crit_path.back()->cell->name.c_str(ctx), crit_path.back()->port.c_str(ctx));
932             }
933             log_info("%.1f ns logic, %.1f ns routing\n", ctx->getDelayNS(logic_total), ctx->getDelayNS(route_total));
934         };
935 
936         for (auto &clock : clock_reports) {
937             log_break();
938             std::string start =
939                     clock.second.first.start.edge == FALLING_EDGE ? std::string("negedge") : std::string("posedge");
940             std::string end =
941                     clock.second.first.end.edge == FALLING_EDGE ? std::string("negedge") : std::string("posedge");
942             log_info("Critical path report for clock '%s' (%s -> %s):\n", clock.first.c_str(ctx), start.c_str(),
943                      end.c_str());
944             auto &crit_path = clock.second.second.ports;
945             print_path_report(clock.second.first, crit_path);
946         }
947 
948         for (auto &xclock : xclock_paths) {
949             log_break();
950             std::string start = format_event(xclock.start);
951             std::string end = format_event(xclock.end);
952             log_info("Critical path report for cross-domain path '%s' -> '%s':\n", start.c_str(), end.c_str());
953             auto &crit_path = crit_paths.at(xclock).ports;
954             print_path_report(xclock, crit_path);
955         }
956     }
957     if (print_fmax) {
958         log_break();
959         unsigned max_width = 0;
960         for (auto &clock : clock_reports)
961             max_width = std::max<unsigned>(max_width, clock.first.str(ctx).size());
962         for (auto &clock : clock_reports) {
963             const auto &clock_name = clock.first.str(ctx);
964             const int width = max_width - clock_name.size();
965             float target = ctx->setting<float>("target_freq") / 1e6;
966             if (ctx->nets.at(clock.first)->clkconstr)
967                 target = 1000 / ctx->getDelayNS(ctx->nets.at(clock.first)->clkconstr->period.minDelay());
968 
969             bool passed = target < clock_fmax[clock.first];
970             if (!warn_on_failure || passed)
971                 log_info("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "",
972                          clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target);
973             else if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false))
974                 log_warning("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "",
975                             clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target);
976             else
977                 log_nonfatal_error("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "",
978                                    clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target);
979         }
980         for (auto &eclock : empty_clocks) {
981             if (eclock != ctx->id("$async$"))
982                 log_info("Clock '%s' has no interior paths\n", eclock.c_str(ctx));
983         }
984         log_break();
985 
986         int start_field_width = 0, end_field_width = 0;
987         for (auto &xclock : xclock_paths) {
988             start_field_width = std::max((int)format_event(xclock.start).length(), start_field_width);
989             end_field_width = std::max((int)format_event(xclock.end).length(), end_field_width);
990         }
991 
992         for (auto &xclock : xclock_paths) {
993             const ClockEvent &a = xclock.start;
994             const ClockEvent &b = xclock.end;
995             auto &path = crit_paths.at(xclock);
996             auto ev_a = format_event(a, start_field_width), ev_b = format_event(b, end_field_width);
997             log_info("Max delay %s -> %s: %0.02f ns\n", ev_a.c_str(), ev_b.c_str(), ctx->getDelayNS(path.path_delay));
998         }
999         log_break();
1000     }
1001 
1002     if (print_histogram && slack_histogram.size() > 0) {
1003         unsigned num_bins = 20;
1004         unsigned bar_width = 60;
1005         auto min_slack = slack_histogram.begin()->first;
1006         auto max_slack = slack_histogram.rbegin()->first;
1007         auto bin_size = std::max<unsigned>(1, ceil((max_slack - min_slack + 1) / float(num_bins)));
1008         std::vector<unsigned> bins(num_bins);
1009         unsigned max_freq = 0;
1010         for (const auto &i : slack_histogram) {
1011             auto &bin = bins[(i.first - min_slack) / bin_size];
1012             bin += i.second;
1013             max_freq = std::max(max_freq, bin);
1014         }
1015         bar_width = std::min(bar_width, max_freq);
1016 
1017         log_break();
1018         log_info("Slack histogram:\n");
1019         log_info(" legend: * represents %d endpoint(s)\n", max_freq / bar_width);
1020         log_info("         + represents [1,%d) endpoint(s)\n", max_freq / bar_width);
1021         for (unsigned i = 0; i < num_bins; ++i)
1022             log_info("[%6d, %6d) |%s%c\n", min_slack + bin_size * i, min_slack + bin_size * (i + 1),
1023                      std::string(bins[i] * bar_width / max_freq, '*').c_str(),
1024                      (bins[i] * bar_width) % max_freq > 0 ? '+' : ' ');
1025     }
1026 }
1027 
get_criticalities(Context * ctx,NetCriticalityMap * net_crit)1028 void get_criticalities(Context *ctx, NetCriticalityMap *net_crit)
1029 {
1030     CriticalPathMap crit_paths;
1031     net_crit->clear();
1032     Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit);
1033     timing.walk_paths();
1034 }
1035 
1036 NEXTPNR_NAMESPACE_END
1037