1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Marvell International Ltd.
4  */
5 
6 #include <command.h>
7 #include <dm.h>
8 #include <hang.h>
9 #include <i2c.h>
10 #include <ram.h>
11 #include <time.h>
12 
13 #include <linux/bitops.h>
14 #include <linux/io.h>
15 
16 #include <mach/octeon_ddr.h>
17 
18 /* Random number generator stuff */
19 
20 #define CVMX_OCT_DID_RNG	8ULL
21 
cvmx_rng_get_random64(void)22 static u64 cvmx_rng_get_random64(void)
23 {
24 	return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
25 }
26 
cvmx_rng_enable(void)27 static void cvmx_rng_enable(void)
28 {
29 	u64 val;
30 
31 	val = csr_rd(CVMX_RNM_CTL_STATUS);
32 	val |= BIT(0) | BIT(1);
33 	csr_wr(CVMX_RNM_CTL_STATUS, val);
34 }
35 
36 #define RLEVEL_PRINTALL_DEFAULT		1
37 #define WLEVEL_PRINTALL_DEFAULT		1
38 
39 /*
40  * Define how many HW WL samples to take for majority voting.
41  * MUST BE odd!!
42  * Assume there should only be 2 possible values that will show up,
43  * so treat ties as a problem!!!
44  * NOTE: Do not change this without checking the code!!!
45  */
46 #define WLEVEL_LOOPS_DEFAULT		5
47 
48 #define ENABLE_COMPUTED_VREF_ADJUSTMENT	1
49 #define SW_WLEVEL_HW_DEFAULT		1
50 #define DEFAULT_BEST_RANK_SCORE		9999999
51 #define MAX_RANK_SCORE_LIMIT		99
52 
53 /*
54  * Define how many HW RL samples per rank to take multiple samples will
55  * allow looking for the best sample score
56  */
57 #define RLEVEL_SAMPLES_DEFAULT		3
58 
59 #define ddr_seq_print(format, ...) do {} while (0)
60 
61 struct wlevel_bitcnt {
62 	int bitcnt[4];
63 };
64 
65 static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
66 				     int ecc_ena, int *settings, char *title);
67 
68 static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
69 					int dac_value, int byte);
70 
71 /* "mode" arg */
72 #define DBTRAIN_TEST 0
73 #define DBTRAIN_DBI  1
74 #define DBTRAIN_LFSR 2
75 
76 static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
77 				int mode, u64 *xor_data);
78 
79 #define LMC_DDR3_RESET_ASSERT   0
80 #define LMC_DDR3_RESET_DEASSERT 1
81 
cn7xxx_lmc_ddr3_reset(struct ddr_priv * priv,int if_num,int reset)82 static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
83 {
84 	union cvmx_lmcx_reset_ctl reset_ctl;
85 
86 	/*
87 	 * 4. Deassert DDRn_RESET_L pin by writing
88 	 *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
89 	 *    without modifying any other LMC(0..3)_RESET_CTL fields.
90 	 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
91 	 * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
92 	 *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
93 	 *    assertion.
94 	 */
95 	debug("LMC%d %s DDR_RESET_L\n", if_num,
96 	      (reset ==
97 	       LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
98 
99 	reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
100 	reset_ctl.cn78xx.ddr3rst = reset;
101 	lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
102 
103 	lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
104 
105 	udelay(500);
106 }
107 
perform_lmc_reset(struct ddr_priv * priv,int node,int if_num)108 static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
109 {
110 	/*
111 	 * 5.9.6 LMC RESET Initialization
112 	 *
113 	 * The purpose of this step is to assert/deassert the RESET# pin at the
114 	 * DDR3/DDR4 parts.
115 	 *
116 	 * This LMC RESET step is done for all enabled LMCs.
117 	 *
118 	 * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
119 	 * are in self refresh and are currently preserving their
120 	 * contents. (Software can determine this via
121 	 * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
122 	 * this section assumes that the DRAM contents need not be preserved.
123 	 *
124 	 * The remainder of this section assumes that the CN78XX DDRn_RESET_L
125 	 * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
126 	 * as will be appropriate in many systems.
127 	 *
128 	 * (In other systems, such as ones that can preserve DDR3/DDR4 part
129 	 * contents while CN78XX is powered down, it will not be appropriate to
130 	 * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
131 	 * DDR3/DDR4 parts, and this section may not apply.)
132 	 *
133 	 * The remainder of this section describes the sequence for LMCn.
134 	 *
135 	 * Perform the following six substeps for LMC reset initialization:
136 	 *
137 	 * 1. If not done already, assert DDRn_RESET_L pin by writing
138 	 * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
139 	 * LMC(0..3)_RESET_CTL fields.
140 	 */
141 
142 	if (!ddr_memory_preserved(priv)) {
143 		/*
144 		 * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
145 		 */
146 
147 		lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
148 
149 		/*
150 		 * 3. Wait until RESET# assertion-time requirement from JEDEC
151 		 * DDR3/DDR4 specification is satisfied (200 us during a
152 		 * power-on ramp, 100ns when power is already stable).
153 		 */
154 
155 		udelay(200);
156 
157 		/*
158 		 * 4. Deassert DDRn_RESET_L pin by writing
159 		 *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
160 		 *    without modifying any other LMC(0..3)_RESET_CTL fields.
161 		 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
162 		 * 6. Wait a minimum of 500us. This guarantees the necessary
163 		 *    T = 500us delay between DDRn_RESET_L deassertion and
164 		 *    DDRn_DIMM*_CKE* assertion.
165 		 */
166 		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
167 
168 		/* Toggle Reset Again */
169 		/* That is, assert, then de-assert, one more time */
170 		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
171 		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
172 	}
173 }
174 
oct3_ddr3_seq(struct ddr_priv * priv,int rank_mask,int if_num,int sequence)175 void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
176 		   int sequence)
177 {
178 	/*
179 	 * 3. Without changing any other fields in LMC(0)_CONFIG, write
180 	 *    LMC(0)_CONFIG[RANKMASK] then write both
181 	 *    LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
182 	 *    operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
183 	 *    the ranks that will participate in the sequence.
184 	 *
185 	 *    The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
186 	 *    selfrefresh exit, depending on whether the DRAM parts are in
187 	 *    self-refresh and whether their contents should be preserved. While
188 	 *    LMC performs these sequences, it will not perform any other DDR3
189 	 *    transactions. When the sequence is complete, hardware sets the
190 	 *    LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
191 	 *    initialized.
192 	 *
193 	 *    If power-up/init is selected immediately following a DRESET
194 	 *    assertion, LMC executes the sequence described in the "Reset and
195 	 *    Initialization Procedure" section of the JEDEC DDR3
196 	 *    specification. This includes activating CKE, writing all four DDR3
197 	 *    mode registers on all selected ranks, and issuing the required
198 	 *    ZQCL
199 	 *    command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
200 	 *    with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
201 	 *    LMC writes the JEDEC standard SSTE32882 control words selected by
202 	 *    LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
203 	 *    the first DDR3 mode register write operation.
204 	 *    LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
205 	 *    corresponding DIMM is not present.
206 	 *
207 	 *    If self-refresh exit is selected, LMC executes the required SRX
208 	 *    command followed by a refresh and ZQ calibration. Section 4.5
209 	 *    describes behavior of a REF + ZQCS.  LMC does not write the DDR3
210 	 *    mode registers as part of this sequence, and the mode register
211 	 *    parameters must match at self-refresh entry and exit times.
212 	 *
213 	 * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
214 	 *    to be set.
215 	 *
216 	 * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
217 	 *    been initialized.
218 	 */
219 
220 	union cvmx_lmcx_seq_ctl seq_ctl;
221 	union cvmx_lmcx_config lmc_config;
222 	int timeout;
223 
224 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
225 	lmc_config.s.rankmask = rank_mask;
226 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
227 
228 	seq_ctl.u64 = 0;
229 
230 	seq_ctl.s.init_start = 1;
231 	seq_ctl.s.seq_sel = sequence;
232 
233 	ddr_seq_print
234 	    ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
235 	     rank_mask, sequence, sequence_str[sequence]);
236 
237 	if (seq_ctl.s.seq_sel == 3)
238 		debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
239 		      rank_mask);
240 
241 	lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
242 	lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
243 
244 	timeout = 100;
245 	do {
246 		udelay(100);	/* Wait a while */
247 		seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
248 		if (--timeout == 0) {
249 			printf("Sequence %d timed out\n", sequence);
250 			break;
251 		}
252 	} while (seq_ctl.s.seq_complete != 1);
253 
254 	ddr_seq_print("           LMC sequence=%x: Completed.\n", sequence);
255 }
256 
257 #define bdk_numa_get_address(n, p)	((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
258 #define AREA_BASE_OFFSET		BIT_ULL(26)
259 
test_dram_byte64(struct ddr_priv * priv,int lmc,u64 p,u64 bitmask,u64 * xor_data)260 static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
261 			    u64 bitmask, u64 *xor_data)
262 {
263 	u64 p1, p2, d1, d2;
264 	u64 v, v1;
265 	u64 p2offset = (1ULL << 26);	// offset to area 2
266 	u64 datamask;
267 	u64 xor;
268 	u64 i, j, k;
269 	u64 ii;
270 	int errors = 0;
271 	//u64 index;
272 	u64 pattern1 = cvmx_rng_get_random64();
273 	u64 pattern2 = 0;
274 	u64 bad_bits[2] = { 0, 0 };
275 	int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
276 	union cvmx_l2c_ctl l2c_ctl;
277 	int burst;
278 	int saved_dissblkdty;
279 	int node = 0;
280 
281 	// Force full cacheline write-backs to boost traffic
282 	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
283 	saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
284 	l2c_ctl.cn78xx.dissblkdty = 1;
285 	l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
286 
287 	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
288 		kbitno = 18;
289 
290 	// Byte lanes may be clear in the mask to indicate no testing on that
291 	//lane.
292 	datamask = bitmask;
293 
294 	/*
295 	 * Add offset to both test regions to not clobber boot stuff
296 	 * when running from L2 for NAND boot.
297 	 */
298 	p += AREA_BASE_OFFSET;	// make sure base is out of the way of boot
299 
300 	// final address must include LMC and node
301 	p |= (lmc << 7);	/* Map address into proper interface */
302 	p = bdk_numa_get_address(node, p);	/* Map to node */
303 	p |= 1ull << 63;
304 
305 #define II_INC BIT_ULL(22)
306 #define II_MAX BIT_ULL(22)
307 #define K_INC  BIT_ULL(14)
308 #define K_MAX  BIT_ULL(kbitno)
309 #define J_INC  BIT_ULL(9)
310 #define J_MAX  BIT_ULL(12)
311 #define I_INC  BIT_ULL(3)
312 #define I_MAX  BIT_ULL(7)
313 
314 	debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
315 	      node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
316 
317 	// loops are ordered so that only a single 64-bit slot is written to
318 	// each cacheline at one time, then the cachelines are forced out;
319 	// this should maximize read/write traffic
320 
321 	// FIXME? extend the range of memory tested!!
322 	for (ii = 0; ii < II_MAX; ii += II_INC) {
323 		for (i = 0; i < I_MAX; i += I_INC) {
324 			for (k = 0; k < K_MAX; k += K_INC) {
325 				for (j = 0; j < J_MAX; j += J_INC) {
326 					p1 = p + ii + k + j;
327 					p2 = p1 + p2offset;
328 
329 					v = pattern1 * (p1 + i);
330 					// write the same thing to both areas
331 					v1 = v;
332 
333 					cvmx_write64_uint64(p1 + i, v);
334 					cvmx_write64_uint64(p2 + i, v1);
335 
336 					CVMX_CACHE_WBIL2(p1, 0);
337 					CVMX_CACHE_WBIL2(p2, 0);
338 				}
339 			}
340 		}
341 	}
342 
343 	CVMX_DCACHE_INVALIDATE;
344 
345 	debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
346 
347 	/* Make a series of passes over the memory areas. */
348 
349 	for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
350 		u64 this_pattern = cvmx_rng_get_random64();
351 
352 		pattern2 ^= this_pattern;
353 
354 		/*
355 		 * XOR the data with a random value, applying the change to both
356 		 * memory areas.
357 		 */
358 
359 		// FIXME? extend the range of memory tested!!
360 		for (ii = 0; ii < II_MAX; ii += II_INC) {
361 			// FIXME: rearranged, did not make much difference?
362 			for (i = 0; i < I_MAX; i += I_INC) {
363 				for (k = 0; k < K_MAX; k += K_INC) {
364 					for (j = 0; j < J_MAX; j += J_INC) {
365 						p1 = p + ii + k + j;
366 						p2 = p1 + p2offset;
367 
368 						v = cvmx_read64_uint64(p1 +
369 								      i) ^
370 						    this_pattern;
371 						v1 = cvmx_read64_uint64(p2 +
372 								       i) ^
373 						    this_pattern;
374 
375 						cvmx_write64_uint64(p1 + i, v);
376 						cvmx_write64_uint64(p2 + i, v1);
377 
378 						CVMX_CACHE_WBIL2(p1, 0);
379 						CVMX_CACHE_WBIL2(p2, 0);
380 					}
381 				}
382 			}
383 		}
384 
385 		CVMX_DCACHE_INVALIDATE;
386 
387 		debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
388 		      node, lmc);
389 
390 		/*
391 		 * Look for differences in the areas. If there is a mismatch,
392 		 * reset both memory locations with the same pattern. Failing
393 		 * to do so means that on all subsequent passes the pair of
394 		 * locations remain out of sync giving spurious errors.
395 		 */
396 
397 		// FIXME: Change the loop order so that an entire cache line
398 		//        is compared at one time. This is so that a read
399 		//        error that occurs *anywhere* on the cacheline will
400 		//        be caught, rather than comparing only 1 cacheline
401 		//        slot at a time, where an error on a different
402 		//        slot will be missed that time around
403 		// Does the above make sense?
404 
405 		// FIXME? extend the range of memory tested!!
406 		for (ii = 0; ii < II_MAX; ii += II_INC) {
407 			for (k = 0; k < K_MAX; k += K_INC) {
408 				for (j = 0; j < J_MAX; j += J_INC) {
409 					p1 = p + ii + k + j;
410 					p2 = p1 + p2offset;
411 
412 					// process entire cachelines in the
413 					//innermost loop
414 					for (i = 0; i < I_MAX; i += I_INC) {
415 						int bybit = 1;
416 						// start in byte lane 0
417 						u64 bymsk = 0xffULL;
418 
419 						// FIXME: this should predict
420 						// what we find...???
421 						v = ((p1 + i) * pattern1) ^
422 							pattern2;
423 						d1 = cvmx_read64_uint64(p1 + i);
424 						d2 = cvmx_read64_uint64(p2 + i);
425 
426 						// union of error bits only in
427 						// active byte lanes
428 						xor = ((d1 ^ v) | (d2 ^ v)) &
429 							datamask;
430 
431 						if (!xor)
432 							continue;
433 
434 						// accumulate bad bits
435 						bad_bits[0] |= xor;
436 
437 						while (xor != 0) {
438 							debug("ERROR(%03d): [0x%016llX] [0x%016llX]  expected 0x%016llX d1 %016llX d2 %016llX\n",
439 							      burst, p1, p2, v,
440 							      d1, d2);
441 							// error(s) in this lane
442 							if (xor & bymsk) {
443 								// set the byte
444 								// error bit
445 								errors |= bybit;
446 								// clear byte
447 								// lane in
448 								// error bits
449 								xor &= ~bymsk;
450 								// clear the
451 								// byte lane in
452 								// the mask
453 								datamask &= ~bymsk;
454 #if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
455 								// nothing
456 								// left to do
457 								if (datamask == 0) {
458 									return errors;
459 								}
460 #endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
461 							}
462 							// move mask into
463 							// next byte lane
464 							bymsk <<= 8;
465 							// move bit into next
466 							// byte position
467 							bybit <<= 1;
468 						}
469 					}
470 					CVMX_CACHE_WBIL2(p1, 0);
471 					CVMX_CACHE_WBIL2(p2, 0);
472 				}
473 			}
474 		}
475 
476 		debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
477 		      node, lmc);
478 	}
479 
480 	if (xor_data) {		// send the bad bits back...
481 		xor_data[0] = bad_bits[0];
482 		xor_data[1] = bad_bits[1];	// let it be zeroed
483 	}
484 
485 	// Restore original setting that could enable partial cacheline writes
486 	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
487 	l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
488 	l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
489 
490 	return errors;
491 }
492 
ddr4_mrw(struct ddr_priv * priv,int if_num,int rank,int mr_wr_addr,int mr_wr_sel,int mr_wr_bg1)493 static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
494 		     int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
495 {
496 	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
497 
498 	lmc_mr_mpr_ctl.u64 = 0;
499 	lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
500 	lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
501 	lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
502 	lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
503 		(mr_wr_addr == -1) ? 1 : 0;
504 	lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
505 	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
506 
507 	/* Mode Register Write */
508 	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
509 }
510 
511 #define INV_A0_17(x)	((x) ^ 0x22bf8)
512 
set_mpr_mode(struct ddr_priv * priv,int rank_mask,int if_num,int dimm_count,int mpr,int bg1)513 static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
514 			 int if_num, int dimm_count, int mpr, int bg1)
515 {
516 	int rankx;
517 
518 	debug("All Ranks: Set mpr mode = %x %c-side\n",
519 	      mpr, (bg1 == 0) ? 'A' : 'B');
520 
521 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
522 		if (!(rank_mask & (1 << rankx)))
523 			continue;
524 		if (bg1 == 0) {
525 			/* MR3 A-side */
526 			ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
527 		} else {
528 			/* MR3 B-side */
529 			ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
530 				 bg1);
531 		}
532 	}
533 }
534 
do_ddr4_mpr_read(struct ddr_priv * priv,int if_num,int rank,int page,int location)535 static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
536 			     int rank, int page, int location)
537 {
538 	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
539 
540 	lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
541 	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
542 	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page;	/* Page */
543 	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
544 	lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
545 	lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0;	/* Read=0, Write=1 */
546 	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
547 
548 	/* MPR register access sequence */
549 	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
550 
551 	debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
552 	      lmc_mr_mpr_ctl.u64);
553 	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
554 	      lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
555 	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
556 	      lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
557 	debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
558 	      lmc_mr_mpr_ctl.cn70xx.mpr_loc);
559 	debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
560 	      lmc_mr_mpr_ctl.cn70xx.mpr_wr);
561 }
562 
set_rdimm_mode(struct ddr_priv * priv,int if_num,int enable)563 static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
564 {
565 	union cvmx_lmcx_control lmc_control;
566 	int save_rdimm_mode;
567 
568 	lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
569 	save_rdimm_mode = lmc_control.s.rdimm_ena;
570 	lmc_control.s.rdimm_ena = enable;
571 	debug("Setting RDIMM_ENA = %x\n", enable);
572 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
573 
574 	return save_rdimm_mode;
575 }
576 
ddr4_mpr_read(struct ddr_priv * priv,int if_num,int rank,int page,int location,u64 * mpr_data)577 static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
578 			  int page, int location, u64 *mpr_data)
579 {
580 	do_ddr4_mpr_read(priv, if_num, rank, page, location);
581 
582 	mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
583 }
584 
585 /* Display MPR values for Page */
display_mpr_page(struct ddr_priv * priv,int rank_mask,int if_num,int page)586 static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
587 			     int if_num, int page)
588 {
589 	int rankx, location;
590 	u64 mpr_data[3];
591 
592 	for (rankx = 0; rankx < 4; rankx++) {
593 		if (!(rank_mask & (1 << rankx)))
594 			continue;
595 
596 		debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
597 		      if_num, rankx, page);
598 		for (location = 0; location < 4; location++) {
599 			ddr4_mpr_read(priv, if_num, rankx, page, location,
600 				      mpr_data);
601 			debug("0x%02llx ", mpr_data[0] & 0xFF);
602 		}
603 		debug("\n");
604 
605 	}			/* for (rankx = 0; rankx < 4; rankx++) */
606 }
607 
ddr4_mpr_write(struct ddr_priv * priv,int if_num,int rank,int page,int location,u8 mpr_data)608 static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
609 			   int page, int location, u8 mpr_data)
610 {
611 	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
612 
613 	lmc_mr_mpr_ctl.u64 = 0;
614 	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
615 	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page;	/* Page */
616 	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
617 	lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
618 	lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1;	/* Read=0, Write=1 */
619 	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
620 
621 	/* MPR register access sequence */
622 	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
623 
624 	debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
625 	      lmc_mr_mpr_ctl.u64);
626 	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
627 	      lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
628 	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
629 	      lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
630 	debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
631 	      lmc_mr_mpr_ctl.cn70xx.mpr_loc);
632 	debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
633 	      lmc_mr_mpr_ctl.cn70xx.mpr_wr);
634 }
635 
set_vref(struct ddr_priv * priv,int if_num,int rank,int range,int value)636 static void set_vref(struct ddr_priv *priv, int if_num, int rank,
637 		     int range, int value)
638 {
639 	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
640 	union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
641 	int mr_wr_addr = 0;
642 
643 	lmc_mr_mpr_ctl.u64 = 0;
644 	lmc_modereg_params3.u64 = lmc_rd(priv,
645 					 CVMX_LMCX_MODEREG_PARAMS3(if_num));
646 
647 	/* A12:A10 tCCD_L */
648 	mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
649 	mr_wr_addr |= 1 << 7;	/* A7 1 = Enable(Training Mode) */
650 	mr_wr_addr |= range << 6;	/* A6 vrefDQ Training Range */
651 	mr_wr_addr |= value << 0;	/* A5:A0 vrefDQ Training Value */
652 
653 	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
654 	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6;	/* Write MR6 */
655 	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
656 	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
657 
658 	/* 0x8 = Mode Register Write */
659 	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
660 
661 	/*
662 	 * It is vendor specific whether vref_value is captured with A7=1.
663 	 * A subsequent MRS might be necessary.
664 	 */
665 	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
666 
667 	mr_wr_addr &= ~(1 << 7);	/* A7 0 = Disable(Training Mode) */
668 	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
669 	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
670 }
671 
set_dram_output_inversion(struct ddr_priv * priv,int if_num,int dimm_count,int rank_mask,int inversion)672 static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
673 				      int dimm_count, int rank_mask,
674 				      int inversion)
675 {
676 	union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
677 	union cvmx_lmcx_dimmx_params lmc_dimmx_params;
678 	union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
679 	int dimm_no;
680 
681 	/* Don't touch extenced register control words */
682 	lmc_ddr4_dimm_ctl.u64 = 0;
683 	lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
684 
685 	debug("All DIMMs: Register Control Word          RC0 : %x\n",
686 	      (inversion & 1));
687 
688 	for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
689 		lmc_dimmx_params.u64 =
690 		    lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
691 		lmc_dimmx_params.s.rc0 =
692 		    (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
693 
694 		lmc_wr(priv,
695 		       CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
696 		       lmc_dimmx_params.u64);
697 	}
698 
699 	/* LMC0_DIMM_CTL */
700 	lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
701 	lmc_dimm_ctl.s.dimm0_wmask = 0x1;
702 	lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
703 
704 	debug("LMC DIMM_CTL                                  : 0x%016llx\n",
705 	      lmc_dimm_ctl.u64);
706 	lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
707 
708 	oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);	/* Init RCW */
709 }
710 
write_mpr_page0_pattern(struct ddr_priv * priv,int rank_mask,int if_num,int dimm_count,int pattern,int location_mask)711 static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
712 				    int if_num, int dimm_count, int pattern,
713 				    int location_mask)
714 {
715 	int rankx;
716 	int location;
717 
718 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
719 		if (!(rank_mask & (1 << rankx)))
720 			continue;
721 		for (location = 0; location < 4; ++location) {
722 			if (!(location_mask & (1 << location)))
723 				continue;
724 
725 			ddr4_mpr_write(priv, if_num, rankx,
726 				       /* page */ 0, /* location */ location,
727 				       pattern);
728 		}
729 	}
730 }
731 
change_rdimm_mpr_pattern(struct ddr_priv * priv,int rank_mask,int if_num,int dimm_count)732 static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
733 				     int if_num, int dimm_count)
734 {
735 	int save_ref_zqcs_int;
736 	union cvmx_lmcx_config lmc_config;
737 
738 	/*
739 	 * Okay, here is the latest sequence.  This should work for all
740 	 * chips and passes (78,88,73,etc).  This sequence should be run
741 	 * immediately after DRAM INIT.  The basic idea is to write the
742 	 * same pattern into each of the 4 MPR locations in the DRAM, so
743 	 * that the same value is returned when doing MPR reads regardless
744 	 * of the inversion state.  My advice is to put this into a
745 	 * function, change_rdimm_mpr_pattern or something like that, so
746 	 * that it can be called multiple times, as I think David wants a
747 	 * clock-like pattern for OFFSET training, but does not want a
748 	 * clock pattern for Bit-Deskew.  You should then be able to call
749 	 * this at any point in the init sequence (after DRAM init) to
750 	 * change the pattern to a new value.
751 	 * Mike
752 	 *
753 	 * A correction: PHY doesn't need any pattern during offset
754 	 * training, but needs clock like pattern for internal vref and
755 	 * bit-dskew training.  So for that reason, these steps below have
756 	 * to be conducted before those trainings to pre-condition
757 	 * the pattern.  David
758 	 *
759 	 * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
760 	 * sequence. If you issue MRW sequence to do RCW write (in o78 pass
761 	 * 1 at least), LMC will still do two commands because
762 	 * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
763 	 * any unintentional mode register write so it's best to do what
764 	 * Mike is doing here.
765 	 * Andrew
766 	 */
767 
768 	/* 1) Disable refresh (REF_ZQCS_INT = 0) */
769 
770 	debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
771 
772 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
773 	save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
774 	lmc_config.cn78xx.ref_zqcs_int = 0;
775 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
776 
777 	/*
778 	 * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
779 	 * with MODEREG_PARAMS0[MPRLOC]=0,
780 	 * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
781 	 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
782 	 */
783 
784 	debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
785 
786 	/* A-side */
787 	set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
788 	/* B-side */
789 	set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
790 
791 	/*
792 	 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
793 	 * the value you would like directly into
794 	 * MR_MPR_CTL[MR_WR_ADDR]
795 	 */
796 
797 	/*
798 	 * 3) Disable RCD Parity (if previously enabled) - parity does not
799 	 * work if inversion disabled
800 	 */
801 
802 	debug("3) Disable RCD Parity\n");
803 
804 	/*
805 	 * 4) Disable Inversion in the RCD.
806 	 * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
807 	 * may be easier to use the MRW sequence (seq_sel=8).  Just set
808 	 * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
809 	 * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
810 	 */
811 
812 	debug("4) Disable Inversion in the RCD.\n");
813 
814 	set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
815 
816 	/*
817 	 * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
818 	 * non-inverted.
819 	 */
820 
821 	debug("5) Disable CONTROL[RDIMM_ENA]\n");
822 
823 	set_rdimm_mode(priv, if_num, 0);
824 
825 	/*
826 	 * 6) Write all 4 MPR registers with the desired pattern (have to
827 	 * do this for all enabled ranks)
828 	 * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
829 	 * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
830 	 */
831 
832 	debug("6) Write all 4 MPR page 0 Training Patterns\n");
833 
834 	write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
835 
836 	/* 7) Re-enable RDIMM_ENA */
837 
838 	debug("7) Re-enable RDIMM_ENA\n");
839 
840 	set_rdimm_mode(priv, if_num, 1);
841 
842 	/* 8) Re-enable RDIMM inversion */
843 
844 	debug("8) Re-enable RDIMM inversion\n");
845 
846 	set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
847 
848 	/* 9) Re-enable RDIMM parity (if desired) */
849 
850 	debug("9) Re-enable RDIMM parity (if desired)\n");
851 
852 	/*
853 	 * 10)Take B-side devices out of MPR mode (Run MRW sequence
854 	 * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
855 	 * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
856 	 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
857 	 */
858 
859 	debug("10)Take B-side devices out of MPR mode\n");
860 
861 	set_mpr_mode(priv, rank_mask, if_num, dimm_count,
862 		     /* mpr */ 0, /* bg1 */ 1);
863 
864 	/*
865 	 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
866 	 * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
867 	 */
868 
869 	/* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
870 
871 	debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
872 
873 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
874 	lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
875 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
876 }
877 
validate_hwl_seq(int * wl,int * seq)878 static int validate_hwl_seq(int *wl, int *seq)
879 {
880 	// sequence index, step through the sequence array
881 	int seqx;
882 	int bitnum;
883 
884 	seqx = 0;
885 
886 	while (seq[seqx + 1] >= 0) {	// stop on next seq entry == -1
887 		// but now, check current versus next
888 		bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
889 		// magic validity number (see matrix above)
890 		if (!((1 << bitnum) & 0xBDE7))
891 			return 1;
892 		seqx++;
893 	}
894 
895 	return 0;
896 }
897 
validate_hw_wl_settings(int if_num,union cvmx_lmcx_wlevel_rankx * lmc_wlevel_rank,int is_rdimm,int ecc_ena)898 static int validate_hw_wl_settings(int if_num,
899 				   union cvmx_lmcx_wlevel_rankx
900 				   *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
901 {
902 	int wl[9], byte, errors;
903 
904 	// arrange the sequences so
905 	// index 0 has byte 0, etc, ECC in middle
906 	int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
907 	// index 0 is ECC, then go down
908 	int rseq1[] = { 8, 3, 2, 1, 0, -1 };
909 	// index 0 has byte 4, then go up
910 	int rseq2[] = { 4, 5, 6, 7, -1 };
911 	// index 0 has byte 0, etc, no ECC
912 	int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
913 	// index 0 is byte 3, then go down, no ECC
914 	int rseq1no[] = { 3, 2, 1, 0, -1 };
915 
916 	// in the CSR, bytes 0-7 are always data, byte 8 is ECC
917 	for (byte = 0; byte < (8 + ecc_ena); byte++) {
918 		// preprocess :-)
919 		wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
920 			    1) & 3;
921 	}
922 
923 	errors = 0;
924 	if (is_rdimm) {		// RDIMM order
925 		errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
926 		errors += validate_hwl_seq(wl, rseq2);
927 	} else {		// UDIMM order
928 		errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
929 	}
930 
931 	return errors;
932 }
933 
extr_wr(u64 u,int x)934 static unsigned int extr_wr(u64 u, int x)
935 {
936 	return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
937 			      ((u >> (51 + x - 2)) & 0x4ULL));
938 }
939 
insrt_wr(u64 * up,int x,int v)940 static void insrt_wr(u64 *up, int x, int v)
941 {
942 	u64 u = *up;
943 
944 	u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
945 	*up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
946 	       ((v & 0x4ULL) << (51 + x - 2)));
947 }
948 
949 /* Read out Deskew Settings for DDR */
950 
951 struct deskew_bytes {
952 	u16 bits[8];
953 };
954 
955 struct deskew_data {
956 	struct deskew_bytes bytes[9];
957 };
958 
959 struct dac_data {
960 	int bytes[9];
961 };
962 
963 // T88 pass 1, skip 4=DAC
964 static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
965 // T88 Pass 2, skip 4=DAC and 5=DBI
966 static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
967 
get_deskew_settings(struct ddr_priv * priv,int if_num,struct deskew_data * dskdat)968 static void get_deskew_settings(struct ddr_priv *priv, int if_num,
969 				struct deskew_data *dskdat)
970 {
971 	union cvmx_lmcx_phy_ctl phy_ctl;
972 	union cvmx_lmcx_config lmc_config;
973 	int bit_index;
974 	int byte_lane, byte_limit;
975 	// NOTE: these are for pass 2.x
976 	int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
977 	const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
978 
979 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
980 	byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
981 
982 	memset(dskdat, 0, sizeof(*dskdat));
983 
984 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
985 	phy_ctl.s.dsk_dbg_clk_scaler = 3;
986 
987 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
988 		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;	// set byte lane
989 
990 		for (bit_index = 0; bit_index < 8; ++bit_index) {
991 			// set bit number and start read sequence
992 			phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
993 			phy_ctl.s.dsk_dbg_rd_start = 1;
994 			lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
995 
996 			// poll for read sequence to complete
997 			do {
998 				phy_ctl.u64 =
999 					lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1000 			} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1001 
1002 			// record the data
1003 			dskdat->bytes[byte_lane].bits[bit_index] =
1004 				phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
1005 		}
1006 	}
1007 }
1008 
display_deskew_settings(struct ddr_priv * priv,int if_num,struct deskew_data * dskdat,int print_enable)1009 static void display_deskew_settings(struct ddr_priv *priv, int if_num,
1010 				    struct deskew_data *dskdat,
1011 				    int print_enable)
1012 {
1013 	int byte_lane;
1014 	int bit_num;
1015 	u16 flags, deskew;
1016 	union cvmx_lmcx_config lmc_config;
1017 	int byte_limit;
1018 	const char *fc = " ?-=+*#&";
1019 
1020 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1021 	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1022 
1023 	if (print_enable) {
1024 		debug("N0.LMC%d: Deskew Data:              Bit =>      :",
1025 		      if_num);
1026 		for (bit_num = 7; bit_num >= 0; --bit_num)
1027 			debug(" %3d  ", bit_num);
1028 		debug("\n");
1029 	}
1030 
1031 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1032 		if (print_enable)
1033 			debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
1034 			      if_num, byte_lane,
1035 			      (print_enable >= 3) ? "FINAL" : "     ");
1036 
1037 		for (bit_num = 7; bit_num >= 0; --bit_num) {
1038 			flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
1039 			deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
1040 
1041 			if (print_enable)
1042 				debug(" %3d %c", deskew, fc[flags ^ 1]);
1043 
1044 		}		/* for (bit_num = 7; bit_num >= 0; --bit_num) */
1045 
1046 		if (print_enable)
1047 			debug("\n");
1048 	}
1049 }
1050 
override_deskew_settings(struct ddr_priv * priv,int if_num,struct deskew_data * dskdat)1051 static void override_deskew_settings(struct ddr_priv *priv, int if_num,
1052 				     struct deskew_data *dskdat)
1053 {
1054 	union cvmx_lmcx_phy_ctl phy_ctl;
1055 	union cvmx_lmcx_config lmc_config;
1056 
1057 	int bit, byte_lane, byte_limit;
1058 	u64 csr_data;
1059 
1060 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1061 	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1062 
1063 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1064 
1065 	phy_ctl.s.phy_reset = 0;
1066 	phy_ctl.s.dsk_dbg_num_bits_sel = 1;
1067 	phy_ctl.s.dsk_dbg_offset = 0;
1068 	phy_ctl.s.dsk_dbg_clk_scaler = 3;
1069 
1070 	phy_ctl.s.dsk_dbg_wr_mode = 1;
1071 	phy_ctl.s.dsk_dbg_load_dis = 0;
1072 	phy_ctl.s.dsk_dbg_overwrt_ena = 0;
1073 
1074 	phy_ctl.s.phy_dsk_reset = 0;
1075 
1076 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1077 	lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1078 
1079 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1080 		csr_data = 0;
1081 		// FIXME: can we ignore DBI?
1082 		for (bit = 0; bit < 8; ++bit) {
1083 			// fetch input and adjust
1084 			u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
1085 				0x7F;
1086 
1087 			/*
1088 			 * lmc_general_purpose0.data[6:0]    // DQ0
1089 			 * lmc_general_purpose0.data[13:7]   // DQ1
1090 			 * lmc_general_purpose0.data[20:14]  // DQ2
1091 			 * lmc_general_purpose0.data[27:21]  // DQ3
1092 			 * lmc_general_purpose0.data[34:28]  // DQ4
1093 			 * lmc_general_purpose0.data[41:35]  // DQ5
1094 			 * lmc_general_purpose0.data[48:42]  // DQ6
1095 			 * lmc_general_purpose0.data[55:49]  // DQ7
1096 			 * lmc_general_purpose0.data[62:56]  // DBI
1097 			 */
1098 			csr_data |= (bits << (7 * bit));
1099 
1100 		} /* for (bit = 0; bit < 8; ++bit) */
1101 
1102 		// update GP0 with the bit data for this byte lane
1103 		lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
1104 		lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
1105 
1106 		// start the deskew load sequence
1107 		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1108 		phy_ctl.s.dsk_dbg_rd_start = 1;
1109 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1110 
1111 		// poll for read sequence to complete
1112 		do {
1113 			udelay(100);
1114 			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1115 		} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1116 	}
1117 
1118 	// tell phy to use the new settings
1119 	phy_ctl.s.dsk_dbg_overwrt_ena = 1;
1120 	phy_ctl.s.dsk_dbg_rd_start = 0;
1121 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1122 
1123 	phy_ctl.s.dsk_dbg_wr_mode = 0;
1124 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1125 }
1126 
process_by_rank_dac(struct ddr_priv * priv,int if_num,int rank_mask,struct dac_data * dacdat)1127 static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
1128 				int rank_mask, struct dac_data *dacdat)
1129 {
1130 	union cvmx_lmcx_config lmc_config;
1131 	int rankx, byte_lane;
1132 	int byte_limit;
1133 	int rank_count;
1134 	struct dac_data dacsum;
1135 	int lane_probs;
1136 
1137 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1138 	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1139 
1140 	memset((void *)&dacsum, 0, sizeof(dacsum));
1141 	rank_count = 0;
1142 	lane_probs = 0;
1143 
1144 	for (rankx = 0; rankx < 4; rankx++) {
1145 		if (!(rank_mask & (1 << rankx)))
1146 			continue;
1147 		rank_count++;
1148 
1149 		display_dac_dbi_settings(if_num, /*dac */ 1,
1150 					 lmc_config.s.ecc_ena,
1151 					 &dacdat[rankx].bytes[0],
1152 					 "By-Ranks VREF");
1153 		// sum
1154 		for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1155 			if (rank_count == 2) {
1156 				int ranks_diff =
1157 				    abs((dacsum.bytes[byte_lane] -
1158 					 dacdat[rankx].bytes[byte_lane]));
1159 
1160 				// FIXME: is 19 a good number?
1161 				if (ranks_diff > 19)
1162 					lane_probs |= (1 << byte_lane);
1163 			}
1164 			dacsum.bytes[byte_lane] +=
1165 			    dacdat[rankx].bytes[byte_lane];
1166 		}
1167 	}
1168 
1169 	// average
1170 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
1171 		dacsum.bytes[byte_lane] /= rank_count;	// FIXME: nint?
1172 
1173 	display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
1174 				 &dacsum.bytes[0], "All-Rank VREF");
1175 
1176 	if (lane_probs) {
1177 		debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
1178 		      if_num, lane_probs);
1179 	}
1180 
1181 	// finally, write the averaged DAC values
1182 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1183 		load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
1184 				  byte_lane);
1185 	}
1186 }
1187 
process_by_rank_dsk(struct ddr_priv * priv,int if_num,int rank_mask,struct deskew_data * dskdat)1188 static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
1189 				int rank_mask, struct deskew_data *dskdat)
1190 {
1191 	union cvmx_lmcx_config lmc_config;
1192 	int rankx, lane, bit;
1193 	int byte_limit;
1194 	struct deskew_data dsksum, dskcnt;
1195 	u16 deskew;
1196 
1197 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1198 	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1199 
1200 	memset((void *)&dsksum, 0, sizeof(dsksum));
1201 	memset((void *)&dskcnt, 0, sizeof(dskcnt));
1202 
1203 	for (rankx = 0; rankx < 4; rankx++) {
1204 		if (!(rank_mask & (1 << rankx)))
1205 			continue;
1206 
1207 		// sum ranks
1208 		for (lane = 0; lane < byte_limit; lane++) {
1209 			for (bit = 0; bit < 8; ++bit) {
1210 				deskew = dskdat[rankx].bytes[lane].bits[bit];
1211 				// if flags indicate sat hi or lo, skip it
1212 				if (deskew & 6)
1213 					continue;
1214 
1215 				// clear flags
1216 				dsksum.bytes[lane].bits[bit] +=
1217 					deskew & ~7;
1218 				// count entries
1219 				dskcnt.bytes[lane].bits[bit] += 1;
1220 			}
1221 		}
1222 	}
1223 
1224 	// average ranks
1225 	for (lane = 0; lane < byte_limit; lane++) {
1226 		for (bit = 0; bit < 8; ++bit) {
1227 			int div = dskcnt.bytes[lane].bits[bit];
1228 
1229 			if (div > 0) {
1230 				dsksum.bytes[lane].bits[bit] /= div;
1231 				// clear flags
1232 				dsksum.bytes[lane].bits[bit] &= ~7;
1233 				// set LOCK
1234 				dsksum.bytes[lane].bits[bit] |= 1;
1235 			} else {
1236 				// FIXME? use reset value?
1237 				dsksum.bytes[lane].bits[bit] =
1238 					(64 << 3) | 1;
1239 			}
1240 		}
1241 	}
1242 
1243 	// TME for FINAL version
1244 	display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
1245 
1246 	// finally, write the averaged DESKEW values
1247 	override_deskew_settings(priv, if_num, &dsksum);
1248 }
1249 
1250 struct deskew_counts {
1251 	int saturated;		// number saturated
1252 	int unlocked;		// number unlocked
1253 	int nibrng_errs;	// nibble range errors
1254 	int nibunl_errs;	// nibble unlocked errors
1255 	int bitval_errs;	// bit value errors
1256 };
1257 
1258 #define MIN_BITVAL  17
1259 #define MAX_BITVAL 110
1260 
validate_deskew_training(struct ddr_priv * priv,int rank_mask,int if_num,struct deskew_counts * counts,int print_flags)1261 static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
1262 				     int if_num, struct deskew_counts *counts,
1263 				     int print_flags)
1264 {
1265 	int byte_lane, bit_index, nib_num;
1266 	int nibrng_errs, nibunl_errs, bitval_errs;
1267 	union cvmx_lmcx_config lmc_config;
1268 	s16 nib_min[2], nib_max[2], nib_unl[2];
1269 	int byte_limit;
1270 	int print_enable = print_flags & 1;
1271 	struct deskew_data dskdat;
1272 	s16 flags, deskew;
1273 	const char *fc = " ?-=+*#&";
1274 	int bit_last;
1275 
1276 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1277 	byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
1278 
1279 	memset(counts, 0, sizeof(struct deskew_counts));
1280 
1281 	get_deskew_settings(priv, if_num, &dskdat);
1282 
1283 	if (print_enable) {
1284 		debug("N0.LMC%d: Deskew Settings:          Bit =>      :",
1285 		      if_num);
1286 		for (bit_index = 7; bit_index >= 0; --bit_index)
1287 			debug(" %3d  ", bit_index);
1288 		debug("\n");
1289 	}
1290 
1291 	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1292 		if (print_enable)
1293 			debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
1294 			      if_num, byte_lane,
1295 			      (print_flags & 2) ? "FINAL" : "     ");
1296 
1297 		nib_min[0] = 127;
1298 		nib_min[1] = 127;
1299 		nib_max[0] = 0;
1300 		nib_max[1] = 0;
1301 		nib_unl[0] = 0;
1302 		nib_unl[1] = 0;
1303 
1304 		if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
1305 			bit_last = 3;
1306 			if (print_enable)
1307 				debug("                        ");
1308 		} else {
1309 			bit_last = 7;
1310 		}
1311 
1312 		for (bit_index = bit_last; bit_index >= 0; --bit_index) {
1313 			nib_num = (bit_index > 3) ? 1 : 0;
1314 
1315 			flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
1316 			deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
1317 
1318 			counts->saturated += !!(flags & 6);
1319 
1320 			// Do range calc even when locked; it could happen
1321 			// that a bit is still unlocked after final retry,
1322 			// and we want to have an external retry if a RANGE
1323 			// error is present at exit...
1324 			nib_min[nib_num] = min(nib_min[nib_num], deskew);
1325 			nib_max[nib_num] = max(nib_max[nib_num], deskew);
1326 
1327 			if (!(flags & 1)) {	// only when not locked
1328 				counts->unlocked += 1;
1329 				nib_unl[nib_num] += 1;
1330 			}
1331 
1332 			if (print_enable)
1333 				debug(" %3d %c", deskew, fc[flags ^ 1]);
1334 		}
1335 
1336 		/*
1337 		 * Now look for nibble errors
1338 		 *
1339 		 * For bit 55, it looks like a bit deskew problem. When the
1340 		 * upper nibble of byte 6 needs to go to saturation, bit 7
1341 		 * of byte 6 locks prematurely at 64. For DIMMs with raw
1342 		 * card A and B, can we reset the deskew training when we
1343 		 * encounter this case? The reset criteria should be looking
1344 		 * at one nibble at a time for raw card A and B; if the
1345 		 * bit-deskew setting within a nibble is different by > 33,
1346 		 * we'll issue a reset to the bit deskew training.
1347 		 *
1348 		 * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
1349 		 */
1350 		// upper nibble range, then lower nibble range
1351 		nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
1352 		nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
1353 
1354 		// check for nibble all unlocked
1355 		nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
1356 
1357 		// check for bit value errors, ie < 17 or > 110
1358 		// FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
1359 		bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
1360 			       (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
1361 		bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
1362 				(nib_min[0] < MIN_BITVAL)) ? 1 : 0;
1363 
1364 		if ((nibrng_errs != 0 || nibunl_errs != 0 ||
1365 		     bitval_errs != 0) && print_enable) {
1366 			debug(" %c%c%c",
1367 			      (nibrng_errs) ? 'R' : ' ',
1368 			      (nibunl_errs) ? 'U' : ' ',
1369 			      (bitval_errs) ? 'V' : ' ');
1370 		}
1371 
1372 		if (print_enable)
1373 			debug("\n");
1374 
1375 		counts->nibrng_errs |= (nibrng_errs << byte_lane);
1376 		counts->nibunl_errs |= (nibunl_errs << byte_lane);
1377 		counts->bitval_errs |= (bitval_errs << byte_lane);
1378 	}
1379 }
1380 
load_dac_override(struct ddr_priv * priv,int if_num,int dac_value,int byte)1381 static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
1382 					int dac_value, int byte)
1383 {
1384 	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1385 	// single bytelanes incr by 1; A is for ALL
1386 	int bytex = (byte == 0x0A) ? byte : byte + 1;
1387 
1388 	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1389 
1390 	SET_DDR_DLL_CTL3(byte_sel, bytex);
1391 	SET_DDR_DLL_CTL3(offset, dac_value >> 1);
1392 
1393 	ddr_dll_ctl3.cn73xx.bit_select = 0x9;	/* No-op */
1394 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1395 
1396 	ddr_dll_ctl3.cn73xx.bit_select = 0xC;	/* vref bypass setting load */
1397 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1398 
1399 	ddr_dll_ctl3.cn73xx.bit_select = 0xD;	/* vref bypass on. */
1400 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1401 
1402 	ddr_dll_ctl3.cn73xx.bit_select = 0x9;	/* No-op */
1403 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1404 
1405 	lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));	// flush writes
1406 
1407 	return (unsigned short)GET_DDR_DLL_CTL3(offset);
1408 }
1409 
1410 // arg dac_or_dbi is 1 for DAC, 0 for DBI
1411 // returns 9 entries (bytelanes 0 through 8) in settings[]
1412 // returns 0 if OK, -1 if a problem
read_dac_dbi_settings(struct ddr_priv * priv,int if_num,int dac_or_dbi,int * settings)1413 static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
1414 				 int dac_or_dbi, int *settings)
1415 {
1416 	union cvmx_lmcx_phy_ctl phy_ctl;
1417 	int byte_lane, bit_num;
1418 	int deskew;
1419 	int dac_value;
1420 	int new_deskew_layout = 0;
1421 
1422 	new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
1423 		octeon_is_cpuid(OCTEON_CNF75XX);
1424 	new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
1425 			      !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
1426 
1427 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1428 	phy_ctl.s.dsk_dbg_clk_scaler = 3;
1429 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1430 
1431 	bit_num = (dac_or_dbi) ? 4 : 5;
1432 	// DBI not available
1433 	if (bit_num == 5 && !new_deskew_layout)
1434 		return -1;
1435 
1436 	// FIXME: always assume ECC is available
1437 	for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
1438 		//set byte lane and bit to read
1439 		phy_ctl.s.dsk_dbg_bit_sel = bit_num;
1440 		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1441 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1442 
1443 		//start read sequence
1444 		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1445 		phy_ctl.s.dsk_dbg_rd_start = 1;
1446 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1447 
1448 		//poll for read sequence to complete
1449 		do {
1450 			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1451 		} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1452 
1453 		// keep the flag bits where they are for DBI
1454 		deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
1455 		dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
1456 
1457 		settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
1458 	}
1459 
1460 	return 0;
1461 }
1462 
1463 // print out the DBI settings array
1464 // arg dac_or_dbi is 1 for DAC, 0 for DBI
display_dac_dbi_settings(int lmc,int dac_or_dbi,int ecc_ena,int * settings,char * title)1465 static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
1466 				     int ecc_ena, int *settings, char *title)
1467 {
1468 	int byte;
1469 	int flags;
1470 	int deskew;
1471 	const char *fc = " ?-=+*#&";
1472 
1473 	debug("N0.LMC%d: %s %s Settings %d:0 :",
1474 	      lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
1475 	// FIXME: what about 32-bit mode?
1476 	for (byte = (7 + ecc_ena); byte >= 0; --byte) {
1477 		if (dac_or_dbi) {	// DAC
1478 			flags = 1;	// say its locked to get blank
1479 			deskew = settings[byte] & 0xff;
1480 		} else {	// DBI
1481 			flags = settings[byte] & 7;
1482 			deskew = (settings[byte] >> 3) & 0x7f;
1483 		}
1484 		debug(" %3d %c", deskew, fc[flags ^ 1]);
1485 	}
1486 	debug("\n");
1487 }
1488 
1489 // Find a HWL majority
find_wl_majority(struct wlevel_bitcnt * bc,int * mx,int * mc,int * xc,int * cc)1490 static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
1491 			    int *xc, int *cc)
1492 {
1493 	int ix, ic;
1494 
1495 	*mx = -1;
1496 	*mc = 0;
1497 	*xc = 0;
1498 	*cc = 0;
1499 
1500 	for (ix = 0; ix < 4; ix++) {
1501 		ic = bc->bitcnt[ix];
1502 
1503 		// make a bitmask of the ones with a count
1504 		if (ic > 0) {
1505 			*mc |= (1 << ix);
1506 			*cc += 1;	// count how many had non-zero counts
1507 		}
1508 
1509 		// find the majority
1510 		if (ic > *xc) {	// new max?
1511 			*xc = ic;	// yes
1512 			*mx = ix;	// set its index
1513 		}
1514 	}
1515 
1516 	return (*mx << 1);
1517 }
1518 
1519 // Evaluate the DAC settings array
evaluate_dac_settings(int if_64b,int ecc_ena,int * settings)1520 static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
1521 {
1522 	int byte, lane, dac, comp;
1523 	int last = (if_64b) ? 7 : 3;
1524 
1525 	// FIXME: change the check...???
1526 	// this looks only for sets of DAC values whose max/min differ by a lot
1527 	// let any EVEN go so long as it is within range...
1528 	for (byte = (last + ecc_ena); byte >= 0; --byte) {
1529 		dac = settings[byte] & 0xff;
1530 
1531 		for (lane = (last + ecc_ena); lane >= 0; --lane) {
1532 			comp = settings[lane] & 0xff;
1533 			if (abs((dac - comp)) > 25)
1534 				return 1;
1535 		}
1536 	}
1537 
1538 	return 0;
1539 }
1540 
perform_offset_training(struct ddr_priv * priv,int rank_mask,int if_num)1541 static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
1542 				    int if_num)
1543 {
1544 	union cvmx_lmcx_phy_ctl lmc_phy_ctl;
1545 	u64 orig_phy_ctl;
1546 	const char *s;
1547 
1548 	/*
1549 	 * 4.8.6 LMC Offset Training
1550 	 *
1551 	 * LMC requires input-receiver offset training.
1552 	 *
1553 	 * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
1554 	 */
1555 	lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1556 	orig_phy_ctl = lmc_phy_ctl.u64;
1557 	lmc_phy_ctl.s.dac_on = 1;
1558 
1559 	// allow full CSR override
1560 	s = lookup_env_ull(priv, "ddr_phy_ctl");
1561 	if (s)
1562 		lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
1563 
1564 	// do not print or write if CSR does not change...
1565 	if (lmc_phy_ctl.u64 != orig_phy_ctl) {
1566 		debug("PHY_CTL                                       : 0x%016llx\n",
1567 		      lmc_phy_ctl.u64);
1568 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
1569 	}
1570 
1571 	/*
1572 	 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
1573 	 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1574 	 *
1575 	 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1576 	 */
1577 	/* Start Offset training sequence */
1578 	oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
1579 }
1580 
perform_internal_vref_training(struct ddr_priv * priv,int rank_mask,int if_num)1581 static void perform_internal_vref_training(struct ddr_priv *priv,
1582 					   int rank_mask, int if_num)
1583 {
1584 	union cvmx_lmcx_ext_config ext_config;
1585 	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1586 
1587 	// First, make sure all byte-lanes are out of VREF bypass mode
1588 	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1589 
1590 	ddr_dll_ctl3.cn78xx.byte_sel = 0x0A;	/* all byte-lanes */
1591 	ddr_dll_ctl3.cn78xx.bit_select = 0x09;	/* No-op */
1592 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1593 
1594 	ddr_dll_ctl3.cn78xx.bit_select = 0x0E;	/* vref bypass off. */
1595 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1596 
1597 	ddr_dll_ctl3.cn78xx.bit_select = 0x09;	/* No-op */
1598 	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1599 
1600 	/*
1601 	 * 4.8.7 LMC Internal vref Training
1602 	 *
1603 	 * LMC requires input-reference-voltage training.
1604 	 *
1605 	 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
1606 	 */
1607 	ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1608 	ext_config.s.vrefint_seq_deskew = 0;
1609 
1610 	ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1611 		      ext_config.s.vrefint_seq_deskew);
1612 
1613 	lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1614 
1615 	/*
1616 	 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
1617 	 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1618 	 *
1619 	 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1620 	 */
1621 	/* Start LMC Internal vref Training */
1622 	oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1623 }
1624 
1625 #define dbg_avg(format, ...)	// debug(format, ##__VA_ARGS__)
1626 
process_samples_average(s16 * bytes,int num_samples,int lmc,int lane_no)1627 static int process_samples_average(s16 *bytes, int num_samples,
1628 				   int lmc, int lane_no)
1629 {
1630 	int i, sadj, sum = 0, ret, asum, trunc;
1631 	s16 smin = 32767, smax = -32768;
1632 	int nmin, nmax;
1633 	//int rng;
1634 
1635 	dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
1636 
1637 	for (i = 0; i < num_samples; i++) {
1638 		sum += bytes[i];
1639 		if (bytes[i] < smin)
1640 			smin = bytes[i];
1641 		if (bytes[i] > smax)
1642 			smax = bytes[i];
1643 		dbg_avg(" %3d", bytes[i]);
1644 	}
1645 
1646 	nmin = 0;
1647 	nmax = 0;
1648 	for (i = 0; i < num_samples; i++) {
1649 		if (bytes[i] == smin)
1650 			nmin += 1;
1651 		if (bytes[i] == smax)
1652 			nmax += 1;
1653 	}
1654 	dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
1655 		smin, nmin, smax, nmax, rng, num_samples);
1656 
1657 	asum = sum - smin - smax;
1658 
1659 	sadj = divide_nint(asum * 10, (num_samples - 2));
1660 
1661 	trunc = asum / (num_samples - 2);
1662 
1663 	dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
1664 
1665 	sadj = divide_nint(sadj, 10);
1666 	if (trunc & 1)
1667 		ret = trunc;
1668 	else if (sadj & 1)
1669 		ret = sadj;
1670 	else
1671 		ret = trunc + 1;
1672 
1673 	dbg_avg(" -> %3d\n", ret);
1674 
1675 	return ret;
1676 }
1677 
1678 #define DEFAULT_SAT_RETRY_LIMIT    11	// 1 + 10 retries
1679 
1680 #define default_lock_retry_limit   20	// 20 retries
1681 #define deskew_validation_delay    10000	// 10 millisecs
1682 
perform_deskew_training(struct ddr_priv * priv,int rank_mask,int if_num,int spd_rawcard_aorb)1683 static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
1684 				   int if_num, int spd_rawcard_aorb)
1685 {
1686 	int unsaturated, locked;
1687 	int sat_retries, sat_retries_limit;
1688 	int lock_retries, lock_retries_total, lock_retries_limit;
1689 	int print_first;
1690 	int print_them_all;
1691 	struct deskew_counts dsk_counts;
1692 	union cvmx_lmcx_phy_ctl phy_ctl;
1693 	char *s;
1694 	int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
1695 		octeon_is_cpuid(OCTEON_CNF75XX);
1696 	int disable_bitval_retries = 1;	// default to disabled
1697 
1698 	debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
1699 
1700 	sat_retries = 0;
1701 	sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
1702 
1703 	lock_retries_total = 0;
1704 	unsaturated = 0;
1705 	print_first = 1;	// print the first one
1706 	// set to true for printing all normal deskew attempts
1707 	print_them_all = 0;
1708 
1709 	// provide override for bitval_errs causing internal VREF retries
1710 	s = env_get("ddr_disable_bitval_retries");
1711 	if (s)
1712 		disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
1713 
1714 	lock_retries_limit = default_lock_retry_limit;
1715 	if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
1716 	    (octeon_is_cpuid(OCTEON_CN73XX)) ||
1717 	    (octeon_is_cpuid(OCTEON_CNF75XX)))
1718 		lock_retries_limit *= 2;	// give new chips twice as many
1719 
1720 	do {			/* while (sat_retries < sat_retry_limit) */
1721 		/*
1722 		 * 4.8.8 LMC Deskew Training
1723 		 *
1724 		 * LMC requires input-read-data deskew training.
1725 		 *
1726 		 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
1727 		 */
1728 
1729 		union cvmx_lmcx_ext_config ext_config;
1730 
1731 		ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1732 		ext_config.s.vrefint_seq_deskew = 1;
1733 
1734 		ddr_seq_print
1735 		    ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1736 		     ext_config.s.vrefint_seq_deskew);
1737 
1738 		lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1739 
1740 		/*
1741 		 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
1742 		 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
1743 		 *
1744 		 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1745 		 */
1746 
1747 		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1748 		phy_ctl.s.phy_dsk_reset = 1;	/* RESET Deskew sequence */
1749 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1750 
1751 		/* LMC Deskew Training */
1752 		oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1753 
1754 		lock_retries = 0;
1755 
1756 perform_deskew_training:
1757 
1758 		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1759 		phy_ctl.s.phy_dsk_reset = 0;	/* Normal Deskew sequence */
1760 		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1761 
1762 		/* LMC Deskew Training */
1763 		oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1764 
1765 		// Moved this from validate_deskew_training
1766 		/* Allow deskew results to stabilize before evaluating them. */
1767 		udelay(deskew_validation_delay);
1768 
1769 		// Now go look at lock and saturation status...
1770 		validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
1771 					 print_first);
1772 		// after printing the first and not doing them all, no more
1773 		if (print_first && !print_them_all)
1774 			print_first = 0;
1775 
1776 		unsaturated = (dsk_counts.saturated == 0);
1777 		locked = (dsk_counts.unlocked == 0);
1778 
1779 		// only do locking retries if unsaturated or rawcard A or B,
1780 		// otherwise full SAT retry
1781 		if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
1782 			if (!locked) {	// and not locked
1783 				lock_retries++;
1784 				lock_retries_total++;
1785 				if (lock_retries <= lock_retries_limit) {
1786 					goto perform_deskew_training;
1787 				} else {
1788 					debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
1789 					      if_num, lock_retries_limit);
1790 				}
1791 			} else {
1792 				// only print if we did try
1793 				if (lock_retries_total > 0)
1794 					debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
1795 					      if_num, lock_retries);
1796 			}
1797 		}		/* if (unsaturated || spd_rawcard_aorb) */
1798 
1799 		++sat_retries;
1800 
1801 		/*
1802 		 * At this point, check for a DDR4 RDIMM that will not
1803 		 * benefit from SAT retries; if so, exit
1804 		 */
1805 		if (spd_rawcard_aorb && !has_no_sat) {
1806 			debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
1807 			      if_num);
1808 			break;	// no sat or lock retries
1809 		}
1810 
1811 	} while (!unsaturated && (sat_retries < sat_retries_limit));
1812 
1813 	debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
1814 	      if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
1815 	      "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
1816 
1817 	// FIXME? add saturation to reasons for fault return - give it a
1818 	// chance via Internal VREF
1819 	// FIXME? add OPTIONAL bit value to reasons for fault return -
1820 	// give it a chance via Internal VREF
1821 	if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
1822 	    (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
1823 	    !unsaturated) {
1824 		debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
1825 		      if_num);
1826 		// FIXME: do we want this output always for errors?
1827 		validate_deskew_training(priv, rank_mask, if_num,
1828 					 &dsk_counts, 1);
1829 		return -1;	// we did retry locally, they did not help
1830 	}
1831 
1832 	// NOTE: we (currently) always print one last training validation
1833 	// before starting Read Leveling...
1834 
1835 	return 0;
1836 }
1837 
1838 #define SCALING_FACTOR (1000)
1839 
1840 // NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
compute_vref_1slot_2rank(int rtt_wr,int rtt_park,int dqx_ctl,int rank_count,int dram_connection)1841 static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
1842 				    int rank_count, int dram_connection)
1843 {
1844 	u64 reff_s;
1845 	u64 rser_s = (dram_connection) ? 0 : 15;
1846 	u64 vdd = 1200;
1847 	u64 vref;
1848 	// 99 == HiZ
1849 	u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1850 			1 * 1024 * 1024 : rtt_wr);
1851 	u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
1852 					       (rtt_wr != 0))) ?
1853 			  1 * 1024 * 1024 : rtt_park);
1854 	u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1855 	int vref_value;
1856 	u64 rangepc = 6000;	// range1 base
1857 	u64 vrefpc;
1858 	int vref_range = 0;
1859 
1860 	reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
1861 
1862 	vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
1863 		(rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
1864 
1865 	vref = (vref * vdd) / 2 / SCALING_FACTOR;
1866 
1867 	vrefpc = (vref * 100 * 100) / vdd;
1868 
1869 	if (vrefpc < rangepc) {	// < range1 base, use range2
1870 		vref_range = 1 << 6;	// set bit A6 for range2
1871 		rangepc = 4500;	// range2 base is 45%
1872 	}
1873 
1874 	vref_value = divide_nint(vrefpc - rangepc, 65);
1875 	if (vref_value < 0)
1876 		vref_value = vref_range;	// set to base of range
1877 	else
1878 		vref_value |= vref_range;
1879 
1880 	debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
1881 	      rtt_wr, rtt_park, dqx_ctl, rank_count);
1882 	debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
1883 	      rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
1884 	      vref_range ? 2 : 1);
1885 
1886 	return vref_value;
1887 }
1888 
1889 // NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
compute_vref_2slot_2rank(int rtt_wr,int rtt_park_00,int rtt_park_01,int dqx_ctl,int rtt_nom,int dram_connection)1890 static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
1891 				    int rtt_park_01,
1892 				    int dqx_ctl, int rtt_nom,
1893 				    int dram_connection)
1894 {
1895 	u64 rser = (dram_connection) ? 0 : 15;
1896 	u64 vdd = 1200;
1897 	u64 vl, vlp, vcm;
1898 	u64 rd0, rd1, rpullup;
1899 	// 99 == HiZ
1900 	u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1901 			1 * 1024 * 1024 : rtt_wr);
1902 	u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
1903 	u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
1904 	u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1905 	u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
1906 	int vref_value;
1907 	u64 rangepc = 6000;	// range1 base
1908 	u64 vrefpc;
1909 	int vref_range = 0;
1910 
1911 	// rd0 = (RTT_NOM (parallel) RTT_WR) +  =
1912 	// ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
1913 	rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
1914 			  (rtt_nom_s + rtt_wr_s)) + rser;
1915 
1916 	// rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
1917 	// ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
1918 	rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
1919 			  (rtt_park_00_s + rtt_park_01_s)) + rser;
1920 
1921 	// rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
1922 	rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
1923 
1924 	// vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
1925 	vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
1926 
1927 	// vlp = ((RSER / rd0) * (1.2 - vl)) + vl
1928 	vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
1929 
1930 	// vcm = (vlp + 1.2) / 2
1931 	vcm = divide_nint((vlp + vdd), 2);
1932 
1933 	// vrefpc = (vcm / 1.2) * 100
1934 	vrefpc = divide_nint((vcm * 100 * 100), vdd);
1935 
1936 	if (vrefpc < rangepc) {	// < range1 base, use range2
1937 		vref_range = 1 << 6;	// set bit A6 for range2
1938 		rangepc = 4500;	// range2 base is 45%
1939 	}
1940 
1941 	vref_value = divide_nint(vrefpc - rangepc, 65);
1942 	if (vref_value < 0)
1943 		vref_value = vref_range;	// set to base of range
1944 	else
1945 		vref_value |= vref_range;
1946 
1947 	debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
1948 	      rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
1949 	      vref_value);
1950 
1951 	return vref_value;
1952 }
1953 
1954 // NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
compute_vref_val(struct ddr_priv * priv,int if_num,int rankx,int dimm_count,int rank_count,struct impedence_values * imp_values,int is_stacked_die,int dram_connection)1955 static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
1956 			    int dimm_count, int rank_count,
1957 			    struct impedence_values *imp_values,
1958 			    int is_stacked_die, int dram_connection)
1959 {
1960 	int computed_final_vref_value = 0;
1961 	int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
1962 	const char *s;
1963 	int rtt_wr, dqx_ctl, rtt_nom, index;
1964 	union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
1965 	union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
1966 	union cvmx_lmcx_comp_ctl2 comp_ctl2;
1967 	int rtt_park;
1968 	int rtt_park_00;
1969 	int rtt_park_01;
1970 
1971 	debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
1972 	      if_num, rankx, __func__, dram_connection);
1973 
1974 	// allow some overrides...
1975 	s = env_get("ddr_adjust_computed_vref");
1976 	if (s) {
1977 		enable_adjust = !!simple_strtoul(s, NULL, 0);
1978 		if (!enable_adjust) {
1979 			debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
1980 			      if_num, rankx);
1981 		}
1982 	}
1983 
1984 	s = env_get("ddr_set_computed_vref");
1985 	if (s) {
1986 		int new_vref = simple_strtoul(s, NULL, 0);
1987 
1988 		debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
1989 		      if_num, rankx, new_vref, new_vref);
1990 		return new_vref;
1991 	}
1992 
1993 	/*
1994 	 * Calculate an alternative to the measured vref value
1995 	 * but only for configurations we know how to...
1996 	 */
1997 	// We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
1998 	// and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
1999 	// configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
2000 	// in 2-slot configs.
2001 
2002 	lmc_modereg_params1.u64 =
2003 	    lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
2004 	lmc_modereg_params2.u64 =
2005 	    lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
2006 	comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
2007 	dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
2008 
2009 	// WR always comes from the current rank
2010 	index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
2011 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
2012 		index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
2013 	rtt_wr = imp_values->rtt_wr_ohms[index];
2014 
2015 	// separate calculations for 1 vs 2 DIMMs per LMC
2016 	if (dimm_count == 1) {
2017 		// PARK comes from this rank if 1-rank, otherwise other rank
2018 		index =
2019 		    (lmc_modereg_params2.u64 >>
2020 		     ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
2021 		rtt_park = imp_values->rtt_nom_ohms[index];
2022 		computed_final_vref_value =
2023 		    compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
2024 					     rank_count, dram_connection);
2025 	} else {
2026 		// get both PARK values from the other DIMM
2027 		index =
2028 		    (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
2029 		    0x07;
2030 		rtt_park_00 = imp_values->rtt_nom_ohms[index];
2031 		index =
2032 		    (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
2033 		    0x07;
2034 		rtt_park_01 = imp_values->rtt_nom_ohms[index];
2035 		// NOM comes from this rank if 1-rank, otherwise other rank
2036 		index =
2037 		    (lmc_modereg_params1.u64 >>
2038 		     ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
2039 		rtt_nom = imp_values->rtt_nom_ohms[index];
2040 		computed_final_vref_value =
2041 		    compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
2042 					     dqx_ctl, rtt_nom, dram_connection);
2043 	}
2044 
2045 	if (enable_adjust) {
2046 		union cvmx_lmcx_config lmc_config;
2047 		union cvmx_lmcx_control lmc_control;
2048 
2049 		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
2050 		lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2051 
2052 		/*
2053 		 *  New computed vref = existing computed vref – X
2054 		 *
2055 		 * The value of X is depending on different conditions.
2056 		 * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
2057 		 * die 2Rx4, so I conclude the results into two conditions:
2058 		 *
2059 		 * 1. Stacked Die: 2Rx4
2060 		 * 1-slot: offset = 7. i, e New computed vref = existing
2061 		 * computed vref – 7
2062 		 * 2-slot: offset = 6
2063 		 *
2064 		 * 2. Regular: 2Rx4
2065 		 * 1-slot: offset = 3
2066 		 * 2-slot:  offset = 2
2067 		 */
2068 		// we know we never get called unless DDR4, so test just
2069 		// the other conditions
2070 		if (lmc_control.s.rdimm_ena == 1 &&
2071 		    rank_count == 2 && lmc_config.s.mode_x4dev) {
2072 			// it must first be RDIMM and 2-rank and x4
2073 			int adj;
2074 
2075 			// now do according to stacked die or not...
2076 			if (is_stacked_die)
2077 				adj = (dimm_count == 1) ? -7 : -6;
2078 			else
2079 				adj = (dimm_count == 1) ? -3 : -2;
2080 
2081 			// we must have adjusted it, so print it out if
2082 			// verbosity is right
2083 			debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
2084 			      if_num, rankx, computed_final_vref_value,
2085 			      computed_final_vref_value,
2086 			      computed_final_vref_value + adj,
2087 			      computed_final_vref_value + adj);
2088 			computed_final_vref_value += adj;
2089 		}
2090 	}
2091 
2092 	return computed_final_vref_value;
2093 }
2094 
unpack_rlevel_settings(int if_bytemask,int ecc_ena,struct rlevel_byte_data * rlevel_byte,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)2095 static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
2096 				   struct rlevel_byte_data *rlevel_byte,
2097 				   union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
2098 {
2099 	if ((if_bytemask & 0xff) == 0xff) {
2100 		if (ecc_ena) {
2101 			rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
2102 			rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
2103 			rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
2104 			rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
2105 			/* ECC */
2106 			rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
2107 		} else {
2108 			rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
2109 			rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
2110 			rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
2111 			rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
2112 		}
2113 	} else {
2114 		rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8;	/* unused */
2115 		rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;	/* unused */
2116 		rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;	/* unused */
2117 		rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;	/* unused */
2118 		rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;	/* ECC */
2119 	}
2120 
2121 	rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
2122 	rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
2123 	rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
2124 	rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
2125 }
2126 
pack_rlevel_settings(int if_bytemask,int ecc_ena,struct rlevel_byte_data * rlevel_byte,union cvmx_lmcx_rlevel_rankx * final_rlevel_rank)2127 static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
2128 				 struct rlevel_byte_data *rlevel_byte,
2129 				 union cvmx_lmcx_rlevel_rankx
2130 				 *final_rlevel_rank)
2131 {
2132 	union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
2133 
2134 	if ((if_bytemask & 0xff) == 0xff) {
2135 		if (ecc_ena) {
2136 			lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
2137 			lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
2138 			lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
2139 			lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
2140 			/* ECC */
2141 			lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
2142 		} else {
2143 			lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2144 			lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2145 			lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2146 			lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2147 		}
2148 	} else {
2149 		lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
2150 		lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2151 		lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2152 		lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2153 		lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2154 	}
2155 
2156 	lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
2157 	lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
2158 	lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
2159 	lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
2160 
2161 	*final_rlevel_rank = lmc_rlevel_rank;
2162 }
2163 
2164 /////////////////// These are the RLEVEL settings display routines
2165 
2166 // flags
2167 #define WITH_NOTHING 0
2168 #define WITH_SCORE   1
2169 #define WITH_AVERAGE 2
2170 #define WITH_FINAL   4
2171 #define WITH_COMPUTE 8
2172 
do_display_rl(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank,int flags,int score)2173 static void do_display_rl(int if_num,
2174 			  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2175 			  int rank, int flags, int score)
2176 {
2177 	char score_buf[16];
2178 	char *msg_buf;
2179 	char hex_buf[20];
2180 
2181 	if (flags & WITH_SCORE) {
2182 		snprintf(score_buf, sizeof(score_buf), "(%d)", score);
2183 	} else {
2184 		score_buf[0] = ' ';
2185 		score_buf[1] = 0;
2186 	}
2187 
2188 	if (flags & WITH_AVERAGE) {
2189 		msg_buf = "  DELAY AVERAGES  ";
2190 	} else if (flags & WITH_FINAL) {
2191 		msg_buf = "  FINAL SETTINGS  ";
2192 	} else if (flags & WITH_COMPUTE) {
2193 		msg_buf = "  COMPUTED DELAYS ";
2194 	} else {
2195 		snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2196 			 (unsigned long long)lmc_rlevel_rank.u64);
2197 		msg_buf = hex_buf;
2198 	}
2199 
2200 	debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
2201 	      if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
2202 	      lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
2203 	      lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
2204 	      lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
2205 	      lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
2206 	      lmc_rlevel_rank.s.byte0, score_buf);
2207 }
2208 
display_rl(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank)2209 static void display_rl(int if_num,
2210 		       union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
2211 {
2212 	do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
2213 }
2214 
display_rl_with_score(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank,int score)2215 static void display_rl_with_score(int if_num,
2216 				  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2217 				  int rank, int score)
2218 {
2219 	do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
2220 }
2221 
display_rl_with_final(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank)2222 static void display_rl_with_final(int if_num,
2223 				  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2224 				  int rank)
2225 {
2226 	do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
2227 }
2228 
display_rl_with_computed(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank,int score)2229 static void display_rl_with_computed(int if_num,
2230 				     union cvmx_lmcx_rlevel_rankx
2231 				     lmc_rlevel_rank, int rank, int score)
2232 {
2233 	do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
2234 }
2235 
2236 // flag values
2237 #define WITH_RODT_BLANK      0
2238 #define WITH_RODT_SKIPPING   1
2239 #define WITH_RODT_BESTROW    2
2240 #define WITH_RODT_BESTSCORE  3
2241 // control
2242 #define SKIP_SKIPPING 1
2243 
2244 static const char *with_rodt_canned_msgs[4] = {
2245 	"          ", "SKIPPING  ", "BEST ROW  ", "BEST SCORE"
2246 };
2247 
display_rl_with_rodt(int if_num,union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,int rank,int score,int nom_ohms,int rodt_ohms,int flag)2248 static void display_rl_with_rodt(int if_num,
2249 				 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2250 				 int rank, int score,
2251 				 int nom_ohms, int rodt_ohms, int flag)
2252 {
2253 	const char *msg_buf;
2254 	char set_buf[20];
2255 
2256 #if SKIP_SKIPPING
2257 	if (flag == WITH_RODT_SKIPPING)
2258 		return;
2259 #endif
2260 
2261 	msg_buf = with_rodt_canned_msgs[flag];
2262 	if (nom_ohms < 0) {
2263 		snprintf(set_buf, sizeof(set_buf), "    RODT %3d    ",
2264 			 rodt_ohms);
2265 	} else {
2266 		snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
2267 			 rodt_ohms);
2268 	}
2269 
2270 	debug("N0.LMC%d.R%d: Rlevel %s   %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
2271 	      if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
2272 	      lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
2273 	      lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
2274 	      lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
2275 	      lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
2276 }
2277 
do_display_wl(int if_num,union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,int rank,int flags)2278 static void do_display_wl(int if_num,
2279 			  union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2280 			  int rank, int flags)
2281 {
2282 	char *msg_buf;
2283 	char hex_buf[20];
2284 
2285 	if (flags & WITH_FINAL) {
2286 		msg_buf = "  FINAL SETTINGS  ";
2287 	} else {
2288 		snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2289 			 (unsigned long long)lmc_wlevel_rank.u64);
2290 		msg_buf = hex_buf;
2291 	}
2292 
2293 	debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2294 	      if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
2295 	      lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
2296 	      lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
2297 	      lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
2298 	      lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
2299 	      lmc_wlevel_rank.s.byte0);
2300 }
2301 
display_wl(int if_num,union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,int rank)2302 static void display_wl(int if_num,
2303 		       union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
2304 {
2305 	do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2306 }
2307 
display_wl_with_final(int if_num,union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,int rank)2308 static void display_wl_with_final(int if_num,
2309 				  union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2310 				  int rank)
2311 {
2312 	do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
2313 }
2314 
2315 // pretty-print bitmask adjuster
ppbm(u64 bm)2316 static u64 ppbm(u64 bm)
2317 {
2318 	if (bm != 0ul) {
2319 		while ((bm & 0x0fful) == 0ul)
2320 			bm >>= 4;
2321 	}
2322 
2323 	return bm;
2324 }
2325 
2326 // xlate PACKED index to UNPACKED index to use with rlevel_byte
2327 #define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
2328 // xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2329 #define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
2330 
2331 // flag values
2332 #define WITH_WL_BITMASKS      0
2333 #define WITH_RL_BITMASKS      1
2334 #define WITH_RL_MASK_SCORES   2
2335 #define WITH_RL_SEQ_SCORES    3
2336 
do_display_bm(int if_num,int rank,void * bm,int flags,int ecc)2337 static void do_display_bm(int if_num, int rank, void *bm,
2338 			  int flags, int ecc)
2339 {
2340 	if (flags == WITH_WL_BITMASKS) {
2341 		// wlevel_bitmask array in PACKED index order, so just
2342 		// print them
2343 		int *bitmasks = (int *)bm;
2344 
2345 		debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks                 : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2346 		      if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
2347 		      bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
2348 		      bitmasks[1], bitmasks[0]
2349 			);
2350 	} else if (flags == WITH_RL_BITMASKS) {
2351 		// rlevel_bitmask array in PACKED index order, so just
2352 		// print them
2353 		struct rlevel_bitmask *rlevel_bitmask =
2354 			(struct rlevel_bitmask *)bm;
2355 
2356 		debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks        8:0      : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2357 		      if_num, rank, ppbm(rlevel_bitmask[8].bm),
2358 		      ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
2359 		      ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
2360 		      ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
2361 		      ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
2362 			);
2363 	} else if (flags == WITH_RL_MASK_SCORES) {
2364 		// rlevel_bitmask array in PACKED index order, so just
2365 		// print them
2366 		struct rlevel_bitmask *rlevel_bitmask =
2367 			(struct rlevel_bitmask *)bm;
2368 
2369 		debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2370 		      if_num, rank, rlevel_bitmask[8].errs,
2371 		      rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
2372 		      rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
2373 		      rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
2374 		      rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
2375 	} else if (flags == WITH_RL_SEQ_SCORES) {
2376 		// rlevel_byte array in UNPACKED index order, so xlate
2377 		// and print them
2378 		struct rlevel_byte_data *rlevel_byte =
2379 			(struct rlevel_byte_data *)bm;
2380 
2381 		debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2382 		      if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
2383 		      rlevel_byte[XPU(7, ecc)].sqerrs,
2384 		      rlevel_byte[XPU(6, ecc)].sqerrs,
2385 		      rlevel_byte[XPU(5, ecc)].sqerrs,
2386 		      rlevel_byte[XPU(4, ecc)].sqerrs,
2387 		      rlevel_byte[XPU(3, ecc)].sqerrs,
2388 		      rlevel_byte[XPU(2, ecc)].sqerrs,
2389 		      rlevel_byte[XPU(1, ecc)].sqerrs,
2390 		      rlevel_byte[XPU(0, ecc)].sqerrs);
2391 	}
2392 }
2393 
display_wl_bm(int if_num,int rank,int * bitmasks)2394 static void display_wl_bm(int if_num, int rank, int *bitmasks)
2395 {
2396 	do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2397 }
2398 
display_rl_bm(int if_num,int rank,struct rlevel_bitmask * bitmasks,int ecc_ena)2399 static void display_rl_bm(int if_num, int rank,
2400 			  struct rlevel_bitmask *bitmasks, int ecc_ena)
2401 {
2402 	do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
2403 		      ecc_ena);
2404 }
2405 
display_rl_bm_scores(int if_num,int rank,struct rlevel_bitmask * bitmasks,int ecc_ena)2406 static void display_rl_bm_scores(int if_num, int rank,
2407 				 struct rlevel_bitmask *bitmasks, int ecc_ena)
2408 {
2409 	do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
2410 		      ecc_ena);
2411 }
2412 
display_rl_seq_scores(int if_num,int rank,struct rlevel_byte_data * bytes,int ecc_ena)2413 static void display_rl_seq_scores(int if_num, int rank,
2414 				  struct rlevel_byte_data *bytes, int ecc_ena)
2415 {
2416 	do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2417 }
2418 
2419 #define RODT_OHMS_COUNT        8
2420 #define RTT_NOM_OHMS_COUNT     8
2421 #define RTT_NOM_TABLE_COUNT    8
2422 #define RTT_WR_OHMS_COUNT      8
2423 #define DIC_OHMS_COUNT         3
2424 #define DRIVE_STRENGTH_COUNT  15
2425 
2426 static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
2427 	0, 40, 60, 80, 120, 240, 34, 48 };
2428 static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2429 	0, 60, 120, 40, 240, 48, 80, 34 };
2430 static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2431 	0, 4, 2, 6, 1, 5, 3, 7 };
2432 // setting HiZ ohms to 99 for computed vref
2433 static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
2434 	0, 120, 240, 99, 80 };
2435 static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
2436 static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
2437 	0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
2438 static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
2439 	0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2440 struct impedence_values ddr4_impedence_val = {
2441 	.rodt_ohms = ddr4_rodt_ohms,
2442 	.rtt_nom_ohms = ddr4_rtt_nom_ohms,
2443 	.rtt_nom_table = ddr4_rtt_nom_table,
2444 	.rtt_wr_ohms = ddr4_rtt_wr_ohms,
2445 	.dic_ohms = ddr4_dic_ohms,
2446 	.drive_strength = ddr4_drive_strength,
2447 	.dqx_strength = ddr4_dqx_strength,
2448 };
2449 
2450 static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
2451 	0, 20, 30, 40, 60, 120, 0, 0 };
2452 static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2453 	0, 60, 120, 40, 20, 30, 0, 0 };
2454 static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2455 	0, 2, 1, 3, 5, 4, 0, 0 };
2456 static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
2457 static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
2458 static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
2459 	0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2460 static struct impedence_values ddr3_impedence_val = {
2461 	.rodt_ohms = ddr3_rodt_ohms,
2462 	.rtt_nom_ohms = ddr3_rtt_nom_ohms,
2463 	.rtt_nom_table = ddr3_rtt_nom_table,
2464 	.rtt_wr_ohms = ddr3_rtt_wr_ohms,
2465 	.dic_ohms = ddr3_dic_ohms,
2466 	.drive_strength = ddr3_drive_strength,
2467 	.dqx_strength = ddr3_drive_strength,
2468 };
2469 
hertz_to_psecs(u64 hertz)2470 static u64 hertz_to_psecs(u64 hertz)
2471 {
2472 	/* Clock in psecs */
2473 	return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
2474 }
2475 
2476 #define DIVIDEND_SCALE 1000	/* Scale to avoid rounding error. */
2477 
psecs_to_mts(u64 psecs)2478 static u64 psecs_to_mts(u64 psecs)
2479 {
2480 	return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
2481 				       psecs), DIVIDEND_SCALE);
2482 }
2483 
2484 #define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
2485 
pretty_psecs_to_mts(u64 psecs)2486 static unsigned long pretty_psecs_to_mts(u64 psecs)
2487 {
2488 	u64 ret = 0;		// default to error
2489 
2490 	if (WITHIN(psecs, 2500, 1))
2491 		ret = 800;
2492 	else if (WITHIN(psecs, 1875, 1))
2493 		ret = 1066;
2494 	else if (WITHIN(psecs, 1500, 1))
2495 		ret = 1333;
2496 	else if (WITHIN(psecs, 1250, 1))
2497 		ret = 1600;
2498 	else if (WITHIN(psecs, 1071, 1))
2499 		ret = 1866;
2500 	else if (WITHIN(psecs, 937, 1))
2501 		ret = 2133;
2502 	else if (WITHIN(psecs, 833, 1))
2503 		ret = 2400;
2504 	else if (WITHIN(psecs, 750, 1))
2505 		ret = 2666;
2506 	return ret;
2507 }
2508 
mts_to_hertz(u64 mts)2509 static u64 mts_to_hertz(u64 mts)
2510 {
2511 	return ((mts * 1000 * 1000) / 2);
2512 }
2513 
compute_rc3x(int64_t tclk_psecs)2514 static int compute_rc3x(int64_t tclk_psecs)
2515 {
2516 	long speed;
2517 	long tclk_psecs_min, tclk_psecs_max;
2518 	long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2519 	int rc3x;
2520 
2521 #define ENCODING_BASE 1240
2522 
2523 	data_rate_mhz = psecs_to_mts(tclk_psecs);
2524 
2525 	/*
2526 	 * 2400 MT/s is a special case. Using integer arithmetic it rounds
2527 	 * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2528 	 * proper setting from the table.
2529 	 */
2530 	if (tclk_psecs == 833)
2531 		data_rate_mhz = 2400;
2532 
2533 	for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2534 		int error = 0;
2535 
2536 		/* Clock in psecs */
2537 		tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
2538 		/* Clock in psecs */
2539 		tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
2540 
2541 		data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2542 		data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2543 
2544 		/* Force alingment to multiple to avound rounding errors. */
2545 		data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2546 		data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2547 
2548 		error += (speed + 00 != data_rate_mhz_min);
2549 		error += (speed + 20 != data_rate_mhz_max);
2550 
2551 		rc3x = (speed - ENCODING_BASE) / 20;
2552 
2553 		if (data_rate_mhz <= (speed + 20))
2554 			break;
2555 	}
2556 
2557 	return rc3x;
2558 }
2559 
2560 /*
2561  * static global variables needed, so that functions (loops) can be
2562  * restructured from the main huge function. Its not elegant, but the
2563  * only way to break the original functions like init_octeon3_ddr3_interface()
2564  * into separate logical smaller functions with less indentation levels.
2565  */
2566 static int if_num __section(".data");
2567 static u32 if_mask __section(".data");
2568 static int ddr_hertz __section(".data");
2569 
2570 static struct ddr_conf *ddr_conf __section(".data");
2571 static const struct dimm_odt_config *odt_1rank_config __section(".data");
2572 static const struct dimm_odt_config *odt_2rank_config __section(".data");
2573 static const struct dimm_odt_config *odt_4rank_config __section(".data");
2574 static struct dimm_config *dimm_config_table __section(".data");
2575 static const struct dimm_odt_config *odt_config __section(".data");
2576 static const struct ddr3_custom_config *c_cfg __section(".data");
2577 
2578 static int odt_idx __section(".data");
2579 
2580 static ulong tclk_psecs __section(".data");
2581 static ulong eclk_psecs __section(".data");
2582 
2583 static int row_bits __section(".data");
2584 static int col_bits __section(".data");
2585 static int num_banks __section(".data");
2586 static int num_ranks __section(".data");
2587 static int dram_width __section(".data");
2588 static int dimm_count __section(".data");
2589 /* Accumulate and report all the errors before giving up */
2590 static int fatal_error __section(".data");
2591 /* Flag that indicates safe DDR settings should be used */
2592 static int safe_ddr_flag __section(".data");
2593 /* Octeon II Default: 64bit interface width */
2594 static int if_64b __section(".data");
2595 static int if_bytemask __section(".data");
2596 static u32 mem_size_mbytes __section(".data");
2597 static unsigned int didx __section(".data");
2598 static int bank_bits __section(".data");
2599 static int bunk_enable __section(".data");
2600 static int rank_mask __section(".data");
2601 static int column_bits_start __section(".data");
2602 static int row_lsb __section(".data");
2603 static int pbank_lsb __section(".data");
2604 static int use_ecc __section(".data");
2605 static int mtb_psec __section(".data");
2606 static short ftb_dividend __section(".data");
2607 static short ftb_divisor __section(".data");
2608 static int taamin __section(".data");
2609 static int tckmin __section(".data");
2610 static int cl __section(".data");
2611 static int min_cas_latency __section(".data");
2612 static int max_cas_latency __section(".data");
2613 static int override_cas_latency __section(".data");
2614 static int ddr_rtt_nom_auto __section(".data");
2615 static int ddr_rodt_ctl_auto __section(".data");
2616 
2617 static int spd_addr __section(".data");
2618 static int spd_org __section(".data");
2619 static int spd_banks __section(".data");
2620 static int spd_rdimm __section(".data");
2621 static int spd_dimm_type __section(".data");
2622 static int spd_ecc __section(".data");
2623 static u32 spd_cas_latency __section(".data");
2624 static int spd_mtb_dividend __section(".data");
2625 static int spd_mtb_divisor __section(".data");
2626 static int spd_tck_min __section(".data");
2627 static int spd_taa_min __section(".data");
2628 static int spd_twr __section(".data");
2629 static int spd_trcd __section(".data");
2630 static int spd_trrd __section(".data");
2631 static int spd_trp __section(".data");
2632 static int spd_tras __section(".data");
2633 static int spd_trc __section(".data");
2634 static int spd_trfc __section(".data");
2635 static int spd_twtr __section(".data");
2636 static int spd_trtp __section(".data");
2637 static int spd_tfaw __section(".data");
2638 static int spd_addr_mirror __section(".data");
2639 static int spd_package __section(".data");
2640 static int spd_rawcard __section(".data");
2641 static int spd_rawcard_aorb __section(".data");
2642 static int spd_rdimm_registers __section(".data");
2643 static int spd_thermal_sensor __section(".data");
2644 
2645 static int is_stacked_die __section(".data");
2646 static int is_3ds_dimm __section(".data");
2647 // 3DS: logical ranks per package rank
2648 static int lranks_per_prank __section(".data");
2649 // 3DS: logical ranks bits
2650 static int lranks_bits __section(".data");
2651 // in Mbits; only used for 3DS
2652 static int die_capacity __section(".data");
2653 
2654 static enum ddr_type ddr_type __section(".data");
2655 
2656 static int twr __section(".data");
2657 static int trcd __section(".data");
2658 static int trrd __section(".data");
2659 static int trp __section(".data");
2660 static int tras __section(".data");
2661 static int trc __section(".data");
2662 static int trfc __section(".data");
2663 static int twtr __section(".data");
2664 static int trtp __section(".data");
2665 static int tfaw __section(".data");
2666 
2667 static int ddr4_tckavgmin __section(".data");
2668 static int ddr4_tckavgmax __section(".data");
2669 static int ddr4_trdcmin __section(".data");
2670 static int ddr4_trpmin __section(".data");
2671 static int ddr4_trasmin __section(".data");
2672 static int ddr4_trcmin __section(".data");
2673 static int ddr4_trfc1min __section(".data");
2674 static int ddr4_trfc2min __section(".data");
2675 static int ddr4_trfc4min __section(".data");
2676 static int ddr4_tfawmin __section(".data");
2677 static int ddr4_trrd_smin __section(".data");
2678 static int ddr4_trrd_lmin __section(".data");
2679 static int ddr4_tccd_lmin __section(".data");
2680 
2681 static int wl_mask_err __section(".data");
2682 static int wl_loops __section(".data");
2683 static int default_rtt_nom[4] __section(".data");
2684 static int dyn_rtt_nom_mask __section(".data");
2685 static struct impedence_values *imp_val __section(".data");
2686 static char default_rodt_ctl __section(".data");
2687 // default to disabled (ie, try LMC restart, not chip reset)
2688 static int ddr_disable_chip_reset __section(".data");
2689 static const char *dimm_type_name __section(".data");
2690 static int match_wl_rtt_nom __section(".data");
2691 
2692 struct hwl_alt_by_rank {
2693 	u16 hwl_alt_mask;	// mask of bytelanes with alternate
2694 	u16 hwl_alt_delay[9];	// bytelane alternate avail if mask=1
2695 };
2696 
2697 static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
2698 
2699 #define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3	// was: 5
2700 static int internal_retries __section(".data");
2701 
2702 static int deskew_training_errors __section(".data");
2703 static struct deskew_counts deskew_training_results __section(".data");
2704 static int disable_deskew_training __section(".data");
2705 static int restart_if_dsk_incomplete __section(".data");
2706 static int dac_eval_retries __section(".data");
2707 static int dac_settings[9] __section(".data");
2708 static int num_samples __section(".data");
2709 static int sample __section(".data");
2710 static int lane __section(".data");
2711 static int last_lane __section(".data");
2712 static int total_dac_eval_retries __section(".data");
2713 static int dac_eval_exhausted __section(".data");
2714 
2715 #define DEFAULT_DAC_SAMPLES 7	// originally was 5
2716 #define DAC_RETRIES_LIMIT   2
2717 
2718 struct bytelane_sample {
2719 	s16 bytes[DEFAULT_DAC_SAMPLES];
2720 };
2721 
2722 static struct bytelane_sample lanes[9] __section(".data");
2723 
2724 static char disable_sequential_delay_check __section(".data");
2725 static int wl_print __section(".data");
2726 
2727 static int enable_by_rank_init __section(".data");
2728 static int saved_rank_mask __section(".data");
2729 static int by_rank __section(".data");
2730 static struct deskew_data rank_dsk[4] __section(".data");
2731 static struct dac_data rank_dac[4] __section(".data");
2732 
2733 // todo: perhaps remove node at some time completely?
2734 static int node __section(".data");
2735 static int base_cl __section(".data");
2736 
2737 /* Parameters from DDR3 Specifications */
2738 #define DDR3_TREFI         7800000	/* 7.8 us */
2739 #define DDR3_ZQCS          80000ull	/* 80 ns */
2740 #define DDR3_ZQCS_INTERNAL 1280000000ull	/* 128ms/100 */
2741 #define DDR3_TCKE          5000	/* 5 ns */
2742 #define DDR3_TMRD          4	/* 4 nCK */
2743 #define DDR3_TDLLK         512	/* 512 nCK */
2744 #define DDR3_TMPRR         1	/* 1 nCK */
2745 #define DDR3_TWLMRD        40	/* 40 nCK */
2746 #define DDR3_TWLDQSEN      25	/* 25 nCK */
2747 
2748 /* Parameters from DDR4 Specifications */
2749 #define DDR4_TMRD          8	/* 8 nCK */
2750 #define DDR4_TDLLK         768	/* 768 nCK */
2751 
lmc_config(struct ddr_priv * priv)2752 static void lmc_config(struct ddr_priv *priv)
2753 {
2754 	union cvmx_lmcx_config cfg;
2755 	char *s;
2756 
2757 	cfg.u64 = 0;
2758 
2759 	cfg.cn78xx.ecc_ena = use_ecc;
2760 	cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
2761 	cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
2762 
2763 	cfg.cn78xx.idlepower = 0;	/* Disabled */
2764 
2765 	s = lookup_env(priv, "ddr_idlepower");
2766 	if (s)
2767 		cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
2768 
2769 	cfg.cn78xx.forcewrite = 0;	/* Disabled */
2770 	/* Include memory reference address in the ECC */
2771 	cfg.cn78xx.ecc_adr = 1;
2772 
2773 	s = lookup_env(priv, "ddr_ecc_adr");
2774 	if (s)
2775 		cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
2776 
2777 	cfg.cn78xx.reset = 0;
2778 
2779 	/*
2780 	 * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
2781 	 * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
2782 	 * ref_zqcs_int(18:7) to
2783 	 * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
2784 	 * value should always be greater than 32, to account for
2785 	 * resistor calibration delays.
2786 	 */
2787 
2788 	cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
2789 	cfg.cn78xx.ref_zqcs_int |=
2790 		((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
2791 			      (512 * 128))) & 0xfff) << 7);
2792 
2793 	cfg.cn78xx.early_dqx = 1;	/* Default to enabled */
2794 
2795 	s = lookup_env(priv, "ddr_early_dqx");
2796 	if (!s)
2797 		s = lookup_env(priv, "ddr%d_early_dqx", if_num);
2798 
2799 	if (s)
2800 		cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
2801 
2802 	cfg.cn78xx.sref_with_dll = 0;
2803 
2804 	cfg.cn78xx.rank_ena = bunk_enable;
2805 	cfg.cn78xx.rankmask = rank_mask;	/* Set later */
2806 	cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
2807 		rank_mask;
2808 	/* Set once and don't change it. */
2809 	cfg.cn78xx.init_status = rank_mask;
2810 	cfg.cn78xx.early_unload_d0_r0 = 0;
2811 	cfg.cn78xx.early_unload_d0_r1 = 0;
2812 	cfg.cn78xx.early_unload_d1_r0 = 0;
2813 	cfg.cn78xx.early_unload_d1_r1 = 0;
2814 	cfg.cn78xx.scrz = 0;
2815 	if (octeon_is_cpuid(OCTEON_CN70XX))
2816 		cfg.cn78xx.mode32b = 1;	/* Read-only. Always 1. */
2817 	cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
2818 	cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
2819 				 (dram_width == 16)) ? 0 : 1;
2820 
2821 	s = lookup_env_ull(priv, "ddr_config");
2822 	if (s)
2823 		cfg.u64 = simple_strtoull(s, NULL, 0);
2824 	debug("LMC_CONFIG                                    : 0x%016llx\n",
2825 	      cfg.u64);
2826 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
2827 }
2828 
lmc_control(struct ddr_priv * priv)2829 static void lmc_control(struct ddr_priv *priv)
2830 {
2831 	union cvmx_lmcx_control ctrl;
2832 	char *s;
2833 
2834 	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2835 	ctrl.s.rdimm_ena = spd_rdimm;
2836 	ctrl.s.bwcnt = 0;	/* Clear counter later */
2837 	if (spd_rdimm)
2838 		ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
2839 	else
2840 		ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
2841 	ctrl.s.pocas = 0;
2842 	ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
2843 	ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
2844 	ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
2845 	ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
2846 	ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
2847 	ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
2848 	/* discards writes to addresses that don't exist in the DRAM */
2849 	ctrl.s.nxm_write_en = 0;
2850 	ctrl.s.max_write_batch = 8;
2851 	ctrl.s.xor_bank = 1;
2852 	ctrl.s.auto_dclkdis = 1;
2853 	ctrl.s.int_zqcs_dis = 0;
2854 	ctrl.s.ext_zqcs_dis = 0;
2855 	ctrl.s.bprch = 1;
2856 	ctrl.s.wodt_bprch = 1;
2857 	ctrl.s.rodt_bprch = 1;
2858 
2859 	s = lookup_env(priv, "ddr_xor_bank");
2860 	if (s)
2861 		ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
2862 
2863 	s = lookup_env(priv, "ddr_2t");
2864 	if (s)
2865 		ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
2866 
2867 	s = lookup_env(priv, "ddr_fprch2");
2868 	if (s)
2869 		ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
2870 
2871 	s = lookup_env(priv, "ddr_bprch");
2872 	if (s)
2873 		ctrl.s.bprch = simple_strtoul(s, NULL, 0);
2874 
2875 	s = lookup_env(priv, "ddr_wodt_bprch");
2876 	if (s)
2877 		ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
2878 
2879 	s = lookup_env(priv, "ddr_rodt_bprch");
2880 	if (s)
2881 		ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
2882 
2883 	s = lookup_env(priv, "ddr_int_zqcs_dis");
2884 	if (s)
2885 		ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
2886 
2887 	s = lookup_env(priv, "ddr_ext_zqcs_dis");
2888 	if (s)
2889 		ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
2890 
2891 	s = lookup_env_ull(priv, "ddr_control");
2892 	if (s)
2893 		ctrl.u64 = simple_strtoull(s, NULL, 0);
2894 
2895 	debug("LMC_CONTROL                                   : 0x%016llx\n",
2896 	      ctrl.u64);
2897 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
2898 }
2899 
lmc_timing_params0(struct ddr_priv * priv)2900 static void lmc_timing_params0(struct ddr_priv *priv)
2901 {
2902 	union cvmx_lmcx_timing_params0 tp0;
2903 	unsigned int trp_value;
2904 	char *s;
2905 
2906 	tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
2907 
2908 	trp_value = divide_roundup(trp, tclk_psecs) - 1;
2909 	debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
2910 	      trp_value +
2911 	      (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
2912 					    tclk_psecs)) - 4);
2913 	s = lookup_env_ull(priv, "ddr_use_old_trp");
2914 	if (s) {
2915 		if (!!simple_strtoull(s, NULL, 0)) {
2916 			trp_value +=
2917 			    divide_roundup(max(4ull * tclk_psecs, 7500ull),
2918 					   tclk_psecs) - 4;
2919 			debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
2920 			      trp_value);
2921 		}
2922 	}
2923 
2924 	tp0.cn78xx.txpr =
2925 	    divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
2926 			   16 * tclk_psecs);
2927 	tp0.cn78xx.trp = trp_value & 0x1f;
2928 	tp0.cn78xx.tcksre =
2929 	    divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
2930 
2931 	if (ddr_type == DDR4_DRAM) {
2932 		int tzqinit = 4;	// Default to 4, for all DDR4 speed bins
2933 
2934 		s = lookup_env(priv, "ddr_tzqinit");
2935 		if (s)
2936 			tzqinit = simple_strtoul(s, NULL, 0);
2937 
2938 		tp0.cn78xx.tzqinit = tzqinit;
2939 		/* Always 8. */
2940 		tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
2941 						  (16 * tclk_psecs));
2942 		tp0.cn78xx.tcke =
2943 		    divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
2944 				   tclk_psecs) - 1;
2945 		tp0.cn78xx.tmrd =
2946 		    divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
2947 		tp0.cn78xx.tmod = 25;	/* 25 is the max allowed */
2948 		tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
2949 	} else {
2950 		tp0.cn78xx.tzqinit =
2951 		    divide_roundup(max(512ull * tclk_psecs, 640000ull),
2952 				   (256 * tclk_psecs));
2953 		tp0.cn78xx.tzqcs =
2954 		    divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
2955 				   (16 * tclk_psecs));
2956 		tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
2957 		tp0.cn78xx.tmrd =
2958 		    divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
2959 		tp0.cn78xx.tmod =
2960 		    divide_roundup(max(12ull * tclk_psecs, 15000ull),
2961 				   tclk_psecs) - 1;
2962 		tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
2963 	}
2964 
2965 	s = lookup_env_ull(priv, "ddr_timing_params0");
2966 	if (s)
2967 		tp0.u64 = simple_strtoull(s, NULL, 0);
2968 	debug("TIMING_PARAMS0                                : 0x%016llx\n",
2969 	      tp0.u64);
2970 	lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
2971 }
2972 
lmc_timing_params1(struct ddr_priv * priv)2973 static void lmc_timing_params1(struct ddr_priv *priv)
2974 {
2975 	union cvmx_lmcx_timing_params1 tp1;
2976 	unsigned int txp, temp_trcd, trfc_dlr;
2977 	char *s;
2978 
2979 	tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
2980 
2981 	/* .cn70xx. */
2982 	tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
2983 
2984 	tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
2985 
2986 	temp_trcd = divide_roundup(trcd, tclk_psecs);
2987 	if (temp_trcd > 15) {
2988 		debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
2989 		      temp_trcd);
2990 	}
2991 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
2992 		/*
2993 		 * Let .trcd=0 serve as a flag that the field has
2994 		 * overflowed. Must use Additive Latency mode as a
2995 		 * workaround.
2996 		 */
2997 		temp_trcd = 0;
2998 	}
2999 	tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
3000 	tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
3001 
3002 	tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
3003 	tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
3004 
3005 	if (ddr_type == DDR4_DRAM) {
3006 		/* Workaround bug 24006. Use Trrd_l. */
3007 		tp1.cn78xx.trrd =
3008 		    divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3009 	} else {
3010 		tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
3011 	}
3012 
3013 	/*
3014 	 * tXP = max( 3nCK, 7.5 ns)     DDR3-800   tCLK = 2500 psec
3015 	 * tXP = max( 3nCK, 7.5 ns)     DDR3-1066  tCLK = 1875 psec
3016 	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1333  tCLK = 1500 psec
3017 	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1600  tCLK = 1250 psec
3018 	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1866  tCLK = 1071 psec
3019 	 * tXP = max( 3nCK, 6.0 ns)     DDR3-2133  tCLK =  937 psec
3020 	 */
3021 	txp = (tclk_psecs < 1875) ? 6000 : 7500;
3022 	txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
3023 			     tclk_psecs) - 1;
3024 	if (txp > 7) {
3025 		debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
3026 		      txp);
3027 	}
3028 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
3029 		txp = 7;	// max it out
3030 	tp1.cn78xx.txp = (txp >> 0) & 7;
3031 	tp1.cn78xx.txp_ext = (txp >> 3) & 1;
3032 
3033 	tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
3034 					   4 * tclk_psecs);
3035 	tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
3036 					     4 * tclk_psecs);
3037 	tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
3038 	tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
3039 					   tclk_psecs) - 1;
3040 
3041 	if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
3042 		/*
3043 		 * 4 Gb: tRFC_DLR = 90 ns
3044 		 * 8 Gb: tRFC_DLR = 120 ns
3045 		 * 16 Gb: tRFC_DLR = 190 ns FIXME?
3046 		 */
3047 		if (die_capacity == 0x1000)	// 4 Gbit
3048 			trfc_dlr = 90;
3049 		else if (die_capacity == 0x2000)	// 8 Gbit
3050 			trfc_dlr = 120;
3051 		else if (die_capacity == 0x4000)	// 16 Gbit
3052 			trfc_dlr = 190;
3053 		else
3054 			trfc_dlr = 0;
3055 
3056 		if (trfc_dlr == 0) {
3057 			debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
3058 			      node, if_num, die_capacity);
3059 		} else {
3060 			tp1.cn78xx.trfc_dlr =
3061 			    divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
3062 			debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
3063 			      node, if_num, tp1.cn78xx.trfc_dlr);
3064 		}
3065 	}
3066 
3067 	s = lookup_env_ull(priv, "ddr_timing_params1");
3068 	if (s)
3069 		tp1.u64 = simple_strtoull(s, NULL, 0);
3070 
3071 	debug("TIMING_PARAMS1                                : 0x%016llx\n",
3072 	      tp1.u64);
3073 	lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3074 }
3075 
lmc_timing_params2(struct ddr_priv * priv)3076 static void lmc_timing_params2(struct ddr_priv *priv)
3077 {
3078 	if (ddr_type == DDR4_DRAM) {
3079 		union cvmx_lmcx_timing_params1 tp1;
3080 		union cvmx_lmcx_timing_params2 tp2;
3081 		int temp_trrd_l;
3082 
3083 		tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
3084 		tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
3085 		debug("TIMING_PARAMS2                                : 0x%016llx\n",
3086 		      tp2.u64);
3087 
3088 		temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3089 		if (temp_trrd_l > 7)
3090 			debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
3091 			      temp_trrd_l);
3092 		if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
3093 			temp_trrd_l = 7;	// max it out
3094 		tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
3095 		tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
3096 
3097 		// correct for 1600-2400
3098 		tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
3099 					   tclk_psecs) - 1;
3100 		tp2.s.t_rw_op_max = 7;
3101 		tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
3102 					    tclk_psecs) - 1;
3103 
3104 		debug("TIMING_PARAMS2                                : 0x%016llx\n",
3105 		      tp2.u64);
3106 		lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
3107 
3108 		/*
3109 		 * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
3110 		 * for Write-to-Read operations to the same Bank Group
3111 		 */
3112 		if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
3113 			tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
3114 			debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
3115 			      tp1.cn78xx.twtr, tp2.s.twtr_l);
3116 			debug("TIMING_PARAMS1                                : 0x%016llx\n",
3117 			      tp1.u64);
3118 			lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3119 		}
3120 	}
3121 }
3122 
lmc_modereg_params0(struct ddr_priv * priv)3123 static void lmc_modereg_params0(struct ddr_priv *priv)
3124 {
3125 	union cvmx_lmcx_modereg_params0 mp0;
3126 	int param;
3127 	char *s;
3128 
3129 	mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
3130 
3131 	if (ddr_type == DDR4_DRAM) {
3132 		mp0.s.cwl = 0;	/* 1600 (1250ps) */
3133 		if (tclk_psecs < 1250)
3134 			mp0.s.cwl = 1;	/* 1866 (1072ps) */
3135 		if (tclk_psecs < 1072)
3136 			mp0.s.cwl = 2;	/* 2133 (938ps) */
3137 		if (tclk_psecs < 938)
3138 			mp0.s.cwl = 3;	/* 2400 (833ps) */
3139 		if (tclk_psecs < 833)
3140 			mp0.s.cwl = 4;	/* 2666 (750ps) */
3141 		if (tclk_psecs < 750)
3142 			mp0.s.cwl = 5;	/* 3200 (625ps) */
3143 	} else {
3144 		/*
3145 		 ** CSR   CWL         CAS write Latency
3146 		 ** ===   ===   =================================
3147 		 **  0      5   (           tCK(avg) >=   2.5 ns)
3148 		 **  1      6   (2.5 ns   > tCK(avg) >= 1.875 ns)
3149 		 **  2      7   (1.875 ns > tCK(avg) >= 1.5   ns)
3150 		 **  3      8   (1.5 ns   > tCK(avg) >= 1.25  ns)
3151 		 **  4      9   (1.25 ns  > tCK(avg) >= 1.07  ns)
3152 		 **  5     10   (1.07 ns  > tCK(avg) >= 0.935 ns)
3153 		 **  6     11   (0.935 ns > tCK(avg) >= 0.833 ns)
3154 		 **  7     12   (0.833 ns > tCK(avg) >= 0.75  ns)
3155 		 */
3156 
3157 		mp0.s.cwl = 0;
3158 		if (tclk_psecs < 2500)
3159 			mp0.s.cwl = 1;
3160 		if (tclk_psecs < 1875)
3161 			mp0.s.cwl = 2;
3162 		if (tclk_psecs < 1500)
3163 			mp0.s.cwl = 3;
3164 		if (tclk_psecs < 1250)
3165 			mp0.s.cwl = 4;
3166 		if (tclk_psecs < 1070)
3167 			mp0.s.cwl = 5;
3168 		if (tclk_psecs < 935)
3169 			mp0.s.cwl = 6;
3170 		if (tclk_psecs < 833)
3171 			mp0.s.cwl = 7;
3172 	}
3173 
3174 	s = lookup_env(priv, "ddr_cwl");
3175 	if (s)
3176 		mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
3177 
3178 	if (ddr_type == DDR4_DRAM) {
3179 		debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3180 		      mp0.s.cwl + 9
3181 		      + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
3182 	} else {
3183 		debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3184 		      mp0.s.cwl + 5, mp0.s.cwl);
3185 	}
3186 
3187 	mp0.s.mprloc = 0;
3188 	mp0.s.mpr = 0;
3189 	mp0.s.dll = (ddr_type == DDR4_DRAM);	/* 0 for DDR3 and 1 for DDR4 */
3190 	mp0.s.al = 0;
3191 	mp0.s.wlev = 0;		/* Read Only */
3192 	if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
3193 		mp0.s.tdqs = 0;
3194 	else
3195 		mp0.s.tdqs = 1;
3196 	mp0.s.qoff = 0;
3197 
3198 	s = lookup_env(priv, "ddr_cl");
3199 	if (s) {
3200 		cl = simple_strtoul(s, NULL, 0);
3201 		debug("CAS Latency                                   : %6d\n",
3202 		      cl);
3203 	}
3204 
3205 	if (ddr_type == DDR4_DRAM) {
3206 		mp0.s.cl = 0x0;
3207 		if (cl > 9)
3208 			mp0.s.cl = 0x1;
3209 		if (cl > 10)
3210 			mp0.s.cl = 0x2;
3211 		if (cl > 11)
3212 			mp0.s.cl = 0x3;
3213 		if (cl > 12)
3214 			mp0.s.cl = 0x4;
3215 		if (cl > 13)
3216 			mp0.s.cl = 0x5;
3217 		if (cl > 14)
3218 			mp0.s.cl = 0x6;
3219 		if (cl > 15)
3220 			mp0.s.cl = 0x7;
3221 		if (cl > 16)
3222 			mp0.s.cl = 0x8;
3223 		if (cl > 18)
3224 			mp0.s.cl = 0x9;
3225 		if (cl > 20)
3226 			mp0.s.cl = 0xA;
3227 		if (cl > 24)
3228 			mp0.s.cl = 0xB;
3229 	} else {
3230 		mp0.s.cl = 0x2;
3231 		if (cl > 5)
3232 			mp0.s.cl = 0x4;
3233 		if (cl > 6)
3234 			mp0.s.cl = 0x6;
3235 		if (cl > 7)
3236 			mp0.s.cl = 0x8;
3237 		if (cl > 8)
3238 			mp0.s.cl = 0xA;
3239 		if (cl > 9)
3240 			mp0.s.cl = 0xC;
3241 		if (cl > 10)
3242 			mp0.s.cl = 0xE;
3243 		if (cl > 11)
3244 			mp0.s.cl = 0x1;
3245 		if (cl > 12)
3246 			mp0.s.cl = 0x3;
3247 		if (cl > 13)
3248 			mp0.s.cl = 0x5;
3249 		if (cl > 14)
3250 			mp0.s.cl = 0x7;
3251 		if (cl > 15)
3252 			mp0.s.cl = 0x9;
3253 	}
3254 
3255 	mp0.s.rbt = 0;		/* Read Only. */
3256 	mp0.s.tm = 0;
3257 	mp0.s.dllr = 0;
3258 
3259 	param = divide_roundup(twr, tclk_psecs);
3260 
3261 	if (ddr_type == DDR4_DRAM) {	/* DDR4 */
3262 		mp0.s.wrp = 1;
3263 		if (param > 12)
3264 			mp0.s.wrp = 2;
3265 		if (param > 14)
3266 			mp0.s.wrp = 3;
3267 		if (param > 16)
3268 			mp0.s.wrp = 4;
3269 		if (param > 18)
3270 			mp0.s.wrp = 5;
3271 		if (param > 20)
3272 			mp0.s.wrp = 6;
3273 		if (param > 24)	/* RESERVED in DDR4 spec */
3274 			mp0.s.wrp = 7;
3275 	} else {		/* DDR3 */
3276 		mp0.s.wrp = 1;
3277 		if (param > 5)
3278 			mp0.s.wrp = 2;
3279 		if (param > 6)
3280 			mp0.s.wrp = 3;
3281 		if (param > 7)
3282 			mp0.s.wrp = 4;
3283 		if (param > 8)
3284 			mp0.s.wrp = 5;
3285 		if (param > 10)
3286 			mp0.s.wrp = 6;
3287 		if (param > 12)
3288 			mp0.s.wrp = 7;
3289 	}
3290 
3291 	mp0.s.ppd = 0;
3292 
3293 	s = lookup_env(priv, "ddr_wrp");
3294 	if (s)
3295 		mp0.s.wrp = simple_strtoul(s, NULL, 0);
3296 
3297 	debug("%-45s : %d, [0x%x]\n",
3298 	      "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
3299 
3300 	s = lookup_env_ull(priv, "ddr_modereg_params0");
3301 	if (s)
3302 		mp0.u64 = simple_strtoull(s, NULL, 0);
3303 
3304 	debug("MODEREG_PARAMS0                               : 0x%016llx\n",
3305 	      mp0.u64);
3306 	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
3307 }
3308 
lmc_modereg_params1(struct ddr_priv * priv)3309 static void lmc_modereg_params1(struct ddr_priv *priv)
3310 {
3311 	union cvmx_lmcx_modereg_params1 mp1;
3312 	char *s;
3313 	int i;
3314 
3315 	mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
3316 
3317 	/*
3318 	 * Special request: mismatched DIMM support. Slot 0: 2-Rank,
3319 	 * Slot 1: 1-Rank
3320 	 */
3321 	if (rank_mask == 0x7) {	/* 2-Rank, 1-Rank */
3322 		mp1.s.rtt_nom_00 = 0;
3323 		mp1.s.rtt_nom_01 = 3;	/* rttnom_40ohm */
3324 		mp1.s.rtt_nom_10 = 3;	/* rttnom_40ohm */
3325 		mp1.s.rtt_nom_11 = 0;
3326 		dyn_rtt_nom_mask = 0x6;
3327 	}
3328 
3329 	s = lookup_env(priv, "ddr_rtt_nom_mask");
3330 	if (s)
3331 		dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
3332 
3333 	/*
3334 	 * Save the original rtt_nom settings before sweeping through
3335 	 * settings.
3336 	 */
3337 	default_rtt_nom[0] = mp1.s.rtt_nom_00;
3338 	default_rtt_nom[1] = mp1.s.rtt_nom_01;
3339 	default_rtt_nom[2] = mp1.s.rtt_nom_10;
3340 	default_rtt_nom[3] = mp1.s.rtt_nom_11;
3341 
3342 	ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
3343 
3344 	for (i = 0; i < 4; ++i) {
3345 		u64 value;
3346 
3347 		s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
3348 			       !!(i & 1));
3349 		if (!s)
3350 			s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
3351 				       !!(i & 2), !!(i & 1));
3352 		if (s) {
3353 			value = simple_strtoul(s, NULL, 0);
3354 			mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
3355 			mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
3356 			default_rtt_nom[i] = value;
3357 			ddr_rtt_nom_auto = 0;
3358 		}
3359 	}
3360 
3361 	s = lookup_env(priv, "ddr_rtt_nom");
3362 	if (!s)
3363 		s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
3364 	if (s) {
3365 		u64 value;
3366 
3367 		value = simple_strtoul(s, NULL, 0);
3368 
3369 		if (dyn_rtt_nom_mask & 1) {
3370 			default_rtt_nom[0] = value;
3371 			mp1.s.rtt_nom_00 = value;
3372 		}
3373 		if (dyn_rtt_nom_mask & 2) {
3374 			default_rtt_nom[1] = value;
3375 			mp1.s.rtt_nom_01 = value;
3376 		}
3377 		if (dyn_rtt_nom_mask & 4) {
3378 			default_rtt_nom[2] = value;
3379 			mp1.s.rtt_nom_10 = value;
3380 		}
3381 		if (dyn_rtt_nom_mask & 8) {
3382 			default_rtt_nom[3] = value;
3383 			mp1.s.rtt_nom_11 = value;
3384 		}
3385 
3386 		ddr_rtt_nom_auto = 0;
3387 	}
3388 
3389 	for (i = 0; i < 4; ++i) {
3390 		u64 value;
3391 
3392 		s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
3393 		if (!s)
3394 			s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
3395 				       !!(i & 2), !!(i & 1));
3396 		if (s) {
3397 			value = simple_strtoul(s, NULL, 0);
3398 			insrt_wr(&mp1.u64, i, value);
3399 		}
3400 	}
3401 
3402 	// Make sure 78XX pass 1 has valid RTT_WR settings, because
3403 	// configuration files may be set-up for later chips, and
3404 	// 78XX pass 1 supports no RTT_WR extension bits
3405 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3406 		for (i = 0; i < 4; ++i) {
3407 			// if 80 or undefined
3408 			if (extr_wr(mp1.u64, i) > 3) {
3409 				// FIXME? always insert 120
3410 				insrt_wr(&mp1.u64, i, 1);
3411 				debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
3412 				      !!(i & 2), i & 1);
3413 			}
3414 		}
3415 	}
3416 
3417 	s = lookup_env(priv, "ddr_dic");
3418 	if (s) {
3419 		u64 value = simple_strtoul(s, NULL, 0);
3420 
3421 		for (i = 0; i < 4; ++i) {
3422 			mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3423 			mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3424 		}
3425 	}
3426 
3427 	for (i = 0; i < 4; ++i) {
3428 		u64 value;
3429 
3430 		s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
3431 		if (s) {
3432 			value = simple_strtoul(s, NULL, 0);
3433 			mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3434 			mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3435 		}
3436 	}
3437 
3438 	s = lookup_env_ull(priv, "ddr_modereg_params1");
3439 	if (s)
3440 		mp1.u64 = simple_strtoull(s, NULL, 0);
3441 
3442 	debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3443 	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
3444 	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
3445 	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
3446 	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
3447 	      mp1.s.rtt_nom_11,
3448 	      mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
3449 
3450 	debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3451 	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
3452 	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
3453 	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
3454 	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
3455 	      extr_wr(mp1.u64, 3),
3456 	      extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
3457 
3458 	debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3459 	      imp_val->dic_ohms[mp1.s.dic_11],
3460 	      imp_val->dic_ohms[mp1.s.dic_10],
3461 	      imp_val->dic_ohms[mp1.s.dic_01],
3462 	      imp_val->dic_ohms[mp1.s.dic_00],
3463 	      mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
3464 
3465 	debug("MODEREG_PARAMS1                               : 0x%016llx\n",
3466 	      mp1.u64);
3467 	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
3468 }
3469 
lmc_modereg_params2(struct ddr_priv * priv)3470 static void lmc_modereg_params2(struct ddr_priv *priv)
3471 {
3472 	char *s;
3473 	int i;
3474 
3475 	if (ddr_type == DDR4_DRAM) {
3476 		union cvmx_lmcx_modereg_params2 mp2;
3477 
3478 		mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
3479 
3480 		s = lookup_env(priv, "ddr_rtt_park");
3481 		if (s) {
3482 			u64 value = simple_strtoul(s, NULL, 0);
3483 
3484 			for (i = 0; i < 4; ++i) {
3485 				mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3486 				mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3487 			}
3488 		}
3489 
3490 		for (i = 0; i < 4; ++i) {
3491 			u64 value;
3492 
3493 			s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
3494 				       !!(i & 1));
3495 			if (s) {
3496 				value = simple_strtoul(s, NULL, 0);
3497 				mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3498 				mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3499 			}
3500 		}
3501 
3502 		s = lookup_env_ull(priv, "ddr_modereg_params2");
3503 		if (s)
3504 			mp2.u64 = simple_strtoull(s, NULL, 0);
3505 
3506 		debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
3507 		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
3508 		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
3509 		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
3510 		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
3511 		      mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
3512 		      mp2.s.rtt_park_00);
3513 
3514 		debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
3515 		      mp2.s.vref_range_11,
3516 		      mp2.s.vref_range_10,
3517 		      mp2.s.vref_range_01, mp2.s.vref_range_00);
3518 
3519 		debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
3520 		      mp2.s.vref_value_11,
3521 		      mp2.s.vref_value_10,
3522 		      mp2.s.vref_value_01, mp2.s.vref_value_00);
3523 
3524 		debug("MODEREG_PARAMS2                               : 0x%016llx\n",
3525 		      mp2.u64);
3526 		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
3527 	}
3528 }
3529 
lmc_modereg_params3(struct ddr_priv * priv)3530 static void lmc_modereg_params3(struct ddr_priv *priv)
3531 {
3532 	char *s;
3533 
3534 	if (ddr_type == DDR4_DRAM) {
3535 		union cvmx_lmcx_modereg_params3 mp3;
3536 
3537 		mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
3538 		/* Disable as workaround to Errata 20547 */
3539 		mp3.s.rd_dbi = 0;
3540 		mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
3541 				   5ull) - 4;
3542 
3543 		s = lookup_env(priv, "ddr_rd_preamble");
3544 		if (s)
3545 			mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
3546 
3547 		if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3548 			int delay = 0;
3549 
3550 			if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
3551 				delay = 1;
3552 
3553 			mp3.s.xrank_add_tccd_l = delay;
3554 			mp3.s.xrank_add_tccd_s = delay;
3555 		}
3556 
3557 		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
3558 		debug("MODEREG_PARAMS3                               : 0x%016llx\n",
3559 		      mp3.u64);
3560 	}
3561 }
3562 
lmc_nxm(struct ddr_priv * priv)3563 static void lmc_nxm(struct ddr_priv *priv)
3564 {
3565 	union cvmx_lmcx_nxm lmc_nxm;
3566 	int num_bits = row_lsb + row_bits + lranks_bits - 26;
3567 	char *s;
3568 
3569 	lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
3570 
3571 	/* .cn78xx. */
3572 	if (rank_mask & 0x1)
3573 		lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
3574 	if (rank_mask & 0x2)
3575 		lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
3576 	if (rank_mask & 0x4)
3577 		lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
3578 	if (rank_mask & 0x8)
3579 		lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
3580 
3581 	/* Set the mask for non-existent ranks. */
3582 	lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
3583 
3584 	s = lookup_env_ull(priv, "ddr_nxm");
3585 	if (s)
3586 		lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
3587 
3588 	debug("LMC_NXM                                       : 0x%016llx\n",
3589 	      lmc_nxm.u64);
3590 	lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
3591 }
3592 
lmc_wodt_mask(struct ddr_priv * priv)3593 static void lmc_wodt_mask(struct ddr_priv *priv)
3594 {
3595 	union cvmx_lmcx_wodt_mask wodt_mask;
3596 	char *s;
3597 
3598 	wodt_mask.u64 = odt_config[odt_idx].odt_mask;
3599 
3600 	s = lookup_env_ull(priv, "ddr_wodt_mask");
3601 	if (s)
3602 		wodt_mask.u64 = simple_strtoull(s, NULL, 0);
3603 
3604 	debug("WODT_MASK                                     : 0x%016llx\n",
3605 	      wodt_mask.u64);
3606 	lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
3607 }
3608 
lmc_rodt_mask(struct ddr_priv * priv)3609 static void lmc_rodt_mask(struct ddr_priv *priv)
3610 {
3611 	union cvmx_lmcx_rodt_mask rodt_mask;
3612 	int rankx;
3613 	char *s;
3614 
3615 	rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
3616 
3617 	s = lookup_env_ull(priv, "ddr_rodt_mask");
3618 	if (s)
3619 		rodt_mask.u64 = simple_strtoull(s, NULL, 0);
3620 
3621 	debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
3622 	lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
3623 
3624 	dyn_rtt_nom_mask = 0;
3625 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
3626 		if (!(rank_mask & (1 << rankx)))
3627 			continue;
3628 		dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
3629 	}
3630 	if (num_ranks == 4) {
3631 		/*
3632 		 * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
3633 		 * ODT1 is wired to the third rank (rank 2).  The mask,
3634 		 * dyn_rtt_nom_mask, is used to indicate for which ranks
3635 		 * to sweep RTT_NOM during read-leveling. Shift the bit
3636 		 * from the ODT1 position over to the "ODT2" position so
3637 		 * that the read-leveling analysis comes out right.
3638 		 */
3639 		int odt1_bit = dyn_rtt_nom_mask & 2;
3640 
3641 		dyn_rtt_nom_mask &= ~2;
3642 		dyn_rtt_nom_mask |= odt1_bit << 1;
3643 	}
3644 	debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
3645 }
3646 
lmc_comp_ctl2(struct ddr_priv * priv)3647 static void lmc_comp_ctl2(struct ddr_priv *priv)
3648 {
3649 	union cvmx_lmcx_comp_ctl2 cc2;
3650 	char *s;
3651 
3652 	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
3653 
3654 	cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
3655 	/* Default 4=34.3 ohm */
3656 	cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
3657 	/* Default 4=34.3 ohm */
3658 	cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
3659 	/* Default 4=34.3 ohm */
3660 	cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
3661 
3662 	ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
3663 	s = lookup_env(priv, "ddr_rodt_ctl_auto");
3664 	if (s)
3665 		ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
3666 
3667 	default_rodt_ctl = odt_config[odt_idx].qs_dic;
3668 	s = lookup_env(priv, "ddr_rodt_ctl");
3669 	if (!s)
3670 		s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
3671 	if (s) {
3672 		default_rodt_ctl = simple_strtoul(s, NULL, 0);
3673 		ddr_rodt_ctl_auto = 0;
3674 	}
3675 
3676 	cc2.cn70xx.rodt_ctl = default_rodt_ctl;
3677 
3678 	// if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
3679 	// and DCLK speed is 1 GHz or more...
3680 	if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
3681 	    ddr_hertz >= 1000000000) {
3682 		// lowest for DDR4 is 26 ohms
3683 		cc2.s.ck_ctl = ddr4_driver_26_ohm;
3684 		debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
3685 		      node, if_num, cc2.s.ck_ctl,
3686 		      imp_val->drive_strength[cc2.s.ck_ctl]);
3687 	}
3688 
3689 	// if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
3690 	// if DCLK speed is 1 GHz or more...
3691 	if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
3692 	    (spd_dimm_type == 2 || spd_dimm_type == 6) &&
3693 	    ddr_hertz >= 1000000000) {
3694 		// lowest for DDR4 is 26 ohms
3695 		cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
3696 		// lowest for DDR4 is 26 ohms
3697 		cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
3698 		debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
3699 		      node, if_num, ddr4_driver_26_ohm,
3700 		      imp_val->drive_strength[ddr4_driver_26_ohm]);
3701 	}
3702 
3703 	s = lookup_env(priv, "ddr_ck_ctl");
3704 	if (s)
3705 		cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
3706 
3707 	s = lookup_env(priv, "ddr_cmd_ctl");
3708 	if (s)
3709 		cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
3710 
3711 	s = lookup_env(priv, "ddr_control_ctl");
3712 	if (s)
3713 		cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
3714 
3715 	s = lookup_env(priv, "ddr_dqx_ctl");
3716 	if (s)
3717 		cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
3718 
3719 	debug("%-45s : %d, %d ohms\n", "DQX_CTL           ", cc2.cn78xx.dqx_ctl,
3720 	      imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
3721 	debug("%-45s : %d, %d ohms\n", "CK_CTL            ", cc2.cn78xx.ck_ctl,
3722 	      imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
3723 	debug("%-45s : %d, %d ohms\n", "CMD_CTL           ", cc2.cn78xx.cmd_ctl,
3724 	      imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
3725 	debug("%-45s : %d, %d ohms\n", "CONTROL_CTL       ",
3726 	      cc2.cn78xx.control_ctl,
3727 	      imp_val->drive_strength[cc2.cn78xx.control_ctl]);
3728 	debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
3729 	      cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
3730 
3731 	debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
3732 	lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
3733 }
3734 
lmc_phy_ctl(struct ddr_priv * priv)3735 static void lmc_phy_ctl(struct ddr_priv *priv)
3736 {
3737 	union cvmx_lmcx_phy_ctl phy_ctl;
3738 
3739 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
3740 	phy_ctl.s.ts_stagger = 0;
3741 	// FIXME: are there others TBD?
3742 	phy_ctl.s.dsk_dbg_overwrt_ena = 0;
3743 
3744 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3745 		// C0 is TEN, C1 is A17
3746 		phy_ctl.s.c0_sel = 2;
3747 		phy_ctl.s.c1_sel = 2;
3748 		debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
3749 		      node, if_num, phy_ctl.s.c1_sel);
3750 	}
3751 
3752 	debug("PHY_CTL                                       : 0x%016llx\n",
3753 	      phy_ctl.u64);
3754 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
3755 }
3756 
lmc_ext_config(struct ddr_priv * priv)3757 static void lmc_ext_config(struct ddr_priv *priv)
3758 {
3759 	union cvmx_lmcx_ext_config ext_cfg;
3760 	char *s;
3761 
3762 	ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
3763 	ext_cfg.s.vrefint_seq_deskew = 0;
3764 	ext_cfg.s.read_ena_bprch = 1;
3765 	ext_cfg.s.read_ena_fprch = 1;
3766 	ext_cfg.s.drive_ena_fprch = 1;
3767 	ext_cfg.s.drive_ena_bprch = 1;
3768 	// make sure this is OFF for all current chips
3769 	ext_cfg.s.invert_data = 0;
3770 
3771 	s = lookup_env(priv, "ddr_read_fprch");
3772 	if (s)
3773 		ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
3774 
3775 	s = lookup_env(priv, "ddr_read_bprch");
3776 	if (s)
3777 		ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
3778 
3779 	s = lookup_env(priv, "ddr_drive_fprch");
3780 	if (s)
3781 		ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
3782 
3783 	s = lookup_env(priv, "ddr_drive_bprch");
3784 	if (s)
3785 		ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
3786 
3787 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3788 		ext_cfg.s.dimm0_cid = lranks_bits;
3789 		ext_cfg.s.dimm1_cid = lranks_bits;
3790 		debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
3791 		      node, if_num, ext_cfg.s.dimm0_cid);
3792 	}
3793 
3794 	lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
3795 	debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
3796 }
3797 
lmc_ext_config2(struct ddr_priv * priv)3798 static void lmc_ext_config2(struct ddr_priv *priv)
3799 {
3800 	char *s;
3801 
3802 	// NOTE: all chips have this register, but not necessarily the
3803 	// fields we modify...
3804 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
3805 	    !octeon_is_cpuid(OCTEON_CN73XX)) {
3806 		union cvmx_lmcx_ext_config2 ext_cfg2;
3807 		int value = 1;	// default to 1
3808 
3809 		ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
3810 
3811 		s = lookup_env(priv, "ddr_ext2_delay_unload");
3812 		if (s)
3813 			value = !!simple_strtoul(s, NULL, 0);
3814 
3815 		ext_cfg2.s.delay_unload_r0 = value;
3816 		ext_cfg2.s.delay_unload_r1 = value;
3817 		ext_cfg2.s.delay_unload_r2 = value;
3818 		ext_cfg2.s.delay_unload_r3 = value;
3819 
3820 		lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
3821 		debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
3822 	}
3823 }
3824 
lmc_dimm01_params_loop(struct ddr_priv * priv)3825 static void lmc_dimm01_params_loop(struct ddr_priv *priv)
3826 {
3827 	union cvmx_lmcx_dimmx_params dimm_p;
3828 	int dimmx = didx;
3829 	char *s;
3830 	int rc;
3831 	int i;
3832 
3833 	dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
3834 
3835 	if (ddr_type == DDR4_DRAM) {
3836 		union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
3837 		union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
3838 		union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
3839 
3840 		dimm_p.s.rc0 = 0;
3841 		dimm_p.s.rc1 = 0;
3842 		dimm_p.s.rc2 = 0;
3843 
3844 		rc = read_spd(&dimm_config_table[didx], 0,
3845 			      DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
3846 		dimm_p.s.rc3 = (rc >> 4) & 0xf;
3847 		dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
3848 		dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
3849 
3850 		rc = read_spd(&dimm_config_table[didx], 0,
3851 			      DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
3852 		dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
3853 		dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
3854 
3855 		dimm_p.s.rc6 = 0;
3856 		dimm_p.s.rc7 = 0;
3857 		dimm_p.s.rc8 = 0;
3858 		dimm_p.s.rc9 = 0;
3859 
3860 		/*
3861 		 * rc10               DDR4 RDIMM Operating Speed
3862 		 * ===  ===================================================
3863 		 *  0               tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
3864 		 *  1   1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
3865 		 *  2   1071 psec > tclk_psecs >=  938 psec DDR4-2133 ( 938 ps)
3866 		 *  3    938 psec > tclk_psecs >=  833 psec DDR4-2400 ( 833 ps)
3867 		 *  4    833 psec > tclk_psecs >=  750 psec DDR4-2666 ( 750 ps)
3868 		 *  5    750 psec > tclk_psecs >=  625 psec DDR4-3200 ( 625 ps)
3869 		 */
3870 		dimm_p.s.rc10 = 0;
3871 		if (tclk_psecs < 1250)
3872 			dimm_p.s.rc10 = 1;
3873 		if (tclk_psecs < 1071)
3874 			dimm_p.s.rc10 = 2;
3875 		if (tclk_psecs < 938)
3876 			dimm_p.s.rc10 = 3;
3877 		if (tclk_psecs < 833)
3878 			dimm_p.s.rc10 = 4;
3879 		if (tclk_psecs < 750)
3880 			dimm_p.s.rc10 = 5;
3881 
3882 		dimm_p.s.rc11 = 0;
3883 		dimm_p.s.rc12 = 0;
3884 		/* 0=LRDIMM, 1=RDIMM */
3885 		dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
3886 		dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
3887 			(spd_addr_mirror << 3) : 0;
3888 		dimm_p.s.rc14 = 0;
3889 		dimm_p.s.rc15 = 0;	/* 1 nCK latency adder */
3890 
3891 		ddr4_p0.u64 = 0;
3892 
3893 		ddr4_p0.s.rc8x = 0;
3894 		ddr4_p0.s.rc7x = 0;
3895 		ddr4_p0.s.rc6x = 0;
3896 		ddr4_p0.s.rc5x = 0;
3897 		ddr4_p0.s.rc4x = 0;
3898 
3899 		ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
3900 
3901 		ddr4_p0.s.rc2x = 0;
3902 		ddr4_p0.s.rc1x = 0;
3903 
3904 		ddr4_p1.u64 = 0;
3905 
3906 		ddr4_p1.s.rcbx = 0;
3907 		ddr4_p1.s.rcax = 0;
3908 		ddr4_p1.s.rc9x = 0;
3909 
3910 		ddr4_ctl.u64 = 0;
3911 		ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
3912 		ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
3913 		    (dimm_count > 1) ? 0x004 : 0x0000;
3914 
3915 		/*
3916 		 * Handle any overrides from envvars here...
3917 		 */
3918 		s = lookup_env(priv, "ddr_ddr4_params0");
3919 		if (s)
3920 			ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
3921 
3922 		s = lookup_env(priv, "ddr_ddr4_params1");
3923 		if (s)
3924 			ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
3925 
3926 		s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
3927 		if (s)
3928 			ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
3929 
3930 		for (i = 0; i < 11; ++i) {
3931 			u64 value;
3932 
3933 			s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
3934 			if (s) {
3935 				value = simple_strtoul(s, NULL, 0);
3936 				if (i < 8) {
3937 					ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
3938 					ddr4_p0.u64 |= (value << (i * 8));
3939 				} else {
3940 					ddr4_p1.u64 &=
3941 					    ~((u64)0xff << ((i - 8) * 8));
3942 					ddr4_p1.u64 |= (value << ((i - 8) * 8));
3943 				}
3944 			}
3945 		}
3946 
3947 		/*
3948 		 * write the final CSR values
3949 		 */
3950 		lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
3951 		       ddr4_p0.u64);
3952 
3953 		lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
3954 
3955 		lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
3956 		       ddr4_p1.u64);
3957 
3958 		debug("DIMM%d Register Control Words        RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
3959 		      dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
3960 		      ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
3961 		      ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
3962 		      ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
3963 		      ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
3964 
3965 	} else {
3966 		rc = read_spd(&dimm_config_table[didx], 0, 69);
3967 		dimm_p.s.rc0 = (rc >> 0) & 0xf;
3968 		dimm_p.s.rc1 = (rc >> 4) & 0xf;
3969 
3970 		rc = read_spd(&dimm_config_table[didx], 0, 70);
3971 		dimm_p.s.rc2 = (rc >> 0) & 0xf;
3972 		dimm_p.s.rc3 = (rc >> 4) & 0xf;
3973 
3974 		rc = read_spd(&dimm_config_table[didx], 0, 71);
3975 		dimm_p.s.rc4 = (rc >> 0) & 0xf;
3976 		dimm_p.s.rc5 = (rc >> 4) & 0xf;
3977 
3978 		rc = read_spd(&dimm_config_table[didx], 0, 72);
3979 		dimm_p.s.rc6 = (rc >> 0) & 0xf;
3980 		dimm_p.s.rc7 = (rc >> 4) & 0xf;
3981 
3982 		rc = read_spd(&dimm_config_table[didx], 0, 73);
3983 		dimm_p.s.rc8 = (rc >> 0) & 0xf;
3984 		dimm_p.s.rc9 = (rc >> 4) & 0xf;
3985 
3986 		rc = read_spd(&dimm_config_table[didx], 0, 74);
3987 		dimm_p.s.rc10 = (rc >> 0) & 0xf;
3988 		dimm_p.s.rc11 = (rc >> 4) & 0xf;
3989 
3990 		rc = read_spd(&dimm_config_table[didx], 0, 75);
3991 		dimm_p.s.rc12 = (rc >> 0) & 0xf;
3992 		dimm_p.s.rc13 = (rc >> 4) & 0xf;
3993 
3994 		rc = read_spd(&dimm_config_table[didx], 0, 76);
3995 		dimm_p.s.rc14 = (rc >> 0) & 0xf;
3996 		dimm_p.s.rc15 = (rc >> 4) & 0xf;
3997 
3998 		s = ddr_getenv_debug(priv, "ddr_clk_drive");
3999 		if (s) {
4000 			if (strcmp(s, "light") == 0)
4001 				dimm_p.s.rc5 = 0x0;	/* Light Drive */
4002 			if (strcmp(s, "moderate") == 0)
4003 				dimm_p.s.rc5 = 0x5;	/* Moderate Drive */
4004 			if (strcmp(s, "strong") == 0)
4005 				dimm_p.s.rc5 = 0xA;	/* Strong Drive */
4006 			printf("Parameter found in environment. ddr_clk_drive = %s\n",
4007 			       s);
4008 		}
4009 
4010 		s = ddr_getenv_debug(priv, "ddr_cmd_drive");
4011 		if (s) {
4012 			if (strcmp(s, "light") == 0)
4013 				dimm_p.s.rc3 = 0x0;	/* Light Drive */
4014 			if (strcmp(s, "moderate") == 0)
4015 				dimm_p.s.rc3 = 0x5;	/* Moderate Drive */
4016 			if (strcmp(s, "strong") == 0)
4017 				dimm_p.s.rc3 = 0xA;	/* Strong Drive */
4018 			printf("Parameter found in environment. ddr_cmd_drive = %s\n",
4019 			       s);
4020 		}
4021 
4022 		s = ddr_getenv_debug(priv, "ddr_ctl_drive");
4023 		if (s) {
4024 			if (strcmp(s, "light") == 0)
4025 				dimm_p.s.rc4 = 0x0;	/* Light Drive */
4026 			if (strcmp(s, "moderate") == 0)
4027 				dimm_p.s.rc4 = 0x5;	/* Moderate Drive */
4028 			printf("Parameter found in environment. ddr_ctl_drive = %s\n",
4029 			       s);
4030 		}
4031 
4032 		/*
4033 		 * rc10               DDR3 RDIMM Operating Speed
4034 		 * ==   =====================================================
4035 		 *  0               tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
4036 		 *  1   2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4037 		 *  2   1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4038 		 *  3   1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4039 		 *  4   1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4040 		 */
4041 		dimm_p.s.rc10 = 0;
4042 		if (tclk_psecs < 2500)
4043 			dimm_p.s.rc10 = 1;
4044 		if (tclk_psecs < 1875)
4045 			dimm_p.s.rc10 = 2;
4046 		if (tclk_psecs < 1500)
4047 			dimm_p.s.rc10 = 3;
4048 		if (tclk_psecs < 1250)
4049 			dimm_p.s.rc10 = 4;
4050 	}
4051 
4052 	s = lookup_env(priv, "ddr_dimmx_params", i);
4053 	if (s)
4054 		dimm_p.u64 = simple_strtoul(s, NULL, 0);
4055 
4056 	for (i = 0; i < 16; ++i) {
4057 		u64 value;
4058 
4059 		s = lookup_env(priv, "ddr_rc%d", i);
4060 		if (s) {
4061 			value = simple_strtoul(s, NULL, 0);
4062 			dimm_p.u64 &= ~((u64)0xf << (i * 4));
4063 			dimm_p.u64 |= (value << (i * 4));
4064 		}
4065 	}
4066 
4067 	lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
4068 
4069 	debug("DIMM%d Register Control Words         RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4070 	      dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
4071 	      dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
4072 	      dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
4073 	      dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
4074 	      dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
4075 
4076 	// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
4077 	// and treat it specially
4078 	if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
4079 	    spd_rdimm_registers == 2 && dimmx == 0) {
4080 		debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
4081 		lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
4082 	}
4083 }
4084 
lmc_dimm01_params(struct ddr_priv * priv)4085 static void lmc_dimm01_params(struct ddr_priv *priv)
4086 {
4087 	union cvmx_lmcx_dimm_ctl dimm_ctl;
4088 	char *s;
4089 
4090 	if (spd_rdimm) {
4091 		for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
4092 			lmc_dimm01_params_loop(priv);
4093 
4094 		if (ddr_type == DDR4_DRAM) {
4095 			/* LMC0_DIMM_CTL */
4096 			dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4097 			dimm_ctl.s.dimm0_wmask = 0xdf3f;
4098 			dimm_ctl.s.dimm1_wmask =
4099 			    (dimm_count > 1) ? 0xdf3f : 0x0000;
4100 			dimm_ctl.s.tcws = 0x4e0;
4101 			dimm_ctl.s.parity = c_cfg->parity;
4102 
4103 			s = lookup_env(priv, "ddr_dimm0_wmask");
4104 			if (s) {
4105 				dimm_ctl.s.dimm0_wmask =
4106 				    simple_strtoul(s, NULL, 0);
4107 			}
4108 
4109 			s = lookup_env(priv, "ddr_dimm1_wmask");
4110 			if (s) {
4111 				dimm_ctl.s.dimm1_wmask =
4112 				    simple_strtoul(s, NULL, 0);
4113 			}
4114 
4115 			s = lookup_env(priv, "ddr_dimm_ctl_parity");
4116 			if (s)
4117 				dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4118 
4119 			s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4120 			if (s)
4121 				dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4122 
4123 			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4124 			      dimm_ctl.u64);
4125 			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4126 
4127 			/* Init RCW */
4128 			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4129 
4130 			/* Write RC0D last */
4131 			dimm_ctl.s.dimm0_wmask = 0x2000;
4132 			dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
4133 				0x2000 : 0x0000;
4134 			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4135 			      dimm_ctl.u64);
4136 			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4137 
4138 			/*
4139 			 * Don't write any extended registers the second time
4140 			 */
4141 			lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
4142 
4143 			/* Init RCW */
4144 			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4145 		} else {
4146 			/* LMC0_DIMM_CTL */
4147 			dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4148 			dimm_ctl.s.dimm0_wmask = 0xffff;
4149 			// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
4150 			// registers, and treat it specially
4151 			if (num_ranks == 4 && spd_rdimm_registers == 2) {
4152 				debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
4153 				dimm_ctl.s.dimm1_wmask = 0xffff;
4154 			} else {
4155 				dimm_ctl.s.dimm1_wmask =
4156 				    (dimm_count > 1) ? 0xffff : 0x0000;
4157 			}
4158 			dimm_ctl.s.tcws = 0x4e0;
4159 			dimm_ctl.s.parity = c_cfg->parity;
4160 
4161 			s = lookup_env(priv, "ddr_dimm0_wmask");
4162 			if (s) {
4163 				dimm_ctl.s.dimm0_wmask =
4164 				    simple_strtoul(s, NULL, 0);
4165 			}
4166 
4167 			s = lookup_env(priv, "ddr_dimm1_wmask");
4168 			if (s) {
4169 				dimm_ctl.s.dimm1_wmask =
4170 				    simple_strtoul(s, NULL, 0);
4171 			}
4172 
4173 			s = lookup_env(priv, "ddr_dimm_ctl_parity");
4174 			if (s)
4175 				dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4176 
4177 			s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4178 			if (s)
4179 				dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4180 
4181 			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
4182 			      dimm_ctl.u64);
4183 			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4184 
4185 			/* Init RCW */
4186 			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4187 		}
4188 
4189 	} else {
4190 		/* Disable register control writes for unbuffered */
4191 		union cvmx_lmcx_dimm_ctl dimm_ctl;
4192 
4193 		dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4194 		dimm_ctl.s.dimm0_wmask = 0;
4195 		dimm_ctl.s.dimm1_wmask = 0;
4196 		lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4197 	}
4198 }
4199 
lmc_rank_init(struct ddr_priv * priv)4200 static int lmc_rank_init(struct ddr_priv *priv)
4201 {
4202 	char *s;
4203 
4204 	if (enable_by_rank_init) {
4205 		by_rank = 3;
4206 		saved_rank_mask = rank_mask;
4207 	}
4208 
4209 start_by_rank_init:
4210 
4211 	if (enable_by_rank_init) {
4212 		rank_mask = (1 << by_rank);
4213 		if (!(rank_mask & saved_rank_mask))
4214 			goto end_by_rank_init;
4215 		if (by_rank == 0)
4216 			rank_mask = saved_rank_mask;
4217 
4218 		debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
4219 		      by_rank, rank_mask);
4220 	}
4221 
4222 	/*
4223 	 * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
4224 	 */
4225 	union cvmx_lmcx_modereg_params0 mp0;
4226 
4227 	if (ddr_memory_preserved(priv)) {
4228 		/*
4229 		 * Contents are being preserved. Take DRAM out of self-refresh
4230 		 * first. Then init steps can procede normally
4231 		 */
4232 		/* self-refresh exit */
4233 		oct3_ddr3_seq(priv, rank_mask, if_num, 3);
4234 	}
4235 
4236 	mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
4237 	mp0.s.dllr = 1;		/* Set during first init sequence */
4238 	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4239 
4240 	ddr_init_seq(priv, rank_mask, if_num);
4241 
4242 	mp0.s.dllr = 0;		/* Clear for normal operation */
4243 	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4244 
4245 	if (spd_rdimm && ddr_type == DDR4_DRAM &&
4246 	    octeon_is_cpuid(OCTEON_CN7XXX)) {
4247 		debug("Running init sequence 1\n");
4248 		change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
4249 	}
4250 
4251 	memset(lanes, 0, sizeof(lanes));
4252 	for (lane = 0; lane < last_lane; lane++) {
4253 		// init all lanes to reset value
4254 		dac_settings[lane] = 127;
4255 	}
4256 
4257 	// FIXME: disable internal VREF if deskew is disabled?
4258 	if (disable_deskew_training) {
4259 		debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
4260 		      node, if_num);
4261 		num_samples = 0;
4262 	} else if (ddr_type == DDR4_DRAM &&
4263 		   !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
4264 		num_samples = DEFAULT_DAC_SAMPLES;
4265 	} else {
4266 		// if DDR3 or no ability to write DAC values
4267 		num_samples = 1;
4268 	}
4269 
4270 perform_internal_vref_training:
4271 
4272 	total_dac_eval_retries = 0;
4273 	dac_eval_exhausted = 0;
4274 
4275 	for (sample = 0; sample < num_samples; sample++) {
4276 		dac_eval_retries = 0;
4277 
4278 		// make offset and internal vref training repeatable
4279 		do {
4280 			/*
4281 			 * 6.9.8 LMC Offset Training
4282 			 * LMC requires input-receiver offset training.
4283 			 */
4284 			perform_offset_training(priv, rank_mask, if_num);
4285 
4286 			/*
4287 			 * 6.9.9 LMC Internal vref Training
4288 			 * LMC requires input-reference-voltage training.
4289 			 */
4290 			perform_internal_vref_training(priv, rank_mask, if_num);
4291 
4292 			// read and maybe display the DAC values for a sample
4293 			read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
4294 					      dac_settings);
4295 			if (num_samples == 1 || ddr_verbose(priv)) {
4296 				display_dac_dbi_settings(if_num, /*DAC*/ 1,
4297 							 use_ecc, dac_settings,
4298 							 "Internal VREF");
4299 			}
4300 
4301 			// for DDR4, evaluate the DAC settings and retry
4302 			// if any issues
4303 			if (ddr_type == DDR4_DRAM) {
4304 				if (evaluate_dac_settings
4305 				    (if_64b, use_ecc, dac_settings)) {
4306 					dac_eval_retries += 1;
4307 					if (dac_eval_retries >
4308 					    DAC_RETRIES_LIMIT) {
4309 						debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
4310 						      node, if_num);
4311 						dac_eval_exhausted += 1;
4312 					} else {
4313 						debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
4314 						      node, if_num);
4315 						total_dac_eval_retries += 1;
4316 						// try another sample
4317 						continue;
4318 					}
4319 				}
4320 
4321 				// taking multiple samples, otherwise do nothing
4322 				if (num_samples > 1) {
4323 					// good sample or exhausted retries,
4324 					// record it
4325 					for (lane = 0; lane < last_lane;
4326 					     lane++) {
4327 						lanes[lane].bytes[sample] =
4328 						    dac_settings[lane];
4329 					}
4330 				}
4331 			}
4332 			// done if DDR3, or good sample, or exhausted retries
4333 			break;
4334 		} while (1);
4335 	}
4336 
4337 	if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
4338 		debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
4339 		      node, if_num, total_dac_eval_retries, dac_eval_exhausted);
4340 	}
4341 
4342 	if (num_samples > 1) {
4343 		debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
4344 		      node, if_num);
4345 
4346 		for (lane = 0; lane < last_lane; lane++) {
4347 			dac_settings[lane] =
4348 			    process_samples_average(&lanes[lane].bytes[0],
4349 						    num_samples, if_num, lane);
4350 		}
4351 		display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4352 					 dac_settings, "Averaged VREF");
4353 
4354 		// finally, write the final DAC values
4355 		for (lane = 0; lane < last_lane; lane++) {
4356 			load_dac_override(priv, if_num, dac_settings[lane],
4357 					  lane);
4358 		}
4359 	}
4360 
4361 	// allow override of any byte-lane internal VREF
4362 	int overrode_vref_dac = 0;
4363 
4364 	for (lane = 0; lane < last_lane; lane++) {
4365 		s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
4366 		if (s) {
4367 			dac_settings[lane] = simple_strtoul(s, NULL, 0);
4368 			overrode_vref_dac = 1;
4369 			// finally, write the new DAC value
4370 			load_dac_override(priv, if_num, dac_settings[lane],
4371 					  lane);
4372 		}
4373 	}
4374 	if (overrode_vref_dac) {
4375 		display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4376 					 dac_settings, "Override VREF");
4377 	}
4378 
4379 	// as a second step, after internal VREF training, before starting
4380 	// deskew training:
4381 	// for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
4382 	// to 127
4383 	if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
4384 	    !disable_deskew_training) {
4385 		load_dac_override(priv, if_num, 127, /* all */ 0x0A);
4386 		debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
4387 		      node, if_num);
4388 	}
4389 
4390 	/*
4391 	 * 4.8.8 LMC Deskew Training
4392 	 *
4393 	 * LMC requires input-read-data deskew training.
4394 	 */
4395 	if (!disable_deskew_training) {
4396 		deskew_training_errors =
4397 		    perform_deskew_training(priv, rank_mask, if_num,
4398 					    spd_rawcard_aorb);
4399 
4400 		// All the Deskew lock and saturation retries (may) have
4401 		// been done, but we ended up with nibble errors; so,
4402 		// as a last ditch effort, try the Internal vref
4403 		// Training again...
4404 		if (deskew_training_errors) {
4405 			if (internal_retries <
4406 			    DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
4407 				internal_retries++;
4408 				debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
4409 				      node, if_num, internal_retries);
4410 				goto perform_internal_vref_training;
4411 			} else {
4412 				if (restart_if_dsk_incomplete) {
4413 					debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
4414 					      node, if_num, internal_retries);
4415 					return -EAGAIN;
4416 				}
4417 				debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
4418 				      node, if_num, internal_retries);
4419 			}
4420 		}		/* if (deskew_training_errors) */
4421 
4422 		// FIXME: treat this as the final DSK print from now on,
4423 		// and print if VBL_NORM or above also, save the results
4424 		// of the original training in case we want them later
4425 		validate_deskew_training(priv, rank_mask, if_num,
4426 					 &deskew_training_results, 1);
4427 	} else {		/* if (! disable_deskew_training) */
4428 		debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
4429 		      node, if_num);
4430 		validate_deskew_training(priv, rank_mask, if_num,
4431 					 &deskew_training_results, 1);
4432 	}			/* if (! disable_deskew_training) */
4433 
4434 	if (enable_by_rank_init) {
4435 		read_dac_dbi_settings(priv, if_num, /*dac */ 1,
4436 				      &rank_dac[by_rank].bytes[0]);
4437 		get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
4438 		debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4439 	}
4440 
4441 end_by_rank_init:
4442 
4443 	if (enable_by_rank_init) {
4444 		//debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4445 
4446 		by_rank--;
4447 		if (by_rank >= 0)
4448 			goto start_by_rank_init;
4449 
4450 		rank_mask = saved_rank_mask;
4451 		ddr_init_seq(priv, rank_mask, if_num);
4452 
4453 		process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
4454 		process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
4455 
4456 		// FIXME: set this to prevent later checking!!!
4457 		disable_deskew_training = 1;
4458 
4459 		debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
4460 	}
4461 
4462 	return 0;
4463 }
4464 
lmc_config_2(struct ddr_priv * priv)4465 static void lmc_config_2(struct ddr_priv *priv)
4466 {
4467 	union cvmx_lmcx_config lmc_config;
4468 	int save_ref_zqcs_int;
4469 	u64 temp_delay_usecs;
4470 
4471 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4472 
4473 	/*
4474 	 * Temporarily select the minimum ZQCS interval and wait
4475 	 * long enough for a few ZQCS calibrations to occur.  This
4476 	 * should ensure that the calibration circuitry is
4477 	 * stabilized before read/write leveling occurs.
4478 	 */
4479 	if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4480 		save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
4481 		/* set smallest interval */
4482 		lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
4483 	} else {
4484 		save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
4485 		/* set smallest interval */
4486 		lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
4487 	}
4488 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4489 	lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4490 
4491 	/*
4492 	 * Compute an appropriate delay based on the current ZQCS
4493 	 * interval. The delay should be long enough for the
4494 	 * current ZQCS delay counter to expire plus ten of the
4495 	 * minimum intarvals to ensure that some calibrations
4496 	 * occur.
4497 	 */
4498 	temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
4499 			    100 * 512 * 128) / (10000 * 10000) + 10 *
4500 		((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
4501 
4502 	debug("Waiting %lld usecs for ZQCS calibrations to start\n",
4503 	      temp_delay_usecs);
4504 	udelay(temp_delay_usecs);
4505 
4506 	if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4507 		/* Restore computed interval */
4508 		lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
4509 	} else {
4510 		/* Restore computed interval */
4511 		lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
4512 	}
4513 
4514 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4515 	lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4516 }
4517 
4518 static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
4519 static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
4520 static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
4521 
4522 static int wl_mask[9] __section(".data");
4523 static int byte_idx __section(".data");
4524 static int ecc_ena __section(".data");
4525 static int wl_roundup __section(".data");
4526 static int save_mode32b __section(".data");
4527 static int disable_hwl_validity __section(".data");
4528 static int default_wl_rtt_nom __section(".data");
4529 static int wl_pbm_pump __section(".data");
4530 
lmc_write_leveling_loop(struct ddr_priv * priv,int rankx)4531 static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
4532 {
4533 	int wloop = 0;
4534 	// retries per sample for HW-related issues with bitmasks or values
4535 	int wloop_retries = 0;
4536 	int wloop_retries_total = 0;
4537 	int wloop_retries_exhausted = 0;
4538 #define WLOOP_RETRIES_DEFAULT 5
4539 	int wl_val_err;
4540 	int wl_mask_err_rank = 0;
4541 	int wl_val_err_rank = 0;
4542 	// array to collect counts of byte-lane values
4543 	// assume low-order 3 bits and even, so really only 2-bit values
4544 	struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
4545 	int extra_bumps, extra_mask;
4546 	int rank_nom = 0;
4547 
4548 	if (!(rank_mask & (1 << rankx)))
4549 		return;
4550 
4551 	if (match_wl_rtt_nom) {
4552 		if (rankx == 0)
4553 			rank_nom = mp1.s.rtt_nom_00;
4554 		if (rankx == 1)
4555 			rank_nom = mp1.s.rtt_nom_01;
4556 		if (rankx == 2)
4557 			rank_nom = mp1.s.rtt_nom_10;
4558 		if (rankx == 3)
4559 			rank_nom = mp1.s.rtt_nom_11;
4560 
4561 		debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
4562 		      node, if_num, rankx, rank_nom,
4563 		      imp_val->rtt_nom_ohms[rank_nom]);
4564 	}
4565 
4566 	memset(wl_bytes, 0, sizeof(wl_bytes));
4567 	memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
4568 
4569 	// restructure the looping so we can keep trying until we get the
4570 	// samples we want
4571 	while (wloop < wl_loops) {
4572 		wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
4573 
4574 		wl_ctl.cn78xx.rtt_nom =
4575 		    (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
4576 
4577 		if (match_wl_rtt_nom) {
4578 			wl_ctl.cn78xx.rtt_nom =
4579 			    (rank_nom > 0) ? (rank_nom - 1) : 7;
4580 		}
4581 
4582 		/* Clear write-level delays */
4583 		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
4584 
4585 		wl_mask_err = 0;	/* Reset error counters */
4586 		wl_val_err = 0;
4587 
4588 		for (byte_idx = 0; byte_idx < 9; ++byte_idx)
4589 			wl_mask[byte_idx] = 0;	/* Reset bitmasks */
4590 
4591 		// do all the byte-lanes at the same time
4592 		wl_ctl.cn78xx.lanemask = 0x1ff;
4593 
4594 		lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
4595 
4596 		/*
4597 		 * Read and write values back in order to update the
4598 		 * status field. This insures that we read the updated
4599 		 * values after write-leveling has completed.
4600 		 */
4601 		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4602 		       lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
4603 
4604 		/* write-leveling */
4605 		oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
4606 
4607 		do {
4608 			wl_rank.u64 = lmc_rd(priv,
4609 					     CVMX_LMCX_WLEVEL_RANKX(rankx,
4610 								    if_num));
4611 		} while (wl_rank.cn78xx.status != 3);
4612 
4613 		wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
4614 								  if_num));
4615 
4616 		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4617 			wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
4618 								 if_num,
4619 								 byte_idx);
4620 			if (wl_mask[byte_idx] == 0)
4621 				++wl_mask_err;
4622 		}
4623 
4624 		// check validity only if no bitmask errors
4625 		if (wl_mask_err == 0) {
4626 			if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
4627 			    dram_width != 16 && if_64b &&
4628 			    !disable_hwl_validity) {
4629 				// bypass if [mini|SO]-[RU]DIMM or x16 or
4630 				// 32-bit
4631 				wl_val_err =
4632 				    validate_hw_wl_settings(if_num,
4633 							    &wl_rank,
4634 							    spd_rdimm, ecc_ena);
4635 				wl_val_err_rank += (wl_val_err != 0);
4636 			}
4637 		} else {
4638 			wl_mask_err_rank++;
4639 		}
4640 
4641 		// before we print, if we had bitmask or validity errors,
4642 		// do a retry...
4643 		if (wl_mask_err != 0 || wl_val_err != 0) {
4644 			if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
4645 				wloop_retries++;
4646 				wloop_retries_total++;
4647 				// this printout is per-retry: only when VBL
4648 				// is high enough (DEV?)
4649 				// FIXME: do we want to show the bad bitmaps
4650 				// or delays here also?
4651 				debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
4652 				      node, if_num, rankx,
4653 				      (wl_mask_err) ? "Bitmask" : "Validity");
4654 				// this takes us back to the top without
4655 				// counting a sample
4656 				return;
4657 			}
4658 
4659 			// retries exhausted, do not print at normal VBL
4660 			debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
4661 			      node, if_num, rankx,
4662 			      (wl_mask_err) ? "Bitmask" : "Validity");
4663 			wloop_retries_exhausted++;
4664 		}
4665 		// no errors or exhausted retries, use this sample
4666 		wloop_retries = 0;	//reset for next sample
4667 
4668 		// when only 1 sample or forced, print the bitmasks then
4669 		// current HW WL
4670 		if (wl_loops == 1 || wl_print) {
4671 			if (wl_print > 1)
4672 				display_wl_bm(if_num, rankx, wl_mask);
4673 			display_wl(if_num, wl_rank, rankx);
4674 		}
4675 
4676 		if (wl_roundup) {	/* Round up odd bitmask delays */
4677 			for (byte_idx = 0; byte_idx < (8 + ecc_ena);
4678 			     ++byte_idx) {
4679 				if (!(if_bytemask & (1 << byte_idx)))
4680 					return;
4681 				upd_wl_rank(&wl_rank, byte_idx,
4682 					    roundup_ddr3_wlevel_bitmask
4683 					    (wl_mask[byte_idx]));
4684 			}
4685 			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4686 			       wl_rank.u64);
4687 			display_wl(if_num, wl_rank, rankx);
4688 		}
4689 
4690 		// OK, we have a decent sample, no bitmask or validity errors
4691 		extra_bumps = 0;
4692 		extra_mask = 0;
4693 		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4694 			int ix;
4695 
4696 			if (!(if_bytemask & (1 << byte_idx)))
4697 				return;
4698 
4699 			// increment count of byte-lane value
4700 			// only 4 values
4701 			ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
4702 			wl_bytes[byte_idx].bitcnt[ix]++;
4703 			wl_bytes_extra[byte_idx].bitcnt[ix]++;
4704 			// if perfect...
4705 			if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
4706 				wl_bytes_extra[byte_idx].bitcnt[ix] +=
4707 				    wl_pbm_pump;
4708 				extra_bumps++;
4709 				extra_mask |= 1 << byte_idx;
4710 			}
4711 		}
4712 
4713 		if (extra_bumps) {
4714 			if (wl_print > 1) {
4715 				debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
4716 				      node, if_num, rankx, extra_bumps,
4717 				      extra_mask);
4718 			}
4719 		}
4720 
4721 		// if we get here, we have taken a decent sample
4722 		wloop++;
4723 
4724 	}			/* while (wloop < wl_loops) */
4725 
4726 	// if we did sample more than once, try to pick a majority vote
4727 	if (wl_loops > 1) {
4728 		// look for the majority in each byte-lane
4729 		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4730 			int mx, mc, xc, cc;
4731 			int ix, alts;
4732 			int maj, xmaj, xmx, xmc, xxc, xcc;
4733 
4734 			if (!(if_bytemask & (1 << byte_idx)))
4735 				return;
4736 			maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
4737 					       &mc, &xc, &cc);
4738 			xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
4739 						&xmx, &xmc, &xxc, &xcc);
4740 			if (maj != xmaj) {
4741 				if (wl_print) {
4742 					debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
4743 					      node, if_num, rankx,
4744 					      byte_idx, maj, xc, xmaj, xxc);
4745 				}
4746 				mx = xmx;
4747 				mc = xmc;
4748 				xc = xxc;
4749 				cc = xcc;
4750 			}
4751 
4752 			// see if there was an alternate
4753 			// take out the majority choice
4754 			alts = (mc & ~(1 << mx));
4755 			if (alts != 0) {
4756 				for (ix = 0; ix < 4; ix++) {
4757 					// FIXME: could be done multiple times?
4758 					// bad if so
4759 					if (alts & (1 << ix)) {
4760 						// set the mask
4761 						hwl_alts[rankx].hwl_alt_mask |=
4762 							(1 << byte_idx);
4763 						// record the value
4764 						hwl_alts[rankx].hwl_alt_delay[byte_idx] =
4765 							ix << 1;
4766 						if (wl_print > 1) {
4767 							debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
4768 							      node,
4769 							      if_num,
4770 							      rankx,
4771 							      byte_idx,
4772 							      mx << 1,
4773 							      xc,
4774 							      ix << 1,
4775 							      wl_bytes
4776 							      [byte_idx].bitcnt
4777 							      [ix]);
4778 						}
4779 					}
4780 				}
4781 			}
4782 
4783 			if (cc > 2) {	// unlikely, but...
4784 				// assume: counts for 3 indices are all 1
4785 				// possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
4786 				// and the desired?:   2  ,   4  ,     6, 0
4787 				// we choose the middle, assuming one of the
4788 				// outliers is bad
4789 				// NOTE: this is an ugly hack at the moment;
4790 				// there must be a better way
4791 				switch (mc) {
4792 				case 0x7:
4793 					mx = 1;
4794 					break;	// was 0/2/4, choose 2
4795 				case 0xb:
4796 					mx = 0;
4797 					break;	// was 0/2/6, choose 0
4798 				case 0xd:
4799 					mx = 3;
4800 					break;	// was 0/4/6, choose 6
4801 				case 0xe:
4802 					mx = 2;
4803 					break;	// was 2/4/6, choose 4
4804 				default:
4805 				case 0xf:
4806 					mx = 1;
4807 					break;	// was 0/2/4/6, choose 2?
4808 				}
4809 				printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
4810 				       node, if_num, rankx, byte_idx, mc,
4811 				       mx << 1);
4812 			}
4813 			upd_wl_rank(&wl_rank, byte_idx, mx << 1);
4814 		}
4815 
4816 		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4817 		       wl_rank.u64);
4818 		display_wl_with_final(if_num, wl_rank, rankx);
4819 
4820 		// FIXME: does this help make the output a little easier
4821 		// to focus?
4822 		if (wl_print > 0)
4823 			debug("-----------\n");
4824 
4825 	}			/* if (wl_loops > 1) */
4826 
4827 	// maybe print an error summary for the rank
4828 	if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
4829 		debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
4830 		      node, if_num, rankx, wl_mask_err_rank,
4831 		      wl_val_err_rank, wloop_retries_total,
4832 		      wloop_retries_exhausted);
4833 	}
4834 }
4835 
lmc_write_leveling(struct ddr_priv * priv)4836 static void lmc_write_leveling(struct ddr_priv *priv)
4837 {
4838 	union cvmx_lmcx_config cfg;
4839 	int rankx;
4840 	char *s;
4841 
4842 	/*
4843 	 * 4.8.9 LMC Write Leveling
4844 	 *
4845 	 * LMC supports an automatic write leveling like that described in the
4846 	 * JEDEC DDR3 specifications separately per byte-lane.
4847 	 *
4848 	 * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
4849 	 * must be completed prior to starting this LMC write-leveling sequence.
4850 	 *
4851 	 * There are many possible procedures that will write-level all the
4852 	 * attached DDR3 DRAM parts. One possibility is for software to simply
4853 	 * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
4854 	 * describes one possible sequence that uses LMC's autowrite-leveling
4855 	 * capabilities.
4856 	 *
4857 	 * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
4858 	 *    delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
4859 	 *    point.
4860 	 *
4861 	 * Do the remaining steps 2-7 separately for each rank i with attached
4862 	 * DRAM.
4863 	 *
4864 	 * 2. Write LMC(0)_WLEVEL_RANKi = 0.
4865 	 *
4866 	 * 3. For x8 parts:
4867 	 *
4868 	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4869 	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
4870 	 *    DRAM.
4871 	 *
4872 	 *    For x16 parts:
4873 	 *
4874 	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4875 	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
4876 	 *    attached DRAM.
4877 	 *
4878 	 * 4. Without changing any other fields in LMC(0)_CONFIG,
4879 	 *
4880 	 *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
4881 	 *
4882 	 *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
4883 	 *
4884 	 *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
4885 	 *
4886 	 *    LMC will initiate write-leveling at this point. Assuming
4887 	 *    LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
4888 	 *    the selected DRAM rank via a DDR3 MR1 write, then sequences
4889 	 *    through
4890 	 *    and accumulates write-leveling results for eight different delay
4891 	 *    settings twice, starting at a delay of zero in this case since
4892 	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
4893 	 *    setting, covering a total distance of one CK, then disables the
4894 	 *    write-leveling via another DDR3 MR1 write.
4895 	 *
4896 	 *    After the sequence through 16 delay settings is complete:
4897 	 *
4898 	 *    o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
4899 	 *
4900 	 *    o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
4901 	 *      by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
4902 	 *      leveling result of 1 that followed result of 0 during the
4903 	 *      sequence, except that the LMC always writes
4904 	 *      LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
4905 	 *
4906 	 *    o Software can read the eight write-leveling results from the
4907 	 *      first pass through the delay settings by reading
4908 	 *      LMC(0)_WLEVEL_DBG[BITMASK] (after writing
4909 	 *      LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
4910 	 *      results from the second pass through the eight delay
4911 	 *      settings. They should often be identical to the
4912 	 *      LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
4913 	 *
4914 	 * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
4915 	 *
4916 	 *    LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
4917 	 *    lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
4918 	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
4919 	 *    software wrote in substep 2 above, which is 0.
4920 	 *
4921 	 * 6. For x16 parts:
4922 	 *
4923 	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4924 	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
4925 	 *    attached DRAM.
4926 	 *
4927 	 *    Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
4928 	 *    setting. Skip to substep 7 if this has already been done.
4929 	 *
4930 	 *    For x8 parts:
4931 	 *
4932 	 *    Skip this substep. Go to substep 7.
4933 	 *
4934 	 * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
4935 	 *    lanes on all ranks with attached DRAM.
4936 	 *
4937 	 *    At this point, all byte lanes on rank i with attached DRAM should
4938 	 *    have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
4939 	 *    the result for each byte lane.
4940 	 *
4941 	 *    But note that the DDR3 write-leveling sequence will only determine
4942 	 *    the delay modulo the CK cycle time, and cannot determine how many
4943 	 *    additional CK cycles of delay are present. Software must calculate
4944 	 *    the number of CK cycles, or equivalently, the
4945 	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
4946 	 *
4947 	 *    This BYTE*<4:3> calculation is system/board specific.
4948 	 *
4949 	 * Many techniques can be used to calculate write-leveling BYTE*<4:3>
4950 	 * values, including:
4951 	 *
4952 	 *    o Known values for some byte lanes.
4953 	 *
4954 	 *    o Relative values for some byte lanes relative to others.
4955 	 *
4956 	 *    For example, suppose lane X is likely to require a larger
4957 	 *    write-leveling delay than lane Y. A BYTEX<2:0> value that is much
4958 	 *    smaller than the BYTEY<2:0> value may then indicate that the
4959 	 *    required lane X delay wrapped into the next CK, so BYTEX<4:3>
4960 	 *    should be set to BYTEY<4:3>+1.
4961 	 *
4962 	 *    When ECC DRAM is not present (i.e. when DRAM is not attached to
4963 	 *    the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
4964 	 *    DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
4965 	 *    LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
4966 	 *    using the final calculated BYTE0 value.
4967 	 *    Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
4968 	 *    using the final calculated BYTE0 value.
4969 	 *
4970 	 * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
4971 	 *
4972 	 *    Let rank i be a rank with attached DRAM.
4973 	 *
4974 	 *    For all ranks j that do not have attached DRAM, set
4975 	 *    LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
4976 	 */
4977 
4978 	rankx = 0;
4979 	wl_roundup = 0;
4980 	disable_hwl_validity = 0;
4981 
4982 	// wl_pbm_pump: weight for write-leveling PBMs...
4983 	// 0 causes original behavior
4984 	// 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
4985 	// 4 would allow a minority of 1 pbm to outscore a majority of 4
4986 	// non-pbms
4987 	wl_pbm_pump = 4;	// FIXME: is 4 too much?
4988 
4989 	if (wl_loops) {
4990 		debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
4991 		      if_num);
4992 	} else {
4993 		/* Force software write-leveling to run */
4994 		wl_mask_err = 1;
4995 		debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
4996 		      if_num);
4997 	}
4998 
4999 	default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
5000 		rttnom_20ohm : ddr4_rttnom_40ohm;
5001 
5002 	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5003 	ecc_ena = cfg.s.ecc_ena;
5004 	save_mode32b = cfg.cn78xx.mode32b;
5005 	cfg.cn78xx.mode32b = (!if_64b);
5006 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5007 	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5008 
5009 	s = lookup_env(priv, "ddr_wlevel_roundup");
5010 	if (s)
5011 		wl_roundup = simple_strtoul(s, NULL, 0);
5012 
5013 	s = lookup_env(priv, "ddr_wlevel_printall");
5014 	if (s)
5015 		wl_print = strtoul(s, NULL, 0);
5016 
5017 	s = lookup_env(priv, "ddr_wlevel_pbm_bump");
5018 	if (s)
5019 		wl_pbm_pump = strtoul(s, NULL, 0);
5020 
5021 	// default to disable when RL sequential delay check is disabled
5022 	disable_hwl_validity = disable_sequential_delay_check;
5023 	s = lookup_env(priv, "ddr_disable_hwl_validity");
5024 	if (s)
5025 		disable_hwl_validity = !!strtoul(s, NULL, 0);
5026 
5027 	s = lookup_env(priv, "ddr_wl_rtt_nom");
5028 	if (s)
5029 		default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
5030 
5031 	s = lookup_env(priv, "ddr_match_wl_rtt_nom");
5032 	if (s)
5033 		match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
5034 
5035 	if (match_wl_rtt_nom)
5036 		mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
5037 
5038 	// For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
5039 	// For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5040 	if (ddr_type == DDR4_DRAM) {
5041 		int default_or_dis = 1;
5042 		int default_bitmask = 0xff;
5043 
5044 		// when x4, use only the lower nibble
5045 		if (dram_width == 4) {
5046 			default_bitmask = 0x0f;
5047 			if (wl_print) {
5048 				debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
5049 				      node, if_num, default_bitmask);
5050 			}
5051 		}
5052 
5053 		wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
5054 		wl_ctl.s.or_dis = default_or_dis;
5055 		wl_ctl.s.bitmask = default_bitmask;
5056 
5057 		// allow overrides
5058 		s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
5059 		if (s)
5060 			wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5061 
5062 		s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
5063 		if (s)
5064 			wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
5065 
5066 		// print only if not defaults
5067 		if (wl_ctl.s.or_dis != default_or_dis ||
5068 		    wl_ctl.s.bitmask != default_bitmask) {
5069 			debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5070 			      node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
5071 		}
5072 
5073 		// always write
5074 		lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
5075 	}
5076 
5077 	// Start the hardware write-leveling loop per rank
5078 	for (rankx = 0; rankx < dimm_count * 4; rankx++)
5079 		lmc_write_leveling_loop(priv, rankx);
5080 
5081 	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5082 	cfg.cn78xx.mode32b = save_mode32b;
5083 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5084 	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5085 
5086 	// At the end of HW Write Leveling, check on some DESKEW things...
5087 	if (!disable_deskew_training) {
5088 		struct deskew_counts dsk_counts;
5089 		int retry_count = 0;
5090 
5091 		debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
5092 		      node, if_num);
5093 
5094 		do {
5095 			validate_deskew_training(priv, rank_mask, if_num,
5096 						 &dsk_counts, 1);
5097 
5098 			// only RAWCARD A or B will not benefit from
5099 			// retraining if there's only saturation
5100 			// or any rawcard if there is a nibble error
5101 			if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
5102 			    (dsk_counts.nibrng_errs != 0 ||
5103 			     dsk_counts.nibunl_errs != 0)) {
5104 				retry_count++;
5105 				debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5106 				      node, if_num, retry_count);
5107 				perform_deskew_training(priv, rank_mask, if_num,
5108 							spd_rawcard_aorb);
5109 			} else {
5110 				break;
5111 			}
5112 		} while (retry_count < 5);
5113 	}
5114 }
5115 
lmc_workaround(struct ddr_priv * priv)5116 static void lmc_workaround(struct ddr_priv *priv)
5117 {
5118 	/* Workaround Trcd overflow by using Additive latency. */
5119 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
5120 		union cvmx_lmcx_modereg_params0 mp0;
5121 		union cvmx_lmcx_timing_params1 tp1;
5122 		union cvmx_lmcx_control ctrl;
5123 		int rankx;
5124 
5125 		tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
5126 		mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
5127 		ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
5128 
5129 		if (tp1.cn78xx.trcd == 0) {
5130 			debug("Workaround Trcd overflow by using Additive latency.\n");
5131 			/* Hard code this to 12 and enable additive latency */
5132 			tp1.cn78xx.trcd = 12;
5133 			mp0.s.al = 2;	/* CL-2 */
5134 			ctrl.s.pocas = 1;
5135 
5136 			debug("MODEREG_PARAMS0                               : 0x%016llx\n",
5137 			      mp0.u64);
5138 			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
5139 			       mp0.u64);
5140 			debug("TIMING_PARAMS1                                : 0x%016llx\n",
5141 			      tp1.u64);
5142 			lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
5143 
5144 			debug("LMC_CONTROL                                   : 0x%016llx\n",
5145 			      ctrl.u64);
5146 			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
5147 
5148 			for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5149 				if (!(rank_mask & (1 << rankx)))
5150 					continue;
5151 
5152 				/* MR1 */
5153 				ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
5154 			}
5155 		}
5156 	}
5157 
5158 	// this is here just for output, to allow check of the Deskew
5159 	// settings one last time...
5160 	if (!disable_deskew_training) {
5161 		struct deskew_counts dsk_counts;
5162 
5163 		debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
5164 		      node, if_num);
5165 		validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
5166 					 3);
5167 	}
5168 
5169 	/*
5170 	 * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
5171 	 *
5172 	 * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
5173 	 * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
5174 	 * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
5175 	 */
5176 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
5177 	    octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
5178 		union cvmx_lmcx_dll_ctl3 dll_ctl3;
5179 		union cvmx_lmcx_phy_ctl2 phy_ctl2;
5180 		union cvmx_lmcx_ext_config ext_cfg;
5181 		int increased_dsk_adj = 0;
5182 		int byte;
5183 
5184 		phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
5185 		ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
5186 		dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
5187 
5188 		for (byte = 0; byte < 8; ++byte) {
5189 			if (!(if_bytemask & (1 << byte)))
5190 				continue;
5191 			increased_dsk_adj |=
5192 			    (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
5193 		}
5194 
5195 		if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
5196 			ext_cfg.s.drive_ena_bprch = 1;
5197 			lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
5198 			debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
5199 			      if_num);
5200 		}
5201 	}
5202 }
5203 
5204 // Software Write-Leveling block
5205 
5206 #define VREF_RANGE1_LIMIT 0x33	// range1 is valid for 0x00 - 0x32
5207 #define VREF_RANGE2_LIMIT 0x18	// range2 is valid for 0x00 - 0x17
5208 // full window is valid for 0x00 to 0x4A
5209 // let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
5210 #define VREF_LIMIT        (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
5211 #define VREF_FINAL        (VREF_LIMIT - 1)
5212 
5213 enum sw_wl_status {
5214 	WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
5215 	WL_HARDWARE = 1,	/* H/W wleveling succeeded */
5216 	WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
5217 	WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
5218 };
5219 
5220 static u64 rank_addr __section(".data");
5221 static int vref_val __section(".data");
5222 static int final_vref_val __section(".data");
5223 static int final_vref_range __section(".data");
5224 static int start_vref_val __section(".data");
5225 static int computed_final_vref_val __section(".data");
5226 static char best_vref_val_count __section(".data");
5227 static char vref_val_count __section(".data");
5228 static char best_vref_val_start __section(".data");
5229 static char vref_val_start __section(".data");
5230 static int bytes_failed __section(".data");
5231 static enum sw_wl_status byte_test_status[9] __section(".data");
5232 static enum sw_wl_status sw_wl_rank_status __section(".data");
5233 static int sw_wl_failed __section(".data");
5234 static int sw_wl_hw __section(".data");
5235 static int measured_vref_flag __section(".data");
5236 
ddr4_vref_loop(struct ddr_priv * priv,int rankx)5237 static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
5238 {
5239 	char *s;
5240 
5241 	if (vref_val < VREF_FINAL) {
5242 		int vrange, vvalue;
5243 
5244 		if (vref_val < VREF_RANGE2_LIMIT) {
5245 			vrange = 1;
5246 			vvalue = vref_val;
5247 		} else {
5248 			vrange = 0;
5249 			vvalue = vref_val - VREF_RANGE2_LIMIT;
5250 		}
5251 
5252 		set_vref(priv, if_num, rankx, vrange, vvalue);
5253 	} else {		/* if (vref_val < VREF_FINAL) */
5254 		/* Print the final vref value first. */
5255 
5256 		/* Always print the computed first if its valid */
5257 		if (computed_final_vref_val >= 0) {
5258 			debug("N%d.LMC%d.R%d: vref Computed Summary                 :              %2d (0x%02x)\n",
5259 			      node, if_num, rankx,
5260 			      computed_final_vref_val, computed_final_vref_val);
5261 		}
5262 
5263 		if (!measured_vref_flag) {	// setup to use the computed
5264 			best_vref_val_count = 1;
5265 			final_vref_val = computed_final_vref_val;
5266 		} else {	// setup to use the measured
5267 			if (best_vref_val_count > 0) {
5268 				best_vref_val_count =
5269 				    max(best_vref_val_count, (char)2);
5270 				final_vref_val = best_vref_val_start +
5271 					divide_nint(best_vref_val_count - 1, 2);
5272 
5273 				if (final_vref_val < VREF_RANGE2_LIMIT) {
5274 					final_vref_range = 1;
5275 				} else {
5276 					final_vref_range = 0;
5277 					final_vref_val -= VREF_RANGE2_LIMIT;
5278 				}
5279 
5280 				int vvlo = best_vref_val_start;
5281 				int vrlo;
5282 				int vvhi = best_vref_val_start +
5283 					best_vref_val_count - 1;
5284 				int vrhi;
5285 
5286 				if (vvlo < VREF_RANGE2_LIMIT) {
5287 					vrlo = 2;
5288 				} else {
5289 					vrlo = 1;
5290 					vvlo -= VREF_RANGE2_LIMIT;
5291 				}
5292 
5293 				if (vvhi < VREF_RANGE2_LIMIT) {
5294 					vrhi = 2;
5295 				} else {
5296 					vrhi = 1;
5297 					vvhi -= VREF_RANGE2_LIMIT;
5298 				}
5299 				debug("N%d.LMC%d.R%d: vref Training Summary                 :  0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
5300 				      node, if_num, rankx, vvlo, vrlo,
5301 				      final_vref_val,
5302 				      final_vref_range + 1, vvhi, vrhi,
5303 				      best_vref_val_count - 1);
5304 
5305 			} else {
5306 				/*
5307 				 * If nothing passed use the default vref
5308 				 * value for this rank
5309 				 */
5310 				union cvmx_lmcx_modereg_params2 mp2;
5311 
5312 				mp2.u64 =
5313 					lmc_rd(priv,
5314 					       CVMX_LMCX_MODEREG_PARAMS2(if_num));
5315 				final_vref_val = (mp2.u64 >>
5316 						  (rankx * 10 + 3)) & 0x3f;
5317 				final_vref_range = (mp2.u64 >>
5318 						    (rankx * 10 + 9)) & 0x01;
5319 
5320 				debug("N%d.LMC%d.R%d: vref Using Default                    :    %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
5321 				      node, if_num, rankx, final_vref_val,
5322 				      final_vref_val, final_vref_val,
5323 				      final_vref_val, final_vref_range + 1);
5324 			}
5325 		}
5326 
5327 		// allow override
5328 		s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
5329 			       if_num, !!(rankx & 2), !!(rankx & 1));
5330 		if (s)
5331 			final_vref_val = strtoul(s, NULL, 0);
5332 
5333 		set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
5334 	}
5335 }
5336 
5337 #define WL_MIN_NO_ERRORS_COUNT 3	// FIXME? three passes without errors
5338 
5339 static int errors __section(".data");
5340 static int byte_delay[9] __section(".data");
5341 static u64 bytemask __section(".data");
5342 static int bytes_todo __section(".data");
5343 static int no_errors_count __section(".data");
5344 static u64 bad_bits[2] __section(".data");
5345 static u64 sum_dram_dclk __section(".data");
5346 static u64 sum_dram_ops __section(".data");
5347 static u64 start_dram_dclk __section(".data");
5348 static u64 stop_dram_dclk __section(".data");
5349 static u64 start_dram_ops __section(".data");
5350 static u64 stop_dram_ops __section(".data");
5351 
lmc_sw_write_leveling_loop(struct ddr_priv * priv,int rankx)5352 static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
5353 {
5354 	int delay;
5355 	int b;
5356 
5357 	// write the current set of WL delays
5358 	lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
5359 	wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
5360 
5361 	// do the test
5362 	if (sw_wl_hw) {
5363 		errors = run_best_hw_patterns(priv, if_num, rank_addr,
5364 					      DBTRAIN_TEST, bad_bits);
5365 		errors &= bytes_todo;	// keep only the ones we are still doing
5366 	} else {
5367 		start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5368 		start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5369 		errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
5370 					  bad_bits);
5371 
5372 		stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5373 		stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5374 		sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
5375 		sum_dram_ops += stop_dram_ops - start_dram_ops;
5376 	}
5377 
5378 	debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
5379 
5380 	// remember, errors will not be returned for byte-lanes that have
5381 	// maxxed out...
5382 	if (errors == 0) {
5383 		no_errors_count++;	// bump
5384 		// bypass check/update completely
5385 		if (no_errors_count > 1)
5386 			return;	// to end of do-while
5387 	} else {
5388 		no_errors_count = 0;	// reset
5389 	}
5390 
5391 	// check errors by byte
5392 	for (b = 0; b < 9; ++b) {
5393 		if (!(bytes_todo & (1 << b)))
5394 			continue;
5395 
5396 		delay = byte_delay[b];
5397 		// yes, an error in this byte lane
5398 		if (errors & (1 << b)) {
5399 			debug("        byte %d delay %2d Errors\n", b, delay);
5400 			// since this byte had an error, we move to the next
5401 			// delay value, unless done with it
5402 			delay += 8;	// incr by 8 to do delay high-order bits
5403 			if (delay < 32) {
5404 				upd_wl_rank(&wl_rank, b, delay);
5405 				debug("        byte %d delay %2d New\n",
5406 				      b, delay);
5407 				byte_delay[b] = delay;
5408 			} else {
5409 				// reached max delay, maybe really done with
5410 				// this byte
5411 				// consider an alt only for computed VREF and
5412 				if (!measured_vref_flag &&
5413 				    (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
5414 					// if an alt exists...
5415 					// just orig low-3 bits
5416 					int bad_delay = delay & 0x6;
5417 
5418 					// yes, use it
5419 					delay =	hwl_alts[rankx].hwl_alt_delay[b];
5420 					// clear that flag
5421 					hwl_alts[rankx].hwl_alt_mask &=
5422 						~(1 << b);
5423 					upd_wl_rank(&wl_rank, b, delay);
5424 					byte_delay[b] = delay;
5425 					debug("        byte %d delay %2d ALTERNATE\n",
5426 					      b, delay);
5427 					debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
5428 					      node, if_num,
5429 					      rankx, b, bad_delay, delay);
5430 
5431 				} else {
5432 					unsigned int bits_bad;
5433 
5434 					if (b < 8) {
5435 						// test no longer, remove from
5436 						// byte mask
5437 						bytemask &=
5438 							~(0xffULL << (8 * b));
5439 						bits_bad = (unsigned int)
5440 							((bad_bits[0] >>
5441 							  (8 * b)) & 0xffUL);
5442 					} else {
5443 						bits_bad = (unsigned int)
5444 						    (bad_bits[1] & 0xffUL);
5445 					}
5446 
5447 					// remove from bytes to do
5448 					bytes_todo &= ~(1 << b);
5449 					// make sure this is set for this case
5450 					byte_test_status[b] = WL_ESTIMATED;
5451 					debug("        byte %d delay %2d Exhausted\n",
5452 					      b, delay);
5453 					if (!measured_vref_flag) {
5454 						// this is too noisy when doing
5455 						// measured VREF
5456 						debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
5457 						      node, if_num, rankx,
5458 						      b, bits_bad, delay);
5459 					}
5460 				}
5461 			}
5462 		} else {
5463 			// no error, stay with current delay, but keep testing
5464 			// it...
5465 			debug("        byte %d delay %2d Passed\n", b, delay);
5466 			byte_test_status[b] = WL_HARDWARE;	// change status
5467 		}
5468 	}			/* for (b = 0; b < 9; ++b) */
5469 }
5470 
sw_write_lvl_use_ecc(struct ddr_priv * priv,int rankx)5471 static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
5472 {
5473 	int save_byte8 = wl_rank.s.byte8;
5474 
5475 	byte_test_status[8] = WL_HARDWARE;	/* H/W delay value */
5476 
5477 	if (save_byte8 != wl_rank.s.byte3 &&
5478 	    save_byte8 != wl_rank.s.byte4) {
5479 		int test_byte8 = save_byte8;
5480 		int test_byte8_error;
5481 		int byte8_error = 0x1f;
5482 		int adder;
5483 		int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
5484 					    2);
5485 
5486 		for (adder = 0; adder <= 32; adder += 8) {
5487 			test_byte8_error = abs((adder + save_byte8) -
5488 					       avg_bytes);
5489 			if (test_byte8_error < byte8_error) {
5490 				byte8_error = test_byte8_error;
5491 				test_byte8 = save_byte8 + adder;
5492 			}
5493 		}
5494 
5495 		// only do the check if we are not using measured VREF
5496 		if (!measured_vref_flag) {
5497 			/* Use only even settings, rounding down... */
5498 			test_byte8 &= ~1;
5499 
5500 			// do validity check on the calculated ECC delay value
5501 			// this depends on the DIMM type
5502 			if (spd_rdimm) {	// RDIMM
5503 				// but not mini-RDIMM
5504 				if (spd_dimm_type != 5) {
5505 					// it can be > byte4, but should never
5506 					// be > byte3
5507 					if (test_byte8 > wl_rank.s.byte3) {
5508 						/* say it is still estimated */
5509 						byte_test_status[8] =
5510 							WL_ESTIMATED;
5511 					}
5512 				}
5513 			} else {	// UDIMM
5514 				if (test_byte8 < wl_rank.s.byte3 ||
5515 				    test_byte8 > wl_rank.s.byte4) {
5516 					// should never be outside the
5517 					// byte 3-4 range
5518 					/* say it is still estimated */
5519 					byte_test_status[8] = WL_ESTIMATED;
5520 				}
5521 			}
5522 			/*
5523 			 * Report whenever the calculation appears bad.
5524 			 * This happens if some of the original values were off,
5525 			 * or unexpected geometry from DIMM type, or custom
5526 			 * circuitry (NIC225E, I am looking at you!).
5527 			 * We will trust the calculated value, and depend on
5528 			 * later testing to catch any instances when that
5529 			 * value is truly bad.
5530 			 */
5531 			// ESTIMATED means there may be an issue
5532 			if (byte_test_status[8] == WL_ESTIMATED) {
5533 				debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
5534 				      node, if_num, rankx,
5535 				      (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
5536 				      test_byte8, wl_rank.s.byte3);
5537 				byte_test_status[8] = WL_HARDWARE;
5538 			}
5539 		}
5540 		/* Use only even settings */
5541 		wl_rank.s.byte8 = test_byte8 & ~1;
5542 	}
5543 
5544 	if (wl_rank.s.byte8 != save_byte8) {
5545 		/* Change the status if s/w adjusted the delay */
5546 		byte_test_status[8] = WL_SOFTWARE;	/* Estimated delay */
5547 	}
5548 }
5549 
parallel_wl_block_delay(struct ddr_priv * priv,int rankx)5550 static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
5551 						   int rankx)
5552 {
5553 	int errors;
5554 	int byte_delay[8];
5555 	int byte_passed[8];
5556 	u64 bytemask;
5557 	u64 bitmask;
5558 	int wl_offset;
5559 	int bytes_todo;
5560 	int sw_wl_offset = 1;
5561 	int delay;
5562 	int b;
5563 
5564 	for (b = 0; b < 8; ++b)
5565 		byte_passed[b] = 0;
5566 
5567 	bytes_todo = if_bytemask;
5568 
5569 	for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
5570 		debug("Starting wl_offset for-loop: %d\n", wl_offset);
5571 
5572 		bytemask = 0;
5573 
5574 		for (b = 0; b < 8; ++b) {
5575 			byte_delay[b] = 0;
5576 			// this does not contain fully passed bytes
5577 			if (!(bytes_todo & (1 << b)))
5578 				continue;
5579 
5580 			// reset across passes if not fully passed
5581 			byte_passed[b] = 0;
5582 			upd_wl_rank(&wl_rank, b, 0);	// all delays start at 0
5583 			bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
5584 			// set the bytes bits in the bytemask
5585 			bytemask |= bitmask << (8 * b);
5586 		}		/* for (b = 0; b < 8; ++b) */
5587 
5588 		// start a pass if there is any byte lane to test
5589 		while (bytemask != 0) {
5590 			debug("Starting bytemask while-loop: 0x%llx\n",
5591 			      bytemask);
5592 
5593 			// write this set of WL delays
5594 			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
5595 			       wl_rank.u64);
5596 			wl_rank.u64 = lmc_rd(priv,
5597 					     CVMX_LMCX_WLEVEL_RANKX(rankx,
5598 								    if_num));
5599 
5600 			// do the test
5601 			if (sw_wl_hw) {
5602 				errors = run_best_hw_patterns(priv, if_num,
5603 							      rank_addr,
5604 							      DBTRAIN_TEST,
5605 							      NULL) & 0xff;
5606 			} else {
5607 				errors = test_dram_byte64(priv, if_num,
5608 							  rank_addr, bytemask,
5609 							  NULL);
5610 			}
5611 
5612 			debug("test_dram_byte returned 0x%x\n", errors);
5613 
5614 			// check errors by byte
5615 			for (b = 0; b < 8; ++b) {
5616 				if (!(bytes_todo & (1 << b)))
5617 					continue;
5618 
5619 				delay = byte_delay[b];
5620 				if (errors & (1 << b)) {	// yes, an error
5621 					debug("        byte %d delay %2d Errors\n",
5622 					      b, delay);
5623 					byte_passed[b] = 0;
5624 				} else {	// no error
5625 					byte_passed[b] += 1;
5626 					// Look for consecutive working settings
5627 					if (byte_passed[b] == (1 + wl_offset)) {
5628 						debug("        byte %d delay %2d FULLY Passed\n",
5629 						      b, delay);
5630 						if (wl_offset == 1) {
5631 							byte_test_status[b] =
5632 								WL_SOFTWARE;
5633 						} else if (wl_offset == 0) {
5634 							byte_test_status[b] =
5635 								WL_SOFTWARE1;
5636 						}
5637 
5638 						// test no longer, remove
5639 						// from byte mask this pass
5640 						bytemask &= ~(0xffULL <<
5641 							      (8 * b));
5642 						// remove completely from
5643 						// concern
5644 						bytes_todo &= ~(1 << b);
5645 						// on to the next byte, bypass
5646 						// delay updating!!
5647 						continue;
5648 					} else {
5649 						debug("        byte %d delay %2d Passed\n",
5650 						      b, delay);
5651 					}
5652 				}
5653 
5654 				// error or no, here we move to the next delay
5655 				// value for this byte, unless done all delays
5656 				// only a byte that has "fully passed" will
5657 				// bypass around this,
5658 				delay += 2;
5659 				if (delay < 32) {
5660 					upd_wl_rank(&wl_rank, b, delay);
5661 					debug("        byte %d delay %2d New\n",
5662 					      b, delay);
5663 					byte_delay[b] = delay;
5664 				} else {
5665 					// reached max delay, done with this
5666 					// byte
5667 					debug("        byte %d delay %2d Exhausted\n",
5668 					      b, delay);
5669 					// test no longer, remove from byte
5670 					// mask this pass
5671 					bytemask &= ~(0xffULL << (8 * b));
5672 				}
5673 			}	/* for (b = 0; b < 8; ++b) */
5674 			debug("End of for-loop: bytemask 0x%llx\n", bytemask);
5675 		}		/* while (bytemask != 0) */
5676 	}
5677 
5678 	for (b = 0; b < 8; ++b) {
5679 		// any bytes left in bytes_todo did not pass
5680 		if (bytes_todo & (1 << b)) {
5681 			union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
5682 
5683 			/*
5684 			 * Last resort. Use Rlevel settings to estimate
5685 			 * Wlevel if software write-leveling fails
5686 			 */
5687 			debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
5688 			      b);
5689 			lmc_rlevel_rank.u64 =
5690 				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
5691 								    if_num));
5692 			rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
5693 		}
5694 	}			/* for (b = 0; b < 8; ++b) */
5695 }
5696 
lmc_sw_write_leveling(struct ddr_priv * priv)5697 static int lmc_sw_write_leveling(struct ddr_priv *priv)
5698 {
5699 	/* Try to determine/optimize write-level delays experimentally. */
5700 	union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
5701 	union cvmx_lmcx_config cfg;
5702 	int rankx;
5703 	int byte;
5704 	char *s;
5705 	int i;
5706 
5707 	int active_rank;
5708 	int sw_wl_enable = 1;	/* FIX... Should be customizable. */
5709 	int interfaces;
5710 
5711 	static const char * const wl_status_strings[] = {
5712 		"(e)",
5713 		"   ",
5714 		"   ",
5715 		"(1)"
5716 	};
5717 
5718 	// FIXME: make HW-assist the default now?
5719 	int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
5720 	int dram_connection = c_cfg->dram_connection;
5721 
5722 	s = lookup_env(priv, "ddr_sw_wlevel_hw");
5723 	if (s)
5724 		sw_wl_hw_default = !!strtoul(s, NULL, 0);
5725 	if (!if_64b)		// must use SW algo if 32-bit mode
5726 		sw_wl_hw_default = 0;
5727 
5728 	// can never use hw-assist
5729 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
5730 		sw_wl_hw_default = 0;
5731 
5732 	s = lookup_env(priv, "ddr_software_wlevel");
5733 	if (s)
5734 		sw_wl_enable = strtoul(s, NULL, 0);
5735 
5736 	s = lookup_env(priv, "ddr%d_dram_connection", if_num);
5737 	if (s)
5738 		dram_connection = !!strtoul(s, NULL, 0);
5739 
5740 	cvmx_rng_enable();
5741 
5742 	/*
5743 	 * Get the measured_vref setting from the config, check for an
5744 	 * override...
5745 	 */
5746 	/* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
5747 	// NOTE: measured VREF can only be done for DDR4
5748 	if (ddr_type == DDR4_DRAM) {
5749 		measured_vref_flag = c_cfg->measured_vref;
5750 		s = lookup_env(priv, "ddr_measured_vref");
5751 		if (s)
5752 			measured_vref_flag = !!strtoul(s, NULL, 0);
5753 	} else {
5754 		measured_vref_flag = 0;	// OFF for DDR3
5755 	}
5756 
5757 	/*
5758 	 * Ensure disabled ECC for DRAM tests using the SW algo, else leave
5759 	 * it untouched
5760 	 */
5761 	if (!sw_wl_hw_default) {
5762 		cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5763 		cfg.cn78xx.ecc_ena = 0;
5764 		lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5765 	}
5766 
5767 	/*
5768 	 * We need to track absolute rank number, as well as how many
5769 	 * active ranks we have.  Two single rank DIMMs show up as
5770 	 * ranks 0 and 2, but only 2 ranks are active.
5771 	 */
5772 	active_rank = 0;
5773 
5774 	interfaces = __builtin_popcount(if_mask);
5775 
5776 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5777 		final_vref_range = 0;
5778 		start_vref_val = 0;
5779 		computed_final_vref_val = -1;
5780 		sw_wl_rank_status = WL_HARDWARE;
5781 		sw_wl_failed = 0;
5782 		sw_wl_hw = sw_wl_hw_default;
5783 
5784 		if (!sw_wl_enable)
5785 			break;
5786 
5787 		if (!(rank_mask & (1 << rankx)))
5788 			continue;
5789 
5790 		debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
5791 		      node, if_num, rankx,
5792 		      (sw_wl_hw) ? "with H/W assist" :
5793 		      "with S/W algorithm");
5794 
5795 		if (ddr_type == DDR4_DRAM && num_ranks != 4) {
5796 			// always compute when we can...
5797 			computed_final_vref_val =
5798 			    compute_vref_val(priv, if_num, rankx, dimm_count,
5799 					     num_ranks, imp_val,
5800 					     is_stacked_die, dram_connection);
5801 
5802 			// but only use it if allowed
5803 			if (!measured_vref_flag) {
5804 				// skip all the measured vref processing,
5805 				// just the final setting
5806 				start_vref_val = VREF_FINAL;
5807 			}
5808 		}
5809 
5810 		/* Save off the h/w wl results */
5811 		wl_rank_hw_res.u64 = lmc_rd(priv,
5812 					    CVMX_LMCX_WLEVEL_RANKX(rankx,
5813 								   if_num));
5814 
5815 		vref_val_count = 0;
5816 		vref_val_start = 0;
5817 		best_vref_val_count = 0;
5818 		best_vref_val_start = 0;
5819 
5820 		/* Loop one extra time using the Final vref value. */
5821 		for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
5822 		     ++vref_val) {
5823 			if (ddr_type == DDR4_DRAM)
5824 				ddr4_vref_loop(priv, rankx);
5825 
5826 			/* Restore the saved value */
5827 			wl_rank.u64 = wl_rank_hw_res.u64;
5828 
5829 			for (byte = 0; byte < 9; ++byte)
5830 				byte_test_status[byte] = WL_ESTIMATED;
5831 
5832 			if (wl_mask_err == 0) {
5833 				/*
5834 				 * Determine address of DRAM to test for
5835 				 * pass 1 of software write leveling.
5836 				 */
5837 				rank_addr = active_rank *
5838 					(1ull << (pbank_lsb - bunk_enable +
5839 						  (interfaces / 2)));
5840 
5841 				/*
5842 				 * Adjust address for boot bus hole in memory
5843 				 * map.
5844 				 */
5845 				if (rank_addr > 0x10000000)
5846 					rank_addr += 0x10000000;
5847 
5848 				debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
5849 				      node, if_num, rankx, active_rank,
5850 				      rank_addr);
5851 
5852 				// start parallel write-leveling block for
5853 				// delay high-order bits
5854 				errors = 0;
5855 				no_errors_count = 0;
5856 				sum_dram_dclk = 0;
5857 				sum_dram_ops = 0;
5858 
5859 				if (if_64b) {
5860 					bytes_todo = (sw_wl_hw) ?
5861 						if_bytemask : 0xFF;
5862 					bytemask = ~0ULL;
5863 				} else {
5864 					// 32-bit, must be using SW algo,
5865 					// only data bytes
5866 					bytes_todo = 0x0f;
5867 					bytemask = 0x00000000ffffffffULL;
5868 				}
5869 
5870 				for (byte = 0; byte < 9; ++byte) {
5871 					if (!(bytes_todo & (1 << byte))) {
5872 						byte_delay[byte] = 0;
5873 					} else {
5874 						byte_delay[byte] =
5875 						    get_wl_rank(&wl_rank, byte);
5876 					}
5877 				}	/* for (byte = 0; byte < 9; ++byte) */
5878 
5879 				do {
5880 					lmc_sw_write_leveling_loop(priv, rankx);
5881 				} while (no_errors_count <
5882 					 WL_MIN_NO_ERRORS_COUNT);
5883 
5884 				if (!sw_wl_hw) {
5885 					u64 percent_x10;
5886 
5887 					if (sum_dram_dclk == 0)
5888 						sum_dram_dclk = 1;
5889 					percent_x10 = sum_dram_ops * 1000 /
5890 						sum_dram_dclk;
5891 					debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
5892 					      node, if_num, rankx, sum_dram_ops,
5893 					      sum_dram_dclk, percent_x10 / 10,
5894 					      percent_x10 % 10);
5895 				}
5896 				if (errors) {
5897 					debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
5898 					      vref_val, vref_val, errors);
5899 				}
5900 				// end parallel write-leveling block for
5901 				// delay high-order bits
5902 
5903 				// if we used HW-assist, we did the ECC byte
5904 				// when approp.
5905 				if (sw_wl_hw) {
5906 					if (wl_print) {
5907 						debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
5908 						      node, if_num, rankx);
5909 					}
5910 					goto no_ecc_estimate;
5911 				}
5912 
5913 				if ((if_bytemask & 0xff) == 0xff) {
5914 					if (use_ecc) {
5915 						sw_write_lvl_use_ecc(priv,
5916 								     rankx);
5917 					} else {
5918 						/* H/W delay value */
5919 						byte_test_status[8] =
5920 							WL_HARDWARE;
5921 						/* ECC is not used */
5922 						wl_rank.s.byte8 =
5923 							wl_rank.s.byte0;
5924 					}
5925 				} else {
5926 					if (use_ecc) {
5927 						/* Estimate the ECC byte dly */
5928 						// add hi-order to b4
5929 						wl_rank.s.byte4 |=
5930 							(wl_rank.s.byte3 &
5931 							 0x38);
5932 						if ((wl_rank.s.byte4 & 0x06) <
5933 						    (wl_rank.s.byte3 & 0x06)) {
5934 							// must be next clock
5935 							wl_rank.s.byte4 += 8;
5936 						}
5937 					} else {
5938 						/* ECC is not used */
5939 						wl_rank.s.byte4 =
5940 							wl_rank.s.byte0;
5941 					}
5942 
5943 					/*
5944 					 * Change the status if s/w adjusted
5945 					 * the delay
5946 					 */
5947 					/* Estimated delay */
5948 					byte_test_status[4] = WL_SOFTWARE;
5949 				}	/* if ((if_bytemask & 0xff) == 0xff) */
5950 			}	/* if (wl_mask_err == 0) */
5951 
5952 no_ecc_estimate:
5953 
5954 			bytes_failed = 0;
5955 			for (byte = 0; byte < 9; ++byte) {
5956 				/* Don't accumulate errors for untested bytes */
5957 				if (!(if_bytemask & (1 << byte)))
5958 					continue;
5959 				bytes_failed +=
5960 				    (byte_test_status[byte] == WL_ESTIMATED);
5961 			}
5962 
5963 			/* vref training loop is only used for DDR4  */
5964 			if (ddr_type != DDR4_DRAM)
5965 				break;
5966 
5967 			if (bytes_failed == 0) {
5968 				if (vref_val_count == 0)
5969 					vref_val_start = vref_val;
5970 
5971 				++vref_val_count;
5972 				if (vref_val_count > best_vref_val_count) {
5973 					best_vref_val_count = vref_val_count;
5974 					best_vref_val_start = vref_val_start;
5975 					debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    0x%02x <----- ???? -----> 0x%02x\n",
5976 					      node, if_num, rankx, vref_val,
5977 					      best_vref_val_start,
5978 					      best_vref_val_start +
5979 					      best_vref_val_count - 1);
5980 				}
5981 			} else {
5982 				vref_val_count = 0;
5983 				debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    failed\n",
5984 				      node, if_num, rankx, vref_val);
5985 			}
5986 		}
5987 
5988 		/*
5989 		 * Determine address of DRAM to test for software write
5990 		 * leveling.
5991 		 */
5992 		rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
5993 						    (interfaces / 2)));
5994 		/* Adjust address for boot bus hole in memory map. */
5995 		if (rank_addr > 0x10000000)
5996 			rank_addr += 0x10000000;
5997 
5998 		debug("Rank Address: 0x%llx\n", rank_addr);
5999 
6000 		if (bytes_failed) {
6001 			// FIXME? the big hammer, did not even try SW WL pass2,
6002 			// assume only chip reset will help
6003 			debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
6004 			      node, if_num, rankx);
6005 			sw_wl_failed = 1;
6006 		} else {	/* if (bytes_failed) */
6007 			// SW WL pass 1 was OK, write the settings
6008 			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6009 			       wl_rank.u64);
6010 			wl_rank.u64 = lmc_rd(priv,
6011 					     CVMX_LMCX_WLEVEL_RANKX(rankx,
6012 								    if_num));
6013 
6014 			// do validity check on the delay values by running
6015 			// the test 1 more time...
6016 			// FIXME: we really need to check the ECC byte setting
6017 			// here as well, so we need to enable ECC for this test!
6018 			// if there are any errors, claim SW WL failure
6019 			u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
6020 				0x00000000ffffffffULL;
6021 			int errors;
6022 
6023 			// do the test
6024 			if (sw_wl_hw) {
6025 				errors = run_best_hw_patterns(priv, if_num,
6026 							      rank_addr,
6027 							      DBTRAIN_TEST,
6028 							      NULL) & 0xff;
6029 			} else {
6030 				errors = test_dram_byte64(priv, if_num,
6031 							  rank_addr, datamask,
6032 							  NULL);
6033 			}
6034 
6035 			if (errors) {
6036 				debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
6037 				      node, if_num, rankx, errors);
6038 				sw_wl_failed = 1;
6039 			}
6040 		}		/* if (bytes_failed) */
6041 
6042 		// FIXME? dump the WL settings, so we get more of a clue
6043 		// as to what happened where
6044 		debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX  : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
6045 		      node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
6046 		      wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
6047 		      wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
6048 		      wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
6049 		      wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
6050 		      wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
6051 		      wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
6052 		      wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
6053 		      wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
6054 		      wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
6055 		      (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
6056 
6057 		// finally, check for fatal conditions: either chip reset
6058 		// right here, or return error flag
6059 		if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
6060 		    sw_wl_failed) {
6061 			if (!ddr_disable_chip_reset) {	// do chip RESET
6062 				printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
6063 				       node, if_num, rankx);
6064 				mdelay(500);
6065 				do_reset(NULL, 0, 0, NULL);
6066 			} else {
6067 				// return error flag so LMC init can be retried.
6068 				debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
6069 				      node, if_num, rankx);
6070 				return -EAGAIN;	// 0 indicates restart possible.
6071 			}
6072 		}
6073 		active_rank++;
6074 	}
6075 
6076 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6077 		int parameter_set = 0;
6078 		u64 value;
6079 
6080 		if (!(rank_mask & (1 << rankx)))
6081 			continue;
6082 
6083 		wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
6084 								  if_num));
6085 
6086 		for (i = 0; i < 9; ++i) {
6087 			s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
6088 				       if_num, rankx, i);
6089 			if (s) {
6090 				parameter_set |= 1;
6091 				value = strtoul(s, NULL, 0);
6092 
6093 				upd_wl_rank(&wl_rank, i, value);
6094 			}
6095 		}
6096 
6097 		s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
6098 		if (s) {
6099 			parameter_set |= 1;
6100 			value = strtoull(s, NULL, 0);
6101 			wl_rank.u64 = value;
6102 		}
6103 
6104 		if (parameter_set) {
6105 			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6106 			       wl_rank.u64);
6107 			wl_rank.u64 =
6108 			    lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
6109 			display_wl(if_num, wl_rank, rankx);
6110 		}
6111 		// if there are unused entries to be filled
6112 		if ((rank_mask & 0x0F) != 0x0F) {
6113 			if (rankx < 3) {
6114 				debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
6115 				      node, if_num, rankx);
6116 
6117 				// if rank 0, write ranks 1 and 2 here if empty
6118 				if (rankx == 0) {
6119 					// check that rank 1 is empty
6120 					if (!(rank_mask & (1 << 1))) {
6121 						debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6122 						      node, if_num, rankx, 1);
6123 						lmc_wr(priv,
6124 						       CVMX_LMCX_WLEVEL_RANKX(1,
6125 								if_num),
6126 						       wl_rank.u64);
6127 					}
6128 
6129 					// check that rank 2 is empty
6130 					if (!(rank_mask & (1 << 2))) {
6131 						debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6132 						      node, if_num, rankx, 2);
6133 						lmc_wr(priv,
6134 						       CVMX_LMCX_WLEVEL_RANKX(2,
6135 								if_num),
6136 						       wl_rank.u64);
6137 					}
6138 				}
6139 
6140 				// if rank 0, 1 or 2, write rank 3 here if empty
6141 				// check that rank 3 is empty
6142 				if (!(rank_mask & (1 << 3))) {
6143 					debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6144 					      node, if_num, rankx, 3);
6145 					lmc_wr(priv,
6146 					       CVMX_LMCX_WLEVEL_RANKX(3,
6147 								      if_num),
6148 					       wl_rank.u64);
6149 				}
6150 			}
6151 		}
6152 	}
6153 
6154 	/* Enable 32-bit mode if required. */
6155 	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
6156 	cfg.cn78xx.mode32b = (!if_64b);
6157 	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
6158 
6159 	/* Restore the ECC configuration */
6160 	if (!sw_wl_hw_default)
6161 		cfg.cn78xx.ecc_ena = use_ecc;
6162 
6163 	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
6164 
6165 	return 0;
6166 }
6167 
lmc_dll(struct ddr_priv * priv)6168 static void lmc_dll(struct ddr_priv *priv)
6169 {
6170 	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
6171 	int setting[9];
6172 	int i;
6173 
6174 	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6175 
6176 	for (i = 0; i < 9; ++i) {
6177 		SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
6178 		lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
6179 		lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6180 		ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6181 		setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
6182 		debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
6183 		      GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
6184 		      setting[i]);
6185 	}
6186 
6187 	debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
6188 	      node, if_num, "DLL90 Setting 8:0",
6189 	      setting[8], setting[7], setting[6], setting[5], setting[4],
6190 	      setting[3], setting[2], setting[1], setting[0]);
6191 
6192 	process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
6193 				   c_cfg->dll_write_offset,
6194 				   "ddr%d_dll_write_offset_byte%d", 1);
6195 	process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
6196 				   c_cfg->dll_read_offset,
6197 				   "ddr%d_dll_read_offset_byte%d", 2);
6198 }
6199 
6200 #define SLOT_CTL_INCR(csr, chip, field, incr)				\
6201 	csr.chip.field = (csr.chip.field < (64 - incr)) ?		\
6202 		(csr.chip.field + incr) : 63
6203 
6204 #define INCR(csr, chip, field, incr)                                    \
6205 	csr.chip.field = (csr.chip.field < (64 - incr)) ?		\
6206 		(csr.chip.field + incr) : 63
6207 
lmc_workaround_2(struct ddr_priv * priv)6208 static void lmc_workaround_2(struct ddr_priv *priv)
6209 {
6210 	/* Workaround Errata 21063 */
6211 	if (octeon_is_cpuid(OCTEON_CN78XX) ||
6212 	    octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6213 		union cvmx_lmcx_slot_ctl0 slot_ctl0;
6214 		union cvmx_lmcx_slot_ctl1 slot_ctl1;
6215 		union cvmx_lmcx_slot_ctl2 slot_ctl2;
6216 		union cvmx_lmcx_ext_config ext_cfg;
6217 
6218 		slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
6219 		slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6220 		slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6221 
6222 		ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
6223 
6224 		/* When ext_cfg.s.read_ena_bprch is set add 1 */
6225 		if (ext_cfg.s.read_ena_bprch) {
6226 			SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
6227 			SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
6228 			SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
6229 			SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
6230 		}
6231 
6232 		/* Always add 2 */
6233 		SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
6234 		SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
6235 
6236 		lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
6237 		lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6238 		lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6239 	}
6240 
6241 	/* Workaround Errata 21216 */
6242 	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
6243 	    octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6244 		union cvmx_lmcx_slot_ctl1 slot_ctl1;
6245 		union cvmx_lmcx_slot_ctl2 slot_ctl2;
6246 
6247 		slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6248 		slot_ctl1.cn78xx.w2w_xrank_init =
6249 		    max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
6250 		lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6251 
6252 		slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6253 		slot_ctl2.cn78xx.w2w_xdimm_init =
6254 		    max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
6255 		lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6256 	}
6257 }
6258 
lmc_final(struct ddr_priv * priv)6259 static void lmc_final(struct ddr_priv *priv)
6260 {
6261 	/*
6262 	 * 4.8.11 Final LMC Initialization
6263 	 *
6264 	 * Early LMC initialization, LMC write-leveling, and LMC read-leveling
6265 	 * must be completed prior to starting this final LMC initialization.
6266 	 *
6267 	 * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
6268 	 * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
6269 	 * readleveling and write-leveling settings. Software should not write
6270 	 * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
6271 	 * values until after the final read-leveling and write-leveling
6272 	 * settings are written.
6273 	 *
6274 	 * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
6275 	 * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
6276 	 * select the minimum gaps between read operations and write operations
6277 	 * of various types.
6278 	 *
6279 	 * Software must not reduce the values in these CSR fields below the
6280 	 * values previously selected by the LMC hardware (during write-leveling
6281 	 * and read-leveling steps above).
6282 	 *
6283 	 * All sections in this chapter may be used to derive proper settings
6284 	 * for these registers.
6285 	 *
6286 	 * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
6287 	 * properly. This should be done prior to the first read.
6288 	 */
6289 
6290 	/* Clear any residual ECC errors */
6291 	int num_tads = 1;
6292 	int tad;
6293 	int num_mcis = 1;
6294 	int mci;
6295 
6296 	if (octeon_is_cpuid(OCTEON_CN78XX)) {
6297 		num_tads = 8;
6298 		num_mcis = 4;
6299 	} else if (octeon_is_cpuid(OCTEON_CN70XX)) {
6300 		num_tads = 1;
6301 		num_mcis = 1;
6302 	} else if (octeon_is_cpuid(OCTEON_CN73XX) ||
6303 		   octeon_is_cpuid(OCTEON_CNF75XX)) {
6304 		num_tads = 4;
6305 		num_mcis = 3;
6306 	}
6307 
6308 	lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
6309 	lmc_rd(priv, CVMX_LMCX_INT(if_num));
6310 
6311 	for (tad = 0; tad < num_tads; tad++) {
6312 		l2c_wr(priv, CVMX_L2C_TADX_INT_REL(tad),
6313 		       l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
6314 		debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
6315 		      l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
6316 	}
6317 
6318 	for (mci = 0; mci < num_mcis; mci++) {
6319 		l2c_wr(priv, CVMX_L2C_MCIX_INT_REL(mci),
6320 		       l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
6321 		debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
6322 		      l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
6323 	}
6324 
6325 	debug("%-45s : 0x%08llx\n", "LMC_INT",
6326 	      lmc_rd(priv, CVMX_LMCX_INT(if_num)));
6327 }
6328 
lmc_scrambling(struct ddr_priv * priv)6329 static void lmc_scrambling(struct ddr_priv *priv)
6330 {
6331 	// Make sure scrambling is disabled during init...
6332 	union cvmx_lmcx_control ctrl;
6333 	union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
6334 	union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
6335 	union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
6336 	union cvmx_lmcx_ns_ctl lmc_ns_ctl;
6337 	int use_scramble = 0;	// default OFF
6338 	char *s;
6339 
6340 	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
6341 	lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
6342 	lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
6343 	lmc_scramble_cfg2.u64 = 0;	// quiet compiler
6344 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6345 		lmc_scramble_cfg2.u64 =
6346 		    lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
6347 	}
6348 	lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
6349 
6350 	s = lookup_env_ull(priv, "ddr_use_scramble");
6351 	if (s)
6352 		use_scramble = simple_strtoull(s, NULL, 0);
6353 
6354 	/* Generate random values if scrambling is needed */
6355 	if (use_scramble) {
6356 		lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
6357 		lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
6358 		lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
6359 		lmc_ns_ctl.s.ns_scramble_dis = 0;
6360 		lmc_ns_ctl.s.adr_offset = 0;
6361 		ctrl.s.scramble_ena = 1;
6362 	}
6363 
6364 	s = lookup_env_ull(priv, "ddr_scramble_cfg0");
6365 	if (s) {
6366 		lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
6367 		ctrl.s.scramble_ena = 1;
6368 	}
6369 	debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
6370 	      lmc_scramble_cfg0.u64);
6371 
6372 	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
6373 
6374 	s = lookup_env_ull(priv, "ddr_scramble_cfg1");
6375 	if (s) {
6376 		lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
6377 		ctrl.s.scramble_ena = 1;
6378 	}
6379 	debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
6380 	      lmc_scramble_cfg1.u64);
6381 	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
6382 
6383 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6384 		s = lookup_env_ull(priv, "ddr_scramble_cfg2");
6385 		if (s) {
6386 			lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
6387 			ctrl.s.scramble_ena = 1;
6388 		}
6389 		debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
6390 		      lmc_scramble_cfg1.u64);
6391 		lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
6392 		       lmc_scramble_cfg2.u64);
6393 	}
6394 
6395 	s = lookup_env_ull(priv, "ddr_ns_ctl");
6396 	if (s)
6397 		lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
6398 	debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
6399 	lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
6400 
6401 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
6402 }
6403 
6404 struct rl_score {
6405 	u64 setting;
6406 	int score;
6407 };
6408 
6409 static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
6410 static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
6411 static unsigned char rodt_ctl __section(".data");
6412 
6413 static int rl_rodt_err __section(".data");
6414 static unsigned char rtt_nom __section(".data");
6415 static unsigned char rtt_idx __section(".data");
6416 static char min_rtt_nom_idx __section(".data");
6417 static char max_rtt_nom_idx __section(".data");
6418 static char min_rodt_ctl __section(".data");
6419 static char max_rodt_ctl __section(".data");
6420 static int rl_dbg_loops __section(".data");
6421 static unsigned char save_ddr2t __section(".data");
6422 static int rl_samples __section(".data");
6423 static char rl_compute __section(".data");
6424 static char saved_ddr__ptune __section(".data");
6425 static char saved_ddr__ntune __section(".data");
6426 static char rl_comp_offs __section(".data");
6427 static char saved_int_zqcs_dis __section(".data");
6428 static int max_adj_rl_del_inc __section(".data");
6429 static int print_nom_ohms __section(".data");
6430 static int rl_print __section(".data");
6431 
6432 #ifdef ENABLE_HARDCODED_RLEVEL
6433 static char part_number[21] __section(".data");
6434 #endif /* ENABLE_HARDCODED_RLEVEL */
6435 
6436 struct perfect_counts {
6437 	u16 count[9][32]; // 8+ECC by 64 values
6438 	u32 mask[9];      // 8+ECC, bitmask of perfect delays
6439 };
6440 
6441 static struct perfect_counts rank_perf[4] __section(".data");
6442 static struct perfect_counts rodt_perfect_counts __section(".data");
6443 static int pbm_lowsum_limit __section(".data");
6444 // FIXME: PBM skip for RODT 240 and 34
6445 static u32 pbm_rodt_skip __section(".data");
6446 
6447 // control rank majority processing
6448 static int disable_rank_majority __section(".data");
6449 
6450 // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
6451 // for DDR3
6452 static int enable_rldelay_bump __section(".data");
6453 static int rldelay_bump_incr __section(".data");
6454 static int disable_rlv_bump_this_byte __section(".data");
6455 static u64 value_mask __section(".data");
6456 
6457 static struct rlevel_byte_data rl_byte[9] __section(".data");
6458 static int sample_loops __section(".data");
6459 static int max_samples __section(".data");
6460 static int rl_rank_errors __section(".data");
6461 static int rl_mask_err __section(".data");
6462 static int rl_nonseq_err __section(".data");
6463 static struct rlevel_bitmask rl_mask[9] __section(".data");
6464 static int rl_best_rank_score __section(".data");
6465 
6466 static int rodt_row_skip_mask __section(".data");
6467 
rodt_loop(struct ddr_priv * priv,int rankx,struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])6468 static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
6469 		      rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6470 {
6471 	union cvmx_lmcx_comp_ctl2 cc2;
6472 	const int rl_separate_ab = 1;
6473 	int i;
6474 
6475 	rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6476 	rl_rodt_err = 0;
6477 	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6478 	cc2.cn78xx.rodt_ctl = rodt_ctl;
6479 	lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
6480 	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6481 	udelay(1); /* Give it a little time to take affect */
6482 	if (rl_print > 1) {
6483 		debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
6484 		      cc2.cn78xx.rodt_ctl,
6485 		      imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
6486 	}
6487 
6488 	memset(rl_byte, 0, sizeof(rl_byte));
6489 	memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
6490 
6491 	// when iter RODT is the target RODT, take more samples...
6492 	max_samples = rl_samples;
6493 	if (rodt_ctl == default_rodt_ctl)
6494 		max_samples += rl_samples + 1;
6495 
6496 	for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
6497 		int redoing_nonseq_errs = 0;
6498 
6499 		rl_mask_err = 0;
6500 
6501 		if (!(rl_separate_ab && spd_rdimm &&
6502 		      ddr_type == DDR4_DRAM)) {
6503 			/* Clear read-level delays */
6504 			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6505 
6506 			/* read-leveling */
6507 			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6508 
6509 			do {
6510 				rl_rank.u64 =
6511 					lmc_rd(priv,
6512 					       CVMX_LMCX_RLEVEL_RANKX(rankx,
6513 								      if_num));
6514 			} while (rl_rank.cn78xx.status != 3);
6515 		}
6516 
6517 		rl_rank.u64 =
6518 			lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6519 
6520 		// start bitmask interpretation block
6521 
6522 		memset(rl_mask, 0, sizeof(rl_mask));
6523 
6524 		if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
6525 			union cvmx_lmcx_rlevel_rankx rl_rank_aside;
6526 			union cvmx_lmcx_modereg_params0 mp0;
6527 
6528 			/* A-side */
6529 			mp0.u64 =
6530 				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6531 			mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6532 			lmc_wr(priv,
6533 			       CVMX_LMCX_MODEREG_PARAMS0(if_num),
6534 			       mp0.u64);
6535 
6536 			/* Clear read-level delays */
6537 			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6538 
6539 			/* read-leveling */
6540 			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6541 
6542 			do {
6543 				rl_rank.u64 =
6544 					lmc_rd(priv,
6545 					       CVMX_LMCX_RLEVEL_RANKX(rankx,
6546 								      if_num));
6547 			} while (rl_rank.cn78xx.status != 3);
6548 
6549 			rl_rank.u64 =
6550 				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6551 								    if_num));
6552 
6553 			rl_rank_aside.u64 = rl_rank.u64;
6554 
6555 			rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
6556 			rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
6557 			rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
6558 			rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
6559 			rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
6560 			/* A-side complete */
6561 
6562 			/* B-side */
6563 			mp0.u64 =
6564 				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6565 			mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6566 			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6567 			       mp0.u64);
6568 
6569 			/* Clear read-level delays */
6570 			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6571 
6572 			/* read-leveling */
6573 			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6574 
6575 			do {
6576 				rl_rank.u64 =
6577 					lmc_rd(priv,
6578 					       CVMX_LMCX_RLEVEL_RANKX(rankx,
6579 								      if_num));
6580 			} while (rl_rank.cn78xx.status != 3);
6581 
6582 			rl_rank.u64 =
6583 				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6584 								    if_num));
6585 
6586 			rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
6587 			rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
6588 			rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
6589 			rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
6590 			/* B-side complete */
6591 
6592 			upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
6593 			upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
6594 			upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
6595 			upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
6596 			/* ECC A-side */
6597 			upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
6598 
6599 			mp0.u64 =
6600 				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6601 			mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6602 			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6603 			       mp0.u64);
6604 		}
6605 
6606 		/*
6607 		 * Evaluate the quality of the read-leveling delays from the
6608 		 * bitmasks. Also save off a software computed read-leveling
6609 		 * mask that may be used later to qualify the delay results
6610 		 * from Octeon.
6611 		 */
6612 		for (i = 0; i < (8 + ecc_ena); ++i) {
6613 			int bmerr;
6614 
6615 			if (!(if_bytemask & (1 << i)))
6616 				continue;
6617 			if (!(rl_separate_ab && spd_rdimm &&
6618 			      ddr_type == DDR4_DRAM)) {
6619 				rl_mask[i].bm =
6620 					lmc_ddr3_rl_dbg_read(priv, if_num, i);
6621 			}
6622 			bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
6623 							     ddr_type);
6624 			rl_mask[i].errs = bmerr;
6625 			rl_mask_err += bmerr;
6626 			// count only the "perfect" bitmasks
6627 			if (ddr_type == DDR4_DRAM && !bmerr) {
6628 				int delay;
6629 				// FIXME: for now, simple filtering:
6630 				// do NOT count PBMs for RODTs in skip mask
6631 				if ((1U << rodt_ctl) & pbm_rodt_skip)
6632 					continue;
6633 				// FIXME: could optimize this a bit?
6634 				delay = get_rl_rank(&rl_rank, i);
6635 				rank_perf[rankx].count[i][delay] += 1;
6636 				rank_perf[rankx].mask[i] |=
6637 					(1ULL << delay);
6638 				rodt_perfect_counts.count[i][delay] += 1;
6639 				rodt_perfect_counts.mask[i] |= (1ULL << delay);
6640 			}
6641 		}
6642 
6643 		/* Set delays for unused bytes to match byte 0. */
6644 		for (i = 0; i < 9; ++i) {
6645 			if (if_bytemask & (1 << i))
6646 				continue;
6647 			upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
6648 		}
6649 
6650 		/*
6651 		 * Save a copy of the byte delays in physical
6652 		 * order for sequential evaluation.
6653 		 */
6654 		unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
6655 
6656 	redo_nonseq_errs:
6657 
6658 		rl_nonseq_err  = 0;
6659 		if (!disable_sequential_delay_check) {
6660 			for (i = 0; i < 9; ++i)
6661 				rl_byte[i].sqerrs = 0;
6662 
6663 			if ((if_bytemask & 0xff) == 0xff) {
6664 				/*
6665 				 * Evaluate delay sequence across the whole
6666 				 * range of bytes for standard dimms.
6667 				 */
6668 				/* 1=RDIMM, 5=Mini-RDIMM */
6669 				if (spd_dimm_type == 1 || spd_dimm_type == 5) {
6670 					int reg_adj_del = abs(rl_byte[4].delay -
6671 							      rl_byte[5].delay);
6672 
6673 					/*
6674 					 * Registered dimm topology routes
6675 					 * from the center.
6676 					 */
6677 					rl_nonseq_err +=
6678 						nonseq_del(rl_byte, 0,
6679 							   3 + ecc_ena,
6680 							   max_adj_rl_del_inc);
6681 					rl_nonseq_err +=
6682 						nonseq_del(rl_byte, 5,
6683 							   7 + ecc_ena,
6684 							   max_adj_rl_del_inc);
6685 					// byte 5 sqerrs never gets cleared
6686 					// for RDIMMs
6687 					rl_byte[5].sqerrs = 0;
6688 					if (reg_adj_del > 1) {
6689 						/*
6690 						 * Assess proximity of bytes on
6691 						 * opposite sides of register
6692 						 */
6693 						rl_nonseq_err += (reg_adj_del -
6694 								  1) *
6695 							RLEVEL_ADJACENT_DELAY_ERROR;
6696 						// update byte 5 error
6697 						rl_byte[5].sqerrs +=
6698 							(reg_adj_del - 1) *
6699 							RLEVEL_ADJACENT_DELAY_ERROR;
6700 					}
6701 				}
6702 
6703 				/* 2=UDIMM, 6=Mini-UDIMM */
6704 				if (spd_dimm_type == 2 || spd_dimm_type == 6) {
6705 					/*
6706 					 * Unbuffered dimm topology routes
6707 					 * from end to end.
6708 					 */
6709 					rl_nonseq_err += nonseq_del(rl_byte, 0,
6710 								    7 + ecc_ena,
6711 								    max_adj_rl_del_inc);
6712 				}
6713 			} else {
6714 				rl_nonseq_err += nonseq_del(rl_byte, 0,
6715 							    3 + ecc_ena,
6716 							    max_adj_rl_del_inc);
6717 			}
6718 		} /* if (! disable_sequential_delay_check) */
6719 
6720 		rl_rank_errors = rl_mask_err + rl_nonseq_err;
6721 
6722 		// print original sample here only if we are not really
6723 		// averaging or picking best
6724 		// also do not print if we were redoing the NONSEQ score
6725 		// for using COMPUTED
6726 		if (!redoing_nonseq_errs && rl_samples < 2) {
6727 			if (rl_print > 1) {
6728 				display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6729 				display_rl_bm_scores(if_num, rankx, rl_mask,
6730 						     ecc_ena);
6731 				display_rl_seq_scores(if_num, rankx, rl_byte,
6732 						      ecc_ena);
6733 			}
6734 			display_rl_with_score(if_num, rl_rank, rankx,
6735 					      rl_rank_errors);
6736 		}
6737 
6738 		if (rl_compute) {
6739 			if (!redoing_nonseq_errs) {
6740 				/* Recompute the delays based on the bitmask */
6741 				for (i = 0; i < (8 + ecc_ena); ++i) {
6742 					if (!(if_bytemask & (1 << i)))
6743 						continue;
6744 
6745 					upd_rl_rank(&rl_rank, i,
6746 						    compute_ddr3_rlevel_delay(
6747 							    rl_mask[i].mstart,
6748 							    rl_mask[i].width,
6749 							    rl_ctl));
6750 				}
6751 
6752 				/*
6753 				 * Override the copy of byte delays with the
6754 				 * computed results.
6755 				 */
6756 				unpack_rlevel_settings(if_bytemask, ecc_ena,
6757 						       rl_byte, rl_rank);
6758 
6759 				redoing_nonseq_errs = 1;
6760 				goto redo_nonseq_errs;
6761 
6762 			} else {
6763 				/*
6764 				 * now print this if already printed the
6765 				 * original sample
6766 				 */
6767 				if (rl_samples < 2 || rl_print) {
6768 					display_rl_with_computed(if_num,
6769 								 rl_rank, rankx,
6770 								 rl_rank_errors);
6771 				}
6772 			}
6773 		} /* if (rl_compute) */
6774 
6775 		// end bitmask interpretation block
6776 
6777 		// if it is a better (lower) score, then  keep it
6778 		if (rl_rank_errors < rl_best_rank_score) {
6779 			rl_best_rank_score = rl_rank_errors;
6780 
6781 			// save the new best delays and best errors
6782 			for (i = 0; i < (8 + ecc_ena); ++i) {
6783 				rl_byte[i].best = rl_byte[i].delay;
6784 				rl_byte[i].bestsq = rl_byte[i].sqerrs;
6785 				// save bitmasks and their scores as well
6786 				// xlate UNPACKED index to PACKED index to
6787 				// get from rl_mask
6788 				rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
6789 				rl_byte[i].bmerrs =
6790 					rl_mask[XUP(i, !!ecc_ena)].errs;
6791 			}
6792 		}
6793 
6794 		rl_rodt_err += rl_rank_errors;
6795 	}
6796 
6797 	/* We recorded the best score across the averaging loops */
6798 	rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
6799 
6800 	/*
6801 	 * Restore the delays from the best fields that go with the best
6802 	 * score
6803 	 */
6804 	for (i = 0; i < 9; ++i) {
6805 		rl_byte[i].delay = rl_byte[i].best;
6806 		rl_byte[i].sqerrs = rl_byte[i].bestsq;
6807 	}
6808 
6809 	rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6810 
6811 	pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
6812 
6813 	if (rl_samples > 1) {
6814 		// restore the "best" bitmasks and their scores for printing
6815 		for (i = 0; i < 9; ++i) {
6816 			if ((if_bytemask & (1 << i)) == 0)
6817 				continue;
6818 			// xlate PACKED index to UNPACKED index to get from
6819 			// rl_byte
6820 			rl_mask[i].bm   = rl_byte[XPU(i, !!ecc_ena)].bm;
6821 			rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
6822 		}
6823 
6824 		// maybe print bitmasks/scores here
6825 		if (rl_print > 1) {
6826 			display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6827 			display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
6828 			display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
6829 
6830 			display_rl_with_rodt(if_num, rl_rank, rankx,
6831 					     rl_score[rtt_nom][rodt_ctl][rankx].score,
6832 					     print_nom_ohms,
6833 					     imp_val->rodt_ohms[rodt_ctl],
6834 					     WITH_RODT_BESTSCORE);
6835 
6836 			debug("-----------\n");
6837 		}
6838 	}
6839 
6840 	rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
6841 
6842 	// print out the PBMs for the current RODT
6843 	if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
6844 		// FIXME: change verbosity level after debug complete...
6845 
6846 		for (i = 0; i < 9; i++) {
6847 			u64 temp_mask;
6848 			int num_values;
6849 
6850 			// FIXME: PBM skip for RODTs in mask
6851 			if ((1U << rodt_ctl) & pbm_rodt_skip)
6852 				continue;
6853 
6854 			temp_mask = rodt_perfect_counts.mask[i];
6855 			num_values = __builtin_popcountll(temp_mask);
6856 			i = __builtin_ffsll(temp_mask) - 1;
6857 
6858 			debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
6859 			      node, if_num, rankx,
6860 			      imp_val->rodt_ohms[rodt_ctl],
6861 			      i, temp_mask >> i, num_values);
6862 
6863 			while (temp_mask != 0) {
6864 				i = __builtin_ffsll(temp_mask) - 1;
6865 				debug("%2d(%2d) ", i,
6866 				      rodt_perfect_counts.count[i][i]);
6867 				temp_mask &= ~(1UL << i);
6868 			} /* while (temp_mask != 0) */
6869 			debug("\n");
6870 		}
6871 	}
6872 }
6873 
rank_major_loop(struct ddr_priv * priv,int rankx,struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])6874 static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
6875 			    rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6876 {
6877 	/* Start with an arbitrarily high score */
6878 	int best_rank_score = DEFAULT_BEST_RANK_SCORE;
6879 	int best_rank_rtt_nom = 0;
6880 	int best_rank_ctl = 0;
6881 	int best_rank_ohms = 0;
6882 	int best_rankx = 0;
6883 	int dimm_rank_mask;
6884 	int max_rank_score;
6885 	union cvmx_lmcx_rlevel_rankx saved_rl_rank;
6886 	int next_ohms;
6887 	int orankx;
6888 	int next_score = 0;
6889 	int best_byte, new_byte, temp_byte, orig_best_byte;
6890 	int rank_best_bytes[9];
6891 	int byte_sh;
6892 	int avg_byte;
6893 	int avg_diff;
6894 	int i;
6895 
6896 	if (!(rank_mask & (1 << rankx)))
6897 		return;
6898 
6899 	// some of the rank-related loops below need to operate only on
6900 	// the ranks of a single DIMM,
6901 	// so create a mask for their use here
6902 	if (num_ranks == 4) {
6903 		dimm_rank_mask = rank_mask; // should be 1111
6904 	} else {
6905 		dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6906 		if (rankx >= 2) {
6907 			// doing a rank on the second DIMM, should be
6908 			// 0100 or 1100
6909 			dimm_rank_mask <<= 2;
6910 		}
6911 	}
6912 	debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
6913 	      dimm_rank_mask, rank_mask, rankx);
6914 
6915 	// this is the start of the BEST ROW SCORE LOOP
6916 
6917 	for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6918 		rtt_nom = imp_val->rtt_nom_table[rtt_idx];
6919 
6920 		debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6921 		      node, if_num, rankx, rtt_nom,
6922 		      imp_val->rtt_nom_ohms[rtt_nom]);
6923 
6924 		for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
6925 		     --rodt_ctl) {
6926 			next_ohms = imp_val->rodt_ohms[rodt_ctl];
6927 
6928 			// skip RODT rows in mask, but *NOT* rows with too
6929 			// high a score;
6930 			// we will not use the skipped ones for printing or
6931 			// evaluating, but we need to allow all the
6932 			// non-skipped ones to be candidates for "best"
6933 			if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6934 				debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6935 				      node, if_num, rankx, rodt_ctl,
6936 				      next_ohms, next_score);
6937 				continue;
6938 			}
6939 
6940 			// this is ROFFIX-0528
6941 			for (orankx = 0; orankx < dimm_count * 4; orankx++) {
6942 				// stay on the same DIMM
6943 				if (!(dimm_rank_mask & (1 << orankx)))
6944 					continue;
6945 
6946 				next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
6947 
6948 				// always skip a higher score
6949 				if (next_score > best_rank_score)
6950 					continue;
6951 
6952 				// if scores are equal
6953 				if (next_score == best_rank_score) {
6954 					// always skip lower ohms
6955 					if (next_ohms < best_rank_ohms)
6956 						continue;
6957 
6958 					// if same ohms
6959 					if (next_ohms == best_rank_ohms) {
6960 						// always skip the other rank(s)
6961 						if (orankx != rankx)
6962 							continue;
6963 					}
6964 					// else next_ohms are greater,
6965 					// always choose it
6966 				}
6967 				// else next_score is less than current best,
6968 				// so always choose it
6969 				debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6970 				      node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6971 				      best_rank_score, best_rank_ohms);
6972 				best_rank_score	    = next_score;
6973 				best_rank_rtt_nom   = rtt_nom;
6974 				//best_rank_nom_ohms  = rtt_nom_ohms;
6975 				best_rank_ctl       = rodt_ctl;
6976 				best_rank_ohms      = next_ohms;
6977 				best_rankx          = orankx;
6978 				rl_rank.u64 =
6979 					rl_score[rtt_nom][rodt_ctl][orankx].setting;
6980 			}
6981 		}
6982 	}
6983 
6984 	// this is the end of the BEST ROW SCORE LOOP
6985 
6986 	// DANGER, Will Robinson!! Abort now if we did not find a best
6987 	// score at all...
6988 	if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6989 		printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
6990 		       node, if_num, rankx);
6991 		mdelay(500);
6992 		do_reset(NULL, 0, 0, NULL);
6993 	}
6994 
6995 	// FIXME: relative now, but still arbitrary...
6996 	max_rank_score = best_rank_score;
6997 	if (ddr_type == DDR4_DRAM) {
6998 		// halve the range if 2 DIMMs unless they are single rank...
6999 		max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
7000 							   dimm_count : 1));
7001 	} else {
7002 		// Since DDR3 typically has a wider score range,
7003 		// keep more of them always
7004 		max_rank_score += MAX_RANK_SCORE_LIMIT;
7005 	}
7006 
7007 	if (!ecc_ena) {
7008 		/* ECC is not used */
7009 		rl_rank.s.byte8 = rl_rank.s.byte0;
7010 	}
7011 
7012 	// at the end, write the best row settings to the current rank
7013 	lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
7014 	rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7015 
7016 	saved_rl_rank.u64 = rl_rank.u64;
7017 
7018 	// this is the start of the PRINT LOOP
7019 	int pass;
7020 
7021 	// for pass==0, print current rank, pass==1 print other rank(s)
7022 	// this is done because we want to show each ranks RODT values
7023 	// together, not interlaced
7024 	// keep separates for ranks - pass=0 target rank, pass=1 other
7025 	// rank on DIMM
7026 	int mask_skipped[2] = {0, 0};
7027 	int score_skipped[2] = {0, 0};
7028 	int selected_rows[2] = {0, 0};
7029 	int zero_scores[2] = {0, 0};
7030 	for (pass = 0; pass < 2; pass++) {
7031 		for (orankx = 0; orankx < dimm_count * 4; orankx++) {
7032 			// stay on the same DIMM
7033 			if (!(dimm_rank_mask & (1 << orankx)))
7034 				continue;
7035 
7036 			if ((pass == 0 && orankx != rankx) ||
7037 			    (pass != 0 && orankx == rankx))
7038 				continue;
7039 
7040 			for (rtt_idx = min_rtt_nom_idx;
7041 			     rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7042 				rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7043 				if (dyn_rtt_nom_mask == 0) {
7044 					print_nom_ohms = -1;
7045 				} else {
7046 					print_nom_ohms =
7047 						imp_val->rtt_nom_ohms[rtt_nom];
7048 				}
7049 
7050 				// cycle through all the RODT values...
7051 				for (rodt_ctl = max_rodt_ctl;
7052 				     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7053 					union cvmx_lmcx_rlevel_rankx
7054 						temp_rl_rank;
7055 					int temp_score =
7056 						rl_score[rtt_nom][rodt_ctl][orankx].score;
7057 					int skip_row;
7058 
7059 					temp_rl_rank.u64 =
7060 						rl_score[rtt_nom][rodt_ctl][orankx].setting;
7061 
7062 					// skip RODT rows in mask, or rows
7063 					// with too high a score;
7064 					// we will not use them for printing
7065 					// or evaluating...
7066 					if ((1 << rodt_ctl) &
7067 					    rodt_row_skip_mask) {
7068 						skip_row = WITH_RODT_SKIPPING;
7069 						++mask_skipped[pass];
7070 					} else if (temp_score >
7071 						   max_rank_score) {
7072 						skip_row = WITH_RODT_SKIPPING;
7073 						++score_skipped[pass];
7074 					} else {
7075 						skip_row = WITH_RODT_BLANK;
7076 						++selected_rows[pass];
7077 						if (temp_score == 0)
7078 							++zero_scores[pass];
7079 					}
7080 
7081 					// identify and print the BEST ROW
7082 					// when it comes up
7083 					if (skip_row == WITH_RODT_BLANK &&
7084 					    best_rankx == orankx &&
7085 					    best_rank_rtt_nom == rtt_nom &&
7086 					    best_rank_ctl == rodt_ctl)
7087 						skip_row = WITH_RODT_BESTROW;
7088 
7089 					if (rl_print) {
7090 						display_rl_with_rodt(if_num,
7091 								     temp_rl_rank, orankx, temp_score,
7092 								     print_nom_ohms,
7093 								     imp_val->rodt_ohms[rodt_ctl],
7094 								     skip_row);
7095 					}
7096 				}
7097 			}
7098 		}
7099 	}
7100 	debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
7101 	      node, if_num, rankx, selected_rows[0], selected_rows[1],
7102 	      zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
7103 	      score_skipped[0], score_skipped[1]);
7104 	// this is the end of the PRINT LOOP
7105 
7106 	// now evaluate which bytes need adjusting
7107 	// collect the new byte values; first init with current best for
7108 	// neighbor use
7109 	for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7110 		rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
7111 			RLEVEL_BYTE_MSK;
7112 	}
7113 
7114 	// this is the start of the BEST BYTE LOOP
7115 
7116 	for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7117 		int sum = 0, count = 0;
7118 		int count_less = 0, count_same = 0, count_more = 0;
7119 		int count_byte; // save the value we counted around
7120 		// for rank majority use
7121 		int rank_less = 0, rank_same = 0, rank_more = 0;
7122 		int neighbor;
7123 		int neigh_byte;
7124 
7125 		best_byte = rank_best_bytes[i];
7126 		orig_best_byte = rank_best_bytes[i];
7127 
7128 		// this is the start of the BEST BYTE AVERAGING LOOP
7129 
7130 		// validate the initial "best" byte by looking at the
7131 		// average of the unskipped byte-column entries
7132 		// we want to do this before we go further, so we can
7133 		// try to start with a better initial value
7134 		// this is the so-called "BESTBUY" patch set
7135 
7136 		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7137 		     ++rtt_idx) {
7138 			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7139 
7140 			for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7141 			     --rodt_ctl) {
7142 				union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7143 				int temp_score;
7144 
7145 				// average over all the ranks
7146 				for (orankx = 0; orankx < dimm_count * 4;
7147 				     orankx++) {
7148 					// stay on the same DIMM
7149 					if (!(dimm_rank_mask & (1 << orankx)))
7150 						continue;
7151 
7152 					temp_score =
7153 						rl_score[rtt_nom][rodt_ctl][orankx].score;
7154 					// skip RODT rows in mask, or rows with
7155 					// too high a score;
7156 					// we will not use them for printing or
7157 					// evaluating...
7158 
7159 					if (!((1 << rodt_ctl) &
7160 					      rodt_row_skip_mask) &&
7161 					    temp_score <= max_rank_score) {
7162 						temp_rl_rank.u64 =
7163 							rl_score[rtt_nom][rodt_ctl][orankx].setting;
7164 						temp_byte =
7165 							(int)(temp_rl_rank.u64 >> byte_sh) &
7166 							RLEVEL_BYTE_MSK;
7167 						sum += temp_byte;
7168 						count++;
7169 					}
7170 				}
7171 			}
7172 		}
7173 
7174 		// this is the end of the BEST BYTE AVERAGING LOOP
7175 
7176 		// FIXME: validate count and sum??
7177 		avg_byte = (int)divide_nint(sum, count);
7178 		avg_diff = best_byte - avg_byte;
7179 		new_byte = best_byte;
7180 		if (avg_diff != 0) {
7181 			// bump best up/dn by 1, not necessarily all the
7182 			// way to avg
7183 			new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
7184 		}
7185 
7186 		if (rl_print) {
7187 			debug("N%d.LMC%d.R%d: START:   Byte %d: best %d is different by %d from average %d, using %d.\n",
7188 			      node, if_num, rankx,
7189 			      i, best_byte, avg_diff, avg_byte, new_byte);
7190 		}
7191 		best_byte = new_byte;
7192 		count_byte = new_byte; // save the value we will count around
7193 
7194 		// At this point best_byte is either:
7195 		// 1. the original byte-column value from the best scoring
7196 		//    RODT row, OR
7197 		// 2. that value bumped toward the average of all the
7198 		//    byte-column values
7199 		//
7200 		// best_byte will not change from here on...
7201 
7202 		// this is the start of the BEST BYTE COUNTING LOOP
7203 
7204 		// NOTE: we do this next loop separately from above, because
7205 		// we count relative to "best_byte"
7206 		// which may have been modified by the above averaging
7207 		// operation...
7208 
7209 		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7210 		     ++rtt_idx) {
7211 			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7212 
7213 			for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7214 			     --rodt_ctl) {
7215 				union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7216 				int temp_score;
7217 
7218 				for (orankx = 0; orankx < dimm_count * 4;
7219 				     orankx++) { // count over all the ranks
7220 					// stay on the same DIMM
7221 					if (!(dimm_rank_mask & (1 << orankx)))
7222 						continue;
7223 
7224 					temp_score =
7225 						rl_score[rtt_nom][rodt_ctl][orankx].score;
7226 					// skip RODT rows in mask, or rows
7227 					// with too high a score;
7228 					// we will not use them for printing
7229 					// or evaluating...
7230 					if (((1 << rodt_ctl) &
7231 					     rodt_row_skip_mask) ||
7232 					    temp_score > max_rank_score)
7233 						continue;
7234 
7235 					temp_rl_rank.u64 =
7236 						rl_score[rtt_nom][rodt_ctl][orankx].setting;
7237 					temp_byte = (temp_rl_rank.u64 >>
7238 						     byte_sh) & RLEVEL_BYTE_MSK;
7239 
7240 					if (temp_byte == 0)
7241 						;  // do not count it if illegal
7242 					else if (temp_byte == best_byte)
7243 						count_same++;
7244 					else if (temp_byte == best_byte - 1)
7245 						count_less++;
7246 					else if (temp_byte == best_byte + 1)
7247 						count_more++;
7248 					// else do not count anything more
7249 					// than 1 away from the best
7250 
7251 					// no rank counting if disabled
7252 					if (disable_rank_majority)
7253 						continue;
7254 
7255 					// FIXME? count is relative to
7256 					// best_byte; should it be rank-based?
7257 					// rank counts only on main rank
7258 					if (orankx != rankx)
7259 						continue;
7260 					else if (temp_byte == best_byte)
7261 						rank_same++;
7262 					else if (temp_byte == best_byte - 1)
7263 						rank_less++;
7264 					else if (temp_byte == best_byte + 1)
7265 						rank_more++;
7266 				}
7267 			}
7268 		}
7269 
7270 		if (rl_print) {
7271 			debug("N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7272 			      node, if_num, rankx,
7273 			      i, orig_best_byte, best_byte,
7274 			      count_more, count_same, count_less,
7275 			      rank_more, rank_same, rank_less);
7276 		}
7277 
7278 		// this is the end of the BEST BYTE COUNTING LOOP
7279 
7280 		// choose the new byte value
7281 		// we need to check that there is no gap greater than 2
7282 		// between adjacent bytes (adjacency depends on DIMM type)
7283 		// use the neighbor value to help decide
7284 		// initially, the rank_best_bytes[] will contain values from
7285 		// the chosen lowest score rank
7286 		new_byte = 0;
7287 
7288 		// neighbor is index-1 unless we are index 0 or index 8 (ECC)
7289 		neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
7290 		neigh_byte = rank_best_bytes[neighbor];
7291 
7292 		// can go up or down or stay the same, so look at a numeric
7293 		// average to help
7294 		new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
7295 					     (count_same * (best_byte + 0)) +
7296 					     (count_less * (best_byte - 1))),
7297 					    max(1, (count_more + count_same +
7298 						    count_less)));
7299 
7300 		// use neighbor to help choose with average
7301 		if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
7302 		    !disable_sequential_delay_check) {
7303 			// but not for byte 0
7304 			int avg_pick = new_byte;
7305 
7306 			if ((new_byte - best_byte) != 0) {
7307 				// back to best, average did not get better
7308 				new_byte = best_byte;
7309 			} else {
7310 				// avg was the same, still too far, now move
7311 				// it towards the neighbor
7312 				new_byte += (neigh_byte > new_byte) ? 1 : -1;
7313 			}
7314 
7315 			if (rl_print) {
7316 				debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7317 				      node, if_num, rankx,
7318 				      i, neighbor, neigh_byte, avg_pick,
7319 				      new_byte);
7320 			}
7321 		} else {
7322 			// NOTE:
7323 			// For now, we let the neighbor processing above trump
7324 			// the new simple majority processing here.
7325 			// This is mostly because we have seen no smoking gun
7326 			// for a neighbor bad choice (yet?).
7327 			// Also note that we will ALWAYS be using byte 0
7328 			// majority, because of the if clause above.
7329 
7330 			// majority is dependent on the counts, which are
7331 			// relative to best_byte, so start there
7332 			int maj_byte = best_byte;
7333 			int rank_maj;
7334 			int rank_sum;
7335 
7336 			if (count_more > count_same &&
7337 			    count_more > count_less) {
7338 				maj_byte++;
7339 			} else if (count_less > count_same &&
7340 				   count_less > count_more) {
7341 				maj_byte--;
7342 			}
7343 
7344 			if (maj_byte != new_byte) {
7345 				// print only when majority choice is
7346 				// different from average
7347 				if (rl_print) {
7348 					debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7349 					      node, if_num, rankx, i, maj_byte,
7350 					      new_byte);
7351 				}
7352 				new_byte = maj_byte;
7353 			} else {
7354 				if (rl_print) {
7355 					debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7356 					      node, if_num, rankx, i, new_byte);
7357 				}
7358 			}
7359 
7360 			if (!disable_rank_majority) {
7361 				// rank majority is dependent on the rank
7362 				// counts, which are relative to best_byte,
7363 				// so start there, and adjust according to the
7364 				// rank counts majority
7365 				rank_maj = best_byte;
7366 				if (rank_more > rank_same &&
7367 				    rank_more > rank_less) {
7368 					rank_maj++;
7369 				} else if (rank_less > rank_same &&
7370 					   rank_less > rank_more) {
7371 					rank_maj--;
7372 				}
7373 				rank_sum = rank_more + rank_same + rank_less;
7374 
7375 				// now, let rank majority possibly rule over
7376 				// the current new_byte however we got it
7377 				if (rank_maj != new_byte) { // only if different
7378 					// Here is where we decide whether to
7379 					// completely apply RANK_MAJORITY or not
7380 					// ignore if less than
7381 					if (rank_maj < new_byte) {
7382 						if (rl_print) {
7383 							debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
7384 							      node, if_num,
7385 							      rankx, i,
7386 							      rank_maj,
7387 							      new_byte);
7388 						}
7389 					} else {
7390 						// For the moment, we do it
7391 						// ONLY when running 2-slot
7392 						// configs
7393 						//  OR when rank_sum is big
7394 						// enough
7395 						if (dimm_count > 1 ||
7396 						    rank_sum > 2) {
7397 							// print only when rank
7398 							// majority choice is
7399 							// selected
7400 							if (rl_print) {
7401 								debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7402 								      node,
7403 								      if_num,
7404 								      rankx,
7405 								      i,
7406 								      rank_maj,
7407 								      new_byte);
7408 							}
7409 							new_byte = rank_maj;
7410 						} else {
7411 							// FIXME: print some
7412 							// info when we could
7413 							// have chosen RANKMAJ
7414 							// but did not
7415 							if (rl_print) {
7416 								debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7417 								      node,
7418 								      if_num,
7419 								      rankx,
7420 								      i,
7421 								      rank_maj,
7422 								      new_byte,
7423 								      best_byte,
7424 								      rank_sum);
7425 							}
7426 						}
7427 					}
7428 				}
7429 			} /* if (!disable_rank_majority) */
7430 		}
7431 		// one last check:
7432 		// if new_byte is still count_byte, BUT there was no count
7433 		// for that value, DO SOMETHING!!!
7434 		// FIXME: go back to original best byte from the best row
7435 		if (new_byte == count_byte && count_same == 0) {
7436 			new_byte = orig_best_byte;
7437 			if (rl_print) {
7438 				debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7439 				      node, if_num, rankx, i, new_byte);
7440 			}
7441 		}
7442 		// Look at counts for "perfect" bitmasks (PBMs) if we had
7443 		// any for this byte-lane.
7444 		// Remember, we only counted for DDR4, so zero means none
7445 		// or DDR3, and we bypass this...
7446 		value_mask = rank_perf[rankx].mask[i];
7447 		disable_rlv_bump_this_byte = 0;
7448 
7449 		if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
7450 			int i, delay_count, delay_max = 0, del_val = 0;
7451 			int num_values = __builtin_popcountll(value_mask);
7452 			int sum_counts = 0;
7453 			u64 temp_mask = value_mask;
7454 
7455 			disable_rlv_bump_this_byte = 1;
7456 			i = __builtin_ffsll(temp_mask) - 1;
7457 			if (rl_print)
7458 				debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
7459 				      node, if_num, rankx, i, value_mask >> i,
7460 				      num_values);
7461 
7462 			while (temp_mask != 0) {
7463 				i = __builtin_ffsll(temp_mask) - 1;
7464 				delay_count = rank_perf[rankx].count[i][i];
7465 				sum_counts += delay_count;
7466 				if (rl_print)
7467 					debug("%2d(%2d) ", i, delay_count);
7468 				if (delay_count >= delay_max) {
7469 					delay_max = delay_count;
7470 					del_val = i;
7471 				}
7472 				temp_mask &= ~(1UL << i);
7473 			} /* while (temp_mask != 0) */
7474 
7475 			// if sum_counts is small, just use NEW_BYTE
7476 			if (sum_counts < pbm_lowsum_limit) {
7477 				if (rl_print)
7478 					debug(": LOWSUM (%2d), choose ORIG ",
7479 					      sum_counts);
7480 				del_val = new_byte;
7481 				delay_max = rank_perf[rankx].count[i][del_val];
7482 			}
7483 
7484 			// finish printing here...
7485 			if (rl_print) {
7486 				debug(": USING %2d (%2d) D%d\n", del_val,
7487 				      delay_max, disable_rlv_bump_this_byte);
7488 			}
7489 
7490 			new_byte = del_val; // override with best PBM choice
7491 
7492 		} else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
7493 			//                        if (value_mask != 0) {
7494 			int i, delay_count, del_val;
7495 			int num_values = __builtin_popcountll(value_mask);
7496 			int sum_counts = 0;
7497 			u64 temp_mask = value_mask;
7498 
7499 			i = __builtin_ffsll(temp_mask) - 1;
7500 			if (rl_print)
7501 				debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
7502 				      node, if_num, rankx, i, value_mask >> i,
7503 				      num_values);
7504 			while (temp_mask != 0) {
7505 				i = __builtin_ffsll(temp_mask) - 1;
7506 				delay_count = rank_perf[rankx].count[i][i];
7507 				sum_counts += delay_count;
7508 				if (rl_print)
7509 					debug("%2d(%2d) ", i, delay_count);
7510 				temp_mask &= ~(1UL << i);
7511 			} /* while (temp_mask != 0) */
7512 
7513 			del_val = __builtin_ffsll(value_mask) - 1;
7514 			delay_count =
7515 				rank_perf[rankx].count[i][del_val];
7516 
7517 			// overkill, normally only 1-4 bits
7518 			i = (value_mask >> del_val) & 0x1F;
7519 
7520 			// if sum_counts is small, treat as special and use
7521 			// NEW_BYTE
7522 			if (sum_counts < pbm_lowsum_limit) {
7523 				if (rl_print)
7524 					debug(": LOWSUM (%2d), choose ORIG",
7525 					      sum_counts);
7526 				i = 99; // SPECIAL case...
7527 			}
7528 
7529 			switch (i) {
7530 			case 0x01 /* 00001b */:
7531 				// allow BUMP
7532 				break;
7533 
7534 			case 0x13 /* 10011b */:
7535 			case 0x0B /* 01011b */:
7536 			case 0x03 /* 00011b */:
7537 				del_val += 1; // take the second
7538 				disable_rlv_bump_this_byte = 1; // allow no BUMP
7539 				break;
7540 
7541 			case 0x0D /* 01101b */:
7542 			case 0x05 /* 00101b */:
7543 				// test count of lowest and all
7544 				if (delay_count >= 5 || sum_counts <= 5)
7545 					del_val += 1; // take the hole
7546 				else
7547 					del_val += 2; // take the next set
7548 				disable_rlv_bump_this_byte = 1; // allow no BUMP
7549 				break;
7550 
7551 			case 0x0F /* 01111b */:
7552 			case 0x17 /* 10111b */:
7553 			case 0x07 /* 00111b */:
7554 				del_val += 1; // take the second
7555 				if (delay_count < 5) { // lowest count is small
7556 					int second =
7557 						rank_perf[rankx].count[i][del_val];
7558 					int third =
7559 						rank_perf[rankx].count[i][del_val + 1];
7560 					// test if middle is more than 1 OR
7561 					// top is more than 1;
7562 					// this means if they are BOTH 1,
7563 					// then we keep the second...
7564 					if (second > 1 || third > 1) {
7565 						// if middle is small OR top
7566 						// is large
7567 						if (second < 5 ||
7568 						    third > 1) {
7569 							// take the top
7570 							del_val += 1;
7571 							if (rl_print)
7572 								debug(": TOP7 ");
7573 						}
7574 					}
7575 				}
7576 				disable_rlv_bump_this_byte = 1; // allow no BUMP
7577 				break;
7578 
7579 			default: // all others...
7580 				if (rl_print)
7581 					debug(": ABNORMAL, choose ORIG");
7582 
7583 			case 99: // special
7584 				 // FIXME: choose original choice?
7585 				del_val = new_byte;
7586 				disable_rlv_bump_this_byte = 1; // allow no BUMP
7587 				break;
7588 			}
7589 			delay_count =
7590 				rank_perf[rankx].count[i][del_val];
7591 
7592 			// finish printing here...
7593 			if (rl_print)
7594 				debug(": USING %2d (%2d) D%d\n", del_val,
7595 				      delay_count, disable_rlv_bump_this_byte);
7596 			new_byte = del_val; // override with best PBM choice
7597 		} else {
7598 			if (ddr_type == DDR4_DRAM) { // only report when DDR4
7599 				// FIXME: remove or increase VBL for this
7600 				// output...
7601 				if (rl_print)
7602 					debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
7603 					      node, if_num, rankx, i,
7604 					      new_byte);
7605 				// prevent ODD bump, rely on original
7606 				disable_rlv_bump_this_byte = 1;
7607 			}
7608 		} /* if (value_mask != 0) */
7609 
7610 		// optionally bump the delay value
7611 		if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
7612 			if ((new_byte & enable_rldelay_bump) ==
7613 			    enable_rldelay_bump) {
7614 				int bump_value = new_byte + rldelay_bump_incr;
7615 
7616 				if (rl_print) {
7617 					debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
7618 					      node, if_num, rankx, i,
7619 					      new_byte, bump_value,
7620 					      (value_mask &
7621 					       (1 << bump_value)) ?
7622 					      "PBM" : "NOPBM");
7623 				}
7624 				new_byte = bump_value;
7625 			}
7626 		}
7627 
7628 		// last checks for count-related purposes
7629 		if (new_byte == best_byte && count_more > 0 &&
7630 		    count_less == 0) {
7631 			// we really should take best_byte + 1
7632 			if (rl_print) {
7633 				debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
7634 				      node, if_num, rankx, i,
7635 				      new_byte, best_byte + 1);
7636 				new_byte = best_byte + 1;
7637 			}
7638 		} else if ((new_byte < best_byte) && (count_same > 0)) {
7639 			// we really should take best_byte
7640 			if (rl_print) {
7641 				debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
7642 				      node, if_num, rankx, i,
7643 				      new_byte, best_byte);
7644 				new_byte = best_byte;
7645 			}
7646 		} else if (new_byte > best_byte) {
7647 			if ((new_byte == (best_byte + 1)) &&
7648 			    count_more == 0 && count_less > 0) {
7649 				// we really should take best_byte
7650 				if (rl_print) {
7651 					debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
7652 					      node, if_num, rankx, i,
7653 					      new_byte, best_byte);
7654 					new_byte = best_byte;
7655 				}
7656 			} else if ((new_byte >= (best_byte + 2)) &&
7657 				   ((count_more > 0) || (count_same > 0))) {
7658 				if (rl_print) {
7659 					debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
7660 					      node, if_num, rankx, i,
7661 					      new_byte, best_byte + 1);
7662 					new_byte = best_byte + 1;
7663 				}
7664 			}
7665 		}
7666 
7667 		if (rl_print) {
7668 			debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
7669 			      node, if_num, rankx, i, orig_best_byte,
7670 			      best_byte, count_more, count_same, count_less,
7671 			      new_byte);
7672 		}
7673 
7674 		// update the byte with the new value (NOTE: orig value in
7675 		// the CSR may not be current "best")
7676 		upd_rl_rank(&rl_rank, i, new_byte);
7677 
7678 		// save new best for neighbor use
7679 		rank_best_bytes[i] = new_byte;
7680 	} /* for (i = 0; i < 8+ecc_ena; i++) */
7681 
7682 	////////////////// this is the end of the BEST BYTE LOOP
7683 
7684 	if (saved_rl_rank.u64 != rl_rank.u64) {
7685 		lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
7686 		       rl_rank.u64);
7687 		rl_rank.u64 = lmc_rd(priv,
7688 				     CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7689 		debug("Adjusting Read-Leveling per-RANK settings.\n");
7690 	} else {
7691 		debug("Not Adjusting Read-Leveling per-RANK settings.\n");
7692 	}
7693 	display_rl_with_final(if_num, rl_rank, rankx);
7694 
7695 	// FIXME: does this help make the output a little easier to focus?
7696 	if (rl_print > 0)
7697 		debug("-----------\n");
7698 
7699 #define RLEVEL_RANKX_EXTRAS_INCR  0
7700 	// if there are unused entries to be filled
7701 	if ((rank_mask & 0x0f) != 0x0f) {
7702 		// copy the current rank
7703 		union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
7704 
7705 		if (rankx < 3) {
7706 #if RLEVEL_RANKX_EXTRAS_INCR > 0
7707 			int byte, delay;
7708 
7709 			// modify the copy in prep for writing to empty slot(s)
7710 			for (byte = 0; byte < 9; byte++) {
7711 				delay = get_rl_rank(&temp_rl_rank, byte) +
7712 					RLEVEL_RANKX_EXTRAS_INCR;
7713 				if (delay > RLEVEL_BYTE_MSK)
7714 					delay = RLEVEL_BYTE_MSK;
7715 				upd_rl_rank(&temp_rl_rank, byte, delay);
7716 			}
7717 #endif
7718 
7719 			// if rank 0, write rank 1 and rank 2 here if empty
7720 			if (rankx == 0) {
7721 				// check that rank 1 is empty
7722 				if (!(rank_mask & (1 << 1))) {
7723 					debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7724 					      node, if_num, rankx, 1);
7725 					lmc_wr(priv,
7726 					       CVMX_LMCX_RLEVEL_RANKX(1,
7727 								      if_num),
7728 					       temp_rl_rank.u64);
7729 				}
7730 
7731 				// check that rank 2 is empty
7732 				if (!(rank_mask & (1 << 2))) {
7733 					debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7734 					      node, if_num, rankx, 2);
7735 					lmc_wr(priv,
7736 					       CVMX_LMCX_RLEVEL_RANKX(2,
7737 								      if_num),
7738 					       temp_rl_rank.u64);
7739 				}
7740 			}
7741 
7742 			// if ranks 0, 1 or 2, write rank 3 here if empty
7743 			// check that rank 3 is empty
7744 			if (!(rank_mask & (1 << 3))) {
7745 				debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7746 				      node, if_num, rankx, 3);
7747 				lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
7748 				       temp_rl_rank.u64);
7749 			}
7750 		}
7751 	}
7752 }
7753 
lmc_read_leveling(struct ddr_priv * priv)7754 static void lmc_read_leveling(struct ddr_priv *priv)
7755 {
7756 	struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
7757 	union cvmx_lmcx_control ctl;
7758 	union cvmx_lmcx_config cfg;
7759 	int rankx;
7760 	char *s;
7761 	int i;
7762 
7763 	/*
7764 	 * 4.8.10 LMC Read Leveling
7765 	 *
7766 	 * LMC supports an automatic read-leveling separately per byte-lane
7767 	 * using the DDR3 multipurpose register predefined pattern for system
7768 	 * calibration defined in the JEDEC DDR3 specifications.
7769 	 *
7770 	 * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
7771 	 * must be completed prior to starting this LMC read-leveling sequence.
7772 	 *
7773 	 * Software could simply write the desired read-leveling values into
7774 	 * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
7775 	 * LMC's autoread-leveling capabilities.
7776 	 *
7777 	 * When LMC does the read-leveling sequence for a rank, it first enables
7778 	 * the DDR3 multipurpose register predefined pattern for system
7779 	 * calibration on the selected DRAM rank via a DDR3 MR3 write, then
7780 	 * executes 64 RD operations at different internal delay settings, then
7781 	 * disables the predefined pattern via another DDR3 MR3 write
7782 	 * operation. LMC determines the pass or fail of each of the 64 settings
7783 	 * independently for each byte lane, then writes appropriate
7784 	 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
7785 	 *
7786 	 * After read-leveling for a rank, software can read the 64 pass/fail
7787 	 * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
7788 	 * Software can observe all pass/fail results for all byte lanes in a
7789 	 * rank via separate read-leveling sequences on the rank with different
7790 	 * LMC(0)_RLEVEL_CTL[BYTE] values.
7791 	 *
7792 	 * The 64 pass/fail results will typically have failures for the low
7793 	 * delays, followed by a run of some passing settings, followed by more
7794 	 * failures in the remaining high delays.  LMC sets
7795 	 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
7796 	 * First, LMC selects the longest run of successes in the 64 results.
7797 	 * (In the unlikely event that there is more than one longest run, LMC
7798 	 * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
7799 	 * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
7800 	 * LMC selects the last passing setting in the run minus
7801 	 * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
7802 	 * in the run (rounding earlier when necessary). We expect the
7803 	 * read-leveling sequence to produce good results with the reset values
7804 	 * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
7805 	 *
7806 	 * The read-leveling sequence has the following steps:
7807 	 *
7808 	 * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
7809 	 *    Do the remaining substeps 2-4 separately for each rank i with
7810 	 *    attached DRAM.
7811 	 *
7812 	 * 2. Without changing any other fields in LMC(0)_CONFIG,
7813 	 *
7814 	 *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
7815 	 *
7816 	 *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
7817 	 *
7818 	 *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
7819 	 *
7820 	 *    This initiates the previously-described read-leveling.
7821 	 *
7822 	 * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
7823 	 *
7824 	 *    LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
7825 	 *    lanes at this point.
7826 	 *
7827 	 *    If ECC DRAM is not present (i.e. when DRAM is not attached to the
7828 	 *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
7829 	 *    DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
7830 	 *    LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
7831 	 *    LMC(0)_RLEVEL_RANK*[BYTE0].
7832 	 *
7833 	 * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
7834 	 *    LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
7835 	 *    LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
7836 	 *    LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
7837 	 *    BITMASKs can be observed.
7838 	 *
7839 	 * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
7840 	 *
7841 	 *    Let rank i be a rank with attached DRAM.
7842 	 *
7843 	 *    For all ranks j that do not have attached DRAM, set
7844 	 *    LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
7845 	 *
7846 	 * This read-leveling sequence can help select the proper CN70XX ODT
7847 	 * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
7848 	 * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
7849 	 * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
7850 	 * (for a used byte lane k) can indicate that the CN70XX ODT value is
7851 	 * bad. It is possible to simultaneously optimize both
7852 	 * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
7853 	 * performing this read-leveling sequence for several
7854 	 * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
7855 	 * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
7856 	 */
7857 
7858 	rl_rodt_err = 0;
7859 	rl_dbg_loops = 1;
7860 	saved_int_zqcs_dis = 0;
7861 	max_adj_rl_del_inc = 0;
7862 	rl_print = RLEVEL_PRINTALL_DEFAULT;
7863 
7864 #ifdef ENABLE_HARDCODED_RLEVEL
7865 	part_number[21] = {0};
7866 #endif /* ENABLE_HARDCODED_RLEVEL */
7867 
7868 	pbm_lowsum_limit = 5; // FIXME: is this a good default?
7869 	// FIXME: PBM skip for RODT 240 and 34
7870 	pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
7871 		(1U << ddr4_rodt_ctl_34_ohm);
7872 
7873 	disable_rank_majority = 0; // control rank majority processing
7874 
7875 	// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
7876 	// for DDR3
7877 	rldelay_bump_incr = 0;
7878 	disable_rlv_bump_this_byte = 0;
7879 
7880 	enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
7881 		((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
7882 
7883 	s = lookup_env(priv, "ddr_disable_rank_majority");
7884 	if (s)
7885 		disable_rank_majority = !!simple_strtoul(s, NULL, 0);
7886 
7887 	s = lookup_env(priv, "ddr_pbm_lowsum_limit");
7888 	if (s)
7889 		pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
7890 
7891 	s = lookup_env(priv, "ddr_pbm_rodt_skip");
7892 	if (s)
7893 		pbm_rodt_skip = simple_strtoul(s, NULL, 0);
7894 	memset(rank_perf, 0, sizeof(rank_perf));
7895 
7896 	ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
7897 	save_ddr2t = ctl.cn78xx.ddr2t;
7898 
7899 	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
7900 	ecc_ena = cfg.cn78xx.ecc_ena;
7901 
7902 	s = lookup_env(priv, "ddr_rlevel_2t");
7903 	if (s)
7904 		ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
7905 
7906 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
7907 
7908 	debug("LMC%d: Performing Read-Leveling\n", if_num);
7909 
7910 	rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
7911 
7912 	rl_samples = c_cfg->rlevel_average_loops;
7913 	if (rl_samples == 0) {
7914 		rl_samples = RLEVEL_SAMPLES_DEFAULT;
7915 		// up the samples for these cases
7916 		if (dimm_count == 1 || num_ranks == 1)
7917 			rl_samples = rl_samples * 2 + 1;
7918 	}
7919 
7920 	rl_compute = c_cfg->rlevel_compute;
7921 	rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
7922 	rl_ctl.cn78xx.offset    = spd_rdimm
7923 		? c_cfg->offset_rdimm
7924 		: c_cfg->offset_udimm;
7925 
7926 	int value = 1; // should ALWAYS be set
7927 
7928 	s = lookup_env(priv, "ddr_rlevel_delay_unload");
7929 	if (s)
7930 		value = !!simple_strtoul(s, NULL, 0);
7931 	rl_ctl.cn78xx.delay_unload_0 = value;
7932 	rl_ctl.cn78xx.delay_unload_1 = value;
7933 	rl_ctl.cn78xx.delay_unload_2 = value;
7934 	rl_ctl.cn78xx.delay_unload_3 = value;
7935 
7936 	// use OR_DIS=1 to try for better results
7937 	rl_ctl.cn78xx.or_dis = 1;
7938 
7939 	/*
7940 	 * If we will be switching to 32bit mode level based on only
7941 	 * four bits because there are only 4 ECC bits.
7942 	 */
7943 	rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
7944 
7945 	// allow overrides
7946 	s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
7947 	if (s)
7948 		rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
7949 
7950 	s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
7951 	if (s)
7952 		rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
7953 
7954 	rl_comp_offs = spd_rdimm
7955 		? c_cfg->rlevel_comp_offset_rdimm
7956 		: c_cfg->rlevel_comp_offset_udimm;
7957 	s = lookup_env(priv, "ddr_rlevel_comp_offset");
7958 	if (s)
7959 		rl_comp_offs = strtoul(s, NULL, 0);
7960 
7961 	s = lookup_env(priv, "ddr_rlevel_offset");
7962 	if (s)
7963 		rl_ctl.cn78xx.offset   = simple_strtoul(s, NULL, 0);
7964 
7965 	s = lookup_env(priv, "ddr_rlevel_offset_en");
7966 	if (s)
7967 		rl_ctl.cn78xx.offset_en   = simple_strtoul(s, NULL, 0);
7968 
7969 	s = lookup_env(priv, "ddr_rlevel_ctl");
7970 	if (s)
7971 		rl_ctl.u64   = simple_strtoul(s, NULL, 0);
7972 
7973 	lmc_wr(priv,
7974 	       CVMX_LMCX_RLEVEL_CTL(if_num),
7975 	       rl_ctl.u64);
7976 
7977 	// do this here so we can look at final RLEVEL_CTL[offset] setting...
7978 	s = lookup_env(priv, "ddr_enable_rldelay_bump");
7979 	if (s) {
7980 		// also use as mask bits
7981 		enable_rldelay_bump = strtoul(s, NULL, 0);
7982 	}
7983 
7984 	if (enable_rldelay_bump != 0)
7985 		rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
7986 
7987 	s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
7988 	if (s)
7989 		rl_dbg_loops = simple_strtoul(s, NULL, 0);
7990 
7991 	s = lookup_env(priv, "ddr_rtt_nom_auto");
7992 	if (s)
7993 		ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
7994 
7995 	s = lookup_env(priv, "ddr_rlevel_average");
7996 	if (s)
7997 		rl_samples = simple_strtoul(s, NULL, 0);
7998 
7999 	s = lookup_env(priv, "ddr_rlevel_compute");
8000 	if (s)
8001 		rl_compute = simple_strtoul(s, NULL, 0);
8002 
8003 	s = lookup_env(priv, "ddr_rlevel_printall");
8004 	if (s)
8005 		rl_print = simple_strtoul(s, NULL, 0);
8006 
8007 	debug("RLEVEL_CTL                                    : 0x%016llx\n",
8008 	      rl_ctl.u64);
8009 	debug("RLEVEL_OFFSET                                 : %6d\n",
8010 	      rl_ctl.cn78xx.offset);
8011 	debug("RLEVEL_OFFSET_EN                              : %6d\n",
8012 	      rl_ctl.cn78xx.offset_en);
8013 
8014 	/*
8015 	 * The purpose for the indexed table is to sort the settings
8016 	 * by the ohm value to simplify the testing when incrementing
8017 	 * through the settings.  (index => ohms) 1=120, 2=60, 3=40,
8018 	 * 4=30, 5=20
8019 	 */
8020 	min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
8021 		1 : c_cfg->min_rtt_nom_idx;
8022 	max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
8023 		5 : c_cfg->max_rtt_nom_idx;
8024 
8025 	min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
8026 	max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
8027 
8028 	s = lookup_env(priv, "ddr_min_rodt_ctl");
8029 	if (s)
8030 		min_rodt_ctl = simple_strtoul(s, NULL, 0);
8031 
8032 	s = lookup_env(priv, "ddr_max_rodt_ctl");
8033 	if (s)
8034 		max_rodt_ctl = simple_strtoul(s, NULL, 0);
8035 
8036 	s = lookup_env(priv, "ddr_min_rtt_nom_idx");
8037 	if (s)
8038 		min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8039 
8040 	s = lookup_env(priv, "ddr_max_rtt_nom_idx");
8041 	if (s)
8042 		max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8043 
8044 #ifdef ENABLE_HARDCODED_RLEVEL
8045 	if (c_cfg->rl_tbl) {
8046 		/* Check for hard-coded read-leveling settings */
8047 		get_dimm_part_number(part_number, &dimm_config_table[0],
8048 				     0, ddr_type);
8049 		for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8050 			if (!(rank_mask & (1 << rankx)))
8051 				continue;
8052 
8053 			rl_rank.u64 = lmc_rd(priv,
8054 					     CVMX_LMCX_RLEVEL_RANKX(rankx,
8055 								    if_num));
8056 
8057 			i = 0;
8058 			while (c_cfg->rl_tbl[i].part) {
8059 				debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
8060 				      c_cfg->rl_tbl[i].part, part_number);
8061 				if ((strcmp(part_number,
8062 					    c_cfg->rl_tbl[i].part) == 0) &&
8063 				    (abs(c_cfg->rl_tbl[i].speed -
8064 					 2 * ddr_hertz / (1000 * 1000)) < 10)) {
8065 					debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
8066 					      part_number);
8067 					rl_rank.u64 =
8068 						c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
8069 					lmc_wr(priv,
8070 					       CVMX_LMCX_RLEVEL_RANKX(rankx,
8071 								      if_num),
8072 					       rl_rank.u64);
8073 					rl_rank.u64 =
8074 						lmc_rd(priv,
8075 						       CVMX_LMCX_RLEVEL_RANKX(rankx,
8076 									      if_num));
8077 					display_rl(if_num, rl_rank, rankx);
8078 					/* Disable h/w read-leveling */
8079 					rl_dbg_loops = 0;
8080 					break;
8081 				}
8082 				++i;
8083 			}
8084 		}
8085 	}
8086 #endif /* ENABLE_HARDCODED_RLEVEL */
8087 
8088 	max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
8089 	s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
8090 	if (s)
8091 		max_adj_rl_del_inc = strtoul(s, NULL, 0);
8092 
8093 	while (rl_dbg_loops--) {
8094 		union cvmx_lmcx_modereg_params1 mp1;
8095 		union cvmx_lmcx_comp_ctl2 cc2;
8096 
8097 		/* Initialize the error scoreboard */
8098 		memset(rl_score, 0, sizeof(rl_score));
8099 
8100 		cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8101 		saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
8102 		saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
8103 
8104 		/* Disable dynamic compensation settings */
8105 		if (rl_comp_offs != 0) {
8106 			cc2.cn78xx.ptune = saved_ddr__ptune;
8107 			cc2.cn78xx.ntune = saved_ddr__ntune;
8108 
8109 			/*
8110 			 * Round up the ptune calculation to bias the odd
8111 			 * cases toward ptune
8112 			 */
8113 			cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
8114 			cc2.cn78xx.ntune -= rl_comp_offs / 2;
8115 
8116 			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8117 			saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8118 			/* Disable ZQCS while in bypass. */
8119 			ctl.s.int_zqcs_dis = 1;
8120 			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8121 
8122 			cc2.cn78xx.byp = 1; /* Enable bypass mode */
8123 			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8124 			lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8125 			/* Read again */
8126 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8127 			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8128 			      cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8129 		}
8130 
8131 		mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
8132 
8133 		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
8134 		     ++rtt_idx) {
8135 			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8136 
8137 			/*
8138 			 * When the read ODT mask is zero the dyn_rtt_nom_mask
8139 			 * is zero than RTT_NOM will not be changing during
8140 			 * read-leveling.  Since the value is fixed we only need
8141 			 * to test it once.
8142 			 */
8143 			if (dyn_rtt_nom_mask == 0) {
8144 				// flag not to print NOM ohms
8145 				print_nom_ohms = -1;
8146 			} else {
8147 				if (dyn_rtt_nom_mask & 1)
8148 					mp1.s.rtt_nom_00 = rtt_nom;
8149 				if (dyn_rtt_nom_mask & 2)
8150 					mp1.s.rtt_nom_01 = rtt_nom;
8151 				if (dyn_rtt_nom_mask & 4)
8152 					mp1.s.rtt_nom_10 = rtt_nom;
8153 				if (dyn_rtt_nom_mask & 8)
8154 					mp1.s.rtt_nom_11 = rtt_nom;
8155 				// FIXME? rank 0 ohms always?
8156 				print_nom_ohms =
8157 					imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
8158 			}
8159 
8160 			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8161 			       mp1.u64);
8162 
8163 			if (print_nom_ohms >= 0 && rl_print > 1) {
8164 				debug("\n");
8165 				debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8166 				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8167 				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8168 				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8169 				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8170 				      mp1.s.rtt_nom_11,
8171 				      mp1.s.rtt_nom_10,
8172 				      mp1.s.rtt_nom_01,
8173 				      mp1.s.rtt_nom_00);
8174 			}
8175 
8176 			ddr_init_seq(priv, rank_mask, if_num);
8177 
8178 			// Try RANK outside RODT to rearrange the output...
8179 			for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8180 				if (!(rank_mask & (1 << rankx)))
8181 					continue;
8182 
8183 				for (rodt_ctl = max_rodt_ctl;
8184 				     rodt_ctl >= min_rodt_ctl; --rodt_ctl)
8185 					rodt_loop(priv, rankx, rl_score);
8186 			}
8187 		}
8188 
8189 		/* Re-enable dynamic compensation settings. */
8190 		if (rl_comp_offs != 0) {
8191 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8192 
8193 			cc2.cn78xx.ptune = 0;
8194 			cc2.cn78xx.ntune = 0;
8195 			cc2.cn78xx.byp = 0; /* Disable bypass mode */
8196 			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8197 			/* Read once */
8198 			lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8199 
8200 			/* Read again */
8201 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8202 			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8203 			      cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8204 
8205 			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8206 			/* Restore original setting */
8207 			ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
8208 			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8209 		}
8210 
8211 		int override_compensation = 0;
8212 
8213 		s = lookup_env(priv, "ddr__ptune");
8214 		if (s)
8215 			saved_ddr__ptune = strtoul(s, NULL, 0);
8216 
8217 		s = lookup_env(priv, "ddr__ntune");
8218 		if (s) {
8219 			saved_ddr__ntune = strtoul(s, NULL, 0);
8220 			override_compensation = 1;
8221 		}
8222 
8223 		if (override_compensation) {
8224 			cc2.cn78xx.ptune = saved_ddr__ptune;
8225 			cc2.cn78xx.ntune = saved_ddr__ntune;
8226 
8227 			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8228 			saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8229 			/* Disable ZQCS while in bypass. */
8230 			ctl.s.int_zqcs_dis = 1;
8231 			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8232 
8233 			cc2.cn78xx.byp = 1; /* Enable bypass mode */
8234 			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8235 			/* Read again */
8236 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8237 
8238 			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
8239 			      cc2.cn78xx.ptune, cc2.cn78xx.ntune);
8240 		}
8241 
8242 		/* Evaluation block */
8243 		/* Still at initial value? */
8244 		int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
8245 		int auto_rodt_ctl = 0;
8246 		int auto_rtt_nom  = 0;
8247 		int rodt_score;
8248 
8249 		rodt_row_skip_mask = 0;
8250 
8251 		// just add specific RODT rows to the skip mask for DDR4
8252 		// at this time...
8253 		if (ddr_type == DDR4_DRAM) {
8254 			// skip RODT row 34 ohms for all DDR4 types
8255 			rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
8256 			// skip RODT row 40 ohms for all DDR4 types
8257 			rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
8258 			// For now, do not skip RODT row 40 or 48 ohm when
8259 			// ddr_hertz is above 1075 MHz
8260 			if (ddr_hertz > 1075000000) {
8261 				// noskip RODT row 40 ohms
8262 				rodt_row_skip_mask &=
8263 					~(1 << ddr4_rodt_ctl_40_ohm);
8264 				// noskip RODT row 48 ohms
8265 				rodt_row_skip_mask &=
8266 					~(1 << ddr4_rodt_ctl_48_ohm);
8267 			}
8268 			// For now, do not skip RODT row 48 ohm for 2Rx4
8269 			// stacked die DIMMs
8270 			if (is_stacked_die && num_ranks == 2 &&
8271 			    dram_width == 4) {
8272 				// noskip RODT row 48 ohms
8273 				rodt_row_skip_mask &=
8274 					~(1 << ddr4_rodt_ctl_48_ohm);
8275 			}
8276 			// for now, leave all rows eligible when we have
8277 			// mini-DIMMs...
8278 			if (spd_dimm_type == 5 || spd_dimm_type == 6)
8279 				rodt_row_skip_mask = 0;
8280 			// for now, leave all rows eligible when we have
8281 			// a 2-slot 1-rank config
8282 			if (dimm_count == 2 && num_ranks == 1)
8283 				rodt_row_skip_mask = 0;
8284 
8285 			debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
8286 			for (rtt_idx = min_rtt_nom_idx;
8287 			     rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
8288 				rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8289 
8290 				for (rodt_ctl = max_rodt_ctl;
8291 				     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
8292 					rodt_score = 0;
8293 					for (rankx = 0; rankx < dimm_count * 4;
8294 					     rankx++) {
8295 						if (!(rank_mask & (1 << rankx)))
8296 							continue;
8297 
8298 						debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
8299 						      rtt_nom, rodt_ctl, rankx,
8300 						      rl_score[rtt_nom][rodt_ctl][rankx].score);
8301 						rodt_score +=
8302 							rl_score[rtt_nom][rodt_ctl][rankx].score;
8303 					}
8304 					// FIXME: do we need to skip RODT rows
8305 					// here, like we do below in the
8306 					// by-RANK settings?
8307 
8308 					/*
8309 					 * When using automatic ODT settings use
8310 					 * the ODT settings associated with the
8311 					 * best score for all of the tested ODT
8312 					 * combinations.
8313 					 */
8314 
8315 					if (rodt_score < best_rodt_score ||
8316 					    (rodt_score == best_rodt_score &&
8317 					     (imp_val->rodt_ohms[rodt_ctl] >
8318 					      imp_val->rodt_ohms[auto_rodt_ctl]))) {
8319 						debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
8320 						      rodt_ctl,
8321 						      imp_val->rodt_ohms[rodt_ctl],
8322 						      rodt_score,
8323 						      best_rodt_score);
8324 						best_rodt_score = rodt_score;
8325 						auto_rodt_ctl   = rodt_ctl;
8326 						auto_rtt_nom    = rtt_nom;
8327 					}
8328 				}
8329 			}
8330 
8331 			mp1.u64 = lmc_rd(priv,
8332 					 CVMX_LMCX_MODEREG_PARAMS1(if_num));
8333 
8334 			if (ddr_rtt_nom_auto) {
8335 				/* Store the automatically set RTT_NOM value */
8336 				if (dyn_rtt_nom_mask & 1)
8337 					mp1.s.rtt_nom_00 = auto_rtt_nom;
8338 				if (dyn_rtt_nom_mask & 2)
8339 					mp1.s.rtt_nom_01 = auto_rtt_nom;
8340 				if (dyn_rtt_nom_mask & 4)
8341 					mp1.s.rtt_nom_10 = auto_rtt_nom;
8342 				if (dyn_rtt_nom_mask & 8)
8343 					mp1.s.rtt_nom_11 = auto_rtt_nom;
8344 			} else {
8345 				/*
8346 				 * restore the manual settings to the register
8347 				 */
8348 				mp1.s.rtt_nom_00 = default_rtt_nom[0];
8349 				mp1.s.rtt_nom_01 = default_rtt_nom[1];
8350 				mp1.s.rtt_nom_10 = default_rtt_nom[2];
8351 				mp1.s.rtt_nom_11 = default_rtt_nom[3];
8352 			}
8353 
8354 			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8355 			       mp1.u64);
8356 			debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8357 			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8358 			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8359 			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8360 			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8361 			      mp1.s.rtt_nom_11,
8362 			      mp1.s.rtt_nom_10,
8363 			      mp1.s.rtt_nom_01,
8364 			      mp1.s.rtt_nom_00);
8365 
8366 			debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8367 			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
8368 			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
8369 			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
8370 			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
8371 			      extr_wr(mp1.u64, 3),
8372 			      extr_wr(mp1.u64, 2),
8373 			      extr_wr(mp1.u64, 1),
8374 			      extr_wr(mp1.u64, 0));
8375 
8376 			debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8377 			      imp_val->dic_ohms[mp1.s.dic_11],
8378 			      imp_val->dic_ohms[mp1.s.dic_10],
8379 			      imp_val->dic_ohms[mp1.s.dic_01],
8380 			      imp_val->dic_ohms[mp1.s.dic_00],
8381 			      mp1.s.dic_11,
8382 			      mp1.s.dic_10,
8383 			      mp1.s.dic_01,
8384 			      mp1.s.dic_00);
8385 
8386 			if (ddr_type == DDR4_DRAM) {
8387 				union cvmx_lmcx_modereg_params2 mp2;
8388 				/*
8389 				 * We must read the CSR, and not depend on
8390 				 * odt_config[odt_idx].odt_mask2, since we could
8391 				 * have overridden values with envvars.
8392 				 * NOTE: this corrects the printout, since the
8393 				 * CSR is not written with the old values...
8394 				 */
8395 				mp2.u64 = lmc_rd(priv,
8396 						 CVMX_LMCX_MODEREG_PARAMS2(if_num));
8397 
8398 				debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
8399 				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
8400 				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
8401 				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
8402 				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
8403 				      mp2.s.rtt_park_11,
8404 				      mp2.s.rtt_park_10,
8405 				      mp2.s.rtt_park_01,
8406 				      mp2.s.rtt_park_00);
8407 
8408 				debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
8409 				      "VREF_RANGE",
8410 				      mp2.s.vref_range_11,
8411 				      mp2.s.vref_range_10,
8412 				      mp2.s.vref_range_01,
8413 				      mp2.s.vref_range_00);
8414 
8415 				debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
8416 				      "VREF_VALUE",
8417 				      mp2.s.vref_value_11,
8418 				      mp2.s.vref_value_10,
8419 				      mp2.s.vref_value_01,
8420 				      mp2.s.vref_value_00);
8421 			}
8422 
8423 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8424 			if (ddr_rodt_ctl_auto) {
8425 				cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
8426 			} else {
8427 				// back to the original setting
8428 				cc2.cn78xx.rodt_ctl = default_rodt_ctl;
8429 			}
8430 			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8431 			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8432 			debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
8433 			      cc2.cn78xx.rodt_ctl,
8434 			      imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
8435 
8436 			/*
8437 			 * Use the delays associated with the best score for
8438 			 * each individual rank
8439 			 */
8440 			debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
8441 
8442 			// this is the the RANK MAJOR LOOP
8443 			for (rankx = 0; rankx < dimm_count * 4; rankx++)
8444 				rank_major_loop(priv, rankx, rl_score);
8445 		}  /* Evaluation block */
8446 	} /* while(rl_dbg_loops--) */
8447 
8448 	ctl.cn78xx.ddr2t = save_ddr2t;
8449 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8450 	ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8451 	/* Display final 2T value */
8452 	debug("DDR2T                                         : %6d\n",
8453 	      ctl.cn78xx.ddr2t);
8454 
8455 	ddr_init_seq(priv, rank_mask, if_num);
8456 
8457 	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8458 		u64 value;
8459 		int parameter_set = 0;
8460 
8461 		if (!(rank_mask & (1 << rankx)))
8462 			continue;
8463 
8464 		rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
8465 								  if_num));
8466 
8467 		for (i = 0; i < 9; ++i) {
8468 			s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
8469 				       if_num, rankx, i);
8470 			if (s) {
8471 				parameter_set |= 1;
8472 				value = simple_strtoul(s, NULL, 0);
8473 
8474 				upd_rl_rank(&rl_rank, i, value);
8475 			}
8476 		}
8477 
8478 		s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
8479 		if (s) {
8480 			parameter_set |= 1;
8481 			value = simple_strtoull(s, NULL, 0);
8482 			rl_rank.u64 = value;
8483 		}
8484 
8485 		if (parameter_set) {
8486 			lmc_wr(priv,
8487 			       CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
8488 			       rl_rank.u64);
8489 			rl_rank.u64 = lmc_rd(priv,
8490 					     CVMX_LMCX_RLEVEL_RANKX(rankx,
8491 								    if_num));
8492 			display_rl(if_num, rl_rank, rankx);
8493 		}
8494 	}
8495 }
8496 
init_octeon3_ddr3_interface(struct ddr_priv * priv,struct ddr_conf * _ddr_conf,u32 _ddr_hertz,u32 cpu_hertz,u32 ddr_ref_hertz,int _if_num,u32 _if_mask)8497 int init_octeon3_ddr3_interface(struct ddr_priv *priv,
8498 				struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
8499 				u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
8500 				u32 _if_mask)
8501 {
8502 	union cvmx_lmcx_control ctrl;
8503 	int ret;
8504 	char *s;
8505 	int i;
8506 
8507 	if_num = _if_num;
8508 	ddr_hertz = _ddr_hertz;
8509 	ddr_conf = _ddr_conf;
8510 	if_mask = _if_mask;
8511 	odt_1rank_config = ddr_conf->odt_1rank_config;
8512 	odt_2rank_config = ddr_conf->odt_2rank_config;
8513 	odt_4rank_config = ddr_conf->odt_4rank_config;
8514 	dimm_config_table = ddr_conf->dimm_config_table;
8515 	c_cfg = &ddr_conf->custom_lmc_config;
8516 
8517 	/*
8518 	 * Compute clock rates to the nearest picosecond.
8519 	 */
8520 	tclk_psecs = hertz_to_psecs(ddr_hertz);	/* Clock in psecs */
8521 	eclk_psecs = hertz_to_psecs(cpu_hertz);	/* Clock in psecs */
8522 
8523 	dimm_count = 0;
8524 	/* Accumulate and report all the errors before giving up */
8525 	fatal_error = 0;
8526 
8527 	/* Flag that indicates safe DDR settings should be used */
8528 	safe_ddr_flag = 0;
8529 	if_64b = 1;		/* Octeon II Default: 64bit interface width */
8530 	mem_size_mbytes = 0;
8531 	bank_bits = 0;
8532 	column_bits_start = 1;
8533 	use_ecc = 1;
8534 	min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
8535 	spd_package = 0;
8536 	spd_rawcard = 0;
8537 	spd_rawcard_aorb = 0;
8538 	spd_rdimm_registers = 0;
8539 	is_stacked_die = 0;
8540 	is_3ds_dimm = 0;	// 3DS
8541 	lranks_per_prank = 1;	// 3DS: logical ranks per package rank
8542 	lranks_bits = 0;	// 3DS: logical ranks bits
8543 	die_capacity = 0;	// in Mbits; only used for 3DS
8544 
8545 	wl_mask_err = 0;
8546 	dyn_rtt_nom_mask = 0;
8547 	ddr_disable_chip_reset = 1;
8548 	match_wl_rtt_nom = 0;
8549 
8550 	internal_retries = 0;
8551 
8552 	disable_deskew_training = 0;
8553 	restart_if_dsk_incomplete = 0;
8554 	last_lane = ((if_64b) ? 8 : 4) + use_ecc;
8555 
8556 	disable_sequential_delay_check = 0;
8557 	wl_print = WLEVEL_PRINTALL_DEFAULT;
8558 
8559 	enable_by_rank_init = 1;	// FIXME: default by-rank ON
8560 	saved_rank_mask = 0;
8561 
8562 	node = 0;
8563 
8564 	memset(hwl_alts, 0, sizeof(hwl_alts));
8565 
8566 	/*
8567 	 * Initialize these to shut up the compiler. They are configured
8568 	 * and used only for DDR4
8569 	 */
8570 	ddr4_trrd_lmin = 6000;
8571 	ddr4_tccd_lmin = 6000;
8572 
8573 	debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
8574 	      node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
8575 
8576 	if (dimm_config_table[0].spd_addrs[0] == 0 &&
8577 	    !dimm_config_table[0].spd_ptrs[0]) {
8578 		printf("ERROR: No dimms specified in the dimm_config_table.\n");
8579 		return -1;
8580 	}
8581 
8582 	// allow some overrides to be done
8583 
8584 	// this one controls several things related to DIMM geometry: HWL and RL
8585 	disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
8586 	s = lookup_env(priv, "ddr_disable_sequential_delay_check");
8587 	if (s)
8588 		disable_sequential_delay_check = strtoul(s, NULL, 0);
8589 
8590 	// this one controls whether chip RESET is done, or LMC init restarted
8591 	// from step 6.9.6
8592 	s = lookup_env(priv, "ddr_disable_chip_reset");
8593 	if (s)
8594 		ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
8595 
8596 	// this one controls whether Deskew Training is performed
8597 	s = lookup_env(priv, "ddr_disable_deskew_training");
8598 	if (s)
8599 		disable_deskew_training = !!strtoul(s, NULL, 0);
8600 
8601 	if (ddr_verbose(priv)) {
8602 		printf("DDR SPD Table:");
8603 		for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8604 			if (dimm_config_table[didx].spd_addrs[0] == 0)
8605 				break;
8606 
8607 			printf(" --ddr%dspd=0x%02x", if_num,
8608 			       dimm_config_table[didx].spd_addrs[0]);
8609 			if (dimm_config_table[didx].spd_addrs[1] != 0)
8610 				printf(",0x%02x",
8611 				       dimm_config_table[didx].spd_addrs[1]);
8612 		}
8613 		printf("\n");
8614 	}
8615 
8616 	/*
8617 	 * Walk the DRAM Socket Configuration Table to see what is installed.
8618 	 */
8619 	for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8620 		/* Check for lower DIMM socket populated */
8621 		if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
8622 			if (ddr_verbose(priv))
8623 				report_dimm(&dimm_config_table[didx], 0,
8624 					    dimm_count, if_num);
8625 			++dimm_count;
8626 		} else {
8627 			break;
8628 		}		/* Finished when there is no lower DIMM */
8629 	}
8630 
8631 	initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
8632 			     ddr_ref_hertz, if_num, if_mask);
8633 
8634 	if (!odt_1rank_config)
8635 		odt_1rank_config = disable_odt_config;
8636 	if (!odt_2rank_config)
8637 		odt_2rank_config = disable_odt_config;
8638 	if (!odt_4rank_config)
8639 		odt_4rank_config = disable_odt_config;
8640 
8641 	s = env_get("ddr_safe");
8642 	if (s) {
8643 		safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
8644 		printf("Parameter found in environment. ddr_safe = %d\n",
8645 		       safe_ddr_flag);
8646 	}
8647 
8648 	if (dimm_count == 0) {
8649 		printf("ERROR: DIMM 0 not detected.\n");
8650 		return (-1);
8651 	}
8652 
8653 	if (c_cfg->mode32b)
8654 		if_64b = 0;
8655 
8656 	s = lookup_env(priv, "if_64b");
8657 	if (s)
8658 		if_64b = !!simple_strtoul(s, NULL, 0);
8659 
8660 	if (if_64b == 1) {
8661 		if (octeon_is_cpuid(OCTEON_CN70XX)) {
8662 			printf("64-bit interface width is not supported for this Octeon model\n");
8663 			++fatal_error;
8664 		}
8665 	}
8666 
8667 	/* ddr_type only indicates DDR4 or DDR3 */
8668 	ddr_type = (read_spd(&dimm_config_table[0], 0,
8669 			     DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
8670 	debug("DRAM Device Type: DDR%d\n", ddr_type);
8671 
8672 	if (ddr_type == DDR4_DRAM) {
8673 		int spd_module_type;
8674 		int asymmetric;
8675 		const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
8676 
8677 		imp_val = &ddr4_impedence_val;
8678 
8679 		spd_addr =
8680 		    read_spd(&dimm_config_table[0], 0,
8681 			     DDR4_SPD_ADDRESSING_ROW_COL_BITS);
8682 		spd_org =
8683 		    read_spd(&dimm_config_table[0], 0,
8684 			     DDR4_SPD_MODULE_ORGANIZATION);
8685 		spd_banks =
8686 		    0xFF & read_spd(&dimm_config_table[0], 0,
8687 				    DDR4_SPD_DENSITY_BANKS);
8688 
8689 		bank_bits =
8690 		    (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
8691 		/* Controller can only address 4 bits. */
8692 		bank_bits = min((int)bank_bits, 4);
8693 
8694 		spd_package =
8695 		    0XFF & read_spd(&dimm_config_table[0], 0,
8696 				    DDR4_SPD_PACKAGE_TYPE);
8697 		if (spd_package & 0x80) {	// non-monolithic device
8698 			is_stacked_die = ((spd_package & 0x73) == 0x11);
8699 			debug("DDR4: Package Type 0x%02x (%s), %d die\n",
8700 			      spd_package, signal_load[(spd_package & 3)],
8701 			      ((spd_package >> 4) & 7) + 1);
8702 			is_3ds_dimm = ((spd_package & 3) == 2);	// is it 3DS?
8703 			if (is_3ds_dimm) {	// is it 3DS?
8704 				lranks_per_prank = ((spd_package >> 4) & 7) + 1;
8705 				// FIXME: should make sure it is only 2H or 4H
8706 				// or 8H?
8707 				lranks_bits = lranks_per_prank >> 1;
8708 				if (lranks_bits == 4)
8709 					lranks_bits = 3;
8710 			}
8711 		} else if (spd_package != 0) {
8712 			// FIXME: print non-zero monolithic device definition
8713 			debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
8714 			      ((spd_package >> 4) & 7) + 1, (spd_package & 3));
8715 		}
8716 
8717 		asymmetric = (spd_org >> 6) & 1;
8718 		if (asymmetric) {
8719 			int spd_secondary_pkg =
8720 			    read_spd(&dimm_config_table[0], 0,
8721 				     DDR4_SPD_SECONDARY_PACKAGE_TYPE);
8722 			debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
8723 			      spd_secondary_pkg);
8724 		} else {
8725 			u64 bus_width =
8726 				8 << (0x07 &
8727 				read_spd(&dimm_config_table[0], 0,
8728 					 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
8729 			u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
8730 			u64 module_cap;
8731 			int shift = (spd_banks & 0x0F);
8732 
8733 			die_capacity = (shift < 8) ? (256UL << shift) :
8734 				((12UL << (shift & 1)) << 10);
8735 			debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
8736 			      (die_capacity > 512) ? (die_capacity >> 10) :
8737 			      die_capacity, (die_capacity > 512) ? 'G' : 'M');
8738 			module_cap = ((u64)die_capacity << 20) / 8UL *
8739 				bus_width / ddr_width *
8740 				(1UL + ((spd_org >> 3) & 0x7));
8741 
8742 			// is it 3DS?
8743 			if (is_3ds_dimm) {
8744 				module_cap *= (u64)(((spd_package >> 4) & 7) +
8745 						    1);
8746 			}
8747 			debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
8748 			      module_cap >> 30);
8749 		}
8750 
8751 		spd_rawcard =
8752 		    0xFF & read_spd(&dimm_config_table[0], 0,
8753 				    DDR4_SPD_REFERENCE_RAW_CARD);
8754 		debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
8755 
8756 		spd_module_type =
8757 		    read_spd(&dimm_config_table[0], 0,
8758 			     DDR4_SPD_KEY_BYTE_MODULE_TYPE);
8759 		if (spd_module_type & 0x80) {	// HYBRID module
8760 			debug("DDR4: HYBRID module, type %s\n",
8761 			      ((spd_module_type & 0x70) ==
8762 			       0x10) ? "NVDIMM" : "UNKNOWN");
8763 		}
8764 		spd_thermal_sensor =
8765 		    read_spd(&dimm_config_table[0], 0,
8766 			     DDR4_SPD_MODULE_THERMAL_SENSOR);
8767 		spd_dimm_type = spd_module_type & 0x0F;
8768 		spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8769 			(spd_dimm_type == 8);
8770 		if (spd_rdimm) {
8771 			u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
8772 			static const u16 manu_ids[4] = {
8773 				0xb380, 0x3286, 0x9780, 0xb304
8774 			};
8775 			static const char *manu_names[4] = {
8776 				"XXX", "XXXXXXX", "XX", "XXXXX"
8777 			};
8778 			int mc;
8779 
8780 			spd_mfgr_id =
8781 			    (0xFFU &
8782 			     read_spd(&dimm_config_table[0], 0,
8783 				      DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8784 			    ((0xFFU &
8785 			      read_spd(&dimm_config_table[0], 0,
8786 				       DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
8787 			     << 8);
8788 			spd_register_rev =
8789 			    0xFFU & read_spd(&dimm_config_table[0], 0,
8790 					     DDR4_SPD_REGISTER_REVISION_NUMBER);
8791 			for (mc = 0; mc < 4; mc++)
8792 				if (manu_ids[mc] == spd_mfgr_id)
8793 					break;
8794 
8795 			debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
8796 			      (mc >= 4) ? "UNKNOWN" : manu_names[mc],
8797 			      spd_register_rev);
8798 
8799 			// RAWCARD A or B must be bit 7=0 and bits 4-0
8800 			// either 00000(A) or 00001(B)
8801 			spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
8802 			// RDIMM Module Attributes
8803 			spd_mod_attr =
8804 			    0xFFU & read_spd(&dimm_config_table[0], 0,
8805 					DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
8806 			spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8807 			debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
8808 			      spd_mod_attr, (spd_mod_attr >> 4) + 1,
8809 			      ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8810 			      spd_rdimm_registers);
8811 		}
8812 		dimm_type_name = ddr4_dimm_types[spd_dimm_type];
8813 	} else {		/* if (ddr_type == DDR4_DRAM) */
8814 		const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
8815 
8816 		imp_val = &ddr3_impedence_val;
8817 
8818 		spd_addr =
8819 		    read_spd(&dimm_config_table[0], 0,
8820 			     DDR3_SPD_ADDRESSING_ROW_COL_BITS);
8821 		spd_org =
8822 		    read_spd(&dimm_config_table[0], 0,
8823 			     DDR3_SPD_MODULE_ORGANIZATION);
8824 		spd_banks =
8825 		    read_spd(&dimm_config_table[0], 0,
8826 			     DDR3_SPD_DENSITY_BANKS) & 0xff;
8827 
8828 		bank_bits = 3 + ((spd_banks >> 4) & 0x7);
8829 		/* Controller can only address 3 bits. */
8830 		bank_bits = min((int)bank_bits, 3);
8831 		spd_dimm_type =
8832 		    0x0f & read_spd(&dimm_config_table[0], 0,
8833 				    DDR3_SPD_KEY_BYTE_MODULE_TYPE);
8834 		spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8835 			(spd_dimm_type == 9);
8836 
8837 		spd_package =
8838 		    0xFF & read_spd(&dimm_config_table[0], 0,
8839 				    DDR3_SPD_SDRAM_DEVICE_TYPE);
8840 		if (spd_package & 0x80) {	// non-standard device
8841 			debug("DDR3: Device Type 0x%02x (%s), %d die\n",
8842 			      spd_package, signal_load[(spd_package & 3)],
8843 			      ((1 << ((spd_package >> 4) & 7)) >> 1));
8844 		} else if (spd_package != 0) {
8845 			// FIXME: print non-zero monolithic device definition
8846 			debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
8847 			      ((1 << (spd_package >> 4) & 7) >> 1),
8848 			      (spd_package & 3));
8849 		}
8850 
8851 		spd_rawcard =
8852 		    0xFF & read_spd(&dimm_config_table[0], 0,
8853 				    DDR3_SPD_REFERENCE_RAW_CARD);
8854 		debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
8855 		spd_thermal_sensor =
8856 		    read_spd(&dimm_config_table[0], 0,
8857 			     DDR3_SPD_MODULE_THERMAL_SENSOR);
8858 
8859 		if (spd_rdimm) {
8860 			int spd_mfgr_id, spd_register_rev, spd_mod_attr;
8861 
8862 			spd_mfgr_id =
8863 			    (0xFFU &
8864 			     read_spd(&dimm_config_table[0], 0,
8865 				      DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8866 			    ((0xFFU &
8867 			      read_spd(&dimm_config_table[0], 0,
8868 				       DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
8869 			     << 8);
8870 			spd_register_rev =
8871 			    0xFFU & read_spd(&dimm_config_table[0], 0,
8872 					     DDR3_SPD_REGISTER_REVISION_NUMBER);
8873 			debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
8874 			      spd_mfgr_id, spd_register_rev);
8875 			// Module Attributes
8876 			spd_mod_attr =
8877 			    0xFFU & read_spd(&dimm_config_table[0], 0,
8878 					     DDR3_SPD_ADDRESS_MAPPING);
8879 			spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8880 			debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
8881 			      spd_mod_attr,
8882 			      ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8883 			      spd_rdimm_registers);
8884 		}
8885 		dimm_type_name = ddr3_dimm_types[spd_dimm_type];
8886 	}
8887 
8888 	if (spd_thermal_sensor & 0x80) {
8889 		debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
8890 		      (ddr_type == DDR4_DRAM) ? 4 : 3);
8891 	}
8892 
8893 	debug("spd_addr        : %#06x\n", spd_addr);
8894 	debug("spd_org         : %#06x\n", spd_org);
8895 	debug("spd_banks       : %#06x\n", spd_banks);
8896 
8897 	row_bits = 12 + ((spd_addr >> 3) & 0x7);
8898 	col_bits = 9 + ((spd_addr >> 0) & 0x7);
8899 
8900 	num_ranks = 1 + ((spd_org >> 3) & 0x7);
8901 	dram_width = 4 << ((spd_org >> 0) & 0x7);
8902 	num_banks = 1 << bank_bits;
8903 
8904 	s = lookup_env(priv, "ddr_num_ranks");
8905 	if (s)
8906 		num_ranks = simple_strtoul(s, NULL, 0);
8907 
8908 	s = lookup_env(priv, "ddr_enable_by_rank_init");
8909 	if (s)
8910 		enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
8911 
8912 	// FIXME: for now, we can only handle a DDR4 2rank-1slot config
8913 	// FIXME: also, by-rank init does not work correctly if 32-bit mode...
8914 	if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
8915 				    dimm_count != 1 || if_64b != 1 ||
8916 				    num_ranks != 2))
8917 		enable_by_rank_init = 0;
8918 
8919 	if (enable_by_rank_init) {
8920 		struct dimm_odt_config *odt_config;
8921 		union cvmx_lmcx_modereg_params1 mp1;
8922 		union cvmx_lmcx_modereg_params2 modereg_params2;
8923 		int by_rank_rodt, by_rank_wr, by_rank_park;
8924 
8925 		// Do ODT settings changes which work best for 2R-1S configs
8926 		debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
8927 
8928 		// setup for modifying config table values - 2 ranks and 1 DIMM
8929 		odt_config =
8930 		    (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
8931 
8932 		// original was 80, first try was 60
8933 		by_rank_rodt = ddr4_rodt_ctl_48_ohm;
8934 		s = lookup_env(priv, "ddr_by_rank_rodt");
8935 		if (s)
8936 			by_rank_rodt = strtoul(s, NULL, 0);
8937 
8938 		odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
8939 
8940 		// this is for MODEREG_PARAMS1 fields
8941 		// fetch the original settings
8942 		mp1.u64 = odt_config->modereg_params1.u64;
8943 
8944 		by_rank_wr = ddr4_rttwr_80ohm;	// originals were 240
8945 		s = lookup_env(priv, "ddr_by_rank_wr");
8946 		if (s)
8947 			by_rank_wr = simple_strtoul(s, NULL, 0);
8948 
8949 		// change specific settings here...
8950 		insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
8951 		insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
8952 
8953 		// save final settings
8954 		odt_config->modereg_params1.u64 = mp1.u64;
8955 
8956 		// this is for MODEREG_PARAMS2 fields
8957 		// fetch the original settings
8958 		modereg_params2.u64 = odt_config->modereg_params2.u64;
8959 
8960 		by_rank_park = ddr4_rttpark_none;	// originals were 120
8961 		s = lookup_env(priv, "ddr_by_rank_park");
8962 		if (s)
8963 			by_rank_park = simple_strtoul(s, NULL, 0);
8964 
8965 		// change specific settings here...
8966 		modereg_params2.s.rtt_park_00 = by_rank_park;
8967 		modereg_params2.s.rtt_park_01 = by_rank_park;
8968 
8969 		// save final settings
8970 		odt_config->modereg_params2.u64 = modereg_params2.u64;
8971 	}
8972 
8973 	/*
8974 	 * FIX
8975 	 * Check that values are within some theoretical limits.
8976 	 * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
8977 	 *   14 - 3 - 4 = 7
8978 	 * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
8979 	 *   18 - 2 - 3 = 13
8980 	 */
8981 	if (col_bits > 13 || col_bits < 7) {
8982 		printf("Unsupported number of Col Bits: %d\n", col_bits);
8983 		++fatal_error;
8984 	}
8985 
8986 	/*
8987 	 * FIX
8988 	 * Check that values are within some theoretical limits.
8989 	 * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
8990 	 *   26 - 18 - 1 = 7
8991 	 * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
8992 	 *   33 - 14 - 1 = 18
8993 	 */
8994 	if (row_bits > 18 || row_bits < 7) {
8995 		printf("Unsupported number of Row Bits: %d\n", row_bits);
8996 		++fatal_error;
8997 	}
8998 
8999 	s = lookup_env(priv, "ddr_rdimm_ena");
9000 	if (s)
9001 		spd_rdimm = !!simple_strtoul(s, NULL, 0);
9002 
9003 	wl_loops = WLEVEL_LOOPS_DEFAULT;
9004 	// accept generic or interface-specific override
9005 	s = lookup_env(priv, "ddr_wlevel_loops");
9006 	if (!s)
9007 		s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
9008 
9009 	if (s)
9010 		wl_loops = strtoul(s, NULL, 0);
9011 
9012 	s = lookup_env(priv, "ddr_ranks");
9013 	if (s)
9014 		num_ranks = simple_strtoul(s, NULL, 0);
9015 
9016 	bunk_enable = (num_ranks > 1);
9017 
9018 	if (octeon_is_cpuid(OCTEON_CN7XXX))
9019 		column_bits_start = 3;
9020 	else
9021 		printf("ERROR: Unsupported Octeon model: 0x%x\n",
9022 		       read_c0_prid());
9023 
9024 	row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
9025 	debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
9026 	      row_lsb);
9027 
9028 	pbank_lsb = row_lsb + row_bits + bunk_enable;
9029 	debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
9030 
9031 	if (lranks_per_prank > 1) {
9032 		pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
9033 		debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
9034 		      row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
9035 	}
9036 
9037 	mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
9038 	if (num_ranks == 4) {
9039 		/*
9040 		 * Quad rank dimm capacity is equivalent to two dual-rank
9041 		 * dimms.
9042 		 */
9043 		mem_size_mbytes *= 2;
9044 	}
9045 
9046 	/*
9047 	 * Mask with 1 bits set for for each active rank, allowing 2 bits
9048 	 * per dimm. This makes later calculations simpler, as a variety
9049 	 * of CSRs use this layout. This init needs to be updated for dual
9050 	 * configs (ie non-identical DIMMs).
9051 	 *
9052 	 * Bit 0 = dimm0, rank 0
9053 	 * Bit 1 = dimm0, rank 1
9054 	 * Bit 2 = dimm1, rank 0
9055 	 * Bit 3 = dimm1, rank 1
9056 	 * ...
9057 	 */
9058 	rank_mask = 0x1;
9059 	if (num_ranks > 1)
9060 		rank_mask = 0x3;
9061 	if (num_ranks > 2)
9062 		rank_mask = 0xf;
9063 
9064 	for (i = 1; i < dimm_count; i++)
9065 		rank_mask |= ((rank_mask & 0x3) << (2 * i));
9066 
9067 	/*
9068 	 * If we are booting from RAM, the DRAM controller is
9069 	 * already set up.  Just return the memory size
9070 	 */
9071 	if (priv->flags & FLAG_RAM_RESIDENT) {
9072 		debug("Ram Boot: Skipping LMC config\n");
9073 		return mem_size_mbytes;
9074 	}
9075 
9076 	if (ddr_type == DDR4_DRAM) {
9077 		spd_ecc =
9078 		    !!(read_spd
9079 		       (&dimm_config_table[0], 0,
9080 			DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
9081 	} else {
9082 		spd_ecc =
9083 		    !!(read_spd
9084 		       (&dimm_config_table[0], 0,
9085 			DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
9086 	}
9087 
9088 	char rank_spec[8];
9089 
9090 	printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
9091 	debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
9092 	      dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
9093 	      rank_spec,
9094 	      (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
9095 
9096 	if (ddr_type == DDR4_DRAM) {
9097 		spd_cas_latency =
9098 		    ((0xff &
9099 		      read_spd(&dimm_config_table[0], 0,
9100 			       DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
9101 		spd_cas_latency |=
9102 		    ((0xff &
9103 		      read_spd(&dimm_config_table[0], 0,
9104 			       DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
9105 		spd_cas_latency |=
9106 		    ((0xff &
9107 		      read_spd(&dimm_config_table[0], 0,
9108 			       DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
9109 		spd_cas_latency |=
9110 		    ((0xff &
9111 		      read_spd(&dimm_config_table[0], 0,
9112 			       DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
9113 	} else {
9114 		spd_cas_latency =
9115 		    0xff & read_spd(&dimm_config_table[0], 0,
9116 				    DDR3_SPD_CAS_LATENCIES_LSB);
9117 		spd_cas_latency |=
9118 		    ((0xff &
9119 		      read_spd(&dimm_config_table[0], 0,
9120 			       DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
9121 	}
9122 	debug("spd_cas_latency : %#06x\n", spd_cas_latency);
9123 
9124 	if (ddr_type == DDR4_DRAM) {
9125 		/*
9126 		 * No other values for DDR4 MTB and FTB are specified at the
9127 		 * current time so don't bother reading them. Can't speculate
9128 		 * how new values will be represented.
9129 		 */
9130 		int spdmtb = 125;
9131 		int spdftb = 1;
9132 
9133 		taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
9134 					   DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
9135 			 spdftb * (signed char)read_spd(&dimm_config_table[0],
9136 			 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
9137 
9138 		ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9139 			DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
9140 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9141 			DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
9142 
9143 		ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
9144 			DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
9145 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9146 			DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
9147 
9148 		ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9149 			DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
9150 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9151 			DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
9152 
9153 		ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9154 			DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
9155 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9156 			DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
9157 
9158 		ddr4_trasmin = spdmtb *
9159 			(((read_spd
9160 			   (&dimm_config_table[0], 0,
9161 			    DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
9162 			 (read_spd
9163 			  (&dimm_config_table[0], 0,
9164 			   DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
9165 
9166 		ddr4_trcmin = spdmtb *
9167 			((((read_spd
9168 			    (&dimm_config_table[0], 0,
9169 			     DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
9170 			  8) + (read_spd
9171 				(&dimm_config_table[0], 0,
9172 				 DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
9173 				0xff))
9174 			+ spdftb * (signed char)read_spd(&dimm_config_table[0],
9175 							 0,
9176 			DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
9177 
9178 		ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9179 			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
9180 			8) + (read_spd(&dimm_config_table[0], 0,
9181 			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
9182 
9183 		ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9184 			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
9185 			8) + (read_spd(&dimm_config_table[0], 0,
9186 			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
9187 
9188 		ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9189 			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
9190 			8) + (read_spd(&dimm_config_table[0], 0,
9191 			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
9192 
9193 		ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9194 			DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
9195 			8) + (read_spd(&dimm_config_table[0], 0,
9196 			DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
9197 
9198 		ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
9199 			DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
9200 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9201 			DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
9202 
9203 		ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9204 			DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
9205 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9206 			DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
9207 
9208 		ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9209 			DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
9210 			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9211 			DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
9212 
9213 		debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
9214 		debug("%-45s : %6d ps\n", "Fine Timebase   (FTB)", spdftb);
9215 
9216 		debug("%-45s : %6d ps (%ld MT/s)\n",
9217 		      "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
9218 		      pretty_psecs_to_mts(ddr4_tckavgmin));
9219 		debug("%-45s : %6d ps\n",
9220 		      "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
9221 		debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
9222 		      taamin);
9223 		debug("%-45s : %6d ps\n",
9224 		      "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
9225 		debug("%-45s : %6d ps\n",
9226 		      "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
9227 		debug("%-45s : %6d ps\n",
9228 		      "Minimum Active to Precharge Delay (tRASmin)",
9229 		      ddr4_trasmin);
9230 		debug("%-45s : %6d ps\n",
9231 		      "Minimum Active to Active/Refr. Delay (tRCmin)",
9232 		      ddr4_trcmin);
9233 		debug("%-45s : %6d ps\n",
9234 		      "Minimum Refresh Recovery Delay (tRFC1min)",
9235 		      ddr4_trfc1min);
9236 		debug("%-45s : %6d ps\n",
9237 		      "Minimum Refresh Recovery Delay (tRFC2min)",
9238 		      ddr4_trfc2min);
9239 		debug("%-45s : %6d ps\n",
9240 		      "Minimum Refresh Recovery Delay (tRFC4min)",
9241 		      ddr4_trfc4min);
9242 		debug("%-45s : %6d ps\n",
9243 		      "Minimum Four Activate Window Time (tFAWmin)",
9244 		      ddr4_tfawmin);
9245 		debug("%-45s : %6d ps\n",
9246 		      "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
9247 		debug("%-45s : %6d ps\n",
9248 		      "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
9249 		debug("%-45s : %6d ps\n",
9250 		      "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
9251 		      ddr4_tccd_lmin);
9252 
9253 #define DDR4_TWR 15000
9254 #define DDR4_TWTR_S 2500
9255 
9256 		tckmin = ddr4_tckavgmin;
9257 		twr = DDR4_TWR;
9258 		trcd = ddr4_trdcmin;
9259 		trrd = ddr4_trrd_smin;
9260 		trp = ddr4_trpmin;
9261 		tras = ddr4_trasmin;
9262 		trc = ddr4_trcmin;
9263 		trfc = ddr4_trfc1min;
9264 		twtr = DDR4_TWTR_S;
9265 		tfaw = ddr4_tfawmin;
9266 
9267 		if (spd_rdimm) {
9268 			spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9269 			DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
9270 			0x1;
9271 		} else {
9272 			spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9273 				DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
9274 		}
9275 		debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9276 	} else {
9277 		spd_mtb_dividend =
9278 		    0xff & read_spd(&dimm_config_table[0], 0,
9279 				    DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
9280 		spd_mtb_divisor =
9281 		    0xff & read_spd(&dimm_config_table[0], 0,
9282 				    DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
9283 		spd_tck_min =
9284 		    0xff & read_spd(&dimm_config_table[0], 0,
9285 				    DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
9286 		spd_taa_min =
9287 		    0xff & read_spd(&dimm_config_table[0], 0,
9288 				    DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
9289 
9290 		spd_twr =
9291 		    0xff & read_spd(&dimm_config_table[0], 0,
9292 				    DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
9293 		spd_trcd =
9294 		    0xff & read_spd(&dimm_config_table[0], 0,
9295 				    DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
9296 		spd_trrd =
9297 		    0xff & read_spd(&dimm_config_table[0], 0,
9298 				    DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
9299 		spd_trp =
9300 		    0xff & read_spd(&dimm_config_table[0], 0,
9301 				    DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
9302 		spd_tras =
9303 		    0xff & read_spd(&dimm_config_table[0], 0,
9304 				    DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
9305 		spd_tras |=
9306 		    ((0xff &
9307 		      read_spd(&dimm_config_table[0], 0,
9308 			       DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
9309 		spd_trc =
9310 		    0xff & read_spd(&dimm_config_table[0], 0,
9311 				    DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
9312 		spd_trc |=
9313 		    ((0xff &
9314 		      read_spd(&dimm_config_table[0], 0,
9315 			       DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
9316 		spd_trfc =
9317 		    0xff & read_spd(&dimm_config_table[0], 0,
9318 				    DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
9319 		spd_trfc |=
9320 		    ((0xff &
9321 		      read_spd(&dimm_config_table[0], 0,
9322 			       DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
9323 		     8);
9324 		spd_twtr =
9325 		    0xff & read_spd(&dimm_config_table[0], 0,
9326 				DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
9327 		spd_trtp =
9328 		    0xff & read_spd(&dimm_config_table[0], 0,
9329 			DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
9330 		spd_tfaw =
9331 		    0xff & read_spd(&dimm_config_table[0], 0,
9332 				    DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
9333 		spd_tfaw |=
9334 		    ((0xff &
9335 		      read_spd(&dimm_config_table[0], 0,
9336 			       DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
9337 		spd_addr_mirror =
9338 		    0xff & read_spd(&dimm_config_table[0], 0,
9339 				    DDR3_SPD_ADDRESS_MAPPING) & 0x1;
9340 		/* Only address mirror unbuffered dimms.  */
9341 		spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
9342 		ftb_dividend =
9343 		    read_spd(&dimm_config_table[0], 0,
9344 			     DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
9345 		ftb_divisor =
9346 		    read_spd(&dimm_config_table[0], 0,
9347 			     DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
9348 		/* Make sure that it is not 0 */
9349 		ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
9350 
9351 		debug("spd_twr         : %#06x\n", spd_twr);
9352 		debug("spd_trcd        : %#06x\n", spd_trcd);
9353 		debug("spd_trrd        : %#06x\n", spd_trrd);
9354 		debug("spd_trp         : %#06x\n", spd_trp);
9355 		debug("spd_tras        : %#06x\n", spd_tras);
9356 		debug("spd_trc         : %#06x\n", spd_trc);
9357 		debug("spd_trfc        : %#06x\n", spd_trfc);
9358 		debug("spd_twtr        : %#06x\n", spd_twtr);
9359 		debug("spd_trtp        : %#06x\n", spd_trtp);
9360 		debug("spd_tfaw        : %#06x\n", spd_tfaw);
9361 		debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9362 
9363 		mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
9364 		taamin = mtb_psec * spd_taa_min;
9365 		taamin += ftb_dividend *
9366 			(signed char)read_spd(&dimm_config_table[0],
9367 				0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
9368 			ftb_divisor;
9369 		tckmin = mtb_psec * spd_tck_min;
9370 		tckmin += ftb_dividend *
9371 			(signed char)read_spd(&dimm_config_table[0],
9372 				0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
9373 			ftb_divisor;
9374 
9375 		twr = spd_twr * mtb_psec;
9376 		trcd = spd_trcd * mtb_psec;
9377 		trrd = spd_trrd * mtb_psec;
9378 		trp = spd_trp * mtb_psec;
9379 		tras = spd_tras * mtb_psec;
9380 		trc = spd_trc * mtb_psec;
9381 		trfc = spd_trfc * mtb_psec;
9382 		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
9383 			// default to this - because it works...
9384 			int new_trfc = 260000;
9385 
9386 			s = env_get("ddr_trfc");
9387 			if (s) {
9388 				new_trfc = simple_strtoul(s, NULL, 0);
9389 				printf("Parameter found in environment. ddr_trfc = %d\n",
9390 				       new_trfc);
9391 				if (new_trfc < 160000 || new_trfc > 260000) {
9392 					// back to default if out of range
9393 					new_trfc = 260000;
9394 				}
9395 			}
9396 			debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
9397 			      node, if_num, trfc, new_trfc);
9398 			trfc = new_trfc;
9399 		}
9400 
9401 		twtr = spd_twtr * mtb_psec;
9402 		trtp = spd_trtp * mtb_psec;
9403 		tfaw = spd_tfaw * mtb_psec;
9404 
9405 		debug("Medium Timebase (MTB)                         : %6d ps\n",
9406 		      mtb_psec);
9407 		debug("Minimum Cycle Time (tckmin)                   : %6d ps (%ld MT/s)\n",
9408 		      tckmin, pretty_psecs_to_mts(tckmin));
9409 		debug("Minimum CAS Latency Time (taamin)             : %6d ps\n",
9410 		      taamin);
9411 		debug("Write Recovery Time (tWR)                     : %6d ps\n",
9412 		      twr);
9413 		debug("Minimum RAS to CAS delay (tRCD)               : %6d ps\n",
9414 		      trcd);
9415 		debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
9416 		      trrd);
9417 		debug("Minimum Row Precharge Delay (tRP)             : %6d ps\n",
9418 		      trp);
9419 		debug("Minimum Active to Precharge (tRAS)            : %6d ps\n",
9420 		      tras);
9421 		debug("Minimum Active to Active/Refresh Delay (tRC)  : %6d ps\n",
9422 		      trc);
9423 		debug("Minimum Refresh Recovery Delay (tRFC)         : %6d ps\n",
9424 		      trfc);
9425 		debug("Internal write to read command delay (tWTR)   : %6d ps\n",
9426 		      twtr);
9427 		debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
9428 		      trtp);
9429 		debug("Minimum Four Activate Window Delay (tFAW)     : %6d ps\n",
9430 		      tfaw);
9431 	}
9432 
9433 	/*
9434 	 * When the cycle time is within 1 psec of the minimum accept it
9435 	 * as a slight rounding error and adjust it to exactly the minimum
9436 	 * cycle time. This avoids an unnecessary warning.
9437 	 */
9438 	if (abs(tclk_psecs - tckmin) < 2)
9439 		tclk_psecs = tckmin;
9440 
9441 	if (tclk_psecs < (u64)tckmin) {
9442 		printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
9443 		       tclk_psecs, (ulong)tckmin);
9444 	}
9445 
9446 	debug("DDR Clock Rate (tCLK)                         : %6ld ps\n",
9447 	      tclk_psecs);
9448 	debug("Core Clock Rate (eCLK)                        : %6ld ps\n",
9449 	      eclk_psecs);
9450 
9451 	s = env_get("ddr_use_ecc");
9452 	if (s) {
9453 		use_ecc = !!simple_strtoul(s, NULL, 0);
9454 		printf("Parameter found in environment. ddr_use_ecc = %d\n",
9455 		       use_ecc);
9456 	}
9457 	use_ecc = use_ecc && spd_ecc;
9458 
9459 	if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
9460 	    : (use_ecc ? 0x01f : 0x0f);
9461 
9462 	debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
9463 	      if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
9464 
9465 	debug("\n------ Board Custom Configuration Settings ------\n");
9466 	debug("%-45s : %d\n", "MIN_RTT_NOM_IDX   ", c_cfg->min_rtt_nom_idx);
9467 	debug("%-45s : %d\n", "MAX_RTT_NOM_IDX   ", c_cfg->max_rtt_nom_idx);
9468 	debug("%-45s : %d\n", "MIN_RODT_CTL      ", c_cfg->min_rodt_ctl);
9469 	debug("%-45s : %d\n", "MAX_RODT_CTL      ", c_cfg->max_rodt_ctl);
9470 	debug("%-45s : %d\n", "MIN_CAS_LATENCY   ", c_cfg->min_cas_latency);
9471 	debug("%-45s : %d\n", "OFFSET_EN         ", c_cfg->offset_en);
9472 	debug("%-45s : %d\n", "OFFSET_UDIMM      ", c_cfg->offset_udimm);
9473 	debug("%-45s : %d\n", "OFFSET_RDIMM      ", c_cfg->offset_rdimm);
9474 	debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO  ", c_cfg->ddr_rtt_nom_auto);
9475 	debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
9476 	if (spd_rdimm)
9477 		debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9478 		      c_cfg->rlevel_comp_offset_rdimm);
9479 	else
9480 		debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9481 		      c_cfg->rlevel_comp_offset_udimm);
9482 	debug("%-45s : %d\n", "RLEVEL_COMPUTE    ", c_cfg->rlevel_compute);
9483 	debug("%-45s : %d\n", "DDR2T_UDIMM       ", c_cfg->ddr2t_udimm);
9484 	debug("%-45s : %d\n", "DDR2T_RDIMM       ", c_cfg->ddr2t_rdimm);
9485 	debug("%-45s : %d\n", "FPRCH2            ", c_cfg->fprch2);
9486 	debug("%-45s : %d\n", "PTUNE_OFFSET      ", c_cfg->ptune_offset);
9487 	debug("%-45s : %d\n", "NTUNE_OFFSET      ", c_cfg->ntune_offset);
9488 	debug("-------------------------------------------------\n");
9489 
9490 	cl = divide_roundup(taamin, tclk_psecs);
9491 
9492 	debug("Desired CAS Latency                           : %6d\n", cl);
9493 
9494 	min_cas_latency = c_cfg->min_cas_latency;
9495 
9496 	s = lookup_env(priv, "ddr_min_cas_latency");
9497 	if (s)
9498 		min_cas_latency = simple_strtoul(s, NULL, 0);
9499 
9500 	debug("CAS Latencies supported in DIMM               :");
9501 	base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
9502 	for (i = 0; i < 32; ++i) {
9503 		if ((spd_cas_latency >> i) & 1) {
9504 			debug(" %d", i + base_cl);
9505 			max_cas_latency = i + base_cl;
9506 			if (min_cas_latency == 0)
9507 				min_cas_latency = i + base_cl;
9508 		}
9509 	}
9510 	debug("\n");
9511 
9512 	/*
9513 	 * Use relaxed timing when running slower than the minimum
9514 	 * supported speed.  Adjust timing to match the smallest supported
9515 	 * CAS Latency.
9516 	 */
9517 	if (min_cas_latency > cl) {
9518 		ulong adjusted_tclk = taamin / min_cas_latency;
9519 
9520 		cl = min_cas_latency;
9521 		debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
9522 		      tclk_psecs, adjusted_tclk);
9523 		tclk_psecs = adjusted_tclk;
9524 	}
9525 
9526 	s = env_get("ddr_cas_latency");
9527 	if (s) {
9528 		override_cas_latency = simple_strtoul(s, NULL, 0);
9529 		printf("Parameter found in environment. ddr_cas_latency = %d\n",
9530 		       override_cas_latency);
9531 	}
9532 
9533 	/* Make sure that the selected cas latency is legal */
9534 	for (i = (cl - base_cl); i < 32; ++i) {
9535 		if ((spd_cas_latency >> i) & 1) {
9536 			cl = i + base_cl;
9537 			break;
9538 		}
9539 	}
9540 
9541 	if (max_cas_latency < cl)
9542 		cl = max_cas_latency;
9543 
9544 	if (override_cas_latency != 0)
9545 		cl = override_cas_latency;
9546 
9547 	debug("CAS Latency                                   : %6d\n", cl);
9548 
9549 	if ((cl * tckmin) > 20000) {
9550 		debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
9551 		      (cl * tckmin));
9552 	}
9553 
9554 	if (tclk_psecs < (ulong)tckmin) {
9555 		printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
9556 		       tclk_psecs, (ulong)tckmin);
9557 	}
9558 
9559 	if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
9560 		printf("Unsupported number of banks %d. Must be 4 or 8.\n",
9561 		       num_banks);
9562 		++fatal_error;
9563 	}
9564 
9565 	if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
9566 		printf("Unsupported number of ranks: %d\n", num_ranks);
9567 		++fatal_error;
9568 	}
9569 
9570 	if (octeon_is_cpuid(OCTEON_CN78XX) ||
9571 	    octeon_is_cpuid(OCTEON_CN73XX) ||
9572 	    octeon_is_cpuid(OCTEON_CNF75XX)) {
9573 		if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
9574 			printf("Unsupported SDRAM Width, %d.  Must be 4, 8 or 16.\n",
9575 			       dram_width);
9576 			++fatal_error;
9577 		}
9578 	} else if (dram_width != 8 && dram_width != 16) {
9579 		printf("Unsupported SDRAM Width, %d.  Must be 8 or 16.\n",
9580 		       dram_width);
9581 		++fatal_error;
9582 	}
9583 
9584 	/*
9585 	 ** Bail out here if things are not copasetic.
9586 	 */
9587 	if (fatal_error)
9588 		return (-1);
9589 
9590 	/*
9591 	 * 4.8.4 LMC RESET Initialization
9592 	 *
9593 	 * The purpose of this step is to assert/deassert the RESET# pin at the
9594 	 * DDR3/DDR4 parts.
9595 	 *
9596 	 * This LMC RESET step is done for all enabled LMCs.
9597 	 */
9598 	perform_lmc_reset(priv, node, if_num);
9599 
9600 	// Make sure scrambling is disabled during init...
9601 	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
9602 	ctrl.s.scramble_ena = 0;
9603 	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
9604 
9605 	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
9606 	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
9607 	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
9608 		lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
9609 
9610 	odt_idx = min(dimm_count - 1, 3);
9611 
9612 	switch (num_ranks) {
9613 	case 1:
9614 		odt_config = odt_1rank_config;
9615 		break;
9616 	case 2:
9617 		odt_config = odt_2rank_config;
9618 		break;
9619 	case 4:
9620 		odt_config = odt_4rank_config;
9621 		break;
9622 	default:
9623 		odt_config = disable_odt_config;
9624 		printf("Unsupported number of ranks: %d\n", num_ranks);
9625 		++fatal_error;
9626 	}
9627 
9628 	/*
9629 	 * 4.8.5 Early LMC Initialization
9630 	 *
9631 	 * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
9632 	 * completed prior to starting this LMC initialization sequence.
9633 	 *
9634 	 * Perform the following five substeps for early LMC initialization:
9635 	 *
9636 	 * 1. Software must ensure there are no pending DRAM transactions.
9637 	 *
9638 	 * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
9639 	 *    LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
9640 	 *    LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
9641 	 *    LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
9642 	 *    LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
9643 	 *    appropriate values. All sections in this chapter can be used to
9644 	 *    derive proper register settings.
9645 	 */
9646 
9647 	/* LMC(0)_CONFIG */
9648 	lmc_config(priv);
9649 
9650 	/* LMC(0)_CONTROL */
9651 	lmc_control(priv);
9652 
9653 	/* LMC(0)_TIMING_PARAMS0 */
9654 	lmc_timing_params0(priv);
9655 
9656 	/* LMC(0)_TIMING_PARAMS1 */
9657 	lmc_timing_params1(priv);
9658 
9659 	/* LMC(0)_TIMING_PARAMS2 */
9660 	lmc_timing_params2(priv);
9661 
9662 	/* LMC(0)_MODEREG_PARAMS0 */
9663 	lmc_modereg_params0(priv);
9664 
9665 	/* LMC(0)_MODEREG_PARAMS1 */
9666 	lmc_modereg_params1(priv);
9667 
9668 	/* LMC(0)_MODEREG_PARAMS2 */
9669 	lmc_modereg_params2(priv);
9670 
9671 	/* LMC(0)_MODEREG_PARAMS3 */
9672 	lmc_modereg_params3(priv);
9673 
9674 	/* LMC(0)_NXM */
9675 	lmc_nxm(priv);
9676 
9677 	/* LMC(0)_WODT_MASK */
9678 	lmc_wodt_mask(priv);
9679 
9680 	/* LMC(0)_RODT_MASK */
9681 	lmc_rodt_mask(priv);
9682 
9683 	/* LMC(0)_COMP_CTL2 */
9684 	lmc_comp_ctl2(priv);
9685 
9686 	/* LMC(0)_PHY_CTL */
9687 	lmc_phy_ctl(priv);
9688 
9689 	/* LMC(0)_EXT_CONFIG */
9690 	lmc_ext_config(priv);
9691 
9692 	/* LMC(0)_EXT_CONFIG2 */
9693 	lmc_ext_config2(priv);
9694 
9695 	/* LMC(0)_DIMM0/1_PARAMS */
9696 	lmc_dimm01_params(priv);
9697 
9698 	ret = lmc_rank_init(priv);
9699 	if (ret < 0)
9700 		return 0;	/* 0 indicates problem */
9701 
9702 	lmc_config_2(priv);
9703 
9704 	lmc_write_leveling(priv);
9705 
9706 	lmc_read_leveling(priv);
9707 
9708 	lmc_workaround(priv);
9709 
9710 	ret = lmc_sw_write_leveling(priv);
9711 	if (ret < 0)
9712 		return 0;	/* 0 indicates problem */
9713 
9714 	// this sometimes causes stack overflow crashes..
9715 	// display only for DDR4 RDIMMs.
9716 	if (ddr_type == DDR4_DRAM && spd_rdimm) {
9717 		int i;
9718 
9719 		for (i = 0; i < 3; i += 2)	// just pages 0 and 2 for now..
9720 			display_mpr_page(priv, rank_mask, if_num, i);
9721 	}
9722 
9723 	lmc_dll(priv);
9724 
9725 	lmc_workaround_2(priv);
9726 
9727 	lmc_final(priv);
9728 
9729 	lmc_scrambling(priv);
9730 
9731 	return mem_size_mbytes;
9732 }
9733 
9734 /////    HW-assist byte DLL offset tuning   //////
9735 
cvmx_dram_get_num_lmc(struct ddr_priv * priv)9736 static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
9737 {
9738 	union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9739 
9740 	if (octeon_is_cpuid(OCTEON_CN70XX))
9741 		return 1;
9742 
9743 	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
9744 		// sample LMC1
9745 		lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
9746 		if (lmcx_dll_ctl2.cn78xx.intf_en)
9747 			return 2;
9748 		else
9749 			return 1;
9750 	}
9751 
9752 	// for CN78XX, LMCs are always active in pairs, and always LMC0/1
9753 	// so, we sample LMC2 to see if 2 and 3 are active
9754 	lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
9755 	if (lmcx_dll_ctl2.cn78xx.intf_en)
9756 		return 4;
9757 	else
9758 		return 2;
9759 }
9760 
9761 // got to do these here, even though already defined in BDK
9762 
9763 // all DDR3, and DDR4 x16 today, use only 3 bank bits;
9764 // DDR4 x4 and x8 always have 4 bank bits
9765 // NOTE: this will change in the future, when DDR4 x16 devices can
9766 // come with 16 banks!! FIXME!!
cvmx_dram_get_num_bank_bits(struct ddr_priv * priv,int lmc)9767 static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
9768 {
9769 	union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9770 	union cvmx_lmcx_config lmcx_config;
9771 	union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
9772 	int bank_width;
9773 
9774 	// can always read this
9775 	lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9776 
9777 	if (lmcx_dll_ctl2.cn78xx.dreset)	// check LMCn
9778 		return 0;
9779 
9780 	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9781 	lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
9782 
9783 	bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
9784 		      (lmcx_config.s.bg2_enable)) ? 4 : 3;
9785 
9786 	return bank_width;
9787 }
9788 
9789 #define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
9790 #define ADDRESS_HOLE 0x10000000ULL
9791 
cvmx_dram_address_extract_info(struct ddr_priv * priv,u64 address,int * node,int * lmc,int * dimm,int * prank,int * lrank,int * bank,int * row,int * col)9792 static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
9793 					   int *node, int *lmc, int *dimm,
9794 					   int *prank, int *lrank, int *bank,
9795 					   int *row, int *col)
9796 {
9797 	int bank_lsb, xbits;
9798 	union cvmx_l2c_ctl l2c_ctl;
9799 	union cvmx_lmcx_config lmcx_config;
9800 	union cvmx_lmcx_control lmcx_control;
9801 	union cvmx_lmcx_ext_config ext_config;
9802 	int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
9803 	int bank_width;
9804 	int dimm_lsb;
9805 	int dimm_width;
9806 	int prank_lsb, lrank_lsb;
9807 	int prank_width, lrank_width;
9808 	int row_lsb;
9809 	int row_width;
9810 	int col_hi_lsb;
9811 	int col_hi_width;
9812 	int col_hi;
9813 
9814 	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
9815 		bitno = 18;
9816 
9817 	*node = EXTRACT(address, 40, 2);	/* Address bits [41:40] */
9818 
9819 	address &= (1ULL << 40) - 1;	// lop off any node bits or above
9820 	if (address >= ADDRESS_HOLE)	// adjust down if at HOLE or above
9821 		address -= ADDRESS_HOLE;
9822 
9823 	/* Determine the LMC controllers */
9824 	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
9825 
9826 	/* xbits depends on number of LMCs */
9827 	xbits = cvmx_dram_get_num_lmc(priv) >> 1;	// 4->2, 2->1, 1->0
9828 	bank_lsb = 7 + xbits;
9829 
9830 	/* LMC number is probably aliased */
9831 	if (l2c_ctl.s.disidxalias) {
9832 		*lmc = EXTRACT(address, 7, xbits);
9833 	}  else {
9834 		*lmc = EXTRACT(address, 7, xbits) ^
9835 			EXTRACT(address, bitno, xbits) ^
9836 			EXTRACT(address, 12, xbits);
9837 	}
9838 
9839 	/* Figure out the bank field width */
9840 	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
9841 	ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
9842 	bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
9843 
9844 	/* Extract additional info from the LMC_CONFIG CSR */
9845 	dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
9846 	dimm_width = 40 - dimm_lsb;
9847 	prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
9848 	prank_width = dimm_lsb - prank_lsb;
9849 	lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
9850 	lrank_width = prank_lsb - lrank_lsb;
9851 	row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
9852 	row_width = lrank_lsb - row_lsb;
9853 	col_hi_lsb = bank_lsb + bank_width;
9854 	col_hi_width = row_lsb - col_hi_lsb;
9855 
9856 	/* Extract the parts of the address */
9857 	*dimm = EXTRACT(address, dimm_lsb, dimm_width);
9858 	*prank = EXTRACT(address, prank_lsb, prank_width);
9859 	*lrank = EXTRACT(address, lrank_lsb, lrank_width);
9860 	*row = EXTRACT(address, row_lsb, row_width);
9861 
9862 	/* bank calculation may be aliased... */
9863 	lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
9864 	if (lmcx_control.s.xor_bank) {
9865 		*bank = EXTRACT(address, bank_lsb, bank_width) ^
9866 			EXTRACT(address, 12 + xbits, bank_width);
9867 	} else {
9868 		*bank = EXTRACT(address, bank_lsb, bank_width);
9869 	}
9870 
9871 	/* LMC number already extracted */
9872 	col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
9873 	*col = EXTRACT(address, 3, 4) | (col_hi << 4);
9874 	/* Bus byte is address bits [2:0]. Unused here */
9875 }
9876 
9877 // end of added workarounds
9878 
9879 // NOTE: "mode" argument:
9880 //         DBTRAIN_TEST: for testing using GP patterns, includes ECC
9881 //         DBTRAIN_DBI:  for DBI deskew training behavior (uses GP patterns)
9882 //         DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
9883 // NOTE: trust the caller to specify the correct/supported mode
9884 //
test_dram_byte_hw(struct ddr_priv * priv,int if_num,u64 p,int mode,u64 * xor_data)9885 static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
9886 			     int mode, u64 *xor_data)
9887 {
9888 	u64 p1;
9889 	u64 k;
9890 	int errors = 0;
9891 
9892 	u64 mpr_data0, mpr_data1;
9893 	u64 bad_bits[2] = { 0, 0 };
9894 
9895 	int node_address, lmc, dimm;
9896 	int prank, lrank;
9897 	int bank, row, col;
9898 	int save_or_dis;
9899 	int byte;
9900 	int ba_loop, ba_bits;
9901 
9902 	union cvmx_lmcx_rlevel_ctl rlevel_ctl;
9903 	union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
9904 	union cvmx_lmcx_phy_ctl phy_ctl;
9905 
9906 	int biter_errs;
9907 
9908 	// FIXME: K iterations set to 4 for now.
9909 	// FIXME: decrement to increase interations.
9910 	// FIXME: must be no less than 22 to stay above an LMC hash field.
9911 	int kshift = 27;
9912 
9913 	const char *s;
9914 	int node = 0;
9915 
9916 	// allow override default setting for kshift
9917 	s = env_get("ddr_tune_set_kshift");
9918 	if (s) {
9919 		int temp = simple_strtoul(s, NULL, 0);
9920 
9921 		if (temp < 22 || temp > 28) {
9922 			debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
9923 			      node, if_num, temp, kshift);
9924 		} else {
9925 			debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
9926 			      node, if_num, kshift, temp);
9927 			kshift = temp;
9928 		}
9929 	}
9930 
9931 	/*
9932 	 * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
9933 	 */
9934 	rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
9935 	save_or_dis = rlevel_ctl.s.or_dis;
9936 	/* or_dis must be disabled for this sequence */
9937 	rlevel_ctl.s.or_dis = 0;
9938 	lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
9939 
9940 	/*
9941 	 * NOTE: this step done in the calling routine(s)...
9942 	 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
9943 	 * of choice.
9944 	 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
9945 	 * (rising edge) 64 bits of data.
9946 	 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
9947 	 * (falling edge) 64 bits of data.
9948 	 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
9949 	 * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
9950 	 */
9951 
9952 	// final address must include LMC and node
9953 	p |= (if_num << 7);	/* Map address into proper interface */
9954 	p |= (u64)node << CVMX_NODE_MEM_SHIFT;	// map to node
9955 
9956 	/*
9957 	 * Add base offset to both test regions to not clobber u-boot stuff
9958 	 * when running from L2 for NAND boot.
9959 	 */
9960 	p += 0x20000000;	// offset to 512MB, ie above THE HOLE!!!
9961 	p |= 1ull << 63;	// needed for OCTEON
9962 
9963 	errors = 0;
9964 
9965 	cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
9966 				       &prank, &lrank, &bank, &row, &col);
9967 	debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
9968 	      __func__, p, node_address, lmc, dimm, prank, lrank, bank,
9969 	      row, col);
9970 
9971 	// only check once per call, and ignore if no match...
9972 	if ((int)node != node_address) {
9973 		printf("ERROR: Node address mismatch\n");
9974 		return 0;
9975 	}
9976 	if (lmc != if_num) {
9977 		printf("ERROR: LMC address mismatch\n");
9978 		return 0;
9979 	}
9980 
9981 	/*
9982 	 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
9983 	 * it’s a one-shot operation). This is to get into the habit of
9984 	 * resetting PHY’s SILO to the original 0 location.
9985 	 */
9986 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
9987 	phy_ctl.s.phy_reset = 1;
9988 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
9989 
9990 	/*
9991 	 * Walk through a range of addresses avoiding bits that alias
9992 	 * interfaces on the CN88XX.
9993 	 */
9994 
9995 	// FIXME: want to try to keep the K increment from affecting the
9996 	// LMC via hash, so keep it above bit 21 we also want to keep k
9997 	// less than the base offset of bit 29 (512MB)
9998 
9999 	for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
10000 		// FIXME: the sequence will interate over 1/2 cacheline
10001 		// FIXME: for each unit specified in "read_cmd_count",
10002 		// FIXME: so, we setup each sequence to do the max cachelines
10003 		// it can
10004 
10005 		p1 = p + k;
10006 
10007 		cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
10008 					       &dimm, &prank, &lrank, &bank,
10009 					       &row, &col);
10010 
10011 		/*
10012 		 * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
10013 		 * a. COL, ROW, BA, BG, PRANK points to the starting point
10014 		 * of the address.
10015 		 * You can just set them to all 0.
10016 		 * b. RW_TRAIN – set this to 1.
10017 		 * c. TCCD_L – set this to 0.
10018 		 * d. READ_CMD_COUNT – instruct the sequence to the how many
10019 		 * writes/reads.
10020 		 * It is 5 bits field, so set to 31 of maximum # of r/w.
10021 		 */
10022 		dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
10023 		dbtrain_ctl.s.column_a = col;
10024 		dbtrain_ctl.s.row_a = row;
10025 		dbtrain_ctl.s.bg = (bank >> 2) & 3;
10026 		dbtrain_ctl.s.prank = (dimm * 2) + prank;	// FIXME?
10027 		dbtrain_ctl.s.lrank = lrank;	// FIXME?
10028 		dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
10029 		dbtrain_ctl.s.write_ena = 1;
10030 		dbtrain_ctl.s.read_cmd_count = 31;	// max count pass 1.x
10031 		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
10032 		    octeon_is_cpuid(OCTEON_CNF75XX)) {
10033 			// max count on chips that support it
10034 			dbtrain_ctl.s.cmd_count_ext = 3;
10035 		} else {
10036 			// max count pass 1.x
10037 			dbtrain_ctl.s.cmd_count_ext = 0;
10038 		}
10039 
10040 		dbtrain_ctl.s.rw_train = 1;
10041 		dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
10042 		// LFSR should only be on when chip supports it...
10043 		dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
10044 
10045 		biter_errs = 0;
10046 
10047 		// for each address, iterate over the 4 "banks" in the BA
10048 		for (ba_loop = 0, ba_bits = bank & 3;
10049 		     ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
10050 			dbtrain_ctl.s.ba = ba_bits;
10051 			lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
10052 			       dbtrain_ctl.u64);
10053 
10054 			/*
10055 			 * We will use the RW_TRAINING sequence (14) for
10056 			 * this task.
10057 			 *
10058 			 * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
10059 			 *    SEQ_CTL[INIT_START] = 1).
10060 			 * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
10061 			 */
10062 			oct3_ddr3_seq(priv, prank, if_num, 14);
10063 
10064 			/*
10065 			 * 6) Read MPR_DATA0 and MPR_DATA1 for results.
10066 			 * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
10067 			 *    for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
10068 			 * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
10069 			 *    for ECC bit7:0.
10070 			 */
10071 			mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
10072 			mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
10073 
10074 			/*
10075 			 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
10076 			 * clears this as it’s a one-shot operation).
10077 			 * This is to get into the habit of resetting PHY’s
10078 			 * SILO to the original 0 location.
10079 			 */
10080 			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
10081 			phy_ctl.s.phy_reset = 1;
10082 			lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
10083 
10084 			// bypass any error checking or updating when DBI mode
10085 			if (mode == DBTRAIN_DBI)
10086 				continue;
10087 
10088 			// data bytes
10089 			if (~mpr_data0) {
10090 				for (byte = 0; byte < 8; byte++) {
10091 					if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
10092 						biter_errs |= (1 << byte);
10093 				}
10094 				// accumulate bad bits
10095 				bad_bits[0] |= ~mpr_data0;
10096 			}
10097 
10098 			// include ECC byte errors
10099 			if (~mpr_data1 & 0xffUL) {
10100 				biter_errs |= (1 << 8);
10101 				bad_bits[1] |= ~mpr_data1 & 0xffUL;
10102 			}
10103 		}
10104 
10105 		errors |= biter_errs;
10106 	}			/* end for (k=...) */
10107 
10108 	rlevel_ctl.s.or_dis = save_or_dis;
10109 	lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
10110 
10111 	// send the bad bits back...
10112 	if (mode != DBTRAIN_DBI && xor_data) {
10113 		xor_data[0] = bad_bits[0];
10114 		xor_data[1] = bad_bits[1];
10115 	}
10116 
10117 	return errors;
10118 }
10119 
10120 // setup default for byte test pattern array
10121 // take these from the HRM section 6.9.13
10122 static const u64 byte_pattern_0[] = {
10123 	0xFFAAFFFFFF55FFFFULL,	// GP0
10124 	0x55555555AAAAAAAAULL,	// GP1
10125 	0xAA55AAAAULL,		// GP2
10126 };
10127 
10128 static const u64 byte_pattern_1[] = {
10129 	0xFBF7EFDFBF7FFEFDULL,	// GP0
10130 	0x0F1E3C78F0E1C387ULL,	// GP1
10131 	0xF0E1BF7FULL,		// GP2
10132 };
10133 
10134 // this is from Andrew via LFSR with PRBS=0xFFFFAAAA
10135 static const u64 byte_pattern_2[] = {
10136 	0xEE55AADDEE55AADDULL,	// GP0
10137 	0x55AADDEE55AADDEEULL,	// GP1
10138 	0x55EEULL,		// GP2
10139 };
10140 
10141 // this is from Mike via LFSR with PRBS=0x4A519909
10142 static const u64 byte_pattern_3[] = {
10143 	0x0088CCEE0088CCEEULL,	// GP0
10144 	0xBB552211BB552211ULL,	// GP1
10145 	0xBB00ULL,		// GP2
10146 };
10147 
10148 static const u64 *byte_patterns[4] = {
10149 	byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
10150 };
10151 
10152 static const u32 lfsr_patterns[4] = {
10153 	0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
10154 };
10155 
10156 #define NUM_BYTE_PATTERNS 4
10157 
10158 #define DEFAULT_BYTE_BURSTS 32	// compromise between time and rigor
10159 
setup_hw_pattern(struct ddr_priv * priv,int lmc,const u64 * pattern_p)10160 static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
10161 			     const u64 *pattern_p)
10162 {
10163 	/*
10164 	 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
10165 	 * of choice.
10166 	 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
10167 	 *    (rising edge) 64 bits of data.
10168 	 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
10169 	 *    (falling edge) 64 bits of data.
10170 	 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
10171 	 *    (rising edge <7:0>) and upper
10172 	 * (falling edge <15:8>) ECC data.
10173 	 */
10174 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
10175 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
10176 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
10177 }
10178 
setup_lfsr_pattern(struct ddr_priv * priv,int lmc,u32 data)10179 static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
10180 {
10181 	union cvmx_lmcx_char_ctl char_ctl;
10182 	u32 prbs;
10183 	const char *s;
10184 
10185 	s = env_get("ddr_lfsr_prbs");
10186 	if (s)
10187 		prbs = simple_strtoul(s, NULL, 0);
10188 	else
10189 		prbs = data;
10190 
10191 	/*
10192 	 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10193 	 * here data comes from the LFSR generating a PRBS pattern
10194 	 * CHAR_CTL.EN = 0
10195 	 * CHAR_CTL.SEL = 0; // for PRBS
10196 	 * CHAR_CTL.DR = 1;
10197 	 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10198 	 * CHAR_CTL.SKEW_ON = 1;
10199 	 */
10200 	char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
10201 	char_ctl.s.en = 0;
10202 	char_ctl.s.sel = 0;
10203 	char_ctl.s.dr = 1;
10204 	char_ctl.s.prbs = prbs;
10205 	char_ctl.s.skew_on = 1;
10206 	lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
10207 }
10208 
choose_best_hw_patterns(int lmc,int mode)10209 static int choose_best_hw_patterns(int lmc, int mode)
10210 {
10211 	int new_mode = mode;
10212 	const char *s;
10213 
10214 	switch (mode) {
10215 	case DBTRAIN_TEST:	// always choose LFSR if chip supports it
10216 		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10217 			int lfsr_enable = 1;
10218 
10219 			s = env_get("ddr_allow_lfsr");
10220 			if (s) {
10221 				// override?
10222 				lfsr_enable = !!strtoul(s, NULL, 0);
10223 			}
10224 
10225 			if (lfsr_enable)
10226 				new_mode = DBTRAIN_LFSR;
10227 		}
10228 		break;
10229 
10230 	case DBTRAIN_DBI:	// possibly can allow LFSR use?
10231 		break;
10232 
10233 	case DBTRAIN_LFSR:	// forced already
10234 		if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10235 			debug("ERROR: illegal HW assist mode %d\n", mode);
10236 			new_mode = DBTRAIN_TEST;
10237 		}
10238 		break;
10239 
10240 	default:
10241 		debug("ERROR: unknown HW assist mode %d\n", mode);
10242 	}
10243 
10244 	if (new_mode != mode)
10245 		debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
10246 
10247 	return new_mode;
10248 }
10249 
run_best_hw_patterns(struct ddr_priv * priv,int lmc,u64 phys_addr,int mode,u64 * xor_data)10250 int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
10251 			 int mode, u64 *xor_data)
10252 {
10253 	int pattern;
10254 	const u64 *pattern_p;
10255 	int errs, errors = 0;
10256 
10257 	// FIXME? always choose LFSR if chip supports it???
10258 	mode = choose_best_hw_patterns(lmc, mode);
10259 
10260 	for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10261 		if (mode == DBTRAIN_LFSR) {
10262 			setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10263 		} else {
10264 			pattern_p = byte_patterns[pattern];
10265 			setup_hw_pattern(priv, lmc, pattern_p);
10266 		}
10267 		errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
10268 
10269 		debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
10270 		      __func__, pattern, phys_addr, errs);
10271 
10272 		errors |= errs;
10273 	}
10274 
10275 	return errors;
10276 }
10277 
hw_assist_test_dll_offset(struct ddr_priv * priv,int dll_offset_mode,int lmc,int bytelane,int if_64b,u64 dram_tune_rank_offset,int dram_tune_byte_bursts)10278 static void hw_assist_test_dll_offset(struct ddr_priv *priv,
10279 				      int dll_offset_mode, int lmc,
10280 				      int bytelane,
10281 				      int if_64b,
10282 				      u64 dram_tune_rank_offset,
10283 				      int dram_tune_byte_bursts)
10284 {
10285 	int byte_offset, new_best_offset[9];
10286 	int rank_delay_start[4][9];
10287 	int rank_delay_count[4][9];
10288 	int rank_delay_best_start[4][9];
10289 	int rank_delay_best_count[4][9];
10290 	int errors[4], off_errors, tot_errors;
10291 	int rank_mask, rankx, active_ranks;
10292 	int pattern;
10293 	const u64 *pattern_p;
10294 	int byte;
10295 	char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
10296 	int pat_best_offset[9];
10297 	u64 phys_addr;
10298 	int pat_beg, pat_end;
10299 	int rank_beg, rank_end;
10300 	int byte_lo, byte_hi;
10301 	union cvmx_lmcx_config lmcx_config;
10302 	u64 hw_rank_offset;
10303 	int num_lmcs = cvmx_dram_get_num_lmc(priv);
10304 	// FIXME? always choose LFSR if chip supports it???
10305 	int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
10306 	int node = 0;
10307 
10308 	if (bytelane == 0x0A) {	// all bytelanes
10309 		byte_lo = 0;
10310 		byte_hi = 8;
10311 	} else {		// just 1
10312 		byte_lo = bytelane;
10313 		byte_hi = bytelane;
10314 	}
10315 
10316 	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10317 	rank_mask = lmcx_config.s.init_status;
10318 
10319 	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10320 	hw_rank_offset =
10321 	    1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
10322 		     (num_lmcs / 2));
10323 
10324 	debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
10325 	      node, __func__, lmc, (unsigned long long)hw_rank_offset);
10326 
10327 	// start of pattern loop
10328 	// we do the set of tests for each pattern supplied...
10329 
10330 	memset(new_best_offset, 0, sizeof(new_best_offset));
10331 	for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10332 		memset(pat_best_offset, 0, sizeof(pat_best_offset));
10333 
10334 		if (mode == DBTRAIN_TEST) {
10335 			pattern_p = byte_patterns[pattern];
10336 			setup_hw_pattern(priv, lmc, pattern_p);
10337 		} else {
10338 			setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10339 		}
10340 
10341 		// now loop through all legal values for the DLL byte offset...
10342 
10343 #define BYTE_OFFSET_INCR 3	// FIXME: make this tunable?
10344 
10345 		tot_errors = 0;
10346 
10347 		memset(rank_delay_count, 0, sizeof(rank_delay_count));
10348 		memset(rank_delay_start, 0, sizeof(rank_delay_start));
10349 		memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
10350 		memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
10351 
10352 		for (byte_offset = -63; byte_offset < 64;
10353 		     byte_offset += BYTE_OFFSET_INCR) {
10354 			// do the setup on the active LMC
10355 			// set the bytelanes DLL offsets
10356 			change_dll_offset_enable(priv, lmc, 0);
10357 			// FIXME? bytelane?
10358 			load_dll_offset(priv, lmc, dll_offset_mode,
10359 					byte_offset, bytelane);
10360 			change_dll_offset_enable(priv, lmc, 1);
10361 
10362 			//bdk_watchdog_poke();
10363 
10364 			// run the test on each rank
10365 			// only 1 call per rank should be enough, let the
10366 			// bursts, loops, etc, control the load...
10367 
10368 			// errors for this byte_offset, all ranks
10369 			off_errors = 0;
10370 
10371 			active_ranks = 0;
10372 
10373 			for (rankx = 0; rankx < 4; rankx++) {
10374 				if (!(rank_mask & (1 << rankx)))
10375 					continue;
10376 
10377 				phys_addr = hw_rank_offset * active_ranks;
10378 				// FIXME: now done by test_dram_byte_hw()
10379 				//phys_addr |= (lmc << 7);
10380 				//phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
10381 
10382 				active_ranks++;
10383 
10384 				// NOTE: return is a now a bitmask of the
10385 				// erroring bytelanes.
10386 				errors[rankx] =
10387 				    test_dram_byte_hw(priv, lmc, phys_addr,
10388 						      mode, NULL);
10389 
10390 				// process any errors in the bytelane(s) that
10391 				// are being tested
10392 				for (byte = byte_lo; byte <= byte_hi; byte++) {
10393 					// check errors
10394 					// yes, an error in the byte lane in
10395 					// this rank
10396 					if (errors[rankx] & (1 << byte)) {
10397 						off_errors |= (1 << byte);
10398 
10399 						debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
10400 						      node, lmc, rankx, byte,
10401 						      mode_str, byte_offset,
10402 						      phys_addr);
10403 
10404 						// had started run
10405 						if (rank_delay_count
10406 						    [rankx][byte] > 0) {
10407 							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
10408 							      node, lmc, rankx,
10409 							      byte, mode_str,
10410 							      byte_offset);
10411 							// stop now
10412 							rank_delay_count
10413 								[rankx][byte] =
10414 								0;
10415 						}
10416 						// FIXME: else had not started
10417 						// run - nothing else to do?
10418 					} else {
10419 						// no error in the byte lane
10420 						// first success, set run start
10421 						if (rank_delay_count[rankx]
10422 						    [byte] == 0) {
10423 							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
10424 							      node, lmc, rankx,
10425 							      byte, mode_str,
10426 							      byte_offset);
10427 							rank_delay_start[rankx]
10428 								[byte] =
10429 								byte_offset;
10430 						}
10431 						// bump run length
10432 						rank_delay_count[rankx][byte]
10433 							+= BYTE_OFFSET_INCR;
10434 
10435 						// is this now the biggest
10436 						// window?
10437 						if (rank_delay_count[rankx]
10438 						    [byte] >
10439 						    rank_delay_best_count[rankx]
10440 						    [byte]) {
10441 							rank_delay_best_count
10442 							    [rankx][byte] =
10443 							    rank_delay_count
10444 							    [rankx][byte];
10445 							rank_delay_best_start
10446 							    [rankx][byte] =
10447 							    rank_delay_start
10448 							    [rankx][byte];
10449 							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
10450 							      node, lmc, rankx,
10451 							      byte, mode_str,
10452 							      byte_offset,
10453 							      rank_delay_best_start
10454 							      [rankx][byte],
10455 							      rank_delay_best_count
10456 							      [rankx][byte]);
10457 						}
10458 					}
10459 				}
10460 			} /* for (rankx = 0; rankx < 4; rankx++) */
10461 
10462 			tot_errors |= off_errors;
10463 		}
10464 
10465 		// set the bytelanes DLL offsets all back to 0
10466 		change_dll_offset_enable(priv, lmc, 0);
10467 		load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
10468 		change_dll_offset_enable(priv, lmc, 1);
10469 
10470 		// now choose the best byte_offsets for this pattern
10471 		// according to the best windows of the tested ranks
10472 		// calculate offset by constructing an average window
10473 		// from the rank windows
10474 		for (byte = byte_lo; byte <= byte_hi; byte++) {
10475 			pat_beg = -999;
10476 			pat_end = 999;
10477 
10478 			for (rankx = 0; rankx < 4; rankx++) {
10479 				if (!(rank_mask & (1 << rankx)))
10480 					continue;
10481 
10482 				rank_beg = rank_delay_best_start[rankx][byte];
10483 				pat_beg = max(pat_beg, rank_beg);
10484 				rank_end = rank_beg +
10485 					rank_delay_best_count[rankx][byte] -
10486 					BYTE_OFFSET_INCR;
10487 				pat_end = min(pat_end, rank_end);
10488 
10489 				debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test:  Rank Window %3d:%3d\n",
10490 				      node, lmc, rankx, byte, mode_str,
10491 				      rank_beg, rank_end);
10492 
10493 			}	/* for (rankx = 0; rankx < 4; rankx++) */
10494 
10495 			pat_best_offset[byte] = (pat_end + pat_beg) / 2;
10496 
10497 			// sum the pattern averages
10498 			new_best_offset[byte] += pat_best_offset[byte];
10499 		}
10500 
10501 		// now print them on 1 line, descending order...
10502 		debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
10503 		      node, lmc, mode_str, pattern);
10504 		for (byte = byte_hi; byte >= byte_lo; --byte)
10505 			debug(" %4d", pat_best_offset[byte]);
10506 		debug("\n");
10507 	}
10508 	// end of pattern loop
10509 
10510 	debug("N%d.LMC%d: HW DLL %s Offset Average  : ", node, lmc, mode_str);
10511 
10512 	// print in decending byte index order
10513 	for (byte = byte_hi; byte >= byte_lo; --byte) {
10514 		// create the new average NINT
10515 		new_best_offset[byte] = divide_nint(new_best_offset[byte],
10516 						    NUM_BYTE_PATTERNS);
10517 
10518 		// print the best offsets from all patterns
10519 
10520 		// print just the offset of all the bytes
10521 		if (bytelane == 0x0A)
10522 			debug("%4d ", new_best_offset[byte]);
10523 		else		// print the bytelanes also
10524 			debug("(byte %d) %4d ", byte, new_best_offset[byte]);
10525 
10526 		// done with testing, load up the best offsets we found...
10527 		// disable offsets while we load...
10528 		change_dll_offset_enable(priv, lmc, 0);
10529 		load_dll_offset(priv, lmc, dll_offset_mode,
10530 				new_best_offset[byte], byte);
10531 		// re-enable the offsets now that we are done loading
10532 		change_dll_offset_enable(priv, lmc, 1);
10533 	}
10534 
10535 	debug("\n");
10536 }
10537 
10538 /*
10539  * Automatically adjust the DLL offset for the selected bytelane using
10540  * hardware-assist
10541  */
perform_HW_dll_offset_tuning(struct ddr_priv * priv,int dll_offset_mode,int bytelane)10542 static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
10543 					int dll_offset_mode, int bytelane)
10544 {
10545 	int if_64b;
10546 	int save_ecc_ena[4];
10547 	union cvmx_lmcx_config lmc_config;
10548 	int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
10549 	const char *s;
10550 	int loops = 1, loop;
10551 	int by;
10552 	u64 dram_tune_rank_offset;
10553 	int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
10554 	int node = 0;
10555 
10556 	// see if we want to do the tuning more than once per LMC...
10557 	s = env_get("ddr_tune_ecc_loops");
10558 	if (s)
10559 		loops = strtoul(s, NULL, 0);
10560 
10561 	// allow override of the test repeats (bursts)
10562 	s = env_get("ddr_tune_byte_bursts");
10563 	if (s)
10564 		dram_tune_byte_bursts = strtoul(s, NULL, 10);
10565 
10566 	// print current working values
10567 	debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
10568 	      node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
10569 
10570 	// FIXME? get flag from LMC0 only
10571 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
10572 	if_64b = !lmc_config.s.mode32b;
10573 
10574 	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10575 	dram_tune_rank_offset =
10576 	    1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
10577 		     (num_lmcs / 2));
10578 
10579 	// do once for each active LMC
10580 
10581 	for (lmc = 0; lmc < num_lmcs; lmc++) {
10582 		debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
10583 		      node, lmc, bytelane);
10584 
10585 		/* Enable ECC for the HW tests */
10586 		// NOTE: we do enable ECC, but the HW tests used will not
10587 		// generate "visible" errors
10588 		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10589 		save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
10590 		lmc_config.s.ecc_ena = 1;
10591 		lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10592 		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10593 
10594 		// testing is done on a single LMC at a time
10595 		// FIXME: for now, loop here to show what happens multiple times
10596 		for (loop = 0; loop < loops; loop++) {
10597 			/* Perform DLL offset tuning */
10598 			hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
10599 						  bytelane,
10600 						  if_64b, dram_tune_rank_offset,
10601 						  dram_tune_byte_bursts);
10602 		}
10603 
10604 		// perform cleanup on active LMC
10605 		debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
10606 		      node, lmc, bytelane);
10607 
10608 		/* Restore ECC for DRAM tests */
10609 		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10610 		lmc_config.s.ecc_ena = save_ecc_ena[lmc];
10611 		lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10612 		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10613 
10614 		// finally, see if there are any read offset overrides
10615 		// after tuning
10616 		for (by = 0; by < 9; by++) {
10617 			s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
10618 			if (s) {
10619 				int dllro = strtoul(s, NULL, 10);
10620 
10621 				change_dll_offset_enable(priv, lmc, 0);
10622 				load_dll_offset(priv, lmc, 2, dllro, by);
10623 				change_dll_offset_enable(priv, lmc, 1);
10624 			}
10625 		}
10626 
10627 	}			/* for (lmc = 0; lmc < num_lmcs; lmc++) */
10628 
10629 	// finish up...
10630 
10631 	return 0;
10632 
10633 }				/* perform_HW_dll_offset_tuning */
10634 
10635 // this routine simply makes the calls to the tuning routine and returns
10636 // any errors
cvmx_tune_node(struct ddr_priv * priv)10637 static int cvmx_tune_node(struct ddr_priv *priv)
10638 {
10639 	int errs, tot_errs;
10640 	int do_dllwo = 0;	// default to NO
10641 	const char *str;
10642 	int node = 0;
10643 
10644 	// Automatically tune the data and ECC byte DLL read offsets
10645 	debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
10646 	errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
10647 	debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
10648 	      node, errs);
10649 	tot_errs = errs;
10650 
10651 	// disabled by default for now, does not seem to be needed?
10652 	// Automatically tune the data and ECC byte DLL write offsets
10653 	// allow override of default setting
10654 	str = env_get("ddr_tune_write_offsets");
10655 	if (str)
10656 		do_dllwo = !!strtoul(str, NULL, 0);
10657 	if (do_dllwo) {
10658 		debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
10659 		errs =
10660 		    perform_HW_dll_offset_tuning(priv, 1,
10661 						 0x0A /* all bytelanes */);
10662 		debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
10663 		      node, errs);
10664 		tot_errs += errs;
10665 	}
10666 
10667 	return tot_errs;
10668 }
10669 
10670 // this routine makes the calls to the tuning routines when criteria are met
10671 // intended to be called for automated tuning, to apply filtering...
10672 
10673 #define IS_DDR4  1
10674 #define IS_DDR3  0
10675 #define IS_RDIMM 1
10676 #define IS_UDIMM 0
10677 #define IS_1SLOT 1
10678 #define IS_2SLOT 0
10679 
10680 // FIXME: DDR3 is not tuned
10681 static const u32 ddr_speed_filter[2][2][2] = {
10682 	[IS_DDR4] = {
10683 		     [IS_RDIMM] = {
10684 				   [IS_1SLOT] = 940,
10685 				   [IS_2SLOT] = 800},
10686 		     [IS_UDIMM] = {
10687 				   [IS_1SLOT] = 1050,
10688 				   [IS_2SLOT] = 940},
10689 		      },
10690 	[IS_DDR3] = {
10691 		     [IS_RDIMM] = {
10692 				   [IS_1SLOT] = 0,	// disabled
10693 				   [IS_2SLOT] = 0	// disabled
10694 				   },
10695 		     [IS_UDIMM] = {
10696 				   [IS_1SLOT] = 0,	// disabled
10697 				   [IS_2SLOT] = 0	// disabled
10698 				}
10699 		}
10700 };
10701 
cvmx_maybe_tune_node(struct ddr_priv * priv,u32 ddr_speed)10702 void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
10703 {
10704 	const char *s;
10705 	union cvmx_lmcx_config lmc_config;
10706 	union cvmx_lmcx_control lmc_control;
10707 	union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
10708 	int is_ddr4;
10709 	int is_rdimm;
10710 	int is_1slot;
10711 	int do_tune = 0;
10712 	u32 ddr_min_speed;
10713 	int node = 0;
10714 
10715 	// scale it down from Hz to MHz
10716 	ddr_speed = divide_nint(ddr_speed, 1000000);
10717 
10718 	// FIXME: allow an override here so that all configs can be tuned
10719 	// or none
10720 	// If the envvar is defined, always either force it or avoid it
10721 	// accordingly
10722 	s = env_get("ddr_tune_all_configs");
10723 	if (s) {
10724 		do_tune = !!strtoul(s, NULL, 0);
10725 		printf("N%d: DRAM auto-tuning %s.\n", node,
10726 		       (do_tune) ? "forced" : "disabled");
10727 		if (do_tune)
10728 			cvmx_tune_node(priv);
10729 
10730 		return;
10731 	}
10732 
10733 	// filter the tuning calls here...
10734 	// determine if we should/can run automatically for this configuration
10735 	//
10736 	// FIXME: tune only when the configuration indicates it will help:
10737 	//    DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
10738 	//
10739 	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));	// sample LMC0
10740 	lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0));	// sample LMC0
10741 	// sample LMC0
10742 	lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10743 
10744 	is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
10745 	is_rdimm = (lmc_control.s.rdimm_ena != 0);
10746 	// HACK, should do better
10747 	is_1slot = (lmc_config.s.init_status < 4);
10748 
10749 	ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
10750 	do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
10751 
10752 	debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
10753 	      node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
10754 	      (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
10755 
10756 	// call the tuning routine, filtering is done...
10757 	if (do_tune)
10758 		cvmx_tune_node(priv);
10759 }
10760 
10761 /*
10762  * first pattern example:
10763  * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10764  * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10765  * GENERAL_PURPOSE0.DATA == 16'h0000;
10766  */
10767 
10768 static const u64 dbi_pattern[3] = {
10769 	0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
10770 
10771 // Perform switchover to DBI
cvmx_dbi_switchover_interface(struct ddr_priv * priv,int lmc)10772 static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
10773 {
10774 	union cvmx_lmcx_modereg_params0 modereg_params0;
10775 	union cvmx_lmcx_modereg_params3 modereg_params3;
10776 	union cvmx_lmcx_phy_ctl phy_ctl;
10777 	union cvmx_lmcx_config lmcx_config;
10778 	union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
10779 	int rank_mask, rankx, active_ranks;
10780 	u64 phys_addr, rank_offset;
10781 	int num_lmcs, errors;
10782 	int dbi_settings[9], byte, unlocked, retries;
10783 	int ecc_ena;
10784 	int rank_max = 1;	// FIXME: make this 4 to try all the ranks
10785 	int node = 0;
10786 
10787 	ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10788 
10789 	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10790 	rank_mask = lmcx_config.s.init_status;
10791 	ecc_ena = lmcx_config.s.ecc_ena;
10792 
10793 	// FIXME: must filter out any non-supported configs
10794 	//        ie, no DDR3, no x4 devices
10795 	if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
10796 		debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
10797 		      node, lmc);
10798 		return;
10799 	}
10800 
10801 	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10802 	num_lmcs = cvmx_dram_get_num_lmc(priv);
10803 	rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
10804 			       lmcx_config.s.rank_ena + (num_lmcs / 2));
10805 
10806 	debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
10807 	      node, lmc, rank_mask, (unsigned long long)rank_offset);
10808 
10809 	/*
10810 	 * 1. conduct the current init sequence as usual all the way
10811 	 * after software write leveling.
10812 	 */
10813 
10814 	read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10815 
10816 	display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
10817 				 " INIT");
10818 
10819 	/*
10820 	 * 2. set DBI related CSRs as below and issue MR write.
10821 	 * MODEREG_PARAMS3.WR_DBI=1
10822 	 * MODEREG_PARAMS3.RD_DBI=1
10823 	 * PHY_CTL.DBI_MODE_ENA=1
10824 	 */
10825 	modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
10826 
10827 	modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
10828 	modereg_params3.s.wr_dbi = 1;
10829 	modereg_params3.s.rd_dbi = 1;
10830 	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
10831 
10832 	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
10833 	phy_ctl.s.dbi_mode_ena = 1;
10834 	lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
10835 
10836 	/*
10837 	 * there are two options for data to send.  Lets start with (1)
10838 	 * and could move to (2) in the future:
10839 	 *
10840 	 * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
10841 	 * this does not exist) set data directly in these reigsters.
10842 	 * this will yield a clk/2 pattern:
10843 	 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10844 	 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10845 	 * GENERAL_PURPOSE0.DATA == 16'h0000;
10846 	 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10847 	 * here data comes from the LFSR generating a PRBS pattern
10848 	 * CHAR_CTL.EN = 0
10849 	 * CHAR_CTL.SEL = 0; // for PRBS
10850 	 * CHAR_CTL.DR = 1;
10851 	 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10852 	 * CHAR_CTL.SKEW_ON = 1;
10853 	 */
10854 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
10855 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
10856 	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
10857 
10858 	/*
10859 	 * 3. adjust cas_latency (only necessary if RD_DBI is set).
10860 	 * here is my code for doing this:
10861 	 *
10862 	 * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
10863 	 * case (csr_model.MODEREG_PARAMS0.CL.value)
10864 	 * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
10865 	 * // CL 9-13 -> 11-15
10866 	 * 5: begin
10867 	 * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10868 	 * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
10869 	 * csr_model.MODEREG_PARAMS0.CWL.value==3))
10870 	 * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
10871 	 * else
10872 	 * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
10873 	 * end
10874 	 * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
10875 	 * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
10876 	 * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
10877 	 * default:
10878 	 * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
10879 	 * I am not sure what to do.",
10880 	 * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
10881 	 * endcase
10882 	 * end
10883 	 */
10884 
10885 	if (modereg_params3.s.rd_dbi == 1) {
10886 		int old_cl, new_cl, old_cwl;
10887 
10888 		old_cl = modereg_params0.s.cl;
10889 		old_cwl = modereg_params0.s.cwl;
10890 
10891 		switch (old_cl) {
10892 		case 0:
10893 		case 1:
10894 		case 2:
10895 		case 3:
10896 		case 4:
10897 			new_cl = old_cl + 2;
10898 			break;	// 9-13->11-15
10899 			// CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10900 		case 5:
10901 			new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
10902 			break;
10903 		case 6:
10904 			new_cl = 8;
10905 			break;	// 15->18
10906 		case 7:
10907 			new_cl = 14;
10908 			break;	// 16->19
10909 		case 8:
10910 			new_cl = 15;
10911 			break;	// 18->21
10912 		default:
10913 			printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
10914 			       old_cl);
10915 			// FIXME: need to error exit here...
10916 			old_cl = -1;
10917 			new_cl = -1;
10918 			break;
10919 		}
10920 		debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
10921 		      node, lmc, old_cl, old_cwl, new_cl);
10922 		modereg_params0.s.cl = new_cl;
10923 		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
10924 		       modereg_params0.u64);
10925 	}
10926 
10927 	/*
10928 	 * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
10929 	 * SEQ_CTL[SEQ_SEL] = MRW.
10930 	 */
10931 	// Use the default values, from the CSRs fields
10932 	// also, do B-sides for RDIMMs...
10933 
10934 	for (rankx = 0; rankx < 4; rankx++) {
10935 		if (!(rank_mask & (1 << rankx)))
10936 			continue;
10937 
10938 		// for RDIMMs, B-side writes should get done automatically
10939 		// when the A-side is written
10940 		ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10941 			 0 /*MRreg */, 0 /*A-side */);	/* MR0 */
10942 		ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10943 			 5 /*MRreg */, 0 /*A-side */);	/* MR5 */
10944 	}
10945 
10946 	/*
10947 	 * 5. conduct DBI bit deskew training via the General Purpose
10948 	 * R/W sequence (dbtrain). may need to run this over and over to get
10949 	 * a lock (I need up to 5 in simulation):
10950 	 * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
10951 	 * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
10952 	 * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
10953 	 * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
10954 	 * DBTRAIN_CTL.RW_TRAIN = 1
10955 	 * DBTRAIN_CTL.READ_DQ_COUNT = dont care
10956 	 * DBTRAIN_CTL.WRITE_ENA = 1;
10957 	 * DBTRAIN_CTL.ACTIVATE = 1;
10958 	 * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
10959 	 * valid address
10960 	 */
10961 
10962 	// NOW - do the training
10963 	debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
10964 
10965 	active_ranks = 0;
10966 	for (rankx = 0; rankx < rank_max; rankx++) {
10967 		if (!(rank_mask & (1 << rankx)))
10968 			continue;
10969 
10970 		phys_addr = rank_offset * active_ranks;
10971 		// FIXME: now done by test_dram_byte_hw()
10972 
10973 		active_ranks++;
10974 
10975 		retries = 0;
10976 
10977 restart_training:
10978 
10979 		// NOTE: return is a bitmask of the erroring bytelanes -
10980 		// we only print it
10981 		errors =
10982 		    test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
10983 
10984 		debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
10985 		      node, lmc, rankx, (unsigned long long)phys_addr, errors);
10986 
10987 		// NEXT - check for locking
10988 		unlocked = 0;
10989 		read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10990 
10991 		for (byte = 0; byte < (8 + ecc_ena); byte++)
10992 			unlocked += (dbi_settings[byte] & 1) ^ 1;
10993 
10994 		// FIXME: print out the DBI settings array after each rank?
10995 		if (rank_max > 1)	// only when doing more than 1 rank
10996 			display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
10997 						 dbi_settings, " RANK");
10998 
10999 		if (unlocked > 0) {
11000 			debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
11001 			      node, lmc, unlocked);
11002 			retries++;
11003 			if (retries < 10) {
11004 				goto restart_training;
11005 			} else {
11006 				debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
11007 				      node, lmc, retries);
11008 			}
11009 		}
11010 	}			/* for (rankx = 0; rankx < 4; rankx++) */
11011 
11012 	// print out the final DBI settings array
11013 	display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
11014 				 "FINAL");
11015 }
11016 
cvmx_dbi_switchover(struct ddr_priv * priv)11017 void cvmx_dbi_switchover(struct ddr_priv *priv)
11018 {
11019 	int lmc;
11020 	int num_lmcs = cvmx_dram_get_num_lmc(priv);
11021 
11022 	for (lmc = 0; lmc < num_lmcs; lmc++)
11023 		cvmx_dbi_switchover_interface(priv, lmc);
11024 }
11025