1 // SPDX-License-Identifier: BSD-3-Clause
2 /*
3  * Copyright Altera Corporation (C) 2012-2015
4  */
5 
6 #include <common.h>
7 #include <log.h>
8 #include <asm/io.h>
9 #include <asm/arch/sdram.h>
10 #include <errno.h>
11 #include <hang.h>
12 #include "sequencer.h"
13 
14 static const struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
15 	(struct socfpga_sdr_rw_load_manager *)
16 		(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
17 static const struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs
18 	= (struct socfpga_sdr_rw_load_jump_manager *)
19 		(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
20 static const struct socfpga_sdr_reg_file *sdr_reg_file =
21 	(struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
22 static const struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
23 	(struct socfpga_sdr_scc_mgr *)
24 		(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
25 static const struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
26 	(struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
27 static const struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
28 	(struct socfpga_phy_mgr_cfg *)
29 		(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
30 static const struct socfpga_data_mgr *data_mgr =
31 	(struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
32 static const struct socfpga_sdr_ctrl *sdr_ctrl =
33 	(struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
34 
35 #define DELTA_D		1
36 
37 /*
38  * In order to reduce ROM size, most of the selectable calibration steps are
39  * decided at compile time based on the user's calibration mode selection,
40  * as captured by the STATIC_CALIB_STEPS selection below.
41  *
42  * However, to support simulation-time selection of fast simulation mode, where
43  * we skip everything except the bare minimum, we need a few of the steps to
44  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
45  * check, which is based on the rtl-supplied value, or we dynamically compute
46  * the value to use based on the dynamically-chosen calibration mode
47  */
48 
49 #define DLEVEL 0
50 #define STATIC_IN_RTL_SIM 0
51 #define STATIC_SKIP_DELAY_LOOPS 0
52 
53 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
54 	STATIC_SKIP_DELAY_LOOPS)
55 
56 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
57 	((non_skip_value) & seq->skip_delay_mask)
58 
dram_is_ddr(const u8 ddr)59 bool dram_is_ddr(const u8 ddr)
60 {
61 	const struct socfpga_sdram_config *cfg = socfpga_get_sdram_config();
62 	const u8 type = (cfg->ctrl_cfg >> SDR_CTRLGRP_CTRLCFG_MEMTYPE_LSB) &
63 			SDR_CTRLGRP_CTRLCFG_MEMTYPE_MASK;
64 
65 	if (ddr == 2 && type == 1)	/* DDR2 */
66 		return true;
67 
68 	if (ddr == 3 && type == 2)	/* DDR3 */
69 		return true;
70 
71 	return false;
72 }
73 
set_failing_group_stage(struct socfpga_sdrseq * seq,u32 group,u32 stage,u32 substage)74 static void set_failing_group_stage(struct socfpga_sdrseq *seq,
75 				    u32 group, u32 stage, u32 substage)
76 {
77 	/*
78 	 * Only set the global stage if there was not been any other
79 	 * failing group
80 	 */
81 	if (seq->gbl.error_stage == CAL_STAGE_NIL)	{
82 		seq->gbl.error_substage = substage;
83 		seq->gbl.error_stage = stage;
84 		seq->gbl.error_group = group;
85 	}
86 }
87 
reg_file_set_group(u16 set_group)88 static void reg_file_set_group(u16 set_group)
89 {
90 	clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
91 }
92 
reg_file_set_stage(u8 set_stage)93 static void reg_file_set_stage(u8 set_stage)
94 {
95 	clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
96 }
97 
reg_file_set_sub_stage(u8 set_sub_stage)98 static void reg_file_set_sub_stage(u8 set_sub_stage)
99 {
100 	set_sub_stage &= 0xff;
101 	clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
102 }
103 
104 /**
105  * phy_mgr_initialize() - Initialize PHY Manager
106  *
107  * Initialize PHY Manager.
108  */
phy_mgr_initialize(struct socfpga_sdrseq * seq)109 static void phy_mgr_initialize(struct socfpga_sdrseq *seq)
110 {
111 	u32 ratio;
112 
113 	debug("%s:%d\n", __func__, __LINE__);
114 	/* Calibration has control over path to memory */
115 	/*
116 	 * In Hard PHY this is a 2-bit control:
117 	 * 0: AFI Mux Select
118 	 * 1: DDIO Mux Select
119 	 */
120 	writel(0x3, &phy_mgr_cfg->mux_sel);
121 
122 	/* USER memory clock is not stable we begin initialization  */
123 	writel(0, &phy_mgr_cfg->reset_mem_stbl);
124 
125 	/* USER calibration status all set to zero */
126 	writel(0, &phy_mgr_cfg->cal_status);
127 
128 	writel(0, &phy_mgr_cfg->cal_debug_info);
129 
130 	/* Init params only if we do NOT skip calibration. */
131 	if ((seq->dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
132 		return;
133 
134 	ratio = seq->rwcfg->mem_dq_per_read_dqs /
135 		seq->rwcfg->mem_virtual_groups_per_read_dqs;
136 	seq->param.read_correct_mask_vg = (1 << ratio) - 1;
137 	seq->param.write_correct_mask_vg = (1 << ratio) - 1;
138 	seq->param.read_correct_mask = (1 << seq->rwcfg->mem_dq_per_read_dqs)
139 		- 1;
140 	seq->param.write_correct_mask = (1 << seq->rwcfg->mem_dq_per_write_dqs)
141 		- 1;
142 }
143 
144 /**
145  * set_rank_and_odt_mask() - Set Rank and ODT mask
146  * @rank:	Rank mask
147  * @odt_mode:	ODT mode, OFF or READ_WRITE
148  *
149  * Set Rank and ODT mask (On-Die Termination).
150  */
set_rank_and_odt_mask(struct socfpga_sdrseq * seq,const u32 rank,const u32 odt_mode)151 static void set_rank_and_odt_mask(struct socfpga_sdrseq *seq,
152 				  const u32 rank, const u32 odt_mode)
153 {
154 	u32 odt_mask_0 = 0;
155 	u32 odt_mask_1 = 0;
156 	u32 cs_and_odt_mask;
157 
158 	if (odt_mode == RW_MGR_ODT_MODE_OFF) {
159 		odt_mask_0 = 0x0;
160 		odt_mask_1 = 0x0;
161 	} else {	/* RW_MGR_ODT_MODE_READ_WRITE */
162 		switch (seq->rwcfg->mem_number_of_ranks) {
163 		case 1:	/* 1 Rank */
164 			/* Read: ODT = 0 ; Write: ODT = 1 */
165 			odt_mask_0 = 0x0;
166 			odt_mask_1 = 0x1;
167 			break;
168 		case 2:	/* 2 Ranks */
169 			if (seq->rwcfg->mem_number_of_cs_per_dimm == 1) {
170 				/*
171 				 * - Dual-Slot , Single-Rank (1 CS per DIMM)
172 				 *   OR
173 				 * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
174 				 *
175 				 * Since MEM_NUMBER_OF_RANKS is 2, they
176 				 * are both single rank with 2 CS each
177 				 * (special for RDIMM).
178 				 *
179 				 * Read: Turn on ODT on the opposite rank
180 				 * Write: Turn on ODT on all ranks
181 				 */
182 				odt_mask_0 = 0x3 & ~(1 << rank);
183 				odt_mask_1 = 0x3;
184 				if (dram_is_ddr(2))
185 					odt_mask_1 &= ~(1 << rank);
186 			} else {
187 				/*
188 				 * - Single-Slot , Dual-Rank (2 CS per DIMM)
189 				 *
190 				 * Read: Turn on ODT off on all ranks
191 				 * Write: Turn on ODT on active rank
192 				 */
193 				odt_mask_0 = 0x0;
194 				odt_mask_1 = 0x3 & (1 << rank);
195 			}
196 			break;
197 		case 4:	/* 4 Ranks */
198 			/*
199 			 * DDR3 Read, DDR2 Read/Write:
200 			 * ----------+-----------------------+
201 			 *           |         ODT           |
202 			 *           +-----------------------+
203 			 *   Rank    |  3  |  2  |  1  |  0  |
204 			 * ----------+-----+-----+-----+-----+
205 			 *     0     |  0  |  1  |  0  |  0  |
206 			 *     1     |  1  |  0  |  0  |  0  |
207 			 *     2     |  0  |  0  |  0  |  1  |
208 			 *     3     |  0  |  0  |  1  |  0  |
209 			 * ----------+-----+-----+-----+-----+
210 			 *
211 			 * DDR3 Write:
212 			 * ----------+-----------------------+
213 			 *           |         ODT           |
214 			 * Write To  +-----------------------+
215 			 *   Rank    |  3  |  2  |  1  |  0  |
216 			 * ----------+-----+-----+-----+-----+
217 			 *     0     |  0  |  1  |  0  |  1  |
218 			 *     1     |  1  |  0  |  1  |  0  |
219 			 *     2     |  0  |  1  |  0  |  1  |
220 			 *     3     |  1  |  0  |  1  |  0  |
221 			 * ----------+-----+-----+-----+-----+
222 			 */
223 			switch (rank) {
224 			case 0:
225 				odt_mask_0 = 0x4;
226 				if (dram_is_ddr(2))
227 					odt_mask_1 = 0x4;
228 				else if (dram_is_ddr(3))
229 					odt_mask_1 = 0x5;
230 				break;
231 			case 1:
232 				odt_mask_0 = 0x8;
233 				if (dram_is_ddr(2))
234 					odt_mask_1 = 0x8;
235 				else if (dram_is_ddr(3))
236 					odt_mask_1 = 0xA;
237 				break;
238 			case 2:
239 				odt_mask_0 = 0x1;
240 				if (dram_is_ddr(2))
241 					odt_mask_1 = 0x1;
242 				else if (dram_is_ddr(3))
243 					odt_mask_1 = 0x5;
244 				break;
245 			case 3:
246 				odt_mask_0 = 0x2;
247 				if (dram_is_ddr(2))
248 					odt_mask_1 = 0x2;
249 				else if (dram_is_ddr(3))
250 					odt_mask_1 = 0xA;
251 				break;
252 			}
253 			break;
254 		}
255 	}
256 
257 	cs_and_odt_mask = (0xFF & ~(1 << rank)) |
258 			  ((0xFF & odt_mask_0) << 8) |
259 			  ((0xFF & odt_mask_1) << 16);
260 	writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
261 				RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
262 }
263 
264 /**
265  * scc_mgr_set() - Set SCC Manager register
266  * @off:	Base offset in SCC Manager space
267  * @grp:	Read/Write group
268  * @val:	Value to be set
269  *
270  * This function sets the SCC Manager (Scan Chain Control Manager) register.
271  */
scc_mgr_set(u32 off,u32 grp,u32 val)272 static void scc_mgr_set(u32 off, u32 grp, u32 val)
273 {
274 	writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
275 }
276 
277 /**
278  * scc_mgr_initialize() - Initialize SCC Manager registers
279  *
280  * Initialize SCC Manager registers.
281  */
scc_mgr_initialize(void)282 static void scc_mgr_initialize(void)
283 {
284 	/*
285 	 * Clear register file for HPS. 16 (2^4) is the size of the
286 	 * full register file in the scc mgr:
287 	 *	RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
288 	 *                             MEM_IF_READ_DQS_WIDTH - 1);
289 	 */
290 	int i;
291 
292 	for (i = 0; i < 16; i++) {
293 		debug_cond(DLEVEL >= 1, "%s:%d: Clearing SCC RFILE index %u\n",
294 			   __func__, __LINE__, i);
295 		scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, i, 0);
296 	}
297 }
298 
scc_mgr_set_dqdqs_output_phase(u32 write_group,u32 phase)299 static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase)
300 {
301 	scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
302 }
303 
scc_mgr_set_dqs_bus_in_delay(u32 read_group,u32 delay)304 static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay)
305 {
306 	scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
307 }
308 
scc_mgr_set_dqs_en_phase(u32 read_group,u32 phase)309 static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase)
310 {
311 	scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
312 }
313 
scc_mgr_set_dqs_en_delay(u32 read_group,u32 delay)314 static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay)
315 {
316 	scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
317 }
318 
scc_mgr_set_dq_in_delay(u32 dq_in_group,u32 delay)319 static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay)
320 {
321 	scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
322 }
323 
scc_mgr_set_dqs_io_in_delay(struct socfpga_sdrseq * seq,u32 delay)324 static void scc_mgr_set_dqs_io_in_delay(struct socfpga_sdrseq *seq,
325 					u32 delay)
326 {
327 	scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET,
328 		    seq->rwcfg->mem_dq_per_write_dqs, delay);
329 }
330 
scc_mgr_set_dm_in_delay(struct socfpga_sdrseq * seq,u32 dm,u32 delay)331 static void scc_mgr_set_dm_in_delay(struct socfpga_sdrseq *seq, u32 dm,
332 				    u32 delay)
333 {
334 	scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET,
335 		    seq->rwcfg->mem_dq_per_write_dqs + 1 + dm,
336 		    delay);
337 }
338 
scc_mgr_set_dq_out1_delay(u32 dq_in_group,u32 delay)339 static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay)
340 {
341 	scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
342 }
343 
scc_mgr_set_dqs_out1_delay(struct socfpga_sdrseq * seq,u32 delay)344 static void scc_mgr_set_dqs_out1_delay(struct socfpga_sdrseq *seq,
345 				       u32 delay)
346 {
347 	scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
348 		    seq->rwcfg->mem_dq_per_write_dqs, delay);
349 }
350 
scc_mgr_set_dm_out1_delay(struct socfpga_sdrseq * seq,u32 dm,u32 delay)351 static void scc_mgr_set_dm_out1_delay(struct socfpga_sdrseq *seq, u32 dm,
352 				      u32 delay)
353 {
354 	scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
355 		    seq->rwcfg->mem_dq_per_write_dqs + 1 + dm,
356 		    delay);
357 }
358 
359 /* load up dqs config settings */
scc_mgr_load_dqs(u32 dqs)360 static void scc_mgr_load_dqs(u32 dqs)
361 {
362 	writel(dqs, &sdr_scc_mgr->dqs_ena);
363 }
364 
365 /* load up dqs io config settings */
scc_mgr_load_dqs_io(void)366 static void scc_mgr_load_dqs_io(void)
367 {
368 	writel(0, &sdr_scc_mgr->dqs_io_ena);
369 }
370 
371 /* load up dq config settings */
scc_mgr_load_dq(u32 dq_in_group)372 static void scc_mgr_load_dq(u32 dq_in_group)
373 {
374 	writel(dq_in_group, &sdr_scc_mgr->dq_ena);
375 }
376 
377 /* load up dm config settings */
scc_mgr_load_dm(u32 dm)378 static void scc_mgr_load_dm(u32 dm)
379 {
380 	writel(dm, &sdr_scc_mgr->dm_ena);
381 }
382 
383 /**
384  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
385  * @off:	Base offset in SCC Manager space
386  * @grp:	Read/Write group
387  * @val:	Value to be set
388  * @update:	If non-zero, trigger SCC Manager update for all ranks
389  *
390  * This function sets the SCC Manager (Scan Chain Control Manager) register
391  * and optionally triggers the SCC update for all ranks.
392  */
scc_mgr_set_all_ranks(struct socfpga_sdrseq * seq,const u32 off,const u32 grp,const u32 val,const int update)393 static void scc_mgr_set_all_ranks(struct socfpga_sdrseq *seq,
394 				  const u32 off, const u32 grp, const u32 val,
395 				  const int update)
396 {
397 	u32 r;
398 
399 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
400 	     r += NUM_RANKS_PER_SHADOW_REG) {
401 		scc_mgr_set(off, grp, val);
402 
403 		if (update || (r == 0)) {
404 			writel(grp, &sdr_scc_mgr->dqs_ena);
405 			writel(0, &sdr_scc_mgr->update);
406 		}
407 	}
408 }
409 
scc_mgr_set_dqs_en_phase_all_ranks(struct socfpga_sdrseq * seq,u32 read_group,u32 phase)410 static void scc_mgr_set_dqs_en_phase_all_ranks(struct socfpga_sdrseq *seq,
411 					       u32 read_group, u32 phase)
412 {
413 	/*
414 	 * USER although the h/w doesn't support different phases per
415 	 * shadow register, for simplicity our scc manager modeling
416 	 * keeps different phase settings per shadow reg, and it's
417 	 * important for us to keep them in sync to match h/w.
418 	 * for efficiency, the scan chain update should occur only
419 	 * once to sr0.
420 	 */
421 	scc_mgr_set_all_ranks(seq, SCC_MGR_DQS_EN_PHASE_OFFSET,
422 			      read_group, phase, 0);
423 }
424 
scc_mgr_set_dqdqs_output_phase_all_ranks(struct socfpga_sdrseq * seq,u32 write_group,u32 phase)425 static void scc_mgr_set_dqdqs_output_phase_all_ranks(struct socfpga_sdrseq *seq,
426 						     u32 write_group, u32 phase)
427 {
428 	/*
429 	 * USER although the h/w doesn't support different phases per
430 	 * shadow register, for simplicity our scc manager modeling
431 	 * keeps different phase settings per shadow reg, and it's
432 	 * important for us to keep them in sync to match h/w.
433 	 * for efficiency, the scan chain update should occur only
434 	 * once to sr0.
435 	 */
436 	scc_mgr_set_all_ranks(seq, SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
437 			      write_group, phase, 0);
438 }
439 
scc_mgr_set_dqs_en_delay_all_ranks(struct socfpga_sdrseq * seq,u32 read_group,u32 delay)440 static void scc_mgr_set_dqs_en_delay_all_ranks(struct socfpga_sdrseq *seq,
441 					       u32 read_group, u32 delay)
442 {
443 	/*
444 	 * In shadow register mode, the T11 settings are stored in
445 	 * registers in the core, which are updated by the DQS_ENA
446 	 * signals. Not issuing the SCC_MGR_UPD command allows us to
447 	 * save lots of rank switching overhead, by calling
448 	 * select_shadow_regs_for_update with update_scan_chains
449 	 * set to 0.
450 	 */
451 	scc_mgr_set_all_ranks(seq, SCC_MGR_DQS_EN_DELAY_OFFSET,
452 			      read_group, delay, 1);
453 }
454 
455 /**
456  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
457  * @write_group:	Write group
458  * @delay:		Delay value
459  *
460  * This function sets the OCT output delay in SCC manager.
461  */
scc_mgr_set_oct_out1_delay(struct socfpga_sdrseq * seq,const u32 write_group,const u32 delay)462 static void scc_mgr_set_oct_out1_delay(struct socfpga_sdrseq *seq,
463 				       const u32 write_group, const u32 delay)
464 {
465 	const int ratio = seq->rwcfg->mem_if_read_dqs_width /
466 			 seq->rwcfg->mem_if_write_dqs_width;
467 	const int base = write_group * ratio;
468 	int i;
469 	/*
470 	 * Load the setting in the SCC manager
471 	 * Although OCT affects only write data, the OCT delay is controlled
472 	 * by the DQS logic block which is instantiated once per read group.
473 	 * For protocols where a write group consists of multiple read groups,
474 	 * the setting must be set multiple times.
475 	 */
476 	for (i = 0; i < ratio; i++)
477 		scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
478 }
479 
480 /**
481  * scc_mgr_set_hhp_extras() - Set HHP extras.
482  *
483  * Load the fixed setting in the SCC manager HHP extras.
484  */
scc_mgr_set_hhp_extras(void)485 static void scc_mgr_set_hhp_extras(void)
486 {
487 	/*
488 	 * Load the fixed setting in the SCC manager
489 	 * bits: 0:0 = 1'b1	- DQS bypass
490 	 * bits: 1:1 = 1'b1	- DQ bypass
491 	 * bits: 4:2 = 3'b001	- rfifo_mode
492 	 * bits: 6:5 = 2'b01	- rfifo clock_select
493 	 * bits: 7:7 = 1'b0	- separate gating from ungating setting
494 	 * bits: 8:8 = 1'b0	- separate OE from Output delay setting
495 	 */
496 	const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
497 			  (1 << 2) | (1 << 1) | (1 << 0);
498 	const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
499 			 SCC_MGR_HHP_GLOBALS_OFFSET |
500 			 SCC_MGR_HHP_EXTRAS_OFFSET;
501 
502 	debug_cond(DLEVEL >= 1, "%s:%d Setting HHP Extras\n",
503 		   __func__, __LINE__);
504 	writel(value, addr);
505 	debug_cond(DLEVEL >= 1, "%s:%d Done Setting HHP Extras\n",
506 		   __func__, __LINE__);
507 }
508 
509 /**
510  * scc_mgr_zero_all() - Zero all DQS config
511  *
512  * Zero all DQS config.
513  */
scc_mgr_zero_all(struct socfpga_sdrseq * seq)514 static void scc_mgr_zero_all(struct socfpga_sdrseq *seq)
515 {
516 	int i, r;
517 
518 	/*
519 	 * USER Zero all DQS config settings, across all groups and all
520 	 * shadow registers
521 	 */
522 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
523 	     r += NUM_RANKS_PER_SHADOW_REG) {
524 		for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
525 			/*
526 			 * The phases actually don't exist on a per-rank basis,
527 			 * but there's no harm updating them several times, so
528 			 * let's keep the code simple.
529 			 */
530 			scc_mgr_set_dqs_bus_in_delay(i,
531 						     seq->iocfg->dqs_in_reserve
532 						     );
533 			scc_mgr_set_dqs_en_phase(i, 0);
534 			scc_mgr_set_dqs_en_delay(i, 0);
535 		}
536 
537 		for (i = 0; i < seq->rwcfg->mem_if_write_dqs_width; i++) {
538 			scc_mgr_set_dqdqs_output_phase(i, 0);
539 			/* Arria V/Cyclone V don't have out2. */
540 			scc_mgr_set_oct_out1_delay(seq, i,
541 						   seq->iocfg->dqs_out_reserve);
542 		}
543 	}
544 
545 	/* Multicast to all DQS group enables. */
546 	writel(0xff, &sdr_scc_mgr->dqs_ena);
547 	writel(0, &sdr_scc_mgr->update);
548 }
549 
550 /**
551  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
552  * @write_group:	Write group
553  *
554  * Set bypass mode and trigger SCC update.
555  */
scc_set_bypass_mode(const u32 write_group)556 static void scc_set_bypass_mode(const u32 write_group)
557 {
558 	/* Multicast to all DQ enables. */
559 	writel(0xff, &sdr_scc_mgr->dq_ena);
560 	writel(0xff, &sdr_scc_mgr->dm_ena);
561 
562 	/* Update current DQS IO enable. */
563 	writel(0, &sdr_scc_mgr->dqs_io_ena);
564 
565 	/* Update the DQS logic. */
566 	writel(write_group, &sdr_scc_mgr->dqs_ena);
567 
568 	/* Hit update. */
569 	writel(0, &sdr_scc_mgr->update);
570 }
571 
572 /**
573  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
574  * @write_group:	Write group
575  *
576  * Load DQS settings for Write Group, do not trigger SCC update.
577  */
scc_mgr_load_dqs_for_write_group(struct socfpga_sdrseq * seq,const u32 write_group)578 static void scc_mgr_load_dqs_for_write_group(struct socfpga_sdrseq *seq,
579 					     const u32 write_group)
580 {
581 	const int ratio = seq->rwcfg->mem_if_read_dqs_width /
582 			  seq->rwcfg->mem_if_write_dqs_width;
583 	const int base = write_group * ratio;
584 	int i;
585 	/*
586 	 * Load the setting in the SCC manager
587 	 * Although OCT affects only write data, the OCT delay is controlled
588 	 * by the DQS logic block which is instantiated once per read group.
589 	 * For protocols where a write group consists of multiple read groups,
590 	 * the setting must be set multiple times.
591 	 */
592 	for (i = 0; i < ratio; i++)
593 		writel(base + i, &sdr_scc_mgr->dqs_ena);
594 }
595 
596 /**
597  * scc_mgr_zero_group() - Zero all configs for a group
598  *
599  * Zero DQ, DM, DQS and OCT configs for a group.
600  */
scc_mgr_zero_group(struct socfpga_sdrseq * seq,const u32 write_group,const int out_only)601 static void scc_mgr_zero_group(struct socfpga_sdrseq *seq,
602 			       const u32 write_group, const int out_only)
603 {
604 	int i, r;
605 
606 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
607 	     r += NUM_RANKS_PER_SHADOW_REG) {
608 		/* Zero all DQ config settings. */
609 		for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
610 			scc_mgr_set_dq_out1_delay(i, 0);
611 			if (!out_only)
612 				scc_mgr_set_dq_in_delay(i, 0);
613 		}
614 
615 		/* Multicast to all DQ enables. */
616 		writel(0xff, &sdr_scc_mgr->dq_ena);
617 
618 		/* Zero all DM config settings. */
619 		for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
620 			if (!out_only)
621 				scc_mgr_set_dm_in_delay(seq, i, 0);
622 			scc_mgr_set_dm_out1_delay(seq, i, 0);
623 		}
624 
625 		/* Multicast to all DM enables. */
626 		writel(0xff, &sdr_scc_mgr->dm_ena);
627 
628 		/* Zero all DQS IO settings. */
629 		if (!out_only)
630 			scc_mgr_set_dqs_io_in_delay(seq, 0);
631 
632 		/* Arria V/Cyclone V don't have out2. */
633 		scc_mgr_set_dqs_out1_delay(seq, seq->iocfg->dqs_out_reserve);
634 		scc_mgr_set_oct_out1_delay(seq, write_group,
635 					   seq->iocfg->dqs_out_reserve);
636 		scc_mgr_load_dqs_for_write_group(seq, write_group);
637 
638 		/* Multicast to all DQS IO enables (only 1 in total). */
639 		writel(0, &sdr_scc_mgr->dqs_io_ena);
640 
641 		/* Hit update to zero everything. */
642 		writel(0, &sdr_scc_mgr->update);
643 	}
644 }
645 
646 /*
647  * apply and load a particular input delay for the DQ pins in a group
648  * group_bgn is the index of the first dq pin (in the write group)
649  */
scc_mgr_apply_group_dq_in_delay(struct socfpga_sdrseq * seq,u32 group_bgn,u32 delay)650 static void scc_mgr_apply_group_dq_in_delay(struct socfpga_sdrseq *seq,
651 					    u32 group_bgn, u32 delay)
652 {
653 	u32 i, p;
654 
655 	for (i = 0, p = group_bgn; i < seq->rwcfg->mem_dq_per_read_dqs;
656 	     i++, p++) {
657 		scc_mgr_set_dq_in_delay(p, delay);
658 		scc_mgr_load_dq(p);
659 	}
660 }
661 
662 /**
663  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the
664  * DQ pins in a group
665  * @delay:		Delay value
666  *
667  * Apply and load a particular output delay for the DQ pins in a group.
668  */
scc_mgr_apply_group_dq_out1_delay(struct socfpga_sdrseq * seq,const u32 delay)669 static void scc_mgr_apply_group_dq_out1_delay(struct socfpga_sdrseq *seq,
670 					      const u32 delay)
671 {
672 	int i;
673 
674 	for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
675 		scc_mgr_set_dq_out1_delay(i, delay);
676 		scc_mgr_load_dq(i);
677 	}
678 }
679 
680 /* apply and load a particular output delay for the DM pins in a group */
scc_mgr_apply_group_dm_out1_delay(struct socfpga_sdrseq * seq,u32 delay1)681 static void scc_mgr_apply_group_dm_out1_delay(struct socfpga_sdrseq *seq,
682 					      u32 delay1)
683 {
684 	u32 i;
685 
686 	for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
687 		scc_mgr_set_dm_out1_delay(seq, i, delay1);
688 		scc_mgr_load_dm(i);
689 	}
690 }
691 
692 
693 /* apply and load delay on both DQS and OCT out1 */
scc_mgr_apply_group_dqs_io_and_oct_out1(struct socfpga_sdrseq * seq,u32 write_group,u32 delay)694 static void scc_mgr_apply_group_dqs_io_and_oct_out1(struct socfpga_sdrseq *seq,
695 						    u32 write_group, u32 delay)
696 {
697 	scc_mgr_set_dqs_out1_delay(seq, delay);
698 	scc_mgr_load_dqs_io();
699 
700 	scc_mgr_set_oct_out1_delay(seq, write_group, delay);
701 	scc_mgr_load_dqs_for_write_group(seq, write_group);
702 }
703 
704 /**
705  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output
706  * side: DQ, DM, DQS, OCT
707  * @write_group:	Write group
708  * @delay:		Delay value
709  *
710  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
711  */
scc_mgr_apply_group_all_out_delay_add(struct socfpga_sdrseq * seq,const u32 write_group,const u32 delay)712 static void scc_mgr_apply_group_all_out_delay_add(struct socfpga_sdrseq *seq,
713 						  const u32 write_group,
714 						  const u32 delay)
715 {
716 	u32 i, new_delay;
717 
718 	/* DQ shift */
719 	for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++)
720 		scc_mgr_load_dq(i);
721 
722 	/* DM shift */
723 	for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
724 		scc_mgr_load_dm(i);
725 
726 	/* DQS shift */
727 	new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
728 	if (new_delay > seq->iocfg->io_out2_delay_max) {
729 		debug_cond(DLEVEL >= 1,
730 			   "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
731 			   __func__, __LINE__, write_group, delay, new_delay,
732 			   seq->iocfg->io_out2_delay_max,
733 			   new_delay - seq->iocfg->io_out2_delay_max);
734 		new_delay -= seq->iocfg->io_out2_delay_max;
735 		scc_mgr_set_dqs_out1_delay(seq, new_delay);
736 	}
737 
738 	scc_mgr_load_dqs_io();
739 
740 	/* OCT shift */
741 	new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
742 	if (new_delay > seq->iocfg->io_out2_delay_max) {
743 		debug_cond(DLEVEL >= 1,
744 			   "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
745 			   __func__, __LINE__, write_group, delay,
746 			   new_delay, seq->iocfg->io_out2_delay_max,
747 			   new_delay - seq->iocfg->io_out2_delay_max);
748 		new_delay -= seq->iocfg->io_out2_delay_max;
749 		scc_mgr_set_oct_out1_delay(seq, write_group, new_delay);
750 	}
751 
752 	scc_mgr_load_dqs_for_write_group(seq, write_group);
753 }
754 
755 /**
756  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output
757  * side to all ranks
758  * @write_group:	Write group
759  * @delay:		Delay value
760  *
761  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
762  */
763 static void
scc_mgr_apply_group_all_out_delay_add_all_ranks(struct socfpga_sdrseq * seq,const u32 write_group,const u32 delay)764 scc_mgr_apply_group_all_out_delay_add_all_ranks(struct socfpga_sdrseq *seq,
765 						const u32 write_group,
766 						const u32 delay)
767 {
768 	int r;
769 
770 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
771 	     r += NUM_RANKS_PER_SHADOW_REG) {
772 		scc_mgr_apply_group_all_out_delay_add(seq, write_group, delay);
773 		writel(0, &sdr_scc_mgr->update);
774 	}
775 }
776 
777 /**
778  * set_jump_as_return() - Return instruction optimization
779  *
780  * Optimization used to recover some slots in ddr3 inst_rom could be
781  * applied to other protocols if we wanted to
782  */
set_jump_as_return(struct socfpga_sdrseq * seq)783 static void set_jump_as_return(struct socfpga_sdrseq *seq)
784 {
785 	/*
786 	 * To save space, we replace return with jump to special shared
787 	 * RETURN instruction so we set the counter to large value so that
788 	 * we always jump.
789 	 */
790 	writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
791 	writel(seq->rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
792 }
793 
794 /**
795  * delay_for_n_mem_clocks() - Delay for N memory clocks
796  * @clocks:	Length of the delay
797  *
798  * Delay for N memory clocks.
799  */
delay_for_n_mem_clocks(struct socfpga_sdrseq * seq,const u32 clocks)800 static void delay_for_n_mem_clocks(struct socfpga_sdrseq *seq,
801 				   const u32 clocks)
802 {
803 	u32 afi_clocks;
804 	u16 c_loop;
805 	u8 inner;
806 	u8 outer;
807 
808 	debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
809 
810 	/* Scale (rounding up) to get afi clocks. */
811 	afi_clocks = DIV_ROUND_UP(clocks, seq->misccfg->afi_rate_ratio);
812 	if (afi_clocks)	/* Temporary underflow protection */
813 		afi_clocks--;
814 
815 	/*
816 	 * Note, we don't bother accounting for being off a little
817 	 * bit because of a few extra instructions in outer loops.
818 	 * Note, the loops have a test at the end, and do the test
819 	 * before the decrement, and so always perform the loop
820 	 * 1 time more than the counter value
821 	 */
822 	c_loop = afi_clocks >> 16;
823 	outer = c_loop ? 0xff : (afi_clocks >> 8);
824 	inner = outer ? 0xff : afi_clocks;
825 
826 	/*
827 	 * rom instructions are structured as follows:
828 	 *
829 	 *    IDLE_LOOP2: jnz cntr0, TARGET_A
830 	 *    IDLE_LOOP1: jnz cntr1, TARGET_B
831 	 *                return
832 	 *
833 	 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
834 	 * TARGET_B is set to IDLE_LOOP2 as well
835 	 *
836 	 * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
837 	 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
838 	 *
839 	 * a little confusing, but it helps save precious space in the inst_rom
840 	 * and sequencer rom and keeps the delays more accurate and reduces
841 	 * overhead
842 	 */
843 	if (afi_clocks < 0x100) {
844 		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
845 		       &sdr_rw_load_mgr_regs->load_cntr1);
846 
847 		writel(seq->rwcfg->idle_loop1,
848 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
849 
850 		writel(seq->rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
851 					  RW_MGR_RUN_SINGLE_GROUP_OFFSET);
852 	} else {
853 		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
854 		       &sdr_rw_load_mgr_regs->load_cntr0);
855 
856 		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
857 		       &sdr_rw_load_mgr_regs->load_cntr1);
858 
859 		writel(seq->rwcfg->idle_loop2,
860 		       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
861 
862 		writel(seq->rwcfg->idle_loop2,
863 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
864 
865 		do {
866 			writel(seq->rwcfg->idle_loop2,
867 			       SDR_PHYGRP_RWMGRGRP_ADDRESS |
868 			       RW_MGR_RUN_SINGLE_GROUP_OFFSET);
869 		} while (c_loop-- != 0);
870 	}
871 	debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
872 }
873 
delay_for_n_ns(struct socfpga_sdrseq * seq,const u32 ns)874 static void delay_for_n_ns(struct socfpga_sdrseq *seq, const u32 ns)
875 {
876 	delay_for_n_mem_clocks(seq, (ns * seq->misccfg->afi_clk_freq *
877 				seq->misccfg->afi_rate_ratio) / 1000);
878 }
879 
880 /**
881  * rw_mgr_mem_init_load_regs() - Load instruction registers
882  * @cntr0:	Counter 0 value
883  * @cntr1:	Counter 1 value
884  * @cntr2:	Counter 2 value
885  * @jump:	Jump instruction value
886  *
887  * Load instruction registers.
888  */
rw_mgr_mem_init_load_regs(struct socfpga_sdrseq * seq,u32 cntr0,u32 cntr1,u32 cntr2,u32 jump)889 static void rw_mgr_mem_init_load_regs(struct socfpga_sdrseq *seq,
890 				      u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
891 {
892 	u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
893 			   RW_MGR_RUN_SINGLE_GROUP_OFFSET;
894 
895 	/* Load counters */
896 	writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
897 	       &sdr_rw_load_mgr_regs->load_cntr0);
898 	writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
899 	       &sdr_rw_load_mgr_regs->load_cntr1);
900 	writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
901 	       &sdr_rw_load_mgr_regs->load_cntr2);
902 
903 	/* Load jump address */
904 	writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
905 	writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
906 	writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
907 
908 	/* Execute count instruction */
909 	writel(jump, grpaddr);
910 }
911 
912 /**
913  * rw_mgr_mem_load_user_ddr2() - Load user calibration values for DDR2
914  * @handoff:	Indicate whether this is initialization or handoff phase
915  *
916  * Load user calibration values and optionally precharge the banks.
917  */
rw_mgr_mem_load_user_ddr2(struct socfpga_sdrseq * seq,const int handoff)918 static void rw_mgr_mem_load_user_ddr2(struct socfpga_sdrseq *seq,
919 				      const int handoff)
920 {
921 	u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
922 		      RW_MGR_RUN_SINGLE_GROUP_OFFSET;
923 	u32 r;
924 
925 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
926 		/* set rank */
927 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
928 
929 		/* precharge all banks ... */
930 		writel(seq->rwcfg->precharge_all, grpaddr);
931 
932 		writel(seq->rwcfg->emr2, grpaddr);
933 		writel(seq->rwcfg->emr3, grpaddr);
934 		writel(seq->rwcfg->emr, grpaddr);
935 
936 		if (handoff) {
937 			writel(seq->rwcfg->mr_user, grpaddr);
938 			continue;
939 		}
940 
941 		writel(seq->rwcfg->mr_dll_reset, grpaddr);
942 
943 		writel(seq->rwcfg->precharge_all, grpaddr);
944 
945 		writel(seq->rwcfg->refresh, grpaddr);
946 		delay_for_n_ns(seq, 200);
947 		writel(seq->rwcfg->refresh, grpaddr);
948 		delay_for_n_ns(seq, 200);
949 
950 		writel(seq->rwcfg->mr_calib, grpaddr);
951 		writel(/*seq->rwcfg->*/0x0b, grpaddr);	// EMR_OCD_ENABLE
952 		writel(seq->rwcfg->emr, grpaddr);
953 		delay_for_n_mem_clocks(seq, 200);
954 	}
955 }
956 
957 /**
958  * rw_mgr_mem_load_user_ddr3() - Load user calibration values
959  * @fin1:	Final instruction 1
960  * @fin2:	Final instruction 2
961  * @precharge:	If 1, precharge the banks at the end
962  *
963  * Load user calibration values and optionally precharge the banks.
964  */
rw_mgr_mem_load_user_ddr3(struct socfpga_sdrseq * seq,const u32 fin1,const u32 fin2,const int precharge)965 static void rw_mgr_mem_load_user_ddr3(struct socfpga_sdrseq *seq,
966 				 const u32 fin1, const u32 fin2,
967 				 const int precharge)
968 {
969 	u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
970 		      RW_MGR_RUN_SINGLE_GROUP_OFFSET;
971 	u32 r;
972 
973 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
974 		/* set rank */
975 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
976 
977 		/* precharge all banks ... */
978 		if (precharge)
979 			writel(seq->rwcfg->precharge_all, grpaddr);
980 
981 		/*
982 		 * USER Use Mirror-ed commands for odd ranks if address
983 		 * mirrorring is on
984 		 */
985 		if ((seq->rwcfg->mem_address_mirroring >> r) & 0x1) {
986 			set_jump_as_return(seq);
987 			writel(seq->rwcfg->mrs2_mirr, grpaddr);
988 			delay_for_n_mem_clocks(seq, 4);
989 			set_jump_as_return(seq);
990 			writel(seq->rwcfg->mrs3_mirr, grpaddr);
991 			delay_for_n_mem_clocks(seq, 4);
992 			set_jump_as_return(seq);
993 			writel(seq->rwcfg->mrs1_mirr, grpaddr);
994 			delay_for_n_mem_clocks(seq, 4);
995 			set_jump_as_return(seq);
996 			writel(fin1, grpaddr);
997 		} else {
998 			set_jump_as_return(seq);
999 			writel(seq->rwcfg->mrs2, grpaddr);
1000 			delay_for_n_mem_clocks(seq, 4);
1001 			set_jump_as_return(seq);
1002 			writel(seq->rwcfg->mrs3, grpaddr);
1003 			delay_for_n_mem_clocks(seq, 4);
1004 			set_jump_as_return(seq);
1005 			writel(seq->rwcfg->mrs1, grpaddr);
1006 			set_jump_as_return(seq);
1007 			writel(fin2, grpaddr);
1008 		}
1009 
1010 		if (precharge)
1011 			continue;
1012 
1013 		set_jump_as_return(seq);
1014 		writel(seq->rwcfg->zqcl, grpaddr);
1015 
1016 		/* tZQinit = tDLLK = 512 ck cycles */
1017 		delay_for_n_mem_clocks(seq, 512);
1018 	}
1019 }
1020 
1021 /**
1022  * rw_mgr_mem_load_user() - Load user calibration values
1023  * @fin1:	Final instruction 1
1024  * @fin2:	Final instruction 2
1025  * @precharge:	If 1, precharge the banks at the end
1026  *
1027  * Load user calibration values and optionally precharge the banks.
1028  */
rw_mgr_mem_load_user(struct socfpga_sdrseq * seq,const u32 fin1,const u32 fin2,const int precharge)1029 static void rw_mgr_mem_load_user(struct socfpga_sdrseq *seq,
1030 				 const u32 fin1, const u32 fin2,
1031 				 const int precharge)
1032 {
1033 	if (dram_is_ddr(2))
1034 		rw_mgr_mem_load_user_ddr2(seq, precharge);
1035 	else if (dram_is_ddr(3))
1036 		rw_mgr_mem_load_user_ddr3(seq, fin1, fin2, precharge);
1037 	else
1038 		hang();
1039 }
1040 /**
1041  * rw_mgr_mem_initialize() - Initialize RW Manager
1042  *
1043  * Initialize RW Manager.
1044  */
rw_mgr_mem_initialize(struct socfpga_sdrseq * seq)1045 static void rw_mgr_mem_initialize(struct socfpga_sdrseq *seq)
1046 {
1047 	debug("%s:%d\n", __func__, __LINE__);
1048 
1049 	/* The reset / cke part of initialization is broadcasted to all ranks */
1050 	if (dram_is_ddr(3)) {
1051 		writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1052 					RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
1053 	}
1054 
1055 	/*
1056 	 * Here's how you load register for a loop
1057 	 * Counters are located @ 0x800
1058 	 * Jump address are located @ 0xC00
1059 	 * For both, registers 0 to 3 are selected using bits 3 and 2, like
1060 	 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
1061 	 * I know this ain't pretty, but Avalon bus throws away the 2 least
1062 	 * significant bits
1063 	 */
1064 
1065 	/* Start with memory RESET activated */
1066 
1067 	/* tINIT = 200us */
1068 
1069 	/*
1070 	 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
1071 	 * If a and b are the number of iteration in 2 nested loops
1072 	 * it takes the following number of cycles to complete the operation:
1073 	 * number_of_cycles = ((2 + n) * a + 2) * b
1074 	 * where n is the number of instruction in the inner loop
1075 	 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
1076 	 * b = 6A
1077 	 */
1078 	rw_mgr_mem_init_load_regs(seq, seq->misccfg->tinit_cntr0_val,
1079 				  seq->misccfg->tinit_cntr1_val,
1080 				  seq->misccfg->tinit_cntr2_val,
1081 				  seq->rwcfg->init_reset_0_cke_0);
1082 
1083 	/* Indicate that memory is stable. */
1084 	writel(1, &phy_mgr_cfg->reset_mem_stbl);
1085 
1086 	if (dram_is_ddr(2)) {
1087 		writel(seq->rwcfg->nop, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1088 					RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1089 
1090 		/* Bring up clock enable. */
1091 
1092 		/* tXRP < 400 ck cycles */
1093 		delay_for_n_ns(seq, 400);
1094 	} else if (dram_is_ddr(3)) {
1095 		/*
1096 		 * transition the RESET to high
1097 		 * Wait for 500us
1098 		 */
1099 
1100 		/*
1101 		 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
1102 		 * If a and b are the number of iteration in 2 nested loops
1103 		 * it takes the following number of cycles to complete the
1104 		 * operation number_of_cycles = ((2 + n) * a + 2) * b
1105 		 * where n is the number of instruction in the inner loop
1106 		 * One possible solution is
1107 		 * n = 2 , a = 131 , b = 256 => a = 83, b = FF
1108 		 */
1109 		rw_mgr_mem_init_load_regs(seq, seq->misccfg->treset_cntr0_val,
1110 					  seq->misccfg->treset_cntr1_val,
1111 					  seq->misccfg->treset_cntr2_val,
1112 					  seq->rwcfg->init_reset_1_cke_0);
1113 		/* Bring up clock enable. */
1114 
1115 		/* tXRP < 250 ck cycles */
1116 		delay_for_n_mem_clocks(seq, 250);
1117 	}
1118 
1119 	rw_mgr_mem_load_user(seq, seq->rwcfg->mrs0_dll_reset_mirr,
1120 			     seq->rwcfg->mrs0_dll_reset, 0);
1121 }
1122 
1123 /**
1124  * rw_mgr_mem_handoff() - Hand off the memory to user
1125  *
1126  * At the end of calibration we have to program the user settings in
1127  * and hand off the memory to the user.
1128  */
rw_mgr_mem_handoff(struct socfpga_sdrseq * seq)1129 static void rw_mgr_mem_handoff(struct socfpga_sdrseq *seq)
1130 {
1131 	rw_mgr_mem_load_user(seq, seq->rwcfg->mrs0_user_mirr,
1132 			     seq->rwcfg->mrs0_user, 1);
1133 	/*
1134 	 * Need to wait tMOD (12CK or 15ns) time before issuing other
1135 	 * commands, but we will have plenty of NIOS cycles before actual
1136 	 * handoff so its okay.
1137 	 */
1138 }
1139 
1140 /**
1141  * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1142  * @group:	Write Group
1143  * @use_dm:	Use DM
1144  *
1145  * Issue write test command. Two variants are provided, one that just tests
1146  * a write pattern and another that tests datamask functionality.
1147  */
rw_mgr_mem_calibrate_write_test_issue(struct socfpga_sdrseq * seq,u32 group,u32 test_dm)1148 static void rw_mgr_mem_calibrate_write_test_issue(struct socfpga_sdrseq *seq,
1149 						  u32 group, u32 test_dm)
1150 {
1151 	const u32 quick_write_mode =
1152 		(STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1153 		seq->misccfg->enable_super_quick_calibration;
1154 	u32 mcc_instruction;
1155 	u32 rw_wl_nop_cycles;
1156 
1157 	/*
1158 	 * Set counter and jump addresses for the right
1159 	 * number of NOP cycles.
1160 	 * The number of supported NOP cycles can range from -1 to infinity
1161 	 * Three different cases are handled:
1162 	 *
1163 	 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1164 	 *    mechanism will be used to insert the right number of NOPs
1165 	 *
1166 	 * 2. For a number of NOP cycles equals to 0, the micro-instruction
1167 	 *    issuing the write command will jump straight to the
1168 	 *    micro-instruction that turns on DQS (for DDRx), or outputs write
1169 	 *    data (for RLD), skipping
1170 	 *    the NOP micro-instruction all together
1171 	 *
1172 	 * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1173 	 *    turned on in the same micro-instruction that issues the write
1174 	 *    command. Then we need
1175 	 *    to directly jump to the micro-instruction that sends out the data
1176 	 *
1177 	 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1178 	 *       (2 and 3). One jump-counter (0) is used to perform multiple
1179 	 *       write-read operations.
1180 	 *       one counter left to issue this command in "multiple-group" mode
1181 	 */
1182 
1183 	rw_wl_nop_cycles = seq->gbl.rw_wl_nop_cycles;
1184 
1185 	if (rw_wl_nop_cycles == -1) {
1186 		/*
1187 		 * CNTR 2 - We want to execute the special write operation that
1188 		 * turns on DQS right away and then skip directly to the
1189 		 * instruction that sends out the data. We set the counter to a
1190 		 * large number so that the jump is always taken.
1191 		 */
1192 		writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1193 
1194 		/* CNTR 3 - Not used */
1195 		if (test_dm) {
1196 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0_wl_1;
1197 			writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_data,
1198 			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1199 			writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1200 			       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1201 		} else {
1202 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0_wl_1;
1203 			writel(seq->rwcfg->lfsr_wr_rd_bank_0_data,
1204 			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1205 			writel(seq->rwcfg->lfsr_wr_rd_bank_0_nop,
1206 			       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1207 		}
1208 	} else if (rw_wl_nop_cycles == 0) {
1209 		/*
1210 		 * CNTR 2 - We want to skip the NOP operation and go straight
1211 		 * to the DQS enable instruction. We set the counter to a large
1212 		 * number so that the jump is always taken.
1213 		 */
1214 		writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1215 
1216 		/* CNTR 3 - Not used */
1217 		if (test_dm) {
1218 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0;
1219 			writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_dqs,
1220 			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1221 		} else {
1222 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0;
1223 			writel(seq->rwcfg->lfsr_wr_rd_bank_0_dqs,
1224 			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1225 		}
1226 	} else {
1227 		/*
1228 		 * CNTR 2 - In this case we want to execute the next instruction
1229 		 * and NOT take the jump. So we set the counter to 0. The jump
1230 		 * address doesn't count.
1231 		 */
1232 		writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1233 		writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1234 
1235 		/*
1236 		 * CNTR 3 - Set the nop counter to the number of cycles we
1237 		 * need to loop for, minus 1.
1238 		 */
1239 		writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1240 		if (test_dm) {
1241 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0;
1242 			writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1243 			       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1244 		} else {
1245 			mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0;
1246 			writel(seq->rwcfg->lfsr_wr_rd_bank_0_nop,
1247 			       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1248 		}
1249 	}
1250 
1251 	writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1252 		  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1253 
1254 	if (quick_write_mode)
1255 		writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1256 	else
1257 		writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1258 
1259 	writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1260 
1261 	/*
1262 	 * CNTR 1 - This is used to ensure enough time elapses
1263 	 * for read data to come back.
1264 	 */
1265 	writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1266 
1267 	if (test_dm) {
1268 		writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_wait,
1269 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1270 	} else {
1271 		writel(seq->rwcfg->lfsr_wr_rd_bank_0_wait,
1272 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1273 	}
1274 
1275 	writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1276 				RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1277 				(group << 2));
1278 }
1279 
1280 /**
1281  * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple
1282  * pass
1283  * @rank_bgn:		Rank number
1284  * @write_group:	Write Group
1285  * @use_dm:		Use DM
1286  * @all_correct:	All bits must be correct in the mask
1287  * @bit_chk:		Resulting bit mask after the test
1288  * @all_ranks:		Test all ranks
1289  *
1290  * Test writes, can check for a single bit pass or multiple bit pass.
1291  */
1292 static int
rw_mgr_mem_calibrate_write_test(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 write_group,const u32 use_dm,const u32 all_correct,u32 * bit_chk,const u32 all_ranks)1293 rw_mgr_mem_calibrate_write_test(struct socfpga_sdrseq *seq,
1294 				const u32 rank_bgn, const u32 write_group,
1295 				const u32 use_dm, const u32 all_correct,
1296 				u32 *bit_chk, const u32 all_ranks)
1297 {
1298 	const u32 rank_end = all_ranks ?
1299 				seq->rwcfg->mem_number_of_ranks :
1300 				(rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1301 	const u32 shift_ratio = seq->rwcfg->mem_dq_per_write_dqs /
1302 				seq->rwcfg->mem_virtual_groups_per_write_dqs;
1303 	const u32 correct_mask_vg = seq->param.write_correct_mask_vg;
1304 
1305 	u32 tmp_bit_chk, base_rw_mgr, group;
1306 	int vg, r;
1307 
1308 	*bit_chk = seq->param.write_correct_mask;
1309 
1310 	for (r = rank_bgn; r < rank_end; r++) {
1311 		/* Set rank */
1312 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1313 
1314 		tmp_bit_chk = 0;
1315 		for (vg = seq->rwcfg->mem_virtual_groups_per_write_dqs - 1;
1316 		     vg >= 0; vg--) {
1317 			/* Reset the FIFOs to get pointers to known state. */
1318 			writel(0, &phy_mgr_cmd->fifo_reset);
1319 
1320 			group = write_group *
1321 				seq->rwcfg->mem_virtual_groups_per_write_dqs
1322 				+ vg;
1323 			rw_mgr_mem_calibrate_write_test_issue(seq, group,
1324 							      use_dm);
1325 
1326 			base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1327 			tmp_bit_chk <<= shift_ratio;
1328 			tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1329 		}
1330 
1331 		*bit_chk &= tmp_bit_chk;
1332 	}
1333 
1334 	set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1335 	if (all_correct) {
1336 		debug_cond(DLEVEL >= 2,
1337 			   "write_test(%u,%u,ALL) : %u == %u => %i\n",
1338 			   write_group, use_dm, *bit_chk,
1339 			   seq->param.write_correct_mask,
1340 			   *bit_chk == seq->param.write_correct_mask);
1341 		return *bit_chk == seq->param.write_correct_mask;
1342 	} else {
1343 		debug_cond(DLEVEL >= 2,
1344 			   "write_test(%u,%u,ONE) : %u != %i => %i\n",
1345 			   write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1346 		return *bit_chk != 0x00;
1347 	}
1348 }
1349 
1350 /**
1351  * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1352  * @rank_bgn:	Rank number
1353  * @group:	Read/Write Group
1354  * @all_ranks:	Test all ranks
1355  *
1356  * Performs a guaranteed read on the patterns we are going to use during a
1357  * read test to ensure memory works.
1358  */
1359 static int
rw_mgr_mem_calibrate_read_test_patterns(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 group,const u32 all_ranks)1360 rw_mgr_mem_calibrate_read_test_patterns(struct socfpga_sdrseq *seq,
1361 					const u32 rank_bgn, const u32 group,
1362 					const u32 all_ranks)
1363 {
1364 	const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1365 			 RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1366 	const u32 addr_offset =
1367 			 (group * seq->rwcfg->mem_virtual_groups_per_read_dqs)
1368 			 << 2;
1369 	const u32 rank_end = all_ranks ?
1370 				seq->rwcfg->mem_number_of_ranks :
1371 				(rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1372 	const u32 shift_ratio = seq->rwcfg->mem_dq_per_read_dqs /
1373 				seq->rwcfg->mem_virtual_groups_per_read_dqs;
1374 	const u32 correct_mask_vg = seq->param.read_correct_mask_vg;
1375 
1376 	u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1377 	int vg, r;
1378 	int ret = 0;
1379 
1380 	bit_chk = seq->param.read_correct_mask;
1381 
1382 	for (r = rank_bgn; r < rank_end; r++) {
1383 		/* Set rank */
1384 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1385 
1386 		/* Load up a constant bursts of read commands */
1387 		writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1388 		writel(seq->rwcfg->guaranteed_read,
1389 		       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1390 
1391 		writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1392 		writel(seq->rwcfg->guaranteed_read_cont,
1393 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1394 
1395 		tmp_bit_chk = 0;
1396 		for (vg = seq->rwcfg->mem_virtual_groups_per_read_dqs - 1;
1397 		     vg >= 0; vg--) {
1398 			/* Reset the FIFOs to get pointers to known state. */
1399 			writel(0, &phy_mgr_cmd->fifo_reset);
1400 			writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1401 				  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1402 			writel(seq->rwcfg->guaranteed_read,
1403 			       addr + addr_offset + (vg << 2));
1404 
1405 			base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1406 			tmp_bit_chk <<= shift_ratio;
1407 			tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1408 		}
1409 
1410 		bit_chk &= tmp_bit_chk;
1411 	}
1412 
1413 	writel(seq->rwcfg->clear_dqs_enable, addr + (group << 2));
1414 
1415 	set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1416 
1417 	if (bit_chk != seq->param.read_correct_mask)
1418 		ret = -EIO;
1419 
1420 	debug_cond(DLEVEL >= 1,
1421 		   "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1422 		   __func__, __LINE__, group, bit_chk,
1423 		   seq->param.read_correct_mask, ret);
1424 
1425 	return ret;
1426 }
1427 
1428 /**
1429  * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read
1430  * test
1431  * @rank_bgn:	Rank number
1432  * @all_ranks:	Test all ranks
1433  *
1434  * Load up the patterns we are going to use during a read test.
1435  */
rw_mgr_mem_calibrate_read_load_patterns(struct socfpga_sdrseq * seq,const u32 rank_bgn,const int all_ranks)1436 static void rw_mgr_mem_calibrate_read_load_patterns(struct socfpga_sdrseq *seq,
1437 						    const u32 rank_bgn,
1438 						    const int all_ranks)
1439 {
1440 	const u32 rank_end = all_ranks ?
1441 			seq->rwcfg->mem_number_of_ranks :
1442 			(rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1443 	u32 r;
1444 
1445 	debug("%s:%d\n", __func__, __LINE__);
1446 
1447 	for (r = rank_bgn; r < rank_end; r++) {
1448 		/* set rank */
1449 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1450 
1451 		/* Load up a constant bursts */
1452 		writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1453 
1454 		writel(seq->rwcfg->guaranteed_write_wait0,
1455 		       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1456 
1457 		writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1458 
1459 		writel(seq->rwcfg->guaranteed_write_wait1,
1460 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1461 
1462 		writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1463 
1464 		writel(seq->rwcfg->guaranteed_write_wait2,
1465 		       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1466 
1467 		writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1468 
1469 		writel(seq->rwcfg->guaranteed_write_wait3,
1470 		       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1471 
1472 		writel(seq->rwcfg->guaranteed_write,
1473 		       SDR_PHYGRP_RWMGRGRP_ADDRESS |
1474 		       RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1475 	}
1476 
1477 	set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1478 }
1479 
1480 /**
1481  * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1482  * @rank_bgn:		Rank number
1483  * @group:		Read/Write group
1484  * @num_tries:		Number of retries of the test
1485  * @all_correct:	All bits must be correct in the mask
1486  * @bit_chk:		Resulting bit mask after the test
1487  * @all_groups:		Test all R/W groups
1488  * @all_ranks:		Test all ranks
1489  *
1490  * Try a read and see if it returns correct data back. Test has dummy reads
1491  * inserted into the mix used to align DQS enable. Test has more thorough
1492  * checks than the regular read test.
1493  */
1494 static int
rw_mgr_mem_calibrate_read_test(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 group,const u32 num_tries,const u32 all_correct,u32 * bit_chk,const u32 all_groups,const u32 all_ranks)1495 rw_mgr_mem_calibrate_read_test(struct socfpga_sdrseq *seq,
1496 			       const u32 rank_bgn, const u32 group,
1497 			       const u32 num_tries, const u32 all_correct,
1498 			       u32 *bit_chk,
1499 			       const u32 all_groups, const u32 all_ranks)
1500 {
1501 	const u32 rank_end = all_ranks ? seq->rwcfg->mem_number_of_ranks :
1502 		(rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1503 	const u32 quick_read_mode =
1504 		((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1505 		 seq->misccfg->enable_super_quick_calibration);
1506 	u32 correct_mask_vg = seq->param.read_correct_mask_vg;
1507 	u32 tmp_bit_chk;
1508 	u32 base_rw_mgr;
1509 	u32 addr;
1510 
1511 	int r, vg, ret;
1512 
1513 	*bit_chk = seq->param.read_correct_mask;
1514 
1515 	for (r = rank_bgn; r < rank_end; r++) {
1516 		/* set rank */
1517 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1518 
1519 		writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1520 
1521 		writel(seq->rwcfg->read_b2b_wait1,
1522 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1523 
1524 		writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1525 		writel(seq->rwcfg->read_b2b_wait2,
1526 		       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1527 
1528 		if (quick_read_mode)
1529 			writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1530 			/* need at least two (1+1) reads to capture failures */
1531 		else if (all_groups)
1532 			writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1533 		else
1534 			writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1535 
1536 		writel(seq->rwcfg->read_b2b,
1537 		       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1538 		if (all_groups)
1539 			writel(seq->rwcfg->mem_if_read_dqs_width *
1540 			       seq->rwcfg->mem_virtual_groups_per_read_dqs - 1,
1541 			       &sdr_rw_load_mgr_regs->load_cntr3);
1542 		else
1543 			writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1544 
1545 		writel(seq->rwcfg->read_b2b,
1546 		       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1547 
1548 		tmp_bit_chk = 0;
1549 		for (vg = seq->rwcfg->mem_virtual_groups_per_read_dqs - 1;
1550 		     vg >= 0; vg--) {
1551 			/* Reset the FIFOs to get pointers to known state. */
1552 			writel(0, &phy_mgr_cmd->fifo_reset);
1553 			writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1554 				  RW_MGR_RESET_READ_DATAPATH_OFFSET);
1555 
1556 			if (all_groups) {
1557 				addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1558 				       RW_MGR_RUN_ALL_GROUPS_OFFSET;
1559 			} else {
1560 				addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1561 				       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1562 			}
1563 
1564 			writel(seq->rwcfg->read_b2b, addr +
1565 			       ((group *
1566 				 seq->rwcfg->mem_virtual_groups_per_read_dqs +
1567 				 vg) << 2));
1568 
1569 			base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1570 			tmp_bit_chk <<=
1571 				seq->rwcfg->mem_dq_per_read_dqs /
1572 				seq->rwcfg->mem_virtual_groups_per_read_dqs;
1573 			tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1574 		}
1575 
1576 		*bit_chk &= tmp_bit_chk;
1577 	}
1578 
1579 	addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1580 	writel(seq->rwcfg->clear_dqs_enable, addr + (group << 2));
1581 
1582 	set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1583 
1584 	if (all_correct) {
1585 		ret = (*bit_chk == seq->param.read_correct_mask);
1586 		debug_cond(DLEVEL >= 2,
1587 			   "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1588 			   __func__, __LINE__, group, all_groups, *bit_chk,
1589 			   seq->param.read_correct_mask, ret);
1590 	} else	{
1591 		ret = (*bit_chk != 0x00);
1592 		debug_cond(DLEVEL >= 2,
1593 			   "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1594 			   __func__, __LINE__, group, all_groups, *bit_chk,
1595 			   0, ret);
1596 	}
1597 
1598 	return ret;
1599 }
1600 
1601 /**
1602  * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1603  * @grp:		Read/Write group
1604  * @num_tries:		Number of retries of the test
1605  * @all_correct:	All bits must be correct in the mask
1606  * @all_groups:		Test all R/W groups
1607  *
1608  * Perform a READ test across all memory ranks.
1609  */
1610 static int
rw_mgr_mem_calibrate_read_test_all_ranks(struct socfpga_sdrseq * seq,const u32 grp,const u32 num_tries,const u32 all_correct,const u32 all_groups)1611 rw_mgr_mem_calibrate_read_test_all_ranks(struct socfpga_sdrseq *seq,
1612 					 const u32 grp, const u32 num_tries,
1613 					 const u32 all_correct,
1614 					 const u32 all_groups)
1615 {
1616 	u32 bit_chk;
1617 	return rw_mgr_mem_calibrate_read_test(seq, 0, grp, num_tries,
1618 					      all_correct, &bit_chk, all_groups,
1619 					      1);
1620 }
1621 
1622 /**
1623  * rw_mgr_incr_vfifo() - Increase VFIFO value
1624  * @grp:	Read/Write group
1625  *
1626  * Increase VFIFO value.
1627  */
rw_mgr_incr_vfifo(const u32 grp)1628 static void rw_mgr_incr_vfifo(const u32 grp)
1629 {
1630 	writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1631 }
1632 
1633 /**
1634  * rw_mgr_decr_vfifo() - Decrease VFIFO value
1635  * @grp:	Read/Write group
1636  *
1637  * Decrease VFIFO value.
1638  */
rw_mgr_decr_vfifo(struct socfpga_sdrseq * seq,const u32 grp)1639 static void rw_mgr_decr_vfifo(struct socfpga_sdrseq *seq, const u32 grp)
1640 {
1641 	u32 i;
1642 
1643 	for (i = 0; i < seq->misccfg->read_valid_fifo_size - 1; i++)
1644 		rw_mgr_incr_vfifo(grp);
1645 }
1646 
1647 /**
1648  * find_vfifo_failing_read() - Push VFIFO to get a failing read
1649  * @grp:	Read/Write group
1650  *
1651  * Push VFIFO until a failing read happens.
1652  */
find_vfifo_failing_read(struct socfpga_sdrseq * seq,const u32 grp)1653 static int find_vfifo_failing_read(struct socfpga_sdrseq *seq,
1654 				   const u32 grp)
1655 {
1656 	u32 v, ret, fail_cnt = 0;
1657 
1658 	for (v = 0; v < seq->misccfg->read_valid_fifo_size; v++) {
1659 		debug_cond(DLEVEL >= 2, "%s:%d: vfifo %u\n",
1660 			   __func__, __LINE__, v);
1661 		ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1662 							       PASS_ONE_BIT, 0);
1663 		if (!ret) {
1664 			fail_cnt++;
1665 
1666 			if (fail_cnt == 2)
1667 				return v;
1668 		}
1669 
1670 		/* Fiddle with FIFO. */
1671 		rw_mgr_incr_vfifo(grp);
1672 	}
1673 
1674 	/* No failing read found! Something must have gone wrong. */
1675 	debug_cond(DLEVEL >= 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1676 	return 0;
1677 }
1678 
1679 /**
1680  * sdr_find_phase_delay() - Find DQS enable phase or delay
1681  * @working:	If 1, look for working phase/delay, if 0, look for non-working
1682  * @delay:	If 1, look for delay, if 0, look for phase
1683  * @grp:	Read/Write group
1684  * @work:	Working window position
1685  * @work_inc:	Working window increment
1686  * @pd:		DQS Phase/Delay Iterator
1687  *
1688  * Find working or non-working DQS enable phase setting.
1689  */
sdr_find_phase_delay(struct socfpga_sdrseq * seq,int working,int delay,const u32 grp,u32 * work,const u32 work_inc,u32 * pd)1690 static int sdr_find_phase_delay(struct socfpga_sdrseq *seq, int working,
1691 				int delay, const u32 grp, u32 *work,
1692 				const u32 work_inc, u32 *pd)
1693 {
1694 	const u32 max = delay ? seq->iocfg->dqs_en_delay_max :
1695 				seq->iocfg->dqs_en_phase_max;
1696 	u32 ret;
1697 
1698 	for (; *pd <= max; (*pd)++) {
1699 		if (delay)
1700 			scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, *pd);
1701 		else
1702 			scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, *pd);
1703 
1704 		ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1705 							       PASS_ONE_BIT, 0);
1706 		if (!working)
1707 			ret = !ret;
1708 
1709 		if (ret)
1710 			return 0;
1711 
1712 		if (work)
1713 			*work += work_inc;
1714 	}
1715 
1716 	return -EINVAL;
1717 }
1718 /**
1719  * sdr_find_phase() - Find DQS enable phase
1720  * @working:	If 1, look for working phase, if 0, look for non-working phase
1721  * @grp:	Read/Write group
1722  * @work:	Working window position
1723  * @i:		Iterator
1724  * @p:		DQS Phase Iterator
1725  *
1726  * Find working or non-working DQS enable phase setting.
1727  */
sdr_find_phase(struct socfpga_sdrseq * seq,int working,const u32 grp,u32 * work,u32 * i,u32 * p)1728 static int sdr_find_phase(struct socfpga_sdrseq *seq, int working,
1729 			  const u32 grp, u32 *work, u32 *i, u32 *p)
1730 {
1731 	const u32 end = seq->misccfg->read_valid_fifo_size + (working ? 0 : 1);
1732 	int ret;
1733 
1734 	for (; *i < end; (*i)++) {
1735 		if (working)
1736 			*p = 0;
1737 
1738 		ret = sdr_find_phase_delay(seq, working, 0, grp, work,
1739 					   seq->iocfg->delay_per_opa_tap, p);
1740 		if (!ret)
1741 			return 0;
1742 
1743 		if (*p > seq->iocfg->dqs_en_phase_max) {
1744 			/* Fiddle with FIFO. */
1745 			rw_mgr_incr_vfifo(grp);
1746 			if (!working)
1747 				*p = 0;
1748 		}
1749 	}
1750 
1751 	return -EINVAL;
1752 }
1753 
1754 /**
1755  * sdr_working_phase() - Find working DQS enable phase
1756  * @grp:	Read/Write group
1757  * @work_bgn:	Working window start position
1758  * @d:		dtaps output value
1759  * @p:		DQS Phase Iterator
1760  * @i:		Iterator
1761  *
1762  * Find working DQS enable phase setting.
1763  */
sdr_working_phase(struct socfpga_sdrseq * seq,const u32 grp,u32 * work_bgn,u32 * d,u32 * p,u32 * i)1764 static int sdr_working_phase(struct socfpga_sdrseq *seq, const u32 grp,
1765 			     u32 *work_bgn, u32 *d, u32 *p, u32 *i)
1766 {
1767 	const u32 dtaps_per_ptap = seq->iocfg->delay_per_opa_tap /
1768 				   seq->iocfg->delay_per_dqs_en_dchain_tap;
1769 	int ret;
1770 
1771 	*work_bgn = 0;
1772 
1773 	for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1774 		*i = 0;
1775 		scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, *d);
1776 		ret = sdr_find_phase(seq, 1, grp, work_bgn, i, p);
1777 		if (!ret)
1778 			return 0;
1779 		*work_bgn += seq->iocfg->delay_per_dqs_en_dchain_tap;
1780 	}
1781 
1782 	/* Cannot find working solution */
1783 	debug_cond(DLEVEL >= 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1784 		   __func__, __LINE__);
1785 	return -EINVAL;
1786 }
1787 
1788 /**
1789  * sdr_backup_phase() - Find DQS enable backup phase
1790  * @grp:	Read/Write group
1791  * @work_bgn:	Working window start position
1792  * @p:		DQS Phase Iterator
1793  *
1794  * Find DQS enable backup phase setting.
1795  */
sdr_backup_phase(struct socfpga_sdrseq * seq,const u32 grp,u32 * work_bgn,u32 * p)1796 static void sdr_backup_phase(struct socfpga_sdrseq *seq, const u32 grp,
1797 			     u32 *work_bgn, u32 *p)
1798 {
1799 	u32 tmp_delay, d;
1800 	int ret;
1801 
1802 	/* Special case code for backing up a phase */
1803 	if (*p == 0) {
1804 		*p = seq->iocfg->dqs_en_phase_max;
1805 		rw_mgr_decr_vfifo(seq, grp);
1806 	} else {
1807 		(*p)--;
1808 	}
1809 	tmp_delay = *work_bgn - seq->iocfg->delay_per_opa_tap;
1810 	scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, *p);
1811 
1812 	for (d = 0; d <= seq->iocfg->dqs_en_delay_max && tmp_delay < *work_bgn;
1813 	     d++) {
1814 		scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, d);
1815 
1816 		ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1817 							       PASS_ONE_BIT, 0);
1818 		if (ret) {
1819 			*work_bgn = tmp_delay;
1820 			break;
1821 		}
1822 
1823 		tmp_delay += seq->iocfg->delay_per_dqs_en_dchain_tap;
1824 	}
1825 
1826 	/* Restore VFIFO to old state before we decremented it (if needed). */
1827 	(*p)++;
1828 	if (*p > seq->iocfg->dqs_en_phase_max) {
1829 		*p = 0;
1830 		rw_mgr_incr_vfifo(grp);
1831 	}
1832 
1833 	scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, 0);
1834 }
1835 
1836 /**
1837  * sdr_nonworking_phase() - Find non-working DQS enable phase
1838  * @grp:	Read/Write group
1839  * @work_end:	Working window end position
1840  * @p:		DQS Phase Iterator
1841  * @i:		Iterator
1842  *
1843  * Find non-working DQS enable phase setting.
1844  */
sdr_nonworking_phase(struct socfpga_sdrseq * seq,const u32 grp,u32 * work_end,u32 * p,u32 * i)1845 static int sdr_nonworking_phase(struct socfpga_sdrseq *seq,
1846 				const u32 grp, u32 *work_end, u32 *p, u32 *i)
1847 {
1848 	int ret;
1849 
1850 	(*p)++;
1851 	*work_end += seq->iocfg->delay_per_opa_tap;
1852 	if (*p > seq->iocfg->dqs_en_phase_max) {
1853 		/* Fiddle with FIFO. */
1854 		*p = 0;
1855 		rw_mgr_incr_vfifo(grp);
1856 	}
1857 
1858 	ret = sdr_find_phase(seq, 0, grp, work_end, i, p);
1859 	if (ret) {
1860 		/* Cannot see edge of failing read. */
1861 		debug_cond(DLEVEL >= 2, "%s:%d: end: failed\n",
1862 			   __func__, __LINE__);
1863 	}
1864 
1865 	return ret;
1866 }
1867 
1868 /**
1869  * sdr_find_window_center() - Find center of the working DQS window.
1870  * @grp:	Read/Write group
1871  * @work_bgn:	First working settings
1872  * @work_end:	Last working settings
1873  *
1874  * Find center of the working DQS enable window.
1875  */
sdr_find_window_center(struct socfpga_sdrseq * seq,const u32 grp,const u32 work_bgn,const u32 work_end)1876 static int sdr_find_window_center(struct socfpga_sdrseq *seq,
1877 				  const u32 grp, const u32 work_bgn,
1878 				  const u32 work_end)
1879 {
1880 	u32 work_mid;
1881 	int tmp_delay = 0;
1882 	int i, p, d;
1883 
1884 	work_mid = (work_bgn + work_end) / 2;
1885 
1886 	debug_cond(DLEVEL >= 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1887 		   work_bgn, work_end, work_mid);
1888 	/* Get the middle delay to be less than a VFIFO delay */
1889 	tmp_delay = (seq->iocfg->dqs_en_phase_max + 1)
1890 		* seq->iocfg->delay_per_opa_tap;
1891 
1892 	debug_cond(DLEVEL >= 2, "vfifo ptap delay %d\n", tmp_delay);
1893 	work_mid %= tmp_delay;
1894 	debug_cond(DLEVEL >= 2, "new work_mid %d\n", work_mid);
1895 
1896 	tmp_delay = rounddown(work_mid, seq->iocfg->delay_per_opa_tap);
1897 	if (tmp_delay > seq->iocfg->dqs_en_phase_max
1898 		* seq->iocfg->delay_per_opa_tap) {
1899 		tmp_delay = seq->iocfg->dqs_en_phase_max
1900 			* seq->iocfg->delay_per_opa_tap;
1901 	}
1902 	p = tmp_delay / seq->iocfg->delay_per_opa_tap;
1903 
1904 	debug_cond(DLEVEL >= 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1905 
1906 	d = DIV_ROUND_UP(work_mid - tmp_delay,
1907 			 seq->iocfg->delay_per_dqs_en_dchain_tap);
1908 	if (d > seq->iocfg->dqs_en_delay_max)
1909 		d = seq->iocfg->dqs_en_delay_max;
1910 	tmp_delay += d * seq->iocfg->delay_per_dqs_en_dchain_tap;
1911 
1912 	debug_cond(DLEVEL >= 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1913 
1914 	scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
1915 	scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, d);
1916 
1917 	/*
1918 	 * push vfifo until we can successfully calibrate. We can do this
1919 	 * because the largest possible margin in 1 VFIFO cycle.
1920 	 */
1921 	for (i = 0; i < seq->misccfg->read_valid_fifo_size; i++) {
1922 		debug_cond(DLEVEL >= 2, "find_dqs_en_phase: center\n");
1923 		if (rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1924 							     PASS_ONE_BIT,
1925 							     0)) {
1926 			debug_cond(DLEVEL >= 2,
1927 				   "%s:%d center: found: ptap=%u dtap=%u\n",
1928 				   __func__, __LINE__, p, d);
1929 			return 0;
1930 		}
1931 
1932 		/* Fiddle with FIFO. */
1933 		rw_mgr_incr_vfifo(grp);
1934 	}
1935 
1936 	debug_cond(DLEVEL >= 2, "%s:%d center: failed.\n",
1937 		   __func__, __LINE__);
1938 	return -EINVAL;
1939 }
1940 
1941 /**
1942  * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to
1943  * use
1944  * @grp:	Read/Write Group
1945  *
1946  * Find a good DQS enable to use.
1947  */
1948 static int
rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(struct socfpga_sdrseq * seq,const u32 grp)1949 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(struct socfpga_sdrseq *seq,
1950 					     const u32 grp)
1951 {
1952 	u32 d, p, i;
1953 	u32 dtaps_per_ptap;
1954 	u32 work_bgn, work_end;
1955 	u32 found_passing_read, found_failing_read = 0, initial_failing_dtap;
1956 	int ret;
1957 
1958 	debug("%s:%d %u\n", __func__, __LINE__, grp);
1959 
1960 	reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1961 
1962 	scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, 0);
1963 	scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, 0);
1964 
1965 	/* Step 0: Determine number of delay taps for each phase tap. */
1966 	dtaps_per_ptap = seq->iocfg->delay_per_opa_tap /
1967 			 seq->iocfg->delay_per_dqs_en_dchain_tap;
1968 
1969 	/* Step 1: First push vfifo until we get a failing read. */
1970 	find_vfifo_failing_read(seq, grp);
1971 
1972 	/* Step 2: Find first working phase, increment in ptaps. */
1973 	work_bgn = 0;
1974 	ret = sdr_working_phase(seq, grp, &work_bgn, &d, &p, &i);
1975 	if (ret)
1976 		return ret;
1977 
1978 	work_end = work_bgn;
1979 
1980 	/*
1981 	 * If d is 0 then the working window covers a phase tap and we can
1982 	 * follow the old procedure. Otherwise, we've found the beginning
1983 	 * and we need to increment the dtaps until we find the end.
1984 	 */
1985 	if (d == 0) {
1986 		/*
1987 		 * Step 3a: If we have room, back off by one and
1988 		 *          increment in dtaps.
1989 		 */
1990 		sdr_backup_phase(seq, grp, &work_bgn, &p);
1991 
1992 		/*
1993 		 * Step 4a: go forward from working phase to non working
1994 		 * phase, increment in ptaps.
1995 		 */
1996 		ret = sdr_nonworking_phase(seq, grp, &work_end, &p, &i);
1997 		if (ret)
1998 			return ret;
1999 
2000 		/* Step 5a: Back off one from last, increment in dtaps. */
2001 
2002 		/* Special case code for backing up a phase */
2003 		if (p == 0) {
2004 			p = seq->iocfg->dqs_en_phase_max;
2005 			rw_mgr_decr_vfifo(seq, grp);
2006 		} else {
2007 			p = p - 1;
2008 		}
2009 
2010 		work_end -= seq->iocfg->delay_per_opa_tap;
2011 		scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
2012 
2013 		d = 0;
2014 
2015 		debug_cond(DLEVEL >= 2, "%s:%d p: ptap=%u\n",
2016 			   __func__, __LINE__, p);
2017 	}
2018 
2019 	/* The dtap increment to find the failing edge is done here. */
2020 	sdr_find_phase_delay(seq, 0, 1, grp, &work_end,
2021 			     seq->iocfg->delay_per_dqs_en_dchain_tap, &d);
2022 
2023 	/* Go back to working dtap */
2024 	if (d != 0)
2025 		work_end -= seq->iocfg->delay_per_dqs_en_dchain_tap;
2026 
2027 	debug_cond(DLEVEL >= 2,
2028 		   "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
2029 		   __func__, __LINE__, p, d - 1, work_end);
2030 
2031 	if (work_end < work_bgn) {
2032 		/* nil range */
2033 		debug_cond(DLEVEL >= 2, "%s:%d end-2: failed\n",
2034 			   __func__, __LINE__);
2035 		return -EINVAL;
2036 	}
2037 
2038 	debug_cond(DLEVEL >= 2, "%s:%d found range [%u,%u]\n",
2039 		   __func__, __LINE__, work_bgn, work_end);
2040 
2041 	/*
2042 	 * We need to calculate the number of dtaps that equal a ptap.
2043 	 * To do that we'll back up a ptap and re-find the edge of the
2044 	 * window using dtaps
2045 	 */
2046 	debug_cond(DLEVEL >= 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
2047 		   __func__, __LINE__);
2048 
2049 	/* Special case code for backing up a phase */
2050 	if (p == 0) {
2051 		p = seq->iocfg->dqs_en_phase_max;
2052 		rw_mgr_decr_vfifo(seq, grp);
2053 		debug_cond(DLEVEL >= 2, "%s:%d backedup cycle/phase: p=%u\n",
2054 			   __func__, __LINE__, p);
2055 	} else {
2056 		p = p - 1;
2057 		debug_cond(DLEVEL >= 2, "%s:%d backedup phase only: p=%u",
2058 			   __func__, __LINE__, p);
2059 	}
2060 
2061 	scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
2062 
2063 	/*
2064 	 * Increase dtap until we first see a passing read (in case the
2065 	 * window is smaller than a ptap), and then a failing read to
2066 	 * mark the edge of the window again.
2067 	 */
2068 
2069 	/* Find a passing read. */
2070 	debug_cond(DLEVEL >= 2, "%s:%d find passing read\n",
2071 		   __func__, __LINE__);
2072 
2073 	initial_failing_dtap = d;
2074 
2075 	found_passing_read = !sdr_find_phase_delay(seq, 1, 1, grp, NULL, 0, &d);
2076 	if (found_passing_read) {
2077 		/* Find a failing read. */
2078 		debug_cond(DLEVEL >= 2, "%s:%d find failing read\n",
2079 			   __func__, __LINE__);
2080 		d++;
2081 		found_failing_read = !sdr_find_phase_delay(seq, 0, 1, grp, NULL,
2082 							   0, &d);
2083 	} else {
2084 		debug_cond(DLEVEL >= 1,
2085 			   "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
2086 			   __func__, __LINE__);
2087 	}
2088 
2089 	/*
2090 	 * The dynamically calculated dtaps_per_ptap is only valid if we
2091 	 * found a passing/failing read. If we didn't, it means d hit the max
2092 	 * (seq->iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its
2093 	 * statically calculated value.
2094 	 */
2095 	if (found_passing_read && found_failing_read)
2096 		dtaps_per_ptap = d - initial_failing_dtap;
2097 
2098 	writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
2099 	debug_cond(DLEVEL >= 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
2100 		   __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
2101 
2102 	/* Step 6: Find the centre of the window. */
2103 	ret = sdr_find_window_center(seq, grp, work_bgn, work_end);
2104 
2105 	return ret;
2106 }
2107 
2108 /**
2109  * search_stop_check() - Check if the detected edge is valid
2110  * @write:		Perform read (Stage 2) or write (Stage 3) calibration
2111  * @d:			DQS delay
2112  * @rank_bgn:		Rank number
2113  * @write_group:	Write Group
2114  * @read_group:		Read Group
2115  * @bit_chk:		Resulting bit mask after the test
2116  * @sticky_bit_chk:	Resulting sticky bit mask after the test
2117  * @use_read_test:	Perform read test
2118  *
2119  * Test if the found edge is valid.
2120  */
search_stop_check(struct socfpga_sdrseq * seq,const int write,const int d,const int rank_bgn,const u32 write_group,const u32 read_group,u32 * bit_chk,u32 * sticky_bit_chk,const u32 use_read_test)2121 static u32 search_stop_check(struct socfpga_sdrseq *seq, const int write,
2122 			     const int d, const int rank_bgn,
2123 			     const u32 write_group, const u32 read_group,
2124 			     u32 *bit_chk, u32 *sticky_bit_chk,
2125 			     const u32 use_read_test)
2126 {
2127 	const u32 ratio = seq->rwcfg->mem_if_read_dqs_width /
2128 			  seq->rwcfg->mem_if_write_dqs_width;
2129 	const u32 correct_mask = write ? seq->param.write_correct_mask :
2130 					 seq->param.read_correct_mask;
2131 	const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2132 				    seq->rwcfg->mem_dq_per_read_dqs;
2133 	u32 ret;
2134 	/*
2135 	 * Stop searching when the read test doesn't pass AND when
2136 	 * we've seen a passing read on every bit.
2137 	 */
2138 	if (write) {			/* WRITE-ONLY */
2139 		ret = !rw_mgr_mem_calibrate_write_test(seq, rank_bgn,
2140 							 write_group, 0,
2141 							 PASS_ONE_BIT, bit_chk,
2142 							 0);
2143 	} else if (use_read_test) {	/* READ-ONLY */
2144 		ret = !rw_mgr_mem_calibrate_read_test(seq, rank_bgn, read_group,
2145 							NUM_READ_PB_TESTS,
2146 							PASS_ONE_BIT, bit_chk,
2147 							0, 0);
2148 	} else {			/* READ-ONLY */
2149 		rw_mgr_mem_calibrate_write_test(seq, rank_bgn, write_group, 0,
2150 						PASS_ONE_BIT, bit_chk, 0);
2151 		*bit_chk = *bit_chk >> (per_dqs *
2152 			(read_group - (write_group * ratio)));
2153 		ret = (*bit_chk == 0);
2154 	}
2155 	*sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2156 	ret = ret && (*sticky_bit_chk == correct_mask);
2157 	debug_cond(DLEVEL >= 2,
2158 		   "%s:%d center(left): dtap=%u => %u == %u && %u",
2159 		   __func__, __LINE__, d,
2160 		   *sticky_bit_chk, correct_mask, ret);
2161 	return ret;
2162 }
2163 
2164 /**
2165  * search_left_edge() - Find left edge of DQ/DQS working phase
2166  * @write:		Perform read (Stage 2) or write (Stage 3) calibration
2167  * @rank_bgn:		Rank number
2168  * @write_group:	Write Group
2169  * @read_group:		Read Group
2170  * @test_bgn:		Rank number to begin the test
2171  * @sticky_bit_chk:	Resulting sticky bit mask after the test
2172  * @left_edge:		Left edge of the DQ/DQS phase
2173  * @right_edge:		Right edge of the DQ/DQS phase
2174  * @use_read_test:	Perform read test
2175  *
2176  * Find left edge of DQ/DQS working phase.
2177  */
search_left_edge(struct socfpga_sdrseq * seq,const int write,const int rank_bgn,const u32 write_group,const u32 read_group,const u32 test_bgn,u32 * sticky_bit_chk,int * left_edge,int * right_edge,const u32 use_read_test)2178 static void search_left_edge(struct socfpga_sdrseq *seq, const int write,
2179 			     const int rank_bgn, const u32 write_group,
2180 			     const u32 read_group, const u32 test_bgn,
2181 			     u32 *sticky_bit_chk, int *left_edge,
2182 			     int *right_edge, const u32 use_read_test)
2183 {
2184 	const u32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2185 				      seq->iocfg->io_in_delay_max;
2186 	const u32 dqs_max = write ? seq->iocfg->io_out1_delay_max :
2187 				    seq->iocfg->dqs_in_delay_max;
2188 	const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2189 				    seq->rwcfg->mem_dq_per_read_dqs;
2190 	u32 stop, bit_chk;
2191 	int i, d;
2192 
2193 	for (d = 0; d <= dqs_max; d++) {
2194 		if (write)
2195 			scc_mgr_apply_group_dq_out1_delay(seq, d);
2196 		else
2197 			scc_mgr_apply_group_dq_in_delay(seq, test_bgn, d);
2198 
2199 		writel(0, &sdr_scc_mgr->update);
2200 
2201 		stop = search_stop_check(seq, write, d, rank_bgn, write_group,
2202 					 read_group, &bit_chk, sticky_bit_chk,
2203 					 use_read_test);
2204 		if (stop == 1)
2205 			break;
2206 
2207 		/* stop != 1 */
2208 		for (i = 0; i < per_dqs; i++) {
2209 			if (bit_chk & 1) {
2210 				/*
2211 				 * Remember a passing test as
2212 				 * the left_edge.
2213 				 */
2214 				left_edge[i] = d;
2215 			} else {
2216 				/*
2217 				 * If a left edge has not been seen
2218 				 * yet, then a future passing test
2219 				 * will mark this edge as the right
2220 				 * edge.
2221 				 */
2222 				if (left_edge[i] == delay_max + 1)
2223 					right_edge[i] = -(d + 1);
2224 			}
2225 			bit_chk >>= 1;
2226 		}
2227 	}
2228 
2229 	/* Reset DQ delay chains to 0 */
2230 	if (write)
2231 		scc_mgr_apply_group_dq_out1_delay(seq, 0);
2232 	else
2233 		scc_mgr_apply_group_dq_in_delay(seq, test_bgn, 0);
2234 
2235 	*sticky_bit_chk = 0;
2236 	for (i = per_dqs - 1; i >= 0; i--) {
2237 		debug_cond(DLEVEL >= 2,
2238 			   "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2239 			   __func__, __LINE__, i, left_edge[i],
2240 			   i, right_edge[i]);
2241 
2242 		/*
2243 		 * Check for cases where we haven't found the left edge,
2244 		 * which makes our assignment of the the right edge invalid.
2245 		 * Reset it to the illegal value.
2246 		 */
2247 		if ((left_edge[i] == delay_max + 1) &&
2248 		    (right_edge[i] != delay_max + 1)) {
2249 			right_edge[i] = delay_max + 1;
2250 			debug_cond(DLEVEL >= 2,
2251 				   "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2252 				   __func__, __LINE__, i, right_edge[i]);
2253 		}
2254 
2255 		/*
2256 		 * Reset sticky bit
2257 		 * READ: except for bits where we have seen both
2258 		 *       the left and right edge.
2259 		 * WRITE: except for bits where we have seen the
2260 		 *        left edge.
2261 		 */
2262 		*sticky_bit_chk <<= 1;
2263 		if (write) {
2264 			if (left_edge[i] != delay_max + 1)
2265 				*sticky_bit_chk |= 1;
2266 		} else {
2267 			if ((left_edge[i] != delay_max + 1) &&
2268 			    (right_edge[i] != delay_max + 1))
2269 				*sticky_bit_chk |= 1;
2270 		}
2271 	}
2272 }
2273 
2274 /**
2275  * search_right_edge() - Find right edge of DQ/DQS working phase
2276  * @write:		Perform read (Stage 2) or write (Stage 3) calibration
2277  * @rank_bgn:		Rank number
2278  * @write_group:	Write Group
2279  * @read_group:		Read Group
2280  * @start_dqs:		DQS start phase
2281  * @start_dqs_en:	DQS enable start phase
2282  * @sticky_bit_chk:	Resulting sticky bit mask after the test
2283  * @left_edge:		Left edge of the DQ/DQS phase
2284  * @right_edge:		Right edge of the DQ/DQS phase
2285  * @use_read_test:	Perform read test
2286  *
2287  * Find right edge of DQ/DQS working phase.
2288  */
search_right_edge(struct socfpga_sdrseq * seq,const int write,const int rank_bgn,const u32 write_group,const u32 read_group,const int start_dqs,const int start_dqs_en,u32 * sticky_bit_chk,int * left_edge,int * right_edge,const u32 use_read_test)2289 static int search_right_edge(struct socfpga_sdrseq *seq, const int write,
2290 			     const int rank_bgn, const u32 write_group,
2291 			     const u32 read_group, const int start_dqs,
2292 			     const int start_dqs_en, u32 *sticky_bit_chk,
2293 			     int *left_edge, int *right_edge,
2294 			     const u32 use_read_test)
2295 {
2296 	const u32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2297 				      seq->iocfg->io_in_delay_max;
2298 	const u32 dqs_max = write ? seq->iocfg->io_out1_delay_max :
2299 				    seq->iocfg->dqs_in_delay_max;
2300 	const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2301 				    seq->rwcfg->mem_dq_per_read_dqs;
2302 	u32 stop, bit_chk;
2303 	int i, d;
2304 
2305 	for (d = 0; d <= dqs_max - start_dqs; d++) {
2306 		if (write) {	/* WRITE-ONLY */
2307 			scc_mgr_apply_group_dqs_io_and_oct_out1(seq,
2308 								write_group,
2309 								d + start_dqs);
2310 		} else {	/* READ-ONLY */
2311 			scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2312 			if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2313 				u32 delay = d + start_dqs_en;
2314 				if (delay > seq->iocfg->dqs_en_delay_max)
2315 					delay = seq->iocfg->dqs_en_delay_max;
2316 				scc_mgr_set_dqs_en_delay(read_group, delay);
2317 			}
2318 			scc_mgr_load_dqs(read_group);
2319 		}
2320 
2321 		writel(0, &sdr_scc_mgr->update);
2322 
2323 		stop = search_stop_check(seq, write, d, rank_bgn, write_group,
2324 					 read_group, &bit_chk, sticky_bit_chk,
2325 					 use_read_test);
2326 		if (stop == 1) {
2327 			if (write && (d == 0)) {	/* WRITE-ONLY */
2328 				for (i = 0;
2329 				     i < seq->rwcfg->mem_dq_per_write_dqs;
2330 				     i++) {
2331 					/*
2332 					 * d = 0 failed, but it passed when
2333 					 * testing the left edge, so it must be
2334 					 * marginal, set it to -1
2335 					 */
2336 					if (right_edge[i] == delay_max + 1 &&
2337 					    left_edge[i] != delay_max + 1)
2338 						right_edge[i] = -1;
2339 				}
2340 			}
2341 			break;
2342 		}
2343 
2344 		/* stop != 1 */
2345 		for (i = 0; i < per_dqs; i++) {
2346 			if (bit_chk & 1) {
2347 				/*
2348 				 * Remember a passing test as
2349 				 * the right_edge.
2350 				 */
2351 				right_edge[i] = d;
2352 			} else {
2353 				if (d != 0) {
2354 					/*
2355 					 * If a right edge has not
2356 					 * been seen yet, then a future
2357 					 * passing test will mark this
2358 					 * edge as the left edge.
2359 					 */
2360 					if (right_edge[i] == delay_max + 1)
2361 						left_edge[i] = -(d + 1);
2362 				} else {
2363 					/*
2364 					 * d = 0 failed, but it passed
2365 					 * when testing the left edge,
2366 					 * so it must be marginal, set
2367 					 * it to -1
2368 					 */
2369 					if (right_edge[i] == delay_max + 1 &&
2370 					    left_edge[i] != delay_max + 1)
2371 						right_edge[i] = -1;
2372 					/*
2373 					 * If a right edge has not been
2374 					 * seen yet, then a future
2375 					 * passing test will mark this
2376 					 * edge as the left edge.
2377 					 */
2378 					else if (right_edge[i] == delay_max + 1)
2379 						left_edge[i] = -(d + 1);
2380 				}
2381 			}
2382 
2383 			debug_cond(DLEVEL >= 2, "%s:%d center[r,d=%u]: ",
2384 				   __func__, __LINE__, d);
2385 			debug_cond(DLEVEL >= 2,
2386 				   "bit_chk_test=%i left_edge[%u]: %d ",
2387 				   bit_chk & 1, i, left_edge[i]);
2388 			debug_cond(DLEVEL >= 2, "right_edge[%u]: %d\n", i,
2389 				   right_edge[i]);
2390 			bit_chk >>= 1;
2391 		}
2392 	}
2393 
2394 	/* Check that all bits have a window */
2395 	for (i = 0; i < per_dqs; i++) {
2396 		debug_cond(DLEVEL >= 2,
2397 			   "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2398 			   __func__, __LINE__, i, left_edge[i],
2399 			   i, right_edge[i]);
2400 		if ((left_edge[i] == dqs_max + 1) ||
2401 		    (right_edge[i] == dqs_max + 1))
2402 			return i + 1;	/* FIXME: If we fail, retval > 0 */
2403 	}
2404 
2405 	return 0;
2406 }
2407 
2408 /**
2409  * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2410  * @write:		Perform read (Stage 2) or write (Stage 3) calibration
2411  * @left_edge:		Left edge of the DQ/DQS phase
2412  * @right_edge:		Right edge of the DQ/DQS phase
2413  * @mid_min:		Best DQ/DQS phase middle setting
2414  *
2415  * Find index and value of the middle of the DQ/DQS working phase.
2416  */
get_window_mid_index(struct socfpga_sdrseq * seq,const int write,int * left_edge,int * right_edge,int * mid_min)2417 static int get_window_mid_index(struct socfpga_sdrseq *seq,
2418 				const int write, int *left_edge,
2419 				int *right_edge, int *mid_min)
2420 {
2421 	const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2422 				    seq->rwcfg->mem_dq_per_read_dqs;
2423 	int i, mid, min_index;
2424 
2425 	/* Find middle of window for each DQ bit */
2426 	*mid_min = left_edge[0] - right_edge[0];
2427 	min_index = 0;
2428 	for (i = 1; i < per_dqs; i++) {
2429 		mid = left_edge[i] - right_edge[i];
2430 		if (mid < *mid_min) {
2431 			*mid_min = mid;
2432 			min_index = i;
2433 		}
2434 	}
2435 
2436 	/*
2437 	 * -mid_min/2 represents the amount that we need to move DQS.
2438 	 * If mid_min is odd and positive we'll need to add one to make
2439 	 * sure the rounding in further calculations is correct (always
2440 	 * bias to the right), so just add 1 for all positive values.
2441 	 */
2442 	if (*mid_min > 0)
2443 		(*mid_min)++;
2444 	*mid_min = *mid_min / 2;
2445 
2446 	debug_cond(DLEVEL >= 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2447 		   __func__, __LINE__, *mid_min, min_index);
2448 	return min_index;
2449 }
2450 
2451 /**
2452  * center_dq_windows() - Center the DQ/DQS windows
2453  * @write:		Perform read (Stage 2) or write (Stage 3) calibration
2454  * @left_edge:		Left edge of the DQ/DQS phase
2455  * @right_edge:		Right edge of the DQ/DQS phase
2456  * @mid_min:		Adjusted DQ/DQS phase middle setting
2457  * @orig_mid_min:	Original DQ/DQS phase middle setting
2458  * @min_index:		DQ/DQS phase middle setting index
2459  * @test_bgn:		Rank number to begin the test
2460  * @dq_margin:		Amount of shift for the DQ
2461  * @dqs_margin:		Amount of shift for the DQS
2462  *
2463  * Align the DQ/DQS windows in each group.
2464  */
center_dq_windows(struct socfpga_sdrseq * seq,const int write,int * left_edge,int * right_edge,const int mid_min,const int orig_mid_min,const int min_index,const int test_bgn,int * dq_margin,int * dqs_margin)2465 static void center_dq_windows(struct socfpga_sdrseq *seq,
2466 			      const int write, int *left_edge, int *right_edge,
2467 			      const int mid_min, const int orig_mid_min,
2468 			      const int min_index, const int test_bgn,
2469 			      int *dq_margin, int *dqs_margin)
2470 {
2471 	const s32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2472 				      seq->iocfg->io_in_delay_max;
2473 	const s32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2474 				    seq->rwcfg->mem_dq_per_read_dqs;
2475 	const s32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2476 				      SCC_MGR_IO_IN_DELAY_OFFSET;
2477 	const s32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2478 
2479 	s32 temp_dq_io_delay1;
2480 	int shift_dq, i, p;
2481 
2482 	/* Initialize data for export structures */
2483 	*dqs_margin = delay_max + 1;
2484 	*dq_margin  = delay_max + 1;
2485 
2486 	/* add delay to bring centre of all DQ windows to the same "level" */
2487 	for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2488 		/* Use values before divide by 2 to reduce round off error */
2489 		shift_dq = (left_edge[i] - right_edge[i] -
2490 			(left_edge[min_index] - right_edge[min_index]))/2  +
2491 			(orig_mid_min - mid_min);
2492 
2493 		debug_cond(DLEVEL >= 2,
2494 			   "vfifo_center: before: shift_dq[%u]=%d\n",
2495 			   i, shift_dq);
2496 
2497 		temp_dq_io_delay1 = readl(addr + (i << 2));
2498 
2499 		if (shift_dq + temp_dq_io_delay1 > delay_max)
2500 			shift_dq = delay_max - temp_dq_io_delay1;
2501 		else if (shift_dq + temp_dq_io_delay1 < 0)
2502 			shift_dq = -temp_dq_io_delay1;
2503 
2504 		debug_cond(DLEVEL >= 2,
2505 			   "vfifo_center: after: shift_dq[%u]=%d\n",
2506 			   i, shift_dq);
2507 
2508 		if (write)
2509 			scc_mgr_set_dq_out1_delay(i,
2510 						  temp_dq_io_delay1 + shift_dq);
2511 		else
2512 			scc_mgr_set_dq_in_delay(p,
2513 						temp_dq_io_delay1 + shift_dq);
2514 
2515 		scc_mgr_load_dq(p);
2516 
2517 		debug_cond(DLEVEL >= 2,
2518 			   "vfifo_center: margin[%u]=[%d,%d]\n", i,
2519 			   left_edge[i] - shift_dq + (-mid_min),
2520 			   right_edge[i] + shift_dq - (-mid_min));
2521 
2522 		/* To determine values for export structures */
2523 		if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2524 			*dq_margin = left_edge[i] - shift_dq + (-mid_min);
2525 
2526 		if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2527 			*dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2528 	}
2529 }
2530 
2531 /**
2532  * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2533  * @rank_bgn:		Rank number
2534  * @rw_group:		Read/Write Group
2535  * @test_bgn:		Rank at which the test begins
2536  * @use_read_test:	Perform a read test
2537  * @update_fom:		Update FOM
2538  *
2539  * Per-bit deskew DQ and centering.
2540  */
rw_mgr_mem_calibrate_vfifo_center(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 rw_group,const u32 test_bgn,const int use_read_test,const int update_fom)2541 static int rw_mgr_mem_calibrate_vfifo_center(struct socfpga_sdrseq *seq,
2542 					     const u32 rank_bgn,
2543 					     const u32 rw_group,
2544 					     const u32 test_bgn,
2545 					     const int use_read_test,
2546 					     const int update_fom)
2547 {
2548 	const u32 addr =
2549 		SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2550 		(rw_group << 2);
2551 	/*
2552 	 * Store these as signed since there are comparisons with
2553 	 * signed numbers.
2554 	 */
2555 	u32 sticky_bit_chk;
2556 	s32 left_edge[seq->rwcfg->mem_dq_per_read_dqs];
2557 	s32 right_edge[seq->rwcfg->mem_dq_per_read_dqs];
2558 	s32 orig_mid_min, mid_min;
2559 	s32 new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en;
2560 	s32 dq_margin, dqs_margin;
2561 	int i, min_index;
2562 	int ret;
2563 
2564 	debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2565 
2566 	start_dqs = readl(addr);
2567 	if (seq->iocfg->shift_dqs_en_when_shift_dqs)
2568 		start_dqs_en = readl(addr - seq->iocfg->dqs_en_delay_offset);
2569 
2570 	/* set the left and right edge of each bit to an illegal value */
2571 	/* use (seq->iocfg->io_in_delay_max + 1) as an illegal value */
2572 	sticky_bit_chk = 0;
2573 	for (i = 0; i < seq->rwcfg->mem_dq_per_read_dqs; i++) {
2574 		left_edge[i]  = seq->iocfg->io_in_delay_max + 1;
2575 		right_edge[i] = seq->iocfg->io_in_delay_max + 1;
2576 	}
2577 
2578 	/* Search for the left edge of the window for each bit */
2579 	search_left_edge(seq, 0, rank_bgn, rw_group, rw_group, test_bgn,
2580 			 &sticky_bit_chk,
2581 			 left_edge, right_edge, use_read_test);
2582 
2583 
2584 	/* Search for the right edge of the window for each bit */
2585 	ret = search_right_edge(seq, 0, rank_bgn, rw_group, rw_group,
2586 				start_dqs, start_dqs_en,
2587 				&sticky_bit_chk,
2588 				left_edge, right_edge, use_read_test);
2589 	if (ret) {
2590 		/*
2591 		 * Restore delay chain settings before letting the loop
2592 		 * in rw_mgr_mem_calibrate_vfifo to retry different
2593 		 * dqs/ck relationships.
2594 		 */
2595 		scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2596 		if (seq->iocfg->shift_dqs_en_when_shift_dqs)
2597 			scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2598 
2599 		scc_mgr_load_dqs(rw_group);
2600 		writel(0, &sdr_scc_mgr->update);
2601 
2602 		debug_cond(DLEVEL >= 1,
2603 			   "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2604 			   __func__, __LINE__, i, left_edge[i], right_edge[i]);
2605 		if (use_read_test) {
2606 			set_failing_group_stage(seq, rw_group *
2607 				seq->rwcfg->mem_dq_per_read_dqs + i,
2608 				CAL_STAGE_VFIFO,
2609 				CAL_SUBSTAGE_VFIFO_CENTER);
2610 		} else {
2611 			set_failing_group_stage(seq, rw_group *
2612 				seq->rwcfg->mem_dq_per_read_dqs + i,
2613 				CAL_STAGE_VFIFO_AFTER_WRITES,
2614 				CAL_SUBSTAGE_VFIFO_CENTER);
2615 		}
2616 		return -EIO;
2617 	}
2618 
2619 	min_index = get_window_mid_index(seq, 0, left_edge, right_edge,
2620 					 &mid_min);
2621 
2622 	/* Determine the amount we can change DQS (which is -mid_min) */
2623 	orig_mid_min = mid_min;
2624 	new_dqs = start_dqs - mid_min;
2625 	if (new_dqs > seq->iocfg->dqs_in_delay_max)
2626 		new_dqs = seq->iocfg->dqs_in_delay_max;
2627 	else if (new_dqs < 0)
2628 		new_dqs = 0;
2629 
2630 	mid_min = start_dqs - new_dqs;
2631 	debug_cond(DLEVEL >= 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2632 		   mid_min, new_dqs);
2633 
2634 	if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2635 		if (start_dqs_en - mid_min > seq->iocfg->dqs_en_delay_max)
2636 			mid_min += start_dqs_en - mid_min -
2637 				   seq->iocfg->dqs_en_delay_max;
2638 		else if (start_dqs_en - mid_min < 0)
2639 			mid_min += start_dqs_en - mid_min;
2640 	}
2641 	new_dqs = start_dqs - mid_min;
2642 
2643 	debug_cond(DLEVEL >= 1,
2644 		   "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2645 		   start_dqs,
2646 		   seq->iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1,
2647 		   new_dqs, mid_min);
2648 
2649 	/* Add delay to bring centre of all DQ windows to the same "level". */
2650 	center_dq_windows(seq, 0, left_edge, right_edge, mid_min, orig_mid_min,
2651 			  min_index, test_bgn, &dq_margin, &dqs_margin);
2652 
2653 	/* Move DQS-en */
2654 	if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2655 		final_dqs_en = start_dqs_en - mid_min;
2656 		scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2657 		scc_mgr_load_dqs(rw_group);
2658 	}
2659 
2660 	/* Move DQS */
2661 	scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2662 	scc_mgr_load_dqs(rw_group);
2663 	debug_cond(DLEVEL >= 2,
2664 		   "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2665 		   __func__, __LINE__, dq_margin, dqs_margin);
2666 
2667 	/*
2668 	 * Do not remove this line as it makes sure all of our decisions
2669 	 * have been applied. Apply the update bit.
2670 	 */
2671 	writel(0, &sdr_scc_mgr->update);
2672 
2673 	if ((dq_margin < 0) || (dqs_margin < 0))
2674 		return -EINVAL;
2675 
2676 	return 0;
2677 }
2678 
2679 /**
2680  * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the
2681  * device
2682  * @rw_group:	Read/Write Group
2683  * @phase:	DQ/DQS phase
2684  *
2685  * Because initially no communication ca be reliably performed with the memory
2686  * device, the sequencer uses a guaranteed write mechanism to write data into
2687  * the memory device.
2688  */
rw_mgr_mem_calibrate_guaranteed_write(struct socfpga_sdrseq * seq,const u32 rw_group,const u32 phase)2689 static int rw_mgr_mem_calibrate_guaranteed_write(struct socfpga_sdrseq *seq,
2690 						 const u32 rw_group,
2691 						 const u32 phase)
2692 {
2693 	int ret;
2694 
2695 	/* Set a particular DQ/DQS phase. */
2696 	scc_mgr_set_dqdqs_output_phase_all_ranks(seq, rw_group, phase);
2697 
2698 	debug_cond(DLEVEL >= 1, "%s:%d guaranteed write: g=%u p=%u\n",
2699 		   __func__, __LINE__, rw_group, phase);
2700 
2701 	/*
2702 	 * Altera EMI_RM 2015.05.04 :: Figure 1-25
2703 	 * Load up the patterns used by read calibration using the
2704 	 * current DQDQS phase.
2705 	 */
2706 	rw_mgr_mem_calibrate_read_load_patterns(seq, 0, 1);
2707 
2708 	if (seq->gbl.phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2709 		return 0;
2710 
2711 	/*
2712 	 * Altera EMI_RM 2015.05.04 :: Figure 1-26
2713 	 * Back-to-Back reads of the patterns used for calibration.
2714 	 */
2715 	ret = rw_mgr_mem_calibrate_read_test_patterns(seq, 0, rw_group, 1);
2716 	if (ret)
2717 		debug_cond(DLEVEL >= 1,
2718 			   "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2719 			   __func__, __LINE__, rw_group, phase);
2720 	return ret;
2721 }
2722 
2723 /**
2724  * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2725  * @rw_group:	Read/Write Group
2726  * @test_bgn:	Rank at which the test begins
2727  *
2728  * DQS enable calibration ensures reliable capture of the DQ signal without
2729  * glitches on the DQS line.
2730  */
2731 static int
rw_mgr_mem_calibrate_dqs_enable_calibration(struct socfpga_sdrseq * seq,const u32 rw_group,const u32 test_bgn)2732 rw_mgr_mem_calibrate_dqs_enable_calibration(struct socfpga_sdrseq *seq,
2733 					    const u32 rw_group,
2734 					    const u32 test_bgn)
2735 {
2736 	/*
2737 	 * Altera EMI_RM 2015.05.04 :: Figure 1-27
2738 	 * DQS and DQS Eanble Signal Relationships.
2739 	 */
2740 
2741 	/* We start at zero, so have one less dq to devide among */
2742 	const u32 delay_step = seq->iocfg->io_in_delay_max /
2743 			       (seq->rwcfg->mem_dq_per_read_dqs - 1);
2744 	int ret;
2745 	u32 i, p, d, r;
2746 
2747 	debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2748 
2749 	/* Try different dq_in_delays since the DQ path is shorter than DQS. */
2750 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
2751 	     r += NUM_RANKS_PER_SHADOW_REG) {
2752 		for (i = 0, p = test_bgn, d = 0;
2753 		     i < seq->rwcfg->mem_dq_per_read_dqs;
2754 		     i++, p++, d += delay_step) {
2755 			debug_cond(DLEVEL >= 1,
2756 				   "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2757 				   __func__, __LINE__, rw_group, r, i, p, d);
2758 
2759 			scc_mgr_set_dq_in_delay(p, d);
2760 			scc_mgr_load_dq(p);
2761 		}
2762 
2763 		writel(0, &sdr_scc_mgr->update);
2764 	}
2765 
2766 	/*
2767 	 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2768 	 * dq_in_delay values
2769 	 */
2770 	ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(seq, rw_group);
2771 
2772 	debug_cond(DLEVEL >= 1,
2773 		   "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2774 		   __func__, __LINE__, rw_group, !ret);
2775 
2776 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
2777 	     r += NUM_RANKS_PER_SHADOW_REG) {
2778 		scc_mgr_apply_group_dq_in_delay(seq, test_bgn, 0);
2779 		writel(0, &sdr_scc_mgr->update);
2780 	}
2781 
2782 	return ret;
2783 }
2784 
2785 /**
2786  * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2787  * @rw_group:		Read/Write Group
2788  * @test_bgn:		Rank at which the test begins
2789  * @use_read_test:	Perform a read test
2790  * @update_fom:		Update FOM
2791  *
2792  * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2793  * within a group.
2794  */
2795 static int
rw_mgr_mem_calibrate_dq_dqs_centering(struct socfpga_sdrseq * seq,const u32 rw_group,const u32 test_bgn,const int use_read_test,const int update_fom)2796 rw_mgr_mem_calibrate_dq_dqs_centering(struct socfpga_sdrseq *seq,
2797 				      const u32 rw_group, const u32 test_bgn,
2798 				      const int use_read_test,
2799 				      const int update_fom)
2800 
2801 {
2802 	int ret, grp_calibrated;
2803 	u32 rank_bgn, sr;
2804 
2805 	/*
2806 	 * Altera EMI_RM 2015.05.04 :: Figure 1-28
2807 	 * Read per-bit deskew can be done on a per shadow register basis.
2808 	 */
2809 	grp_calibrated = 1;
2810 	for (rank_bgn = 0, sr = 0;
2811 	     rank_bgn < seq->rwcfg->mem_number_of_ranks;
2812 	     rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2813 		ret = rw_mgr_mem_calibrate_vfifo_center(seq, rank_bgn, rw_group,
2814 							test_bgn,
2815 							use_read_test,
2816 							update_fom);
2817 		if (!ret)
2818 			continue;
2819 
2820 		grp_calibrated = 0;
2821 	}
2822 
2823 	if (!grp_calibrated)
2824 		return -EIO;
2825 
2826 	return 0;
2827 }
2828 
2829 /**
2830  * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2831  * @rw_group:		Read/Write Group
2832  * @test_bgn:		Rank at which the test begins
2833  *
2834  * Stage 1: Calibrate the read valid prediction FIFO.
2835  *
2836  * This function implements UniPHY calibration Stage 1, as explained in
2837  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2838  *
2839  * - read valid prediction will consist of finding:
2840  *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2841  *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2842  *  - we also do a per-bit deskew on the DQ lines.
2843  */
rw_mgr_mem_calibrate_vfifo(struct socfpga_sdrseq * seq,const u32 rw_group,const u32 test_bgn)2844 static int rw_mgr_mem_calibrate_vfifo(struct socfpga_sdrseq *seq,
2845 				      const u32 rw_group, const u32 test_bgn)
2846 {
2847 	u32 p, d;
2848 	u32 dtaps_per_ptap;
2849 	u32 failed_substage;
2850 
2851 	int ret;
2852 
2853 	debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2854 
2855 	/* Update info for sims */
2856 	reg_file_set_group(rw_group);
2857 	reg_file_set_stage(CAL_STAGE_VFIFO);
2858 	reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2859 
2860 	failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2861 
2862 	/* USER Determine number of delay taps for each phase tap. */
2863 	dtaps_per_ptap = DIV_ROUND_UP(seq->iocfg->delay_per_opa_tap,
2864 				      seq->iocfg->delay_per_dqs_en_dchain_tap)
2865 				      - 1;
2866 
2867 	for (d = 0; d <= dtaps_per_ptap; d += 2) {
2868 		/*
2869 		 * In RLDRAMX we may be messing the delay of pins in
2870 		 * the same write rw_group but outside of the current read
2871 		 * the rw_group, but that's ok because we haven't calibrated
2872 		 * output side yet.
2873 		 */
2874 		if (d > 0) {
2875 			scc_mgr_apply_group_all_out_delay_add_all_ranks(seq,
2876 									rw_group,
2877 									d);
2878 		}
2879 
2880 		for (p = 0; p <= seq->iocfg->dqdqs_out_phase_max; p++) {
2881 			/* 1) Guaranteed Write */
2882 			ret = rw_mgr_mem_calibrate_guaranteed_write(seq,
2883 								    rw_group,
2884 								    p);
2885 			if (ret)
2886 				break;
2887 
2888 			/* 2) DQS Enable Calibration */
2889 			ret = rw_mgr_mem_calibrate_dqs_enable_calibration(seq,
2890 									  rw_group,
2891 									  test_bgn);
2892 			if (ret) {
2893 				failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2894 				continue;
2895 			}
2896 
2897 			/* 3) Centering DQ/DQS */
2898 			/*
2899 			 * If doing read after write calibration, do not update
2900 			 * FOM now. Do it then.
2901 			 */
2902 			ret = rw_mgr_mem_calibrate_dq_dqs_centering(seq,
2903 								    rw_group,
2904 								    test_bgn,
2905 								    1, 0);
2906 			if (ret) {
2907 				failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2908 				continue;
2909 			}
2910 
2911 			/* All done. */
2912 			goto cal_done_ok;
2913 		}
2914 	}
2915 
2916 	/* Calibration Stage 1 failed. */
2917 	set_failing_group_stage(seq, rw_group, CAL_STAGE_VFIFO,
2918 				failed_substage);
2919 	return 0;
2920 
2921 	/* Calibration Stage 1 completed OK. */
2922 cal_done_ok:
2923 	/*
2924 	 * Reset the delay chains back to zero if they have moved > 1
2925 	 * (check for > 1 because loop will increase d even when pass in
2926 	 * first case).
2927 	 */
2928 	if (d > 2)
2929 		scc_mgr_zero_group(seq, rw_group, 1);
2930 
2931 	return 1;
2932 }
2933 
2934 /**
2935  * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2936  * @rw_group:		Read/Write Group
2937  * @test_bgn:		Rank at which the test begins
2938  *
2939  * Stage 3: DQ/DQS Centering.
2940  *
2941  * This function implements UniPHY calibration Stage 3, as explained in
2942  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2943  */
rw_mgr_mem_calibrate_vfifo_end(struct socfpga_sdrseq * seq,const u32 rw_group,const u32 test_bgn)2944 static int rw_mgr_mem_calibrate_vfifo_end(struct socfpga_sdrseq *seq,
2945 					  const u32 rw_group,
2946 					  const u32 test_bgn)
2947 {
2948 	int ret;
2949 
2950 	debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2951 
2952 	/* Update info for sims. */
2953 	reg_file_set_group(rw_group);
2954 	reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2955 	reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2956 
2957 	ret = rw_mgr_mem_calibrate_dq_dqs_centering(seq, rw_group, test_bgn, 0,
2958 						    1);
2959 	if (ret)
2960 		set_failing_group_stage(seq, rw_group,
2961 					CAL_STAGE_VFIFO_AFTER_WRITES,
2962 					CAL_SUBSTAGE_VFIFO_CENTER);
2963 	return ret;
2964 }
2965 
2966 /**
2967  * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2968  *
2969  * Stage 4: Minimize latency.
2970  *
2971  * This function implements UniPHY calibration Stage 4, as explained in
2972  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2973  * Calibrate LFIFO to find smallest read latency.
2974  */
rw_mgr_mem_calibrate_lfifo(struct socfpga_sdrseq * seq)2975 static u32 rw_mgr_mem_calibrate_lfifo(struct socfpga_sdrseq *seq)
2976 {
2977 	int found_one = 0;
2978 
2979 	debug("%s:%d\n", __func__, __LINE__);
2980 
2981 	/* Update info for sims. */
2982 	reg_file_set_stage(CAL_STAGE_LFIFO);
2983 	reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2984 
2985 	/* Load up the patterns used by read calibration for all ranks */
2986 	rw_mgr_mem_calibrate_read_load_patterns(seq, 0, 1);
2987 
2988 	do {
2989 		writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
2990 		debug_cond(DLEVEL >= 2, "%s:%d lfifo: read_lat=%u",
2991 			   __func__, __LINE__, seq->gbl.curr_read_lat);
2992 
2993 		if (!rw_mgr_mem_calibrate_read_test_all_ranks(seq, 0,
2994 							      NUM_READ_TESTS,
2995 							      PASS_ALL_BITS, 1))
2996 			break;
2997 
2998 		found_one = 1;
2999 		/*
3000 		 * Reduce read latency and see if things are
3001 		 * working correctly.
3002 		 */
3003 		seq->gbl.curr_read_lat--;
3004 	} while (seq->gbl.curr_read_lat > 0);
3005 
3006 	/* Reset the fifos to get pointers to known state. */
3007 	writel(0, &phy_mgr_cmd->fifo_reset);
3008 
3009 	if (found_one) {
3010 		/* Add a fudge factor to the read latency that was determined */
3011 		seq->gbl.curr_read_lat += 2;
3012 		writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3013 		debug_cond(DLEVEL >= 2,
3014 			   "%s:%d lfifo: success: using read_lat=%u\n",
3015 			   __func__, __LINE__, seq->gbl.curr_read_lat);
3016 	} else {
3017 		set_failing_group_stage(seq, 0xff, CAL_STAGE_LFIFO,
3018 					CAL_SUBSTAGE_READ_LATENCY);
3019 
3020 		debug_cond(DLEVEL >= 2,
3021 			   "%s:%d lfifo: failed at initial read_lat=%u\n",
3022 			   __func__, __LINE__, seq->gbl.curr_read_lat);
3023 	}
3024 
3025 	return found_one;
3026 }
3027 
3028 /**
3029  * search_window() - Search for the/part of the window with DM/DQS shift
3030  * @search_dm:		If 1, search for the DM shift, if 0, search for DQS
3031  *			shift
3032  * @rank_bgn:		Rank number
3033  * @write_group:	Write Group
3034  * @bgn_curr:		Current window begin
3035  * @end_curr:		Current window end
3036  * @bgn_best:		Current best window begin
3037  * @end_best:		Current best window end
3038  * @win_best:		Size of the best window
3039  * @new_dqs:		New DQS value (only applicable if search_dm = 0).
3040  *
3041  * Search for the/part of the window with DM/DQS shift.
3042  */
search_window(struct socfpga_sdrseq * seq,const int search_dm,const u32 rank_bgn,const u32 write_group,int * bgn_curr,int * end_curr,int * bgn_best,int * end_best,int * win_best,int new_dqs)3043 static void search_window(struct socfpga_sdrseq *seq,
3044 			  const int search_dm, const u32 rank_bgn,
3045 			  const u32 write_group, int *bgn_curr, int *end_curr,
3046 			  int *bgn_best, int *end_best, int *win_best,
3047 			  int new_dqs)
3048 {
3049 	u32 bit_chk;
3050 	const int max = seq->iocfg->io_out1_delay_max - new_dqs;
3051 	int d, di;
3052 
3053 	/* Search for the/part of the window with DM/DQS shift. */
3054 	for (di = max; di >= 0; di -= DELTA_D) {
3055 		if (search_dm) {
3056 			d = di;
3057 			scc_mgr_apply_group_dm_out1_delay(seq, d);
3058 		} else {
3059 			/* For DQS, we go from 0...max */
3060 			d = max - di;
3061 			/*
3062 			 * Note: This only shifts DQS, so are we limiting
3063 			 *       ourselves to width of DQ unnecessarily.
3064 			 */
3065 			scc_mgr_apply_group_dqs_io_and_oct_out1(seq,
3066 								write_group,
3067 								d + new_dqs);
3068 		}
3069 
3070 		writel(0, &sdr_scc_mgr->update);
3071 
3072 		if (rw_mgr_mem_calibrate_write_test(seq, rank_bgn, write_group,
3073 						    1, PASS_ALL_BITS, &bit_chk,
3074 						    0)) {
3075 			/* Set current end of the window. */
3076 			*end_curr = search_dm ? -d : d;
3077 
3078 			/*
3079 			 * If a starting edge of our window has not been seen
3080 			 * this is our current start of the DM window.
3081 			 */
3082 			if (*bgn_curr == seq->iocfg->io_out1_delay_max + 1)
3083 				*bgn_curr = search_dm ? -d : d;
3084 
3085 			/*
3086 			 * If current window is bigger than best seen.
3087 			 * Set best seen to be current window.
3088 			 */
3089 			if ((*end_curr - *bgn_curr + 1) > *win_best) {
3090 				*win_best = *end_curr - *bgn_curr + 1;
3091 				*bgn_best = *bgn_curr;
3092 				*end_best = *end_curr;
3093 			}
3094 		} else {
3095 			/* We just saw a failing test. Reset temp edge. */
3096 			*bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3097 			*end_curr = seq->iocfg->io_out1_delay_max + 1;
3098 
3099 			/* Early exit is only applicable to DQS. */
3100 			if (search_dm)
3101 				continue;
3102 
3103 			/*
3104 			 * Early exit optimization: if the remaining delay
3105 			 * chain space is less than already seen largest
3106 			 * window we can exit.
3107 			 */
3108 			if (*win_best - 1 > seq->iocfg->io_out1_delay_max
3109 				- new_dqs - d)
3110 				break;
3111 		}
3112 	}
3113 }
3114 
3115 /*
3116  * rw_mgr_mem_calibrate_writes_center() - Center all windows
3117  * @rank_bgn:		Rank number
3118  * @write_group:	Write group
3119  * @test_bgn:		Rank at which the test begins
3120  *
3121  * Center all windows. Do per-bit-deskew to possibly increase size of
3122  * certain windows.
3123  */
3124 static int
rw_mgr_mem_calibrate_writes_center(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 write_group,const u32 test_bgn)3125 rw_mgr_mem_calibrate_writes_center(struct socfpga_sdrseq *seq,
3126 				   const u32 rank_bgn, const u32 write_group,
3127 				   const u32 test_bgn)
3128 {
3129 	int i;
3130 	u32 sticky_bit_chk;
3131 	u32 min_index;
3132 	int left_edge[seq->rwcfg->mem_dq_per_write_dqs];
3133 	int right_edge[seq->rwcfg->mem_dq_per_write_dqs];
3134 	int mid;
3135 	int mid_min, orig_mid_min;
3136 	int new_dqs, start_dqs;
3137 	int dq_margin, dqs_margin, dm_margin;
3138 	int bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3139 	int end_curr = seq->iocfg->io_out1_delay_max + 1;
3140 	int bgn_best = seq->iocfg->io_out1_delay_max + 1;
3141 	int end_best = seq->iocfg->io_out1_delay_max + 1;
3142 	int win_best = 0;
3143 
3144 	int ret;
3145 
3146 	debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
3147 
3148 	dm_margin = 0;
3149 
3150 	start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
3151 			  SCC_MGR_IO_OUT1_DELAY_OFFSET) +
3152 			  (seq->rwcfg->mem_dq_per_write_dqs << 2));
3153 
3154 	/* Per-bit deskew. */
3155 
3156 	/*
3157 	 * Set the left and right edge of each bit to an illegal value.
3158 	 * Use (seq->iocfg->io_out1_delay_max + 1) as an illegal value.
3159 	 */
3160 	sticky_bit_chk = 0;
3161 	for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
3162 		left_edge[i]  = seq->iocfg->io_out1_delay_max + 1;
3163 		right_edge[i] = seq->iocfg->io_out1_delay_max + 1;
3164 	}
3165 
3166 	/* Search for the left edge of the window for each bit. */
3167 	search_left_edge(seq, 1, rank_bgn, write_group, 0, test_bgn,
3168 			 &sticky_bit_chk,
3169 			 left_edge, right_edge, 0);
3170 
3171 	/* Search for the right edge of the window for each bit. */
3172 	ret = search_right_edge(seq, 1, rank_bgn, write_group, 0,
3173 				start_dqs, 0,
3174 				&sticky_bit_chk,
3175 				left_edge, right_edge, 0);
3176 	if (ret) {
3177 		set_failing_group_stage(seq, test_bgn + ret - 1,
3178 					CAL_STAGE_WRITES,
3179 					CAL_SUBSTAGE_WRITES_CENTER);
3180 		return -EINVAL;
3181 	}
3182 
3183 	min_index = get_window_mid_index(seq, 1, left_edge, right_edge,
3184 					 &mid_min);
3185 
3186 	/* Determine the amount we can change DQS (which is -mid_min). */
3187 	orig_mid_min = mid_min;
3188 	new_dqs = start_dqs;
3189 	mid_min = 0;
3190 	debug_cond(DLEVEL >= 1,
3191 		   "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
3192 		   __func__, __LINE__, start_dqs, new_dqs, mid_min);
3193 
3194 	/* Add delay to bring centre of all DQ windows to the same "level". */
3195 	center_dq_windows(seq, 1, left_edge, right_edge, mid_min, orig_mid_min,
3196 			  min_index, 0, &dq_margin, &dqs_margin);
3197 
3198 	/* Move DQS */
3199 	scc_mgr_apply_group_dqs_io_and_oct_out1(seq, write_group, new_dqs);
3200 	writel(0, &sdr_scc_mgr->update);
3201 
3202 	/* Centre DM */
3203 	debug_cond(DLEVEL >= 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3204 
3205 	/* Search for the/part of the window with DM shift. */
3206 	search_window(seq, 1, rank_bgn, write_group, &bgn_curr, &end_curr,
3207 		      &bgn_best, &end_best, &win_best, 0);
3208 
3209 	/* Reset DM delay chains to 0. */
3210 	scc_mgr_apply_group_dm_out1_delay(seq, 0);
3211 
3212 	/*
3213 	 * Check to see if the current window nudges up aganist 0 delay.
3214 	 * If so we need to continue the search by shifting DQS otherwise DQS
3215 	 * search begins as a new search.
3216 	 */
3217 	if (end_curr != 0) {
3218 		bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3219 		end_curr = seq->iocfg->io_out1_delay_max + 1;
3220 	}
3221 
3222 	/* Search for the/part of the window with DQS shifts. */
3223 	search_window(seq, 0, rank_bgn, write_group, &bgn_curr, &end_curr,
3224 		      &bgn_best, &end_best, &win_best, new_dqs);
3225 
3226 	/* Assign left and right edge for cal and reporting. */
3227 	left_edge[0] = -1 * bgn_best;
3228 	right_edge[0] = end_best;
3229 
3230 	debug_cond(DLEVEL >= 2, "%s:%d dm_calib: left=%d right=%d\n",
3231 		   __func__, __LINE__, left_edge[0], right_edge[0]);
3232 
3233 	/* Move DQS (back to orig). */
3234 	scc_mgr_apply_group_dqs_io_and_oct_out1(seq, write_group, new_dqs);
3235 
3236 	/* Move DM */
3237 
3238 	/* Find middle of window for the DM bit. */
3239 	mid = (left_edge[0] - right_edge[0]) / 2;
3240 
3241 	/* Only move right, since we are not moving DQS/DQ. */
3242 	if (mid < 0)
3243 		mid = 0;
3244 
3245 	/* dm_marign should fail if we never find a window. */
3246 	if (win_best == 0)
3247 		dm_margin = -1;
3248 	else
3249 		dm_margin = left_edge[0] - mid;
3250 
3251 	scc_mgr_apply_group_dm_out1_delay(seq, mid);
3252 	writel(0, &sdr_scc_mgr->update);
3253 
3254 	debug_cond(DLEVEL >= 2,
3255 		   "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3256 		   __func__, __LINE__, left_edge[0], right_edge[0],
3257 		   mid, dm_margin);
3258 	/* Export values. */
3259 	seq->gbl.fom_out += dq_margin + dqs_margin;
3260 
3261 	debug_cond(DLEVEL >= 2,
3262 		   "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3263 		   __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3264 
3265 	/*
3266 	 * Do not remove this line as it makes sure all of our
3267 	 * decisions have been applied.
3268 	 */
3269 	writel(0, &sdr_scc_mgr->update);
3270 
3271 	if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3272 		return -EINVAL;
3273 
3274 	return 0;
3275 }
3276 
3277 /**
3278  * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3279  * @rank_bgn:		Rank number
3280  * @group:		Read/Write Group
3281  * @test_bgn:		Rank at which the test begins
3282  *
3283  * Stage 2: Write Calibration Part One.
3284  *
3285  * This function implements UniPHY calibration Stage 2, as explained in
3286  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3287  */
rw_mgr_mem_calibrate_writes(struct socfpga_sdrseq * seq,const u32 rank_bgn,const u32 group,const u32 test_bgn)3288 static int rw_mgr_mem_calibrate_writes(struct socfpga_sdrseq *seq,
3289 				       const u32 rank_bgn, const u32 group,
3290 				       const u32 test_bgn)
3291 {
3292 	int ret;
3293 
3294 	/* Update info for sims */
3295 	debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3296 
3297 	reg_file_set_group(group);
3298 	reg_file_set_stage(CAL_STAGE_WRITES);
3299 	reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3300 
3301 	ret = rw_mgr_mem_calibrate_writes_center(seq, rank_bgn, group,
3302 						 test_bgn);
3303 	if (ret)
3304 		set_failing_group_stage(seq, group, CAL_STAGE_WRITES,
3305 					CAL_SUBSTAGE_WRITES_CENTER);
3306 
3307 	return ret;
3308 }
3309 
3310 /**
3311  * mem_precharge_and_activate() - Precharge all banks and activate
3312  *
3313  * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3314  */
mem_precharge_and_activate(struct socfpga_sdrseq * seq)3315 static void mem_precharge_and_activate(struct socfpga_sdrseq *seq)
3316 {
3317 	int r;
3318 
3319 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
3320 		/* Set rank. */
3321 		set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
3322 
3323 		/* Precharge all banks. */
3324 		writel(seq->rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3325 					     RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3326 
3327 		writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3328 		writel(seq->rwcfg->activate_0_and_1_wait1,
3329 		       &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3330 
3331 		writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3332 		writel(seq->rwcfg->activate_0_and_1_wait2,
3333 		       &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3334 
3335 		/* Activate rows. */
3336 		writel(seq->rwcfg->activate_0_and_1,
3337 		       SDR_PHYGRP_RWMGRGRP_ADDRESS |
3338 		       RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3339 	}
3340 }
3341 
3342 /**
3343  * mem_init_latency() - Configure memory RLAT and WLAT settings
3344  *
3345  * Configure memory RLAT and WLAT parameters.
3346  */
mem_init_latency(struct socfpga_sdrseq * seq)3347 static void mem_init_latency(struct socfpga_sdrseq *seq)
3348 {
3349 	/*
3350 	 * For AV/CV, LFIFO is hardened and always runs at full rate
3351 	 * so max latency in AFI clocks, used here, is correspondingly
3352 	 * smaller.
3353 	 */
3354 	const u32 max_latency = (1 << seq->misccfg->max_latency_count_width)
3355 		- 1;
3356 	u32 rlat, wlat;
3357 
3358 	debug("%s:%d\n", __func__, __LINE__);
3359 
3360 	/*
3361 	 * Read in write latency.
3362 	 * WL for Hard PHY does not include additive latency.
3363 	 */
3364 	wlat = readl(&data_mgr->t_wl_add);
3365 	wlat += readl(&data_mgr->mem_t_add);
3366 
3367 	seq->gbl.rw_wl_nop_cycles = wlat - 1;
3368 
3369 	/* Read in readl latency. */
3370 	rlat = readl(&data_mgr->t_rl_add);
3371 
3372 	/* Set a pretty high read latency initially. */
3373 	seq->gbl.curr_read_lat = rlat + 16;
3374 	if (seq->gbl.curr_read_lat > max_latency)
3375 		seq->gbl.curr_read_lat = max_latency;
3376 
3377 	writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3378 
3379 	/* Advertise write latency. */
3380 	writel(wlat, &phy_mgr_cfg->afi_wlat);
3381 }
3382 
3383 /**
3384  * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3385  *
3386  * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3387  */
mem_skip_calibrate(struct socfpga_sdrseq * seq)3388 static void mem_skip_calibrate(struct socfpga_sdrseq *seq)
3389 {
3390 	u32 vfifo_offset;
3391 	u32 i, j, r;
3392 
3393 	debug("%s:%d\n", __func__, __LINE__);
3394 	/* Need to update every shadow register set used by the interface */
3395 	for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
3396 	     r += NUM_RANKS_PER_SHADOW_REG) {
3397 		/*
3398 		 * Set output phase alignment settings appropriate for
3399 		 * skip calibration.
3400 		 */
3401 		for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3402 			scc_mgr_set_dqs_en_phase(i, 0);
3403 			if (seq->iocfg->dll_chain_length == 6)
3404 				scc_mgr_set_dqdqs_output_phase(i, 6);
3405 			else
3406 				scc_mgr_set_dqdqs_output_phase(i, 7);
3407 			/*
3408 			 * Case:33398
3409 			 *
3410 			 * Write data arrives to the I/O two cycles before write
3411 			 * latency is reached (720 deg).
3412 			 *   -> due to bit-slip in a/c bus
3413 			 *   -> to allow board skew where dqs is longer than ck
3414 			 *      -> how often can this happen!?
3415 			 *      -> can claim back some ptaps for high freq
3416 			 *       support if we can relax this, but i digress...
3417 			 *
3418 			 * The write_clk leads mem_ck by 90 deg
3419 			 * The minimum ptap of the OPA is 180 deg
3420 			 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3421 			 * The write_clk is always delayed by 2 ptaps
3422 			 *
3423 			 * Hence, to make DQS aligned to CK, we need to delay
3424 			 * DQS by:
3425 			 *    (720 - 90 - 180 - 2) *
3426 			 *      (360 / seq->iocfg->dll_chain_length)
3427 			 *
3428 			 * Dividing the above by
3429 			 (360 / seq->iocfg->dll_chain_length)
3430 			 * gives us the number of ptaps, which simplies to:
3431 			 *
3432 			 *    (1.25 * seq->iocfg->dll_chain_length - 2)
3433 			 */
3434 			scc_mgr_set_dqdqs_output_phase(i,
3435 				       ((125 * seq->iocfg->dll_chain_length)
3436 				       / 100) - 2);
3437 		}
3438 		writel(0xff, &sdr_scc_mgr->dqs_ena);
3439 		writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3440 
3441 		for (i = 0; i < seq->rwcfg->mem_if_write_dqs_width; i++) {
3442 			writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3443 				  SCC_MGR_GROUP_COUNTER_OFFSET);
3444 		}
3445 		writel(0xff, &sdr_scc_mgr->dq_ena);
3446 		writel(0xff, &sdr_scc_mgr->dm_ena);
3447 		writel(0, &sdr_scc_mgr->update);
3448 	}
3449 
3450 	/* Compensate for simulation model behaviour */
3451 	for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3452 		scc_mgr_set_dqs_bus_in_delay(i, 10);
3453 		scc_mgr_load_dqs(i);
3454 	}
3455 	writel(0, &sdr_scc_mgr->update);
3456 
3457 	/*
3458 	 * ArriaV has hard FIFOs that can only be initialized by incrementing
3459 	 * in sequencer.
3460 	 */
3461 	vfifo_offset = seq->misccfg->calib_vfifo_offset;
3462 	for (j = 0; j < vfifo_offset; j++)
3463 		writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3464 	writel(0, &phy_mgr_cmd->fifo_reset);
3465 
3466 	/*
3467 	 * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3468 	 * setting from generation-time constant.
3469 	 */
3470 	seq->gbl.curr_read_lat = seq->misccfg->calib_lfifo_offset;
3471 	writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3472 }
3473 
3474 /**
3475  * mem_calibrate() - Memory calibration entry point.
3476  *
3477  * Perform memory calibration.
3478  */
mem_calibrate(struct socfpga_sdrseq * seq)3479 static u32 mem_calibrate(struct socfpga_sdrseq *seq)
3480 {
3481 	u32 i;
3482 	u32 rank_bgn, sr;
3483 	u32 write_group, write_test_bgn;
3484 	u32 read_group, read_test_bgn;
3485 	u32 run_groups, current_run;
3486 	u32 failing_groups = 0;
3487 	u32 group_failed = 0;
3488 
3489 	const u32 rwdqs_ratio = seq->rwcfg->mem_if_read_dqs_width /
3490 				seq->rwcfg->mem_if_write_dqs_width;
3491 
3492 	debug("%s:%d\n", __func__, __LINE__);
3493 
3494 	/* Initialize the data settings */
3495 	seq->gbl.error_substage = CAL_SUBSTAGE_NIL;
3496 	seq->gbl.error_stage = CAL_STAGE_NIL;
3497 	seq->gbl.error_group = 0xff;
3498 	seq->gbl.fom_in = 0;
3499 	seq->gbl.fom_out = 0;
3500 
3501 	/* Initialize WLAT and RLAT. */
3502 	mem_init_latency(seq);
3503 
3504 	/* Initialize bit slips. */
3505 	mem_precharge_and_activate(seq);
3506 
3507 	for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3508 		writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3509 			  SCC_MGR_GROUP_COUNTER_OFFSET);
3510 		/* Only needed once to set all groups, pins, DQ, DQS, DM. */
3511 		if (i == 0)
3512 			scc_mgr_set_hhp_extras();
3513 
3514 		scc_set_bypass_mode(i);
3515 	}
3516 
3517 	/* Calibration is skipped. */
3518 	if ((seq->dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3519 		/*
3520 		 * Set VFIFO and LFIFO to instant-on settings in skip
3521 		 * calibration mode.
3522 		 */
3523 		mem_skip_calibrate(seq);
3524 
3525 		/*
3526 		 * Do not remove this line as it makes sure all of our
3527 		 * decisions have been applied.
3528 		 */
3529 		writel(0, &sdr_scc_mgr->update);
3530 		return 1;
3531 	}
3532 
3533 	/* Calibration is not skipped. */
3534 	for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3535 		/*
3536 		 * Zero all delay chain/phase settings for all
3537 		 * groups and all shadow register sets.
3538 		 */
3539 		scc_mgr_zero_all(seq);
3540 
3541 		run_groups = ~0;
3542 
3543 		for (write_group = 0, write_test_bgn = 0; write_group
3544 			< seq->rwcfg->mem_if_write_dqs_width; write_group++,
3545 			write_test_bgn += seq->rwcfg->mem_dq_per_write_dqs) {
3546 			/* Initialize the group failure */
3547 			group_failed = 0;
3548 
3549 			current_run = run_groups & ((1 <<
3550 				RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3551 			run_groups = run_groups >>
3552 				RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3553 
3554 			if (current_run == 0)
3555 				continue;
3556 
3557 			writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3558 					    SCC_MGR_GROUP_COUNTER_OFFSET);
3559 			scc_mgr_zero_group(seq, write_group, 0);
3560 
3561 			for (read_group = write_group * rwdqs_ratio,
3562 			     read_test_bgn = 0;
3563 			     read_group < (write_group + 1) * rwdqs_ratio;
3564 			     read_group++,
3565 			     read_test_bgn += seq->rwcfg->mem_dq_per_read_dqs) {
3566 				if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3567 					continue;
3568 
3569 				/* Calibrate the VFIFO */
3570 				if (rw_mgr_mem_calibrate_vfifo(seq, read_group,
3571 							       read_test_bgn))
3572 					continue;
3573 
3574 				if (!(seq->gbl.phy_debug_mode_flags &
3575 				      PHY_DEBUG_SWEEP_ALL_GROUPS))
3576 					return 0;
3577 
3578 				/* The group failed, we're done. */
3579 				goto grp_failed;
3580 			}
3581 
3582 			/* Calibrate the output side */
3583 			for (rank_bgn = 0, sr = 0;
3584 			     rank_bgn < seq->rwcfg->mem_number_of_ranks;
3585 			     rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3586 				if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3587 					continue;
3588 
3589 				/* Not needed in quick mode! */
3590 				if (STATIC_CALIB_STEPS &
3591 				    CALIB_SKIP_DELAY_SWEEPS)
3592 					continue;
3593 
3594 				/* Calibrate WRITEs */
3595 				if (!rw_mgr_mem_calibrate_writes(seq, rank_bgn,
3596 								 write_group,
3597 								 write_test_bgn))
3598 					continue;
3599 
3600 				group_failed = 1;
3601 				if (!(seq->gbl.phy_debug_mode_flags &
3602 				      PHY_DEBUG_SWEEP_ALL_GROUPS))
3603 					return 0;
3604 			}
3605 
3606 			/* Some group failed, we're done. */
3607 			if (group_failed)
3608 				goto grp_failed;
3609 
3610 			for (read_group = write_group * rwdqs_ratio,
3611 			     read_test_bgn = 0;
3612 			     read_group < (write_group + 1) * rwdqs_ratio;
3613 			     read_group++,
3614 			     read_test_bgn += seq->rwcfg->mem_dq_per_read_dqs) {
3615 				if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3616 					continue;
3617 
3618 				if (!rw_mgr_mem_calibrate_vfifo_end(seq,
3619 								    read_group,
3620 								    read_test_bgn))
3621 					continue;
3622 
3623 				if (!(seq->gbl.phy_debug_mode_flags &
3624 				      PHY_DEBUG_SWEEP_ALL_GROUPS))
3625 					return 0;
3626 
3627 				/* The group failed, we're done. */
3628 				goto grp_failed;
3629 			}
3630 
3631 			/* No group failed, continue as usual. */
3632 			continue;
3633 
3634 grp_failed:		/* A group failed, increment the counter. */
3635 			failing_groups++;
3636 		}
3637 
3638 		/*
3639 		 * USER If there are any failing groups then report
3640 		 * the failure.
3641 		 */
3642 		if (failing_groups != 0)
3643 			return 0;
3644 
3645 		if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3646 			continue;
3647 
3648 		/* Calibrate the LFIFO */
3649 		if (!rw_mgr_mem_calibrate_lfifo(seq))
3650 			return 0;
3651 	}
3652 
3653 	/*
3654 	 * Do not remove this line as it makes sure all of our decisions
3655 	 * have been applied.
3656 	 */
3657 	writel(0, &sdr_scc_mgr->update);
3658 	return 1;
3659 }
3660 
3661 /**
3662  * run_mem_calibrate() - Perform memory calibration
3663  *
3664  * This function triggers the entire memory calibration procedure.
3665  */
run_mem_calibrate(struct socfpga_sdrseq * seq)3666 static int run_mem_calibrate(struct socfpga_sdrseq *seq)
3667 {
3668 	int pass;
3669 	u32 ctrl_cfg;
3670 
3671 	debug("%s:%d\n", __func__, __LINE__);
3672 
3673 	/* Reset pass/fail status shown on afi_cal_success/fail */
3674 	writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3675 
3676 	/* Stop tracking manager. */
3677 	ctrl_cfg = readl(&sdr_ctrl->ctrl_cfg);
3678 	writel(ctrl_cfg & ~SDR_CTRLGRP_CTRLCFG_DQSTRKEN_MASK,
3679 	       &sdr_ctrl->ctrl_cfg);
3680 
3681 	phy_mgr_initialize(seq);
3682 	rw_mgr_mem_initialize(seq);
3683 
3684 	/* Perform the actual memory calibration. */
3685 	pass = mem_calibrate(seq);
3686 
3687 	mem_precharge_and_activate(seq);
3688 	writel(0, &phy_mgr_cmd->fifo_reset);
3689 
3690 	/* Handoff. */
3691 	rw_mgr_mem_handoff(seq);
3692 	/*
3693 	 * In Hard PHY this is a 2-bit control:
3694 	 * 0: AFI Mux Select
3695 	 * 1: DDIO Mux Select
3696 	 */
3697 	writel(0x2, &phy_mgr_cfg->mux_sel);
3698 
3699 	/* Start tracking manager. */
3700 	writel(ctrl_cfg, &sdr_ctrl->ctrl_cfg);
3701 
3702 	return pass;
3703 }
3704 
3705 /**
3706  * debug_mem_calibrate() - Report result of memory calibration
3707  * @pass:	Value indicating whether calibration passed or failed
3708  *
3709  * This function reports the results of the memory calibration
3710  * and writes debug information into the register file.
3711  */
debug_mem_calibrate(struct socfpga_sdrseq * seq,int pass)3712 static void debug_mem_calibrate(struct socfpga_sdrseq *seq, int pass)
3713 {
3714 	u32 debug_info;
3715 
3716 	if (pass) {
3717 		debug("%s: CALIBRATION PASSED\n", __FILE__);
3718 
3719 		seq->gbl.fom_in /= 2;
3720 		seq->gbl.fom_out /= 2;
3721 
3722 		if (seq->gbl.fom_in > 0xff)
3723 			seq->gbl.fom_in = 0xff;
3724 
3725 		if (seq->gbl.fom_out > 0xff)
3726 			seq->gbl.fom_out = 0xff;
3727 
3728 		/* Update the FOM in the register file */
3729 		debug_info = seq->gbl.fom_in;
3730 		debug_info |= seq->gbl.fom_out << 8;
3731 		writel(debug_info, &sdr_reg_file->fom);
3732 
3733 		writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3734 		writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3735 	} else {
3736 		debug("%s: CALIBRATION FAILED\n", __FILE__);
3737 
3738 		debug_info = seq->gbl.error_stage;
3739 		debug_info |= seq->gbl.error_substage << 8;
3740 		debug_info |= seq->gbl.error_group << 16;
3741 
3742 		writel(debug_info, &sdr_reg_file->failing_stage);
3743 		writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3744 		writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3745 
3746 		/* Update the failing group/stage in the register file */
3747 		debug_info = seq->gbl.error_stage;
3748 		debug_info |= seq->gbl.error_substage << 8;
3749 		debug_info |= seq->gbl.error_group << 16;
3750 		writel(debug_info, &sdr_reg_file->failing_stage);
3751 	}
3752 
3753 	debug("%s: Calibration complete\n", __FILE__);
3754 }
3755 
3756 /**
3757  * hc_initialize_rom_data() - Initialize ROM data
3758  *
3759  * Initialize ROM data.
3760  */
hc_initialize_rom_data(void)3761 static void hc_initialize_rom_data(void)
3762 {
3763 	unsigned int nelem = 0;
3764 	const u32 *rom_init;
3765 	u32 i, addr;
3766 
3767 	socfpga_get_seq_inst_init(&rom_init, &nelem);
3768 	addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3769 	for (i = 0; i < nelem; i++)
3770 		writel(rom_init[i], addr + (i << 2));
3771 
3772 	socfpga_get_seq_ac_init(&rom_init, &nelem);
3773 	addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3774 	for (i = 0; i < nelem; i++)
3775 		writel(rom_init[i], addr + (i << 2));
3776 }
3777 
3778 /**
3779  * initialize_reg_file() - Initialize SDR register file
3780  *
3781  * Initialize SDR register file.
3782  */
initialize_reg_file(struct socfpga_sdrseq * seq)3783 static void initialize_reg_file(struct socfpga_sdrseq *seq)
3784 {
3785 	/* Initialize the register file with the correct data */
3786 	writel(seq->misccfg->reg_file_init_seq_signature,
3787 	       &sdr_reg_file->signature);
3788 	writel(0, &sdr_reg_file->debug_data_addr);
3789 	writel(0, &sdr_reg_file->cur_stage);
3790 	writel(0, &sdr_reg_file->fom);
3791 	writel(0, &sdr_reg_file->failing_stage);
3792 	writel(0, &sdr_reg_file->debug1);
3793 	writel(0, &sdr_reg_file->debug2);
3794 }
3795 
3796 /**
3797  * initialize_hps_phy() - Initialize HPS PHY
3798  *
3799  * Initialize HPS PHY.
3800  */
initialize_hps_phy(void)3801 static void initialize_hps_phy(void)
3802 {
3803 	u32 reg;
3804 	/*
3805 	 * Tracking also gets configured here because it's in the
3806 	 * same register.
3807 	 */
3808 	u32 trk_sample_count = 7500;
3809 	u32 trk_long_idle_sample_count = (10 << 16) | 100;
3810 	/*
3811 	 * Format is number of outer loops in the 16 MSB, sample
3812 	 * count in 16 LSB.
3813 	 */
3814 
3815 	reg = 0;
3816 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3817 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3818 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3819 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3820 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3821 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3822 	/*
3823 	 * This field selects the intrinsic latency to RDATA_EN/FULL path.
3824 	 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3825 	 */
3826 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3827 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3828 		trk_sample_count);
3829 	writel(reg, &sdr_ctrl->phy_ctrl0);
3830 
3831 	reg = 0;
3832 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3833 		trk_sample_count >>
3834 		SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3835 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3836 		trk_long_idle_sample_count);
3837 	writel(reg, &sdr_ctrl->phy_ctrl1);
3838 
3839 	reg = 0;
3840 	reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3841 		trk_long_idle_sample_count >>
3842 		SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3843 	writel(reg, &sdr_ctrl->phy_ctrl2);
3844 }
3845 
3846 /**
3847  * initialize_tracking() - Initialize tracking
3848  *
3849  * Initialize the register file with usable initial data.
3850  */
initialize_tracking(struct socfpga_sdrseq * seq)3851 static void initialize_tracking(struct socfpga_sdrseq *seq)
3852 {
3853 	/*
3854 	 * Initialize the register file with the correct data.
3855 	 * Compute usable version of value in case we skip full
3856 	 * computation later.
3857 	 */
3858 	writel(DIV_ROUND_UP(seq->iocfg->delay_per_opa_tap,
3859 			    seq->iocfg->delay_per_dchain_tap) - 1,
3860 	       &sdr_reg_file->dtaps_per_ptap);
3861 
3862 	/* trk_sample_count */
3863 	writel(7500, &sdr_reg_file->trk_sample_count);
3864 
3865 	/* longidle outer loop [15:0] */
3866 	writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3867 
3868 	/*
3869 	 * longidle sample count [31:24]
3870 	 * trfc, worst case of 933Mhz 4Gb [23:16]
3871 	 * trcd, worst case [15:8]
3872 	 * vfifo wait [7:0]
3873 	 */
3874 	writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3875 	       &sdr_reg_file->delays);
3876 
3877 	/* mux delay */
3878 	if (dram_is_ddr(2)) {
3879 		writel(0, &sdr_reg_file->trk_rw_mgr_addr);
3880 	} else if (dram_is_ddr(3)) {
3881 		writel((seq->rwcfg->idle << 24) |
3882 		       (seq->rwcfg->activate_1 << 16) |
3883 		       (seq->rwcfg->sgle_read << 8) |
3884 		       (seq->rwcfg->precharge_all << 0),
3885 		       &sdr_reg_file->trk_rw_mgr_addr);
3886 	}
3887 
3888 	writel(seq->rwcfg->mem_if_read_dqs_width,
3889 	       &sdr_reg_file->trk_read_dqs_width);
3890 
3891 	/* trefi [7:0] */
3892 	if (dram_is_ddr(2)) {
3893 		writel(1000 << 0, &sdr_reg_file->trk_rfsh);
3894 	} else if (dram_is_ddr(3)) {
3895 		writel((seq->rwcfg->refresh_all << 24) | (1000 << 0),
3896 		       &sdr_reg_file->trk_rfsh);
3897 	}
3898 }
3899 
sdram_calibration_full(struct socfpga_sdr * sdr)3900 int sdram_calibration_full(struct socfpga_sdr *sdr)
3901 {
3902 	u32 pass;
3903 	struct socfpga_sdrseq seq;
3904 
3905 	/*
3906 	 * For size reasons, this file uses hard coded addresses.
3907 	 * Check if we are called with the correct address.
3908 	 */
3909 	if (sdr != (struct socfpga_sdr *)SOCFPGA_SDR_ADDRESS)
3910 		return -ENODEV;
3911 
3912 	memset(&seq, 0, sizeof(seq));
3913 
3914 	seq.rwcfg = socfpga_get_sdram_rwmgr_config();
3915 	seq.iocfg = socfpga_get_sdram_io_config();
3916 	seq.misccfg = socfpga_get_sdram_misc_config();
3917 
3918 	/* Set the calibration enabled by default */
3919 	seq.gbl.phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3920 	/*
3921 	 * Only sweep all groups (regardless of fail state) by default
3922 	 * Set enabled read test by default.
3923 	 */
3924 #if DISABLE_GUARANTEED_READ
3925 	seq.gbl.phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3926 #endif
3927 	/* Initialize the register file */
3928 	initialize_reg_file(&seq);
3929 
3930 	/* Initialize any PHY CSR */
3931 	initialize_hps_phy();
3932 
3933 	scc_mgr_initialize();
3934 
3935 	initialize_tracking(&seq);
3936 
3937 	debug("%s: Preparing to start memory calibration\n", __FILE__);
3938 
3939 	debug("%s:%d\n", __func__, __LINE__);
3940 	debug_cond(DLEVEL >= 1,
3941 		   "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3942 		   seq.rwcfg->mem_number_of_ranks,
3943 		   seq.rwcfg->mem_number_of_cs_per_dimm,
3944 		   seq.rwcfg->mem_dq_per_read_dqs,
3945 		   seq.rwcfg->mem_dq_per_write_dqs,
3946 		   seq.rwcfg->mem_virtual_groups_per_read_dqs,
3947 		   seq.rwcfg->mem_virtual_groups_per_write_dqs);
3948 	debug_cond(DLEVEL >= 1,
3949 		   "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3950 		   seq.rwcfg->mem_if_read_dqs_width,
3951 		   seq.rwcfg->mem_if_write_dqs_width,
3952 		   seq.rwcfg->mem_data_width, seq.rwcfg->mem_data_mask_width,
3953 		   seq.iocfg->delay_per_opa_tap,
3954 		   seq.iocfg->delay_per_dchain_tap);
3955 	debug_cond(DLEVEL >= 1, "dtap_dqsen_delay=%u, dll=%u",
3956 		   seq.iocfg->delay_per_dqs_en_dchain_tap,
3957 		   seq.iocfg->dll_chain_length);
3958 	debug_cond(DLEVEL >= 1,
3959 		   "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3960 		   seq.iocfg->dqs_en_phase_max, seq.iocfg->dqdqs_out_phase_max,
3961 		   seq.iocfg->dqs_en_delay_max, seq.iocfg->dqs_in_delay_max);
3962 	debug_cond(DLEVEL >= 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3963 		   seq.iocfg->io_in_delay_max, seq.iocfg->io_out1_delay_max,
3964 		   seq.iocfg->io_out2_delay_max);
3965 	debug_cond(DLEVEL >= 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3966 		   seq.iocfg->dqs_in_reserve, seq.iocfg->dqs_out_reserve);
3967 
3968 	hc_initialize_rom_data();
3969 
3970 	/* update info for sims */
3971 	reg_file_set_stage(CAL_STAGE_NIL);
3972 	reg_file_set_group(0);
3973 
3974 	/*
3975 	 * Load global needed for those actions that require
3976 	 * some dynamic calibration support.
3977 	 */
3978 	seq.dyn_calib_steps = STATIC_CALIB_STEPS;
3979 	/*
3980 	 * Load global to allow dynamic selection of delay loop settings
3981 	 * based on calibration mode.
3982 	 */
3983 	if (!(seq.dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3984 		seq.skip_delay_mask = 0xff;
3985 	else
3986 		seq.skip_delay_mask = 0x0;
3987 
3988 	pass = run_mem_calibrate(&seq);
3989 	debug_mem_calibrate(&seq, pass);
3990 	return pass;
3991 }
3992