1 /************************************************************************
2  *
3  * Copyright (c) 2013-2015 Intel Corporation.
4  *
5 * SPDX-License-Identifier: BSD-2-Clause-Patent
6  *
7  ***************************************************************************/
8 
9 #include "mrc.h"
10 #include "memory_options.h"
11 
12 #include "meminit_utils.h"
13 #include "hte.h"
14 #include "io.h"
15 
16 void select_hte(
17     MRCParams_t *mrc_params);
18 
19 static uint8_t first_run = 0;
20 
21 const uint8_t vref_codes[64] =
22 { // lowest to highest
23     0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15
24     0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31
25     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47
26     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F  // 48 - 63
27 };
28 
29 #ifdef EMU
30 // Track current post code for debugging purpose
31 uint32_t PostCode;
32 #endif
33 
34 // set_rcvn:
35 //
36 // This function will program the RCVEN delays.
37 // (currently doesn't comprehend rank)
set_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)38 void set_rcvn(
39     uint8_t channel,
40     uint8_t rank,
41     uint8_t byte_lane,
42     uint32_t pi_count)
43 {
44   uint32_t reg;
45   uint32_t msk;
46   uint32_t tempD;
47 
48   ENTERFN();
49   DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
50 
51   // RDPTR (1/2 MCLK, 64 PIs)
52   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
53   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
54   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
55   msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);
56   tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);
57   isbM32m(DDRPHY, reg, tempD, msk);
58 
59   // Adjust PI_COUNT
60   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
61 
62   // PI (1/64 MCLK, 1 PIs)
63   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
64   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
65   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
66   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
67   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
68   tempD = pi_count << 24;
69   isbM32m(DDRPHY, reg, tempD, msk);
70 
71   // DEADBAND
72   // BL0/1 -> B01DBCTL1[08/11] (+1 select)
73   // BL0/1 -> B01DBCTL1[02/05] (enable)
74   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
75   msk = 0x00;
76   tempD = 0x00;
77   // enable
78   msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);
79   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
80   {
81     tempD |= msk;
82   }
83   // select
84   msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);
85   if (pi_count < EARLY_DB)
86   {
87     tempD |= msk;
88   }
89   isbM32m(DDRPHY, reg, tempD, msk);
90 
91   // error check
92   if (pi_count > 0x3F)
93   {
94     training_message(channel, rank, byte_lane);
95     post_code(0xEE, 0xE0);
96   }
97 
98   LEAVEFN();
99   return;
100 }
101 
102 // get_rcvn:
103 //
104 // This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.
105 // (currently doesn't comprehend rank)
get_rcvn(uint8_t channel,uint8_t rank,uint8_t byte_lane)106 uint32_t get_rcvn(
107     uint8_t channel,
108     uint8_t rank,
109     uint8_t byte_lane)
110 {
111   uint32_t reg;
112   uint32_t tempD;
113   uint32_t pi_count;
114 
115   ENTERFN();
116 
117   // RDPTR (1/2 MCLK, 64 PIs)
118   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
119   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
120   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
121   tempD = isbR32m(DDRPHY, reg);
122   tempD >>= (byte_lane & BIT0) ? (20) : (8);
123   tempD &= 0xF;
124 
125   // Adjust PI_COUNT
126   pi_count = tempD * HALF_CLK;
127 
128   // PI (1/64 MCLK, 1 PIs)
129   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
130   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
131   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
132   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
133   tempD = isbR32m(DDRPHY, reg);
134   tempD >>= 24;
135   tempD &= 0x3F;
136 
137   // Adjust PI_COUNT
138   pi_count += tempD;
139 
140   LEAVEFN();
141   return pi_count;
142 }
143 
144 // set_rdqs:
145 //
146 // This function will program the RDQS delays based on an absolute amount of PIs.
147 // (currently doesn't comprehend rank)
set_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)148 void set_rdqs(
149     uint8_t channel,
150     uint8_t rank,
151     uint8_t byte_lane,
152     uint32_t pi_count)
153 {
154   uint32_t reg;
155   uint32_t msk;
156   uint32_t tempD;
157 
158   ENTERFN();
159   DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
160 
161   // PI (1/128 MCLK)
162   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
163   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
164   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
165   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
166   msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
167   tempD = pi_count << 0;
168   isbM32m(DDRPHY, reg, tempD, msk);
169 
170   // error check (shouldn't go above 0x3F)
171   if (pi_count > 0x47)
172   {
173     training_message(channel, rank, byte_lane);
174     post_code(0xEE, 0xE1);
175   }
176 
177   LEAVEFN();
178   return;
179 }
180 
181 // get_rdqs:
182 //
183 // This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.
184 // (currently doesn't comprehend rank)
get_rdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)185 uint32_t get_rdqs(
186     uint8_t channel,
187     uint8_t rank,
188     uint8_t byte_lane)
189 {
190   uint32_t reg;
191   uint32_t tempD;
192   uint32_t pi_count;
193 
194   ENTERFN();
195 
196   // PI (1/128 MCLK)
197   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
198   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
199   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
200   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
201   tempD = isbR32m(DDRPHY, reg);
202 
203   // Adjust PI_COUNT
204   pi_count = tempD & 0x7F;
205 
206   LEAVEFN();
207   return pi_count;
208 }
209 
210 // set_wdqs:
211 //
212 // This function will program the WDQS delays based on an absolute amount of PIs.
213 // (currently doesn't comprehend rank)
set_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)214 void set_wdqs(
215     uint8_t channel,
216     uint8_t rank,
217     uint8_t byte_lane,
218     uint32_t pi_count)
219 {
220   uint32_t reg;
221   uint32_t msk;
222   uint32_t tempD;
223 
224   ENTERFN();
225   DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
226 
227   // RDPTR (1/2 MCLK, 64 PIs)
228   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
229   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
230   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
231   msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);
232   tempD = pi_count / HALF_CLK;
233   tempD <<= (byte_lane & BIT0) ? (16) : (4);
234   isbM32m(DDRPHY, reg, tempD, msk);
235 
236   // Adjust PI_COUNT
237   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
238 
239   // PI (1/64 MCLK, 1 PIs)
240   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
241   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
242   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
243   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
244   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);
245   tempD = pi_count << 16;
246   isbM32m(DDRPHY, reg, tempD, msk);
247 
248   // DEADBAND
249   // BL0/1 -> B01DBCTL1[07/10] (+1 select)
250   // BL0/1 -> B01DBCTL1[01/04] (enable)
251   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
252   msk = 0x00;
253   tempD = 0x00;
254   // enable
255   msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);
256   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
257   {
258     tempD |= msk;
259   }
260   // select
261   msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);
262   if (pi_count < EARLY_DB)
263   {
264     tempD |= msk;
265   }
266   isbM32m(DDRPHY, reg, tempD, msk);
267 
268   // error check
269   if (pi_count > 0x3F)
270   {
271     training_message(channel, rank, byte_lane);
272     post_code(0xEE, 0xE2);
273   }
274 
275   LEAVEFN();
276   return;
277 }
278 
279 // get_wdqs:
280 //
281 // This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.
282 // (currently doesn't comprehend rank)
get_wdqs(uint8_t channel,uint8_t rank,uint8_t byte_lane)283 uint32_t get_wdqs(
284     uint8_t channel,
285     uint8_t rank,
286     uint8_t byte_lane)
287 {
288   uint32_t reg;
289   uint32_t tempD;
290   uint32_t pi_count;
291 
292   ENTERFN();
293 
294   // RDPTR (1/2 MCLK, 64 PIs)
295   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
296   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
297   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
298   tempD = isbR32m(DDRPHY, reg);
299   tempD >>= (byte_lane & BIT0) ? (16) : (4);
300   tempD &= 0xF;
301 
302   // Adjust PI_COUNT
303   pi_count = (tempD * HALF_CLK);
304 
305   // PI (1/64 MCLK, 1 PIs)
306   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
307   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
308   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
309   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
310   tempD = isbR32m(DDRPHY, reg);
311   tempD >>= 16;
312   tempD &= 0x3F;
313 
314   // Adjust PI_COUNT
315   pi_count += tempD;
316 
317   LEAVEFN();
318   return pi_count;
319 }
320 
321 // set_wdq:
322 //
323 // This function will program the WDQ delays based on an absolute number of PIs.
324 // (currently doesn't comprehend rank)
set_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane,uint32_t pi_count)325 void set_wdq(
326     uint8_t channel,
327     uint8_t rank,
328     uint8_t byte_lane,
329     uint32_t pi_count)
330 {
331   uint32_t reg;
332   uint32_t msk;
333   uint32_t tempD;
334 
335   ENTERFN();
336   DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
337 
338   // RDPTR (1/2 MCLK, 64 PIs)
339   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
340   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
341   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
342   msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);
343   tempD = pi_count / HALF_CLK;
344   tempD <<= (byte_lane & BIT0) ? (12) : (0);
345   isbM32m(DDRPHY, reg, tempD, msk);
346 
347   // Adjust PI_COUNT
348   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
349 
350   // PI (1/64 MCLK, 1 PIs)
351   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
352   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
353   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
354   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
355   msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
356   tempD = pi_count << 8;
357   isbM32m(DDRPHY, reg, tempD, msk);
358 
359   // DEADBAND
360   // BL0/1 -> B01DBCTL1[06/09] (+1 select)
361   // BL0/1 -> B01DBCTL1[00/03] (enable)
362   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
363   msk = 0x00;
364   tempD = 0x00;
365   // enable
366   msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);
367   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
368   {
369     tempD |= msk;
370   }
371   // select
372   msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);
373   if (pi_count < EARLY_DB)
374   {
375     tempD |= msk;
376   }
377   isbM32m(DDRPHY, reg, tempD, msk);
378 
379   // error check
380   if (pi_count > 0x3F)
381   {
382     training_message(channel, rank, byte_lane);
383     post_code(0xEE, 0xE3);
384   }
385 
386   LEAVEFN();
387   return;
388 }
389 
390 // get_wdq:
391 //
392 // This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.
393 // (currently doesn't comprehend rank)
get_wdq(uint8_t channel,uint8_t rank,uint8_t byte_lane)394 uint32_t get_wdq(
395     uint8_t channel,
396     uint8_t rank,
397     uint8_t byte_lane)
398 {
399   uint32_t reg;
400   uint32_t tempD;
401   uint32_t pi_count;
402 
403   ENTERFN();
404 
405   // RDPTR (1/2 MCLK, 64 PIs)
406   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
407   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
408   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
409   tempD = isbR32m(DDRPHY, reg);
410   tempD >>= (byte_lane & BIT0) ? (12) : (0);
411   tempD &= 0xF;
412 
413   // Adjust PI_COUNT
414   pi_count = (tempD * HALF_CLK);
415 
416   // PI (1/64 MCLK, 1 PIs)
417   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
418   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
419   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
420   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
421   tempD = isbR32m(DDRPHY, reg);
422   tempD >>= 8;
423   tempD &= 0x3F;
424 
425   // Adjust PI_COUNT
426   pi_count += tempD;
427 
428   LEAVEFN();
429   return pi_count;
430 }
431 
432 // set_wcmd:
433 //
434 // This function will program the WCMD delays based on an absolute number of PIs.
set_wcmd(uint8_t channel,uint32_t pi_count)435 void set_wcmd(
436     uint8_t channel,
437     uint32_t pi_count)
438 {
439   uint32_t reg;
440   uint32_t msk;
441   uint32_t tempD;
442 
443   ENTERFN();
444   // RDPTR (1/2 MCLK, 64 PIs)
445   // CMDPTRREG[11:08] (0x0-0xF)
446   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
447   msk = (BIT11 | BIT10 | BIT9 | BIT8);
448   tempD = pi_count / HALF_CLK;
449   tempD <<= 8;
450   isbM32m(DDRPHY, reg, tempD, msk);
451 
452   // Adjust PI_COUNT
453   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
454 
455   // PI (1/64 MCLK, 1 PIs)
456   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
457   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
458   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
459   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
460   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
461   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
462   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
463   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
464   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
465 
466   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)
467       | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
468 
469   tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);
470 
471   isbM32m(DDRPHY, reg, tempD, msk);
472   reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO
473   isbM32m(DDRPHY, reg, tempD, msk);
474 
475   // DEADBAND
476   // CMDCFGREG0[17] (+1 select)
477   // CMDCFGREG0[16] (enable)
478   reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);
479   msk = 0x00;
480   tempD = 0x00;
481   // enable
482   msk |= BIT16;
483   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
484   {
485     tempD |= msk;
486   }
487   // select
488   msk |= BIT17;
489   if (pi_count < EARLY_DB)
490   {
491     tempD |= msk;
492   }
493   isbM32m(DDRPHY, reg, tempD, msk);
494 
495   // error check
496   if (pi_count > 0x3F)
497   {
498     post_code(0xEE, 0xE4);
499   }
500 
501   LEAVEFN();
502   return;
503 }
504 
505 // get_wcmd:
506 //
507 // This function will return the amount of WCMD delay on the given channel as an absolute PI count.
get_wcmd(uint8_t channel)508 uint32_t get_wcmd(
509     uint8_t channel)
510 {
511   uint32_t reg;
512   uint32_t tempD;
513   uint32_t pi_count;
514 
515   ENTERFN();
516   // RDPTR (1/2 MCLK, 64 PIs)
517   // CMDPTRREG[11:08] (0x0-0xF)
518   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
519   tempD = isbR32m(DDRPHY, reg);
520   tempD >>= 8;
521   tempD &= 0xF;
522 
523   // Adjust PI_COUNT
524   pi_count = tempD * HALF_CLK;
525 
526   // PI (1/64 MCLK, 1 PIs)
527   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
528   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
529   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
530   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
531   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
532   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
533   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
534   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
535   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
536   tempD = isbR32m(DDRPHY, reg);
537   tempD >>= 16;
538   tempD &= 0x3F;
539 
540   // Adjust PI_COUNT
541   pi_count += tempD;
542 
543   LEAVEFN();
544   return pi_count;
545 }
546 
547 // set_wclk:
548 //
549 // This function will program the WCLK delays based on an absolute number of PIs.
set_wclk(uint8_t channel,uint8_t rank,uint32_t pi_count)550 void set_wclk(
551     uint8_t channel,
552     uint8_t rank,
553     uint32_t pi_count)
554 {
555   uint32_t reg;
556   uint32_t msk;
557   uint32_t tempD;
558 
559   ENTERFN();
560   // RDPTR (1/2 MCLK, 64 PIs)
561   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
562   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
563   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
564   msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);
565   tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
566   isbM32m(DDRPHY, reg, tempD, msk);
567 
568   // Adjust PI_COUNT
569   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
570 
571   // PI (1/64 MCLK, 1 PIs)
572   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
573   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
574   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
575   reg += (channel * DDRIOCCC_CH_OFFSET);
576   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
577   tempD = (pi_count << 16) | (pi_count << 8);
578   isbM32m(DDRPHY, reg, tempD, msk);
579   reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);
580   reg += (channel * DDRIOCCC_CH_OFFSET);
581   isbM32m(DDRPHY, reg, tempD, msk);
582   reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);
583   reg += (channel * DDRIOCCC_CH_OFFSET);
584   isbM32m(DDRPHY, reg, tempD, msk);
585   reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);
586   reg += (channel * DDRIOCCC_CH_OFFSET);
587   isbM32m(DDRPHY, reg, tempD, msk);
588 
589   // DEADBAND
590   // CCCFGREG1[11:08] (+1 select)
591   // CCCFGREG1[03:00] (enable)
592   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
593   msk = 0x00;
594   tempD = 0x00;
595   // enable
596   msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters
597   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
598   {
599     tempD |= msk;
600   }
601   // select
602   msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters
603   if (pi_count < EARLY_DB)
604   {
605     tempD |= msk;
606   }
607   isbM32m(DDRPHY, reg, tempD, msk);
608 
609   // error check
610   if (pi_count > 0x3F)
611   {
612     post_code(0xEE, 0xE5);
613   }
614 
615   LEAVEFN();
616   return;
617 }
618 
619 // get_wclk:
620 //
621 // This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.
get_wclk(uint8_t channel,uint8_t rank)622 uint32_t get_wclk(
623     uint8_t channel,
624     uint8_t rank)
625 {
626   uint32_t reg;
627   uint32_t tempD;
628   uint32_t pi_count;
629 
630   ENTERFN();
631   // RDPTR (1/2 MCLK, 64 PIs)
632   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
633   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
634   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
635   tempD = isbR32m(DDRPHY, reg);
636   tempD >>= (rank) ? (12) : (8);
637   tempD &= 0xF;
638 
639   // Adjust PI_COUNT
640   pi_count = tempD * HALF_CLK;
641 
642   // PI (1/64 MCLK, 1 PIs)
643   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
644   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
645   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
646   reg += (channel * DDRIOCCC_CH_OFFSET);
647   tempD = isbR32m(DDRPHY, reg);
648   tempD >>= (rank) ? (16) : (8);
649   tempD &= 0x3F;
650 
651   pi_count += tempD;
652 
653   LEAVEFN();
654   return pi_count;
655 }
656 
657 // set_wctl:
658 //
659 // This function will program the WCTL delays based on an absolute number of PIs.
660 // (currently doesn't comprehend rank)
set_wctl(uint8_t channel,uint8_t rank,uint32_t pi_count)661 void set_wctl(
662     uint8_t channel,
663     uint8_t rank,
664     uint32_t pi_count)
665 {
666   uint32_t reg;
667   uint32_t msk;
668   uint32_t tempD;
669 
670   ENTERFN();
671 
672   // RDPTR (1/2 MCLK, 64 PIs)
673   // CCPTRREG[31:28] (0x0-0xF)
674   // CCPTRREG[27:24] (0x0-0xF)
675   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
676   msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);
677   tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
678   isbM32m(DDRPHY, reg, tempD, msk);
679 
680   // Adjust PI_COUNT
681   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
682 
683   // PI (1/64 MCLK, 1 PIs)
684   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
685   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
686   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
687   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
688   tempD = (pi_count << 24);
689   isbM32m(DDRPHY, reg, tempD, msk);
690   reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
691   isbM32m(DDRPHY, reg, tempD, msk);
692   reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);
693   isbM32m(DDRPHY, reg, tempD, msk);
694   reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);
695   isbM32m(DDRPHY, reg, tempD, msk);
696 
697   // DEADBAND
698   // CCCFGREG1[13:12] (+1 select)
699   // CCCFGREG1[05:04] (enable)
700   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
701   msk = 0x00;
702   tempD = 0x00;
703   // enable
704   msk |= (BIT5 | BIT4); // only ??? matters
705   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
706   {
707     tempD |= msk;
708   }
709   // select
710   msk |= (BIT13 | BIT12); // only ??? matters
711   if (pi_count < EARLY_DB)
712   {
713     tempD |= msk;
714   }
715   isbM32m(DDRPHY, reg, tempD, msk);
716 
717   // error check
718   if (pi_count > 0x3F)
719   {
720     post_code(0xEE, 0xE6);
721   }
722 
723   LEAVEFN();
724   return;
725 }
726 
727 // get_wctl:
728 //
729 // This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.
730 // (currently doesn't comprehend rank)
get_wctl(uint8_t channel,uint8_t rank)731 uint32_t get_wctl(
732     uint8_t channel,
733     uint8_t rank)
734 {
735   uint32_t reg;
736   uint32_t tempD;
737   uint32_t pi_count;
738 
739   ENTERFN();
740 
741   // RDPTR (1/2 MCLK, 64 PIs)
742   // CCPTRREG[31:28] (0x0-0xF)
743   // CCPTRREG[27:24] (0x0-0xF)
744   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
745   tempD = isbR32m(DDRPHY, reg);
746   tempD >>= 24;
747   tempD &= 0xF;
748 
749   // Adjust PI_COUNT
750   pi_count = tempD * HALF_CLK;
751 
752   // PI (1/64 MCLK, 1 PIs)
753   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
754   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
755   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
756   tempD = isbR32m(DDRPHY, reg);
757   tempD >>= 24;
758   tempD &= 0x3F;
759 
760   // Adjust PI_COUNT
761   pi_count += tempD;
762 
763   LEAVEFN();
764   return pi_count;
765 }
766 
767 // set_vref:
768 //
769 // This function will program the internal Vref setting in a given byte lane in a given channel.
set_vref(uint8_t channel,uint8_t byte_lane,uint32_t setting)770 void set_vref(
771     uint8_t channel,
772     uint8_t byte_lane,
773     uint32_t setting)
774 {
775   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
776 
777   ENTERFN();
778   DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);
779 
780   isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),
781       (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));
782   //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));
783   // need to wait ~300ns for Vref to settle (check that this is necessary)
784   delay_n(300);
785   // ??? may need to clear pointers ???
786   LEAVEFN();
787   return;
788 }
789 
790 // get_vref:
791 //
792 // This function will return the internal Vref setting for the given channel, byte_lane;
get_vref(uint8_t channel,uint8_t byte_lane)793 uint32_t get_vref(
794     uint8_t channel,
795     uint8_t byte_lane)
796 {
797   uint8_t j;
798   uint32_t ret_val = sizeof(vref_codes) / 2;
799   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
800 
801   uint32_t tempD;
802 
803   ENTERFN();
804   tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));
805   tempD >>= 2;
806   tempD &= 0x3F;
807   for (j = 0; j < sizeof(vref_codes); j++)
808   {
809     if (vref_codes[j] == tempD)
810     {
811       ret_val = j;
812       break;
813     }
814   }
815   LEAVEFN();
816   return ret_val;
817 }
818 
819 // clear_pointers:
820 //
821 // This function will be used to clear the pointers in a given byte lane in a given channel.
clear_pointers(void)822 void clear_pointers(
823     void)
824 {
825   uint8_t channel_i;
826   uint8_t bl_i;
827 
828   ENTERFN();
829   for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
830   {
831     for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)
832     {
833       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),
834           (BIT8));
835       //delay_m(1); // DEBUG
836       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),
837           (BIT8));
838     }
839   }
840   LEAVEFN();
841   return;
842 }
843 
844 // void enable_cache:
enable_cache(void)845 void enable_cache(
846     void)
847 {
848   // Cache control not used in Quark MRC
849   return;
850 }
851 
852 // void disable_cache:
disable_cache(void)853 void disable_cache(
854     void)
855 {
856   // Cache control not used in Quark MRC
857   return;
858 }
859 
860 // Send DRAM command, data should be formated
861 // using DCMD_Xxxx macro or emrsXCommand structure.
dram_init_command(uint32_t data)862 static void dram_init_command(
863     uint32_t data)
864 {
865   Wr32(DCMD, 0, data);
866 }
867 
868 // find_rising_edge:
869 //
870 // This function will find the rising edge transition on RCVN or WDQS.
find_rising_edge(MRCParams_t * mrc_params,uint32_t delay[],uint8_t channel,uint8_t rank,bool rcvn)871 void find_rising_edge(
872     MRCParams_t *mrc_params,
873     uint32_t delay[],
874     uint8_t channel,
875     uint8_t rank,
876     bool rcvn)
877 {
878 
879 #define SAMPLE_CNT 3   // number of sample points
880 #define SAMPLE_DLY 26  // number of PIs to increment per sample
881 #define FORWARD true   // indicates to increase delays when looking for edge
882 #define BACKWARD false // indicates to decrease delays when looking for edge
883 
884   bool all_edges_found; // determines stop condition
885   bool direction[NUM_BYTE_LANES]; // direction indicator
886   uint8_t sample_i; // sample counter
887   uint8_t bl_i; // byte lane counter
888   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
889   uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"
890   uint32_t tempD; // temporary DWORD
891   uint32_t transition_pattern;
892 
893   ENTERFN();
894 
895   // select hte and request initial configuration
896   select_hte(mrc_params);
897   first_run = 1;
898 
899   // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.
900   for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
901   {
902     // program the desired delays for sample
903     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
904     {
905       // increase sample delay by 26 PI (0.2 CLK)
906       if (rcvn)
907       {
908         set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
909       }
910       else
911       {
912         set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
913       }
914     } // bl_i loop
915     // take samples (Tsample_i)
916     sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);
917 
918     DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
919         (rcvn ? "RCVN" : "WDQS"), channel, rank,
920         sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);
921 
922   } // sample_i loop
923 
924   // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.
925   for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
926   {
927     // build "transition_pattern" (MSB is 1st sample)
928     transition_pattern = 0x00;
929     for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
930     {
931       transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);
932     } // sample_i loop
933 
934     DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
935 
936     // set up to look for rising edge based on "transition_pattern"
937     switch (transition_pattern)
938     {
939     case 0x00: // sampled 0->0->0
940       // move forward from T3 looking for 0->1
941       delay[bl_i] += 2 * SAMPLE_DLY;
942       direction[bl_i] = FORWARD;
943       break;
944     case 0x01: // sampled 0->0->1
945     case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*
946       // move forward from T2 looking for 0->1
947       delay[bl_i] += 1 * SAMPLE_DLY;
948       direction[bl_i] = FORWARD;
949       break;
950 // HSD#237503
951 //      case 0x02: // sampled 0->1->0 (bad duty cycle)
952 //        training_message(channel, rank, bl_i);
953 //        post_code(0xEE, 0xE8);
954 //        break;
955     case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*
956     case 0x03: // sampled 0->1->1
957       // move forward from T1 looking for 0->1
958       delay[bl_i] += 0 * SAMPLE_DLY;
959       direction[bl_i] = FORWARD;
960       break;
961     case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)
962       // move forward from T3 looking for 0->1
963       delay[bl_i] += 2 * SAMPLE_DLY;
964       direction[bl_i] = FORWARD;
965       break;
966 // HSD#237503
967 //      case 0x05: // sampled 1->0->1 (bad duty cycle)
968 //        training_message(channel, rank, bl_i);
969 //        post_code(0xEE, 0xE9);
970 //        break;
971     case 0x06: // sampled 1->1->0
972     case 0x07: // sampled 1->1->1
973       // move backward from T1 looking for 1->0
974       delay[bl_i] += 0 * SAMPLE_DLY;
975       direction[bl_i] = BACKWARD;
976       break;
977     default:
978       post_code(0xEE, 0xEE);
979       break;
980     } // transition_pattern switch
981     // program delays
982     if (rcvn)
983     {
984       set_rcvn(channel, rank, bl_i, delay[bl_i]);
985     }
986     else
987     {
988       set_wdqs(channel, rank, bl_i, delay[bl_i]);
989     }
990   } // bl_i loop
991 
992   // Based on the observed transition pattern on the byte lane,
993   // begin looking for a rising edge with single PI granularity.
994   do
995   {
996     all_edges_found = true; // assume all byte lanes passed
997     tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample
998     // check all each byte lane for proper edge
999     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1000     {
1001       if (tempD & (1 << bl_i))
1002       {
1003         // sampled "1"
1004         if (direction[bl_i] == BACKWARD)
1005         {
1006           // keep looking for edge on this byte lane
1007           all_edges_found = false;
1008           delay[bl_i] -= 1;
1009           if (rcvn)
1010           {
1011             set_rcvn(channel, rank, bl_i, delay[bl_i]);
1012           }
1013           else
1014           {
1015             set_wdqs(channel, rank, bl_i, delay[bl_i]);
1016           }
1017         }
1018       }
1019       else
1020       {
1021         // sampled "0"
1022         if (direction[bl_i] == FORWARD)
1023         {
1024           // keep looking for edge on this byte lane
1025           all_edges_found = false;
1026           delay[bl_i] += 1;
1027           if (rcvn)
1028           {
1029             set_rcvn(channel, rank, bl_i, delay[bl_i]);
1030           }
1031           else
1032           {
1033             set_wdqs(channel, rank, bl_i, delay[bl_i]);
1034           }
1035         }
1036       }
1037     } // bl_i loop
1038   } while (!all_edges_found);
1039 
1040   // restore DDR idle state
1041   dram_init_command(DCMD_PREA(rank));
1042 
1043   DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1044       delay[0], delay[1], delay[2], delay[3]);
1045 
1046   LEAVEFN();
1047   return;
1048 }
1049 
1050 // sample_dqs:
1051 //
1052 // This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1053 // It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.
sample_dqs(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank,bool rcvn)1054 uint32_t sample_dqs(
1055     MRCParams_t *mrc_params,
1056     uint8_t channel,
1057     uint8_t rank,
1058     bool rcvn)
1059 {
1060   uint8_t j; // just a counter
1061   uint8_t bl_i; // which BL in the module (always 2 per module)
1062   uint8_t bl_grp; // which BL module
1063   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
1064   uint32_t msk[2]; // BLx in module
1065   uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample
1066   uint32_t num_0s; // tracks the number of '0' samples
1067   uint32_t num_1s; // tracks the number of '1' samples
1068   uint32_t ret_val = 0x00; // assume all '0' samples
1069   uint32_t address = get_addr(mrc_params, channel, rank);
1070 
1071   // initialise "msk[]"
1072   msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0
1073   msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1
1074 
1075 
1076   // cycle through each byte lane group
1077   for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)
1078   {
1079     // take SAMPLE_SIZE samples
1080     for (j = 0; j < SAMPLE_SIZE; j++)
1081     {
1082       HteMemOp(address, first_run, rcvn?0:1);
1083       first_run = 0;
1084 
1085       // record the contents of the proper DQTRAINSTS register
1086       sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));
1087     }
1088     // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane
1089     // and set that value in the corresponding "ret_val" bit
1090     for (bl_i = 0; bl_i < 2; bl_i++)
1091     {
1092       num_0s = 0x00; // reset '0' tracker for byte lane
1093       num_1s = 0x00; // reset '1' tracker for byte lane
1094       for (j = 0; j < SAMPLE_SIZE; j++)
1095       {
1096         if (sampled_val[j] & msk[bl_i])
1097         {
1098           num_1s++;
1099         }
1100         else
1101         {
1102           num_0s++;
1103         }
1104       }
1105       if (num_1s > num_0s)
1106       {
1107         ret_val |= (1 << (bl_i + (bl_grp * 2)));
1108       }
1109     }
1110   }
1111 
1112   // "ret_val.0" contains the status of BL0
1113   // "ret_val.1" contains the status of BL1
1114   // "ret_val.2" contains the status of BL2
1115   // etc.
1116   return ret_val;
1117 }
1118 
1119 // get_addr:
1120 //
1121 // This function will return a 32 bit address in the desired channel and rank.
get_addr(MRCParams_t * mrc_params,uint8_t channel,uint8_t rank)1122 uint32_t get_addr(
1123     MRCParams_t *mrc_params,
1124     uint8_t channel,
1125     uint8_t rank)
1126 {
1127   uint32_t offset = 0x02000000; // 32MB
1128 
1129   // Begin product specific code
1130   if (channel > 0)
1131   {
1132     DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1133     DEAD_LOOP();
1134   }
1135 
1136   if (rank > 1)
1137   {
1138     DPF(D_ERROR, "ILLEGAL RANK\n");
1139     DEAD_LOOP();
1140   }
1141 
1142   // use 256MB lowest density as per DRP == 0x0003
1143   offset += rank * (256 * 1024 * 1024);
1144 
1145   return offset;
1146 }
1147 
1148 // byte_lane_mask:
1149 //
1150 // This function will return a 32 bit mask that will be used to check for byte lane failures.
byte_lane_mask(MRCParams_t * mrc_params)1151 uint32_t byte_lane_mask(
1152     MRCParams_t *mrc_params)
1153 {
1154   uint32_t j;
1155   uint32_t ret_val = 0x00;
1156 
1157   // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"
1158   // (each bit in "result" represents a byte lane)
1159   for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1160   {
1161     ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1162   }
1163 
1164   // HSD#235037
1165   // need to adjust the mask for 16-bit mode
1166   if (mrc_params->channel_width == x16)
1167   {
1168     ret_val |= (ret_val << 2);
1169   }
1170 
1171   return ret_val;
1172 }
1173 
1174 
1175 // read_tsc:
1176 //
1177 // This function will do some assembly to return TSC register contents as a uint64_t.
read_tsc(void)1178 uint64_t read_tsc(
1179     void)
1180 {
1181   volatile uint64_t tsc;  // EDX:EAX
1182 
1183 #if defined (SIM) || defined (GCC)
1184   volatile uint32_t tscH; // EDX
1185   volatile uint32_t tscL;// EAX
1186 
1187   asm("rdtsc":"=a"(tscL),"=d"(tscH));
1188   tsc = tscH;
1189   tsc = (tsc<<32)|tscL;
1190 #else
1191   tsc = __rdtsc();
1192 #endif
1193 
1194   return tsc;
1195 }
1196 
1197 // get_tsc_freq:
1198 //
1199 // This function returns the TSC frequency in MHz
get_tsc_freq(void)1200 uint32_t get_tsc_freq(
1201     void)
1202 {
1203   static uint32_t freq[] =
1204   { 533, 400, 200, 100 };
1205   uint32_t fuse;
1206 #if 0
1207   fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);
1208 #else
1209   // todo!!! Fixed 533MHz for emulation or debugging
1210   fuse = 0;
1211 #endif
1212   return freq[fuse];
1213 }
1214 
1215 #ifndef SIM
1216 // delay_n:
1217 //
1218 // This is a simple delay function.
1219 // It takes "nanoseconds" as a parameter.
delay_n(uint32_t nanoseconds)1220 void delay_n(
1221     uint32_t nanoseconds)
1222 {
1223   // 1000 MHz clock has 1ns period --> no conversion required
1224   uint64_t final_tsc = read_tsc();
1225   final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);
1226 
1227   while (read_tsc() < final_tsc)
1228     ;
1229   return;
1230 }
1231 #endif
1232 
1233 // delay_u:
1234 //
1235 // This is a simple delay function.
1236 // It takes "microseconds as a parameter.
delay_u(uint32_t microseconds)1237 void delay_u(
1238     uint32_t microseconds)
1239 {
1240   // 64 bit math is not an option, just use loops
1241   while (microseconds--)
1242   {
1243     delay_n(1000);
1244   }
1245   return;
1246 }
1247 
1248 // delay_m:
1249 //
1250 // This is a simple delay function.
1251 // It takes "milliseconds" as a parameter.
delay_m(uint32_t milliseconds)1252 void delay_m(
1253     uint32_t milliseconds)
1254 {
1255   // 64 bit math is not an option, just use loops
1256   while (milliseconds--)
1257   {
1258     delay_u(1000);
1259   }
1260   return;
1261 }
1262 
1263 // delay_s:
1264 //
1265 // This is a simple delay function.
1266 // It takes "seconds" as a parameter.
delay_s(uint32_t seconds)1267 void delay_s(
1268     uint32_t seconds)
1269 {
1270   // 64 bit math is not an option, just use loops
1271   while (seconds--)
1272   {
1273     delay_m(1000);
1274   }
1275   return;
1276 }
1277 
1278 // post_code:
1279 //
1280 // This function will output the POST CODE to the four 7-Segment LED displays.
post_code(uint8_t major,uint8_t minor)1281 void post_code(
1282     uint8_t major,
1283     uint8_t minor)
1284 {
1285 #ifdef EMU
1286   // Update global variable for execution tracking in debug env
1287   PostCode = ((major << 8) | minor);
1288 #endif
1289 
1290   // send message to UART
1291   DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);
1292 
1293   // error check:
1294   if (major == 0xEE)
1295   {
1296     // todo!!! Consider updating error status and exit MRC
1297 #ifdef SIM
1298     // enable Ctrl-C handling
1299     for(;;) delay_n(100);
1300 #else
1301     DEAD_LOOP();
1302 #endif
1303   }
1304 }
1305 
training_message(uint8_t channel,uint8_t rank,uint8_t byte_lane)1306 void training_message(
1307     uint8_t channel,
1308     uint8_t rank,
1309     uint8_t byte_lane)
1310 {
1311   // send message to UART
1312   DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
1313   return;
1314 }
1315 
print_timings(MRCParams_t * mrc_params)1316 void print_timings(
1317     MRCParams_t *mrc_params)
1318 {
1319   uint8_t algo_i;
1320   uint8_t channel_i;
1321   uint8_t rank_i;
1322   uint8_t bl_i;
1323   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;
1324 
1325   DPF(D_INFO, "\n---------------------------");
1326   DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1327   DPF(D_INFO, "\n===========================");
1328   for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)
1329   {
1330     for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
1331     {
1332       if (mrc_params->channel_enables & (1 << channel_i))
1333       {
1334         for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)
1335         {
1336           if (mrc_params->rank_enables & (1 << rank_i))
1337           {
1338             switch (algo_i)
1339             {
1340             case eRCVN:
1341               DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);
1342               break;
1343             case eWDQS:
1344               DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);
1345               break;
1346             case eWDQx:
1347               DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);
1348               break;
1349             case eRDQS:
1350               DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);
1351               break;
1352             case eVREF:
1353               DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);
1354               break;
1355             case eWCMD:
1356               DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);
1357               break;
1358             case eWCTL:
1359               DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);
1360               break;
1361             case eWCLK:
1362               DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);
1363               break;
1364             default:
1365               break;
1366             } // algo_i switch
1367             for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
1368             {
1369               switch (algo_i)
1370               {
1371               case eRCVN:
1372                 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));
1373                 break;
1374               case eWDQS:
1375                 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));
1376                 break;
1377               case eWDQx:
1378                 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));
1379                 break;
1380               case eRDQS:
1381                 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));
1382                 break;
1383               case eVREF:
1384                 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));
1385                 break;
1386               case eWCMD:
1387                 DPF(D_INFO, " %03d", get_wcmd(channel_i));
1388                 break;
1389               case eWCTL:
1390                 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));
1391                 break;
1392               case eWCLK:
1393                 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));
1394                 break;
1395               default:
1396                 break;
1397               } // algo_i switch
1398             } // bl_i loop
1399           } // if rank_i enabled
1400         } // rank_i loop
1401       } // if channel_i enabled
1402     } // channel_i loop
1403   } // algo_i loop
1404   DPF(D_INFO, "\n---------------------------");
1405   DPF(D_INFO, "\n");
1406   return;
1407 }
1408 
1409 // 32 bit LFSR with characteristic polynomial:  X^32 + X^22 +X^2 + X^1
1410 // The function takes pointer to previous 32 bit value and modifies it to next value.
lfsr32(uint32_t * lfsr_ptr)1411 void lfsr32(
1412     uint32_t *lfsr_ptr)
1413 {
1414   uint32_t bit;
1415   uint32_t lfsr;
1416   uint32_t i;
1417 
1418   lfsr = *lfsr_ptr;
1419 
1420   for (i = 0; i < 32; i++)
1421   {
1422     bit = 1 ^ (lfsr & BIT0);
1423     bit = bit ^ ((lfsr & BIT1) >> 1);
1424     bit = bit ^ ((lfsr & BIT2) >> 2);
1425     bit = bit ^ ((lfsr & BIT22) >> 22);
1426 
1427     lfsr = ((lfsr >> 1) | (bit << 31));
1428   }
1429 
1430   *lfsr_ptr = lfsr;
1431   return;
1432 }
1433 
1434 // The purpose of this function is to ensure the SEC comes out of reset
1435 // and IA initiates the SEC enabling Memory Scrambling.
enable_scrambling(MRCParams_t * mrc_params)1436 void enable_scrambling(
1437     MRCParams_t *mrc_params)
1438 {
1439   uint32_t lfsr = 0;
1440   uint8_t i;
1441 
1442   if (mrc_params->scrambling_enables == 0)
1443     return;
1444 
1445   ENTERFN();
1446 
1447   // 32 bit seed is always stored in BIOS NVM.
1448   lfsr = mrc_params->timings.scrambler_seed;
1449 
1450   if (mrc_params->boot_mode == bmCold)
1451   {
1452     // factory value is 0 and in first boot, a clock based seed is loaded.
1453     if (lfsr == 0)
1454     {
1455       lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's
1456     }
1457     // need to replace scrambler
1458     // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.
1459     else
1460     {
1461       for (i = 0; i < 16; i++)
1462       {
1463         lfsr32(&lfsr);
1464       }
1465     }
1466     mrc_params->timings.scrambler_seed = lfsr;  // save new seed.
1467   } // if (cold_boot)
1468 
1469   // In warm boot or S3 exit, we have the previous seed.
1470   // In cold boot, we have the last 32bit LFSR which is the new seed.
1471   lfsr32(&lfsr); // shift to next value
1472   isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));
1473   for (i = 0; i < 2; i++)
1474   {
1475     isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));
1476   }
1477 
1478   LEAVEFN();
1479   return;
1480 }
1481 
1482 // This function will store relevant timing data
1483 // This data will be used on subsequent boots to speed up boot times
1484 // and is required for Suspend To RAM capabilities.
store_timings(MRCParams_t * mrc_params)1485 void store_timings(
1486     MRCParams_t *mrc_params)
1487 {
1488   uint8_t ch, rk, bl;
1489   MrcTimings_t *mt = &mrc_params->timings;
1490 
1491   for (ch = 0; ch < NUM_CHANNELS; ch++)
1492   {
1493     for (rk = 0; rk < NUM_RANKS; rk++)
1494     {
1495       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1496       {
1497         mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN
1498         mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS
1499         mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS
1500         mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl);  // WDQ
1501         if (rk == 0)
1502         {
1503           mt->vref[ch][bl] = get_vref(ch, bl);  // VREF (RANK0 only)
1504         }
1505       }
1506       mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL
1507     }
1508     mt->wcmd[ch] = get_wcmd(ch); // WCMD
1509   }
1510 
1511   // need to save for a case of changing frequency after warm reset
1512   mt->ddr_speed = mrc_params->ddr_speed;
1513 
1514   return;
1515 }
1516 
1517 // This function will retrieve relevant timing data
1518 // This data will be used on subsequent boots to speed up boot times
1519 // and is required for Suspend To RAM capabilities.
restore_timings(MRCParams_t * mrc_params)1520 void restore_timings(
1521     MRCParams_t *mrc_params)
1522 {
1523   uint8_t ch, rk, bl;
1524   const MrcTimings_t *mt = &mrc_params->timings;
1525 
1526   for (ch = 0; ch < NUM_CHANNELS; ch++)
1527   {
1528     for (rk = 0; rk < NUM_RANKS; rk++)
1529     {
1530       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1531       {
1532         set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN
1533         set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS
1534         set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS
1535         set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]);  // WDQ
1536         if (rk == 0)
1537         {
1538           set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)
1539         }
1540       }
1541       set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL
1542     }
1543     set_wcmd(ch, mt->wcmd[ch]); // WCMD
1544   }
1545 
1546   return;
1547 }
1548 
1549 // Configure default settings normally set as part of read training
1550 // Some defaults have to be set earlier as they may affect earlier
1551 // training steps.
default_timings(MRCParams_t * mrc_params)1552 void default_timings(
1553     MRCParams_t *mrc_params)
1554 {
1555   uint8_t ch, rk, bl;
1556 
1557   for (ch = 0; ch < NUM_CHANNELS; ch++)
1558   {
1559     for (rk = 0; rk < NUM_RANKS; rk++)
1560     {
1561       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
1562       {
1563         set_rdqs(ch, rk, bl, 24); // RDQS
1564         if (rk == 0)
1565         {
1566           set_vref(ch, bl, 32); // VREF (RANK0 only)
1567         }
1568       }
1569     }
1570   }
1571 
1572   return;
1573 }
1574 
1575