1 // SPDX-License-Identifier: GPL-2.0
2 /* Marvell PTP driver
3  *
4  * Copyright (C) 2020 Marvell.
5  *
6  */
7 
8 #include <linux/bitfield.h>
9 #include <linux/device.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/hrtimer.h>
13 #include <linux/ktime.h>
14 
15 #include "ptp.h"
16 #include "mbox.h"
17 #include "rvu.h"
18 
19 #define DRV_NAME				"Marvell PTP Driver"
20 
21 #define PCI_DEVID_OCTEONTX2_PTP			0xA00C
22 #define PCI_SUBSYS_DEVID_OCTX2_98xx_PTP		0xB100
23 #define PCI_SUBSYS_DEVID_OCTX2_96XX_PTP		0xB200
24 #define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP		0xB300
25 #define PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP	0xB400
26 #define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP		0xB500
27 #define PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP	0xB600
28 #define PCI_DEVID_OCTEONTX2_RST			0xA085
29 #define PCI_DEVID_CN10K_PTP			0xA09E
30 #define PCI_SUBSYS_DEVID_CN10K_A_PTP		0xB900
31 #define PCI_SUBSYS_DEVID_CNF10K_A_PTP		0xBA00
32 #define PCI_SUBSYS_DEVID_CNF10K_B_PTP		0xBC00
33 
34 #define PCI_PTP_BAR_NO				0
35 
36 #define PTP_CLOCK_CFG				0xF00ULL
37 #define PTP_CLOCK_CFG_PTP_EN			BIT_ULL(0)
38 #define PTP_CLOCK_CFG_EXT_CLK_EN		BIT_ULL(1)
39 #define PTP_CLOCK_CFG_EXT_CLK_IN_MASK		GENMASK_ULL(7, 2)
40 #define PTP_CLOCK_CFG_TSTMP_EDGE		BIT_ULL(9)
41 #define PTP_CLOCK_CFG_TSTMP_EN			BIT_ULL(8)
42 #define PTP_CLOCK_CFG_TSTMP_IN_MASK		GENMASK_ULL(15, 10)
43 #define PTP_CLOCK_CFG_PPS_EN			BIT_ULL(30)
44 #define PTP_CLOCK_CFG_PPS_INV			BIT_ULL(31)
45 
46 #define PTP_PPS_HI_INCR				0xF60ULL
47 #define PTP_PPS_LO_INCR				0xF68ULL
48 #define PTP_PPS_THRESH_HI			0xF58ULL
49 
50 #define PTP_CLOCK_LO				0xF08ULL
51 #define PTP_CLOCK_HI				0xF10ULL
52 #define PTP_CLOCK_COMP				0xF18ULL
53 #define PTP_TIMESTAMP				0xF20ULL
54 #define PTP_CLOCK_SEC				0xFD0ULL
55 #define PTP_SEC_ROLLOVER			0xFD8ULL
56 
57 #define CYCLE_MULT				1000
58 
59 static struct ptp *first_ptp_block;
60 static const struct pci_device_id ptp_id_table[];
61 
62 static bool is_ptp_dev_cnf10kb(struct ptp *ptp)
63 {
64 	return (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_B_PTP) ? true : false;
65 }
66 
67 static bool is_ptp_dev_cn10k(struct ptp *ptp)
68 {
69 	return (ptp->pdev->device == PCI_DEVID_CN10K_PTP) ? true : false;
70 }
71 
72 static bool cn10k_ptp_errata(struct ptp *ptp)
73 {
74 	if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP ||
75 	    ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP)
76 		return true;
77 	return false;
78 }
79 
80 static bool is_ptp_tsfmt_sec_nsec(struct ptp *ptp)
81 {
82 	if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP ||
83 	    ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP)
84 		return true;
85 	return false;
86 }
87 
88 static enum hrtimer_restart ptp_reset_thresh(struct hrtimer *hrtimer)
89 {
90 	struct ptp *ptp = container_of(hrtimer, struct ptp, hrtimer);
91 	ktime_t curr_ts = ktime_get();
92 	ktime_t delta_ns, period_ns;
93 	u64 ptp_clock_hi;
94 
95 	/* calculate the elapsed time since last restart */
96 	delta_ns = ktime_to_ns(ktime_sub(curr_ts, ptp->last_ts));
97 
98 	/* if the ptp clock value has crossed 0.5 seconds,
99 	 * its too late to update pps threshold value, so
100 	 * update threshold after 1 second.
101 	 */
102 	ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
103 	if (ptp_clock_hi > 500000000) {
104 		period_ns = ktime_set(0, (NSEC_PER_SEC + 100 - ptp_clock_hi));
105 	} else {
106 		writeq(500000000, ptp->reg_base + PTP_PPS_THRESH_HI);
107 		period_ns = ktime_set(0, (NSEC_PER_SEC + 100 - delta_ns));
108 	}
109 
110 	hrtimer_forward_now(hrtimer, period_ns);
111 	ptp->last_ts = curr_ts;
112 
113 	return HRTIMER_RESTART;
114 }
115 
116 static void ptp_hrtimer_start(struct ptp *ptp, ktime_t start_ns)
117 {
118 	ktime_t period_ns;
119 
120 	period_ns = ktime_set(0, (NSEC_PER_SEC + 100 - start_ns));
121 	hrtimer_start(&ptp->hrtimer, period_ns, HRTIMER_MODE_REL);
122 	ptp->last_ts = ktime_get();
123 }
124 
125 static u64 read_ptp_tstmp_sec_nsec(struct ptp *ptp)
126 {
127 	u64 sec, sec1, nsec;
128 	unsigned long flags;
129 
130 	spin_lock_irqsave(&ptp->ptp_lock, flags);
131 	sec = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL;
132 	nsec = readq(ptp->reg_base + PTP_CLOCK_HI);
133 	sec1 = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL;
134 	/* check nsec rollover */
135 	if (sec1 > sec) {
136 		nsec = readq(ptp->reg_base + PTP_CLOCK_HI);
137 		sec = sec1;
138 	}
139 	spin_unlock_irqrestore(&ptp->ptp_lock, flags);
140 
141 	return sec * NSEC_PER_SEC + nsec;
142 }
143 
144 static u64 read_ptp_tstmp_nsec(struct ptp *ptp)
145 {
146 	return readq(ptp->reg_base + PTP_CLOCK_HI);
147 }
148 
149 static u64 ptp_calc_adjusted_comp(u64 ptp_clock_freq)
150 {
151 	u64 comp, adj = 0, cycles_per_sec, ns_drift = 0;
152 	u32 ptp_clock_nsec, cycle_time;
153 	int cycle;
154 
155 	/* Errata:
156 	 * Issue #1: At the time of 1 sec rollover of the nano-second counter,
157 	 * the nano-second counter is set to 0. However, it should be set to
158 	 * (existing counter_value - 10^9).
159 	 *
160 	 * Issue #2: The nano-second counter rolls over at 0x3B9A_C9FF.
161 	 * It should roll over at 0x3B9A_CA00.
162 	 */
163 
164 	/* calculate ptp_clock_comp value */
165 	comp = ((u64)1000000000ULL << 32) / ptp_clock_freq;
166 	/* use CYCLE_MULT to avoid accuracy loss due to integer arithmetic */
167 	cycle_time = NSEC_PER_SEC * CYCLE_MULT / ptp_clock_freq;
168 	/* cycles per sec */
169 	cycles_per_sec = ptp_clock_freq;
170 
171 	/* check whether ptp nanosecond counter rolls over early */
172 	cycle = cycles_per_sec - 1;
173 	ptp_clock_nsec = (cycle * comp) >> 32;
174 	while (ptp_clock_nsec < NSEC_PER_SEC) {
175 		if (ptp_clock_nsec == 0x3B9AC9FF)
176 			goto calc_adj_comp;
177 		cycle++;
178 		ptp_clock_nsec = (cycle * comp) >> 32;
179 	}
180 	/* compute nanoseconds lost per second when nsec counter rolls over */
181 	ns_drift = ptp_clock_nsec - NSEC_PER_SEC;
182 	/* calculate ptp_clock_comp adjustment */
183 	if (ns_drift > 0) {
184 		adj = comp * ns_drift;
185 		adj = adj / 1000000000ULL;
186 	}
187 	/* speed up the ptp clock to account for nanoseconds lost */
188 	comp += adj;
189 	return comp;
190 
191 calc_adj_comp:
192 	/* slow down the ptp clock to not rollover early */
193 	adj = comp * cycle_time;
194 	adj = adj / 1000000000ULL;
195 	adj = adj / CYCLE_MULT;
196 	comp -= adj;
197 
198 	return comp;
199 }
200 
201 struct ptp *ptp_get(void)
202 {
203 	struct ptp *ptp = first_ptp_block;
204 
205 	/* Check PTP block is present in hardware */
206 	if (!pci_dev_present(ptp_id_table))
207 		return ERR_PTR(-ENODEV);
208 	/* Check driver is bound to PTP block */
209 	if (!ptp)
210 		ptp = ERR_PTR(-EPROBE_DEFER);
211 	else
212 		pci_dev_get(ptp->pdev);
213 
214 	return ptp;
215 }
216 
217 void ptp_put(struct ptp *ptp)
218 {
219 	if (!ptp)
220 		return;
221 
222 	pci_dev_put(ptp->pdev);
223 }
224 
225 static int ptp_adjfine(struct ptp *ptp, long scaled_ppm)
226 {
227 	bool neg_adj = false;
228 	u32 freq, freq_adj;
229 	u64 comp, adj;
230 	s64 ppb;
231 
232 	if (scaled_ppm < 0) {
233 		neg_adj = true;
234 		scaled_ppm = -scaled_ppm;
235 	}
236 
237 	/* The hardware adds the clock compensation value to the PTP clock
238 	 * on every coprocessor clock cycle. Typical convention is that it
239 	 * represent number of nanosecond betwen each cycle. In this
240 	 * convention compensation value is in 64 bit fixed-point
241 	 * representation where upper 32 bits are number of nanoseconds
242 	 * and lower is fractions of nanosecond.
243 	 * The scaled_ppm represent the ratio in "parts per million" by which
244 	 * the compensation value should be corrected.
245 	 * To calculate new compenstation value we use 64bit fixed point
246 	 * arithmetic on following formula
247 	 * comp = tbase + tbase * scaled_ppm / (1M * 2^16)
248 	 * where tbase is the basic compensation value calculated
249 	 * initialy in the probe function.
250 	 */
251 	/* convert scaled_ppm to ppb */
252 	ppb = 1 + scaled_ppm;
253 	ppb *= 125;
254 	ppb >>= 13;
255 
256 	if (cn10k_ptp_errata(ptp)) {
257 		/* calculate the new frequency based on ppb */
258 		freq_adj = (ptp->clock_rate * ppb) / 1000000000ULL;
259 		freq = neg_adj ? ptp->clock_rate + freq_adj : ptp->clock_rate - freq_adj;
260 		comp = ptp_calc_adjusted_comp(freq);
261 	} else {
262 		comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
263 		adj = comp * ppb;
264 		adj = div_u64(adj, 1000000000ull);
265 		comp = neg_adj ? comp - adj : comp + adj;
266 	}
267 	writeq(comp, ptp->reg_base + PTP_CLOCK_COMP);
268 
269 	return 0;
270 }
271 
272 static int ptp_get_clock(struct ptp *ptp, u64 *clk)
273 {
274 	/* Return the current PTP clock */
275 	*clk = ptp->read_ptp_tstmp(ptp);
276 
277 	return 0;
278 }
279 
280 void ptp_start(struct ptp *ptp, u64 sclk, u32 ext_clk_freq, u32 extts)
281 {
282 	struct pci_dev *pdev;
283 	u64 clock_comp;
284 	u64 clock_cfg;
285 
286 	if (!ptp)
287 		return;
288 
289 	pdev = ptp->pdev;
290 
291 	if (!sclk) {
292 		dev_err(&pdev->dev, "PTP input clock cannot be zero\n");
293 		return;
294 	}
295 
296 	/* sclk is in MHz */
297 	ptp->clock_rate = sclk * 1000000;
298 
299 	/* Program the seconds rollover value to 1 second */
300 	if (is_ptp_dev_cnf10kb(ptp))
301 		writeq(0x3b9aca00, ptp->reg_base + PTP_SEC_ROLLOVER);
302 
303 	/* Enable PTP clock */
304 	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
305 
306 	if (ext_clk_freq) {
307 		ptp->clock_rate = ext_clk_freq;
308 		/* Set GPIO as PTP clock source */
309 		clock_cfg &= ~PTP_CLOCK_CFG_EXT_CLK_IN_MASK;
310 		clock_cfg |= PTP_CLOCK_CFG_EXT_CLK_EN;
311 	}
312 
313 	if (extts) {
314 		clock_cfg |= PTP_CLOCK_CFG_TSTMP_EDGE;
315 		/* Set GPIO as timestamping source */
316 		clock_cfg &= ~PTP_CLOCK_CFG_TSTMP_IN_MASK;
317 		clock_cfg |= PTP_CLOCK_CFG_TSTMP_EN;
318 	}
319 
320 	clock_cfg |= PTP_CLOCK_CFG_PTP_EN;
321 	clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV;
322 	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
323 
324 	/* Set 50% duty cycle for 1Hz output */
325 	writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR);
326 	writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR);
327 	if (cn10k_ptp_errata(ptp)) {
328 		/* The ptp_clock_hi rollsover to zero once clock cycle before it
329 		 * reaches one second boundary. so, program the pps_lo_incr in
330 		 * such a way that the pps threshold value comparison at one
331 		 * second boundary will succeed and pps edge changes. After each
332 		 * one second boundary, the hrtimer handler will be invoked and
333 		 * reprograms the pps threshold value.
334 		 */
335 		ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate;
336 		writeq((0x1dcd6500ULL - ptp->clock_period) << 32,
337 		       ptp->reg_base + PTP_PPS_LO_INCR);
338 	}
339 
340 	if (cn10k_ptp_errata(ptp))
341 		clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate);
342 	else
343 		clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate;
344 
345 	/* Initial compensation value to start the nanosecs counter */
346 	writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP);
347 }
348 
349 static int ptp_get_tstmp(struct ptp *ptp, u64 *clk)
350 {
351 	u64 timestamp;
352 
353 	if (is_ptp_dev_cn10k(ptp)) {
354 		timestamp = readq(ptp->reg_base + PTP_TIMESTAMP);
355 		*clk = (timestamp >> 32) * NSEC_PER_SEC + (timestamp & 0xFFFFFFFF);
356 	} else {
357 		*clk = readq(ptp->reg_base + PTP_TIMESTAMP);
358 	}
359 
360 	return 0;
361 }
362 
363 static int ptp_set_thresh(struct ptp *ptp, u64 thresh)
364 {
365 	if (!cn10k_ptp_errata(ptp))
366 		writeq(thresh, ptp->reg_base + PTP_PPS_THRESH_HI);
367 
368 	return 0;
369 }
370 
371 static int ptp_extts_on(struct ptp *ptp, int on)
372 {
373 	u64 ptp_clock_hi;
374 
375 	if (cn10k_ptp_errata(ptp)) {
376 		if (on) {
377 			ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
378 			ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi);
379 		} else {
380 			if (hrtimer_active(&ptp->hrtimer))
381 				hrtimer_cancel(&ptp->hrtimer);
382 		}
383 	}
384 
385 	return 0;
386 }
387 
388 static int ptp_probe(struct pci_dev *pdev,
389 		     const struct pci_device_id *ent)
390 {
391 	struct device *dev = &pdev->dev;
392 	struct ptp *ptp;
393 	int err;
394 
395 	ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL);
396 	if (!ptp) {
397 		err = -ENOMEM;
398 		goto error;
399 	}
400 
401 	ptp->pdev = pdev;
402 
403 	err = pcim_enable_device(pdev);
404 	if (err)
405 		goto error_free;
406 
407 	err = pcim_iomap_regions(pdev, 1 << PCI_PTP_BAR_NO, pci_name(pdev));
408 	if (err)
409 		goto error_free;
410 
411 	ptp->reg_base = pcim_iomap_table(pdev)[PCI_PTP_BAR_NO];
412 
413 	pci_set_drvdata(pdev, ptp);
414 	if (!first_ptp_block)
415 		first_ptp_block = ptp;
416 
417 	spin_lock_init(&ptp->ptp_lock);
418 	if (is_ptp_tsfmt_sec_nsec(ptp))
419 		ptp->read_ptp_tstmp = &read_ptp_tstmp_sec_nsec;
420 	else
421 		ptp->read_ptp_tstmp = &read_ptp_tstmp_nsec;
422 
423 	if (cn10k_ptp_errata(ptp)) {
424 		hrtimer_init(&ptp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
425 		ptp->hrtimer.function = ptp_reset_thresh;
426 	}
427 
428 	return 0;
429 
430 error_free:
431 	devm_kfree(dev, ptp);
432 
433 error:
434 	/* For `ptp_get()` we need to differentiate between the case
435 	 * when the core has not tried to probe this device and the case when
436 	 * the probe failed.  In the later case we pretend that the
437 	 * initialization was successful and keep the error in
438 	 * `dev->driver_data`.
439 	 */
440 	pci_set_drvdata(pdev, ERR_PTR(err));
441 	if (!first_ptp_block)
442 		first_ptp_block = ERR_PTR(err);
443 
444 	return 0;
445 }
446 
447 static void ptp_remove(struct pci_dev *pdev)
448 {
449 	struct ptp *ptp = pci_get_drvdata(pdev);
450 	u64 clock_cfg;
451 
452 	if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer))
453 		hrtimer_cancel(&ptp->hrtimer);
454 
455 	if (IS_ERR_OR_NULL(ptp))
456 		return;
457 
458 	/* Disable PTP clock */
459 	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
460 	clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN;
461 	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
462 }
463 
464 static const struct pci_device_id ptp_id_table[] = {
465 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
466 			 PCI_VENDOR_ID_CAVIUM,
467 			 PCI_SUBSYS_DEVID_OCTX2_98xx_PTP) },
468 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
469 			 PCI_VENDOR_ID_CAVIUM,
470 			 PCI_SUBSYS_DEVID_OCTX2_96XX_PTP) },
471 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
472 			 PCI_VENDOR_ID_CAVIUM,
473 			 PCI_SUBSYS_DEVID_OCTX2_95XX_PTP) },
474 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
475 			 PCI_VENDOR_ID_CAVIUM,
476 			 PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP) },
477 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
478 			 PCI_VENDOR_ID_CAVIUM,
479 			 PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) },
480 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
481 			 PCI_VENDOR_ID_CAVIUM,
482 			 PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP) },
483 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_PTP) },
484 	{ 0, }
485 };
486 
487 struct pci_driver ptp_driver = {
488 	.name = DRV_NAME,
489 	.id_table = ptp_id_table,
490 	.probe = ptp_probe,
491 	.remove = ptp_remove,
492 };
493 
494 int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req,
495 			    struct ptp_rsp *rsp)
496 {
497 	int err = 0;
498 
499 	/* This function is the PTP mailbox handler invoked when
500 	 * called by AF consumers/netdev drivers via mailbox mechanism.
501 	 * It is used by netdev driver to get the PTP clock and to set
502 	 * frequency adjustments. Since mailbox can be called without
503 	 * notion of whether the driver is bound to ptp device below
504 	 * validation is needed as first step.
505 	 */
506 	if (!rvu->ptp)
507 		return -ENODEV;
508 
509 	switch (req->op) {
510 	case PTP_OP_ADJFINE:
511 		err = ptp_adjfine(rvu->ptp, req->scaled_ppm);
512 		break;
513 	case PTP_OP_GET_CLOCK:
514 		err = ptp_get_clock(rvu->ptp, &rsp->clk);
515 		break;
516 	case PTP_OP_GET_TSTMP:
517 		err = ptp_get_tstmp(rvu->ptp, &rsp->clk);
518 		break;
519 	case PTP_OP_SET_THRESH:
520 		err = ptp_set_thresh(rvu->ptp, req->thresh);
521 		break;
522 	case PTP_OP_EXTTS_ON:
523 		err = ptp_extts_on(rvu->ptp, req->extts_on);
524 		break;
525 	default:
526 		err = -EINVAL;
527 		break;
528 	}
529 
530 	return err;
531 }
532