1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2020 Marvell International Ltd.
4  *
5  * Interface to the hardware Scheduling unit.
6  *
7  * New, starting with SDK 1.7.0, cvmx-pow supports a number of
8  * extended consistency checks. The define
9  * CVMX_ENABLE_POW_CHECKS controls the runtime insertion of POW
10  * internal state checks to find common programming errors. If
11  * CVMX_ENABLE_POW_CHECKS is not defined, checks are by default
12  * enabled. For example, cvmx-pow will check for the following
13  * program errors or POW state inconsistency.
14  * - Requesting a POW operation with an active tag switch in
15  *   progress.
16  * - Waiting for a tag switch to complete for an excessively
17  *   long period. This is normally a sign of an error in locking
18  *   causing deadlock.
19  * - Illegal tag switches from NULL_NULL.
20  * - Illegal tag switches from NULL.
21  * - Illegal deschedule request.
22  * - WQE pointer not matching the one attached to the core by
23  *   the POW.
24  */
25 
26 #ifndef __CVMX_POW_H__
27 #define __CVMX_POW_H__
28 
29 #include "cvmx-wqe.h"
30 #include "cvmx-pow-defs.h"
31 #include "cvmx-sso-defs.h"
32 #include "cvmx-address.h"
33 #include "cvmx-coremask.h"
34 
35 /* Default to having all POW constancy checks turned on */
36 #ifndef CVMX_ENABLE_POW_CHECKS
37 #define CVMX_ENABLE_POW_CHECKS 1
38 #endif
39 
40 /*
41  * Special type for CN78XX style SSO groups (0..255),
42  * for distinction from legacy-style groups (0..15)
43  */
44 typedef union {
45 	u8 xgrp;
46 	/* Fields that map XGRP for backwards compatibility */
47 	struct __attribute__((__packed__)) {
48 		u8 group : 5;
49 		u8 qus : 3;
50 	};
51 } cvmx_xgrp_t;
52 
53 /*
54  * Softwsare-only structure to convey a return value
55  * containing multiple information fields about an work queue entry
56  */
57 typedef struct {
58 	u32 tag;
59 	u16 index;
60 	u8 grp; /* Legacy group # (0..15) */
61 	u8 tag_type;
62 } cvmx_pow_tag_info_t;
63 
64 /**
65  * Wait flag values for pow functions.
66  */
67 typedef enum {
68 	CVMX_POW_WAIT = 1,
69 	CVMX_POW_NO_WAIT = 0,
70 } cvmx_pow_wait_t;
71 
72 /**
73  *  POW tag operations.  These are used in the data stored to the POW.
74  */
75 typedef enum {
76 	CVMX_POW_TAG_OP_SWTAG = 0L,
77 	CVMX_POW_TAG_OP_SWTAG_FULL = 1L,
78 	CVMX_POW_TAG_OP_SWTAG_DESCH = 2L,
79 	CVMX_POW_TAG_OP_DESCH = 3L,
80 	CVMX_POW_TAG_OP_ADDWQ = 4L,
81 	CVMX_POW_TAG_OP_UPDATE_WQP_GRP = 5L,
82 	CVMX_POW_TAG_OP_SET_NSCHED = 6L,
83 	CVMX_POW_TAG_OP_CLR_NSCHED = 7L,
84 	CVMX_POW_TAG_OP_NOP = 15L
85 } cvmx_pow_tag_op_t;
86 
87 /**
88  * This structure defines the store data on a store to POW
89  */
90 typedef union {
91 	u64 u64;
92 	struct {
93 		u64 no_sched : 1;
94 		u64 unused : 2;
95 		u64 index : 13;
96 		cvmx_pow_tag_op_t op : 4;
97 		u64 unused2 : 2;
98 		u64 qos : 3;
99 		u64 grp : 4;
100 		cvmx_pow_tag_type_t type : 3;
101 		u64 tag : 32;
102 	} s_cn38xx;
103 	struct {
104 		u64 no_sched : 1;
105 		cvmx_pow_tag_op_t op : 4;
106 		u64 unused1 : 4;
107 		u64 index : 11;
108 		u64 unused2 : 1;
109 		u64 grp : 6;
110 		u64 unused3 : 3;
111 		cvmx_pow_tag_type_t type : 2;
112 		u64 tag : 32;
113 	} s_cn68xx_clr;
114 	struct {
115 		u64 no_sched : 1;
116 		cvmx_pow_tag_op_t op : 4;
117 		u64 unused1 : 12;
118 		u64 qos : 3;
119 		u64 unused2 : 1;
120 		u64 grp : 6;
121 		u64 unused3 : 3;
122 		cvmx_pow_tag_type_t type : 2;
123 		u64 tag : 32;
124 	} s_cn68xx_add;
125 	struct {
126 		u64 no_sched : 1;
127 		cvmx_pow_tag_op_t op : 4;
128 		u64 unused1 : 16;
129 		u64 grp : 6;
130 		u64 unused3 : 3;
131 		cvmx_pow_tag_type_t type : 2;
132 		u64 tag : 32;
133 	} s_cn68xx_other;
134 	struct {
135 		u64 rsvd_62_63 : 2;
136 		u64 grp : 10;
137 		cvmx_pow_tag_type_t type : 2;
138 		u64 no_sched : 1;
139 		u64 rsvd_48 : 1;
140 		cvmx_pow_tag_op_t op : 4;
141 		u64 rsvd_42_43 : 2;
142 		u64 wqp : 42;
143 	} s_cn78xx_other;
144 
145 } cvmx_pow_tag_req_t;
146 
147 union cvmx_pow_tag_req_addr {
148 	u64 u64;
149 	struct {
150 		u64 mem_region : 2;
151 		u64 reserved_49_61 : 13;
152 		u64 is_io : 1;
153 		u64 did : 8;
154 		u64 addr : 40;
155 	} s;
156 	struct {
157 		u64 mem_region : 2;
158 		u64 reserved_49_61 : 13;
159 		u64 is_io : 1;
160 		u64 did : 8;
161 		u64 node : 4;
162 		u64 tag : 32;
163 		u64 reserved_0_3 : 4;
164 	} s_cn78xx;
165 };
166 
167 /**
168  * This structure describes the address to load stuff from POW
169  */
170 typedef union {
171 	u64 u64;
172 	/**
173 	 * Address for new work request loads (did<2:0> == 0)
174 	 */
175 	struct {
176 		u64 mem_region : 2;
177 		u64 reserved_49_61 : 13;
178 		u64 is_io : 1;
179 		u64 did : 8;
180 		u64 reserved_4_39 : 36;
181 		u64 wait : 1;
182 		u64 reserved_0_2 : 3;
183 	} swork;
184 	struct {
185 		u64 mem_region : 2;
186 		u64 reserved_49_61 : 13;
187 		u64 is_io : 1;
188 		u64 did : 8;
189 		u64 node : 4;
190 		u64 reserved_32_35 : 4;
191 		u64 indexed : 1;
192 		u64 grouped : 1;
193 		u64 rtngrp : 1;
194 		u64 reserved_16_28 : 13;
195 		u64 index : 12;
196 		u64 wait : 1;
197 		u64 reserved_0_2 : 3;
198 	} swork_78xx;
199 	/**
200 	 * Address for loads to get POW internal status
201 	 */
202 	struct {
203 		u64 mem_region : 2;
204 		u64 reserved_49_61 : 13;
205 		u64 is_io : 1;
206 		u64 did : 8;
207 		u64 reserved_10_39 : 30;
208 		u64 coreid : 4;
209 		u64 get_rev : 1;
210 		u64 get_cur : 1;
211 		u64 get_wqp : 1;
212 		u64 reserved_0_2 : 3;
213 	} sstatus;
214 	/**
215 	 * Address for loads to get 68XX SS0 internal status
216 	 */
217 	struct {
218 		u64 mem_region : 2;
219 		u64 reserved_49_61 : 13;
220 		u64 is_io : 1;
221 		u64 did : 8;
222 		u64 reserved_14_39 : 26;
223 		u64 coreid : 5;
224 		u64 reserved_6_8 : 3;
225 		u64 opcode : 3;
226 		u64 reserved_0_2 : 3;
227 	} sstatus_cn68xx;
228 	/**
229 	 * Address for memory loads to get POW internal state
230 	 */
231 	struct {
232 		u64 mem_region : 2;
233 		u64 reserved_49_61 : 13;
234 		u64 is_io : 1;
235 		u64 did : 8;
236 		u64 reserved_16_39 : 24;
237 		u64 index : 11;
238 		u64 get_des : 1;
239 		u64 get_wqp : 1;
240 		u64 reserved_0_2 : 3;
241 	} smemload;
242 	/**
243 	 * Address for memory loads to get SSO internal state
244 	 */
245 	struct {
246 		u64 mem_region : 2;
247 		u64 reserved_49_61 : 13;
248 		u64 is_io : 1;
249 		u64 did : 8;
250 		u64 reserved_20_39 : 20;
251 		u64 index : 11;
252 		u64 reserved_6_8 : 3;
253 		u64 opcode : 3;
254 		u64 reserved_0_2 : 3;
255 	} smemload_cn68xx;
256 	/**
257 	 * Address for index/pointer loads
258 	 */
259 	struct {
260 		u64 mem_region : 2;
261 		u64 reserved_49_61 : 13;
262 		u64 is_io : 1;
263 		u64 did : 8;
264 		u64 reserved_9_39 : 31;
265 		u64 qosgrp : 4;
266 		u64 get_des_get_tail : 1;
267 		u64 get_rmt : 1;
268 		u64 reserved_0_2 : 3;
269 	} sindexload;
270 	/**
271 	 * Address for a Index/Pointer loads to get SSO internal state
272 	 */
273 	struct {
274 		u64 mem_region : 2;
275 		u64 reserved_49_61 : 13;
276 		u64 is_io : 1;
277 		u64 did : 8;
278 		u64 reserved_15_39 : 25;
279 		u64 qos_grp : 6;
280 		u64 reserved_6_8 : 3;
281 		u64 opcode : 3;
282 		u64 reserved_0_2 : 3;
283 	} sindexload_cn68xx;
284 	/**
285 	 * Address for NULL_RD request (did<2:0> == 4)
286 	 * when this is read, HW attempts to change the state to NULL if it is NULL_NULL
287 	 * (the hardware cannot switch from NULL_NULL to NULL if a POW entry is not available -
288 	 * software may need to recover by finishing another piece of work before a POW
289 	 * entry can ever become available.)
290 	 */
291 	struct {
292 		u64 mem_region : 2;
293 		u64 reserved_49_61 : 13;
294 		u64 is_io : 1;
295 		u64 did : 8;
296 		u64 reserved_0_39 : 40;
297 	} snull_rd;
298 } cvmx_pow_load_addr_t;
299 
300 /**
301  * This structure defines the response to a load/SENDSINGLE to POW (except CSR reads)
302  */
303 typedef union {
304 	u64 u64;
305 	/**
306 	 * Response to new work request loads
307 	 */
308 	struct {
309 		u64 no_work : 1;
310 		u64 pend_switch : 1;
311 		u64 tt : 2;
312 		u64 reserved_58_59 : 2;
313 		u64 grp : 10;
314 		u64 reserved_42_47 : 6;
315 		u64 addr : 42;
316 	} s_work;
317 
318 	/**
319 	 * Result for a POW Status Load (when get_cur==0 and get_wqp==0)
320 	 */
321 	struct {
322 		u64 reserved_62_63 : 2;
323 		u64 pend_switch : 1;
324 		u64 pend_switch_full : 1;
325 		u64 pend_switch_null : 1;
326 		u64 pend_desched : 1;
327 		u64 pend_desched_switch : 1;
328 		u64 pend_nosched : 1;
329 		u64 pend_new_work : 1;
330 		u64 pend_new_work_wait : 1;
331 		u64 pend_null_rd : 1;
332 		u64 pend_nosched_clr : 1;
333 		u64 reserved_51 : 1;
334 		u64 pend_index : 11;
335 		u64 pend_grp : 4;
336 		u64 reserved_34_35 : 2;
337 		u64 pend_type : 2;
338 		u64 pend_tag : 32;
339 	} s_sstatus0;
340 	/**
341 	 * Result for a SSO Status Load (when opcode is SL_PENDTAG)
342 	 */
343 	struct {
344 		u64 pend_switch : 1;
345 		u64 pend_get_work : 1;
346 		u64 pend_get_work_wait : 1;
347 		u64 pend_nosched : 1;
348 		u64 pend_nosched_clr : 1;
349 		u64 pend_desched : 1;
350 		u64 pend_alloc_we : 1;
351 		u64 reserved_48_56 : 9;
352 		u64 pend_index : 11;
353 		u64 reserved_34_36 : 3;
354 		u64 pend_type : 2;
355 		u64 pend_tag : 32;
356 	} s_sstatus0_cn68xx;
357 	/**
358 	 * Result for a POW Status Load (when get_cur==0 and get_wqp==1)
359 	 */
360 	struct {
361 		u64 reserved_62_63 : 2;
362 		u64 pend_switch : 1;
363 		u64 pend_switch_full : 1;
364 		u64 pend_switch_null : 1;
365 		u64 pend_desched : 1;
366 		u64 pend_desched_switch : 1;
367 		u64 pend_nosched : 1;
368 		u64 pend_new_work : 1;
369 		u64 pend_new_work_wait : 1;
370 		u64 pend_null_rd : 1;
371 		u64 pend_nosched_clr : 1;
372 		u64 reserved_51 : 1;
373 		u64 pend_index : 11;
374 		u64 pend_grp : 4;
375 		u64 pend_wqp : 36;
376 	} s_sstatus1;
377 	/**
378 	 * Result for a SSO Status Load (when opcode is SL_PENDWQP)
379 	 */
380 	struct {
381 		u64 pend_switch : 1;
382 		u64 pend_get_work : 1;
383 		u64 pend_get_work_wait : 1;
384 		u64 pend_nosched : 1;
385 		u64 pend_nosched_clr : 1;
386 		u64 pend_desched : 1;
387 		u64 pend_alloc_we : 1;
388 		u64 reserved_51_56 : 6;
389 		u64 pend_index : 11;
390 		u64 reserved_38_39 : 2;
391 		u64 pend_wqp : 38;
392 	} s_sstatus1_cn68xx;
393 
394 	struct {
395 		u64 pend_switch : 1;
396 		u64 pend_get_work : 1;
397 		u64 pend_get_work_wait : 1;
398 		u64 pend_nosched : 1;
399 		u64 pend_nosched_clr : 1;
400 		u64 pend_desched : 1;
401 		u64 pend_alloc_we : 1;
402 		u64 reserved_56 : 1;
403 		u64 prep_index : 12;
404 		u64 reserved_42_43 : 2;
405 		u64 pend_tag : 42;
406 	} s_sso_ppx_pendwqp_cn78xx;
407 	/**
408 	 * Result for a POW Status Load (when get_cur==1, get_wqp==0, and get_rev==0)
409 	 */
410 	struct {
411 		u64 reserved_62_63 : 2;
412 		u64 link_index : 11;
413 		u64 index : 11;
414 		u64 grp : 4;
415 		u64 head : 1;
416 		u64 tail : 1;
417 		u64 tag_type : 2;
418 		u64 tag : 32;
419 	} s_sstatus2;
420 	/**
421 	 * Result for a SSO Status Load (when opcode is SL_TAG)
422 	 */
423 	struct {
424 		u64 reserved_57_63 : 7;
425 		u64 index : 11;
426 		u64 reserved_45 : 1;
427 		u64 grp : 6;
428 		u64 head : 1;
429 		u64 tail : 1;
430 		u64 reserved_34_36 : 3;
431 		u64 tag_type : 2;
432 		u64 tag : 32;
433 	} s_sstatus2_cn68xx;
434 
435 	struct {
436 		u64 tailc : 1;
437 		u64 reserved_60_62 : 3;
438 		u64 index : 12;
439 		u64 reserved_46_47 : 2;
440 		u64 grp : 10;
441 		u64 head : 1;
442 		u64 tail : 1;
443 		u64 tt : 2;
444 		u64 tag : 32;
445 	} s_sso_ppx_tag_cn78xx;
446 	/**
447 	 * Result for a POW Status Load (when get_cur==1, get_wqp==0, and get_rev==1)
448 	 */
449 	struct {
450 		u64 reserved_62_63 : 2;
451 		u64 revlink_index : 11;
452 		u64 index : 11;
453 		u64 grp : 4;
454 		u64 head : 1;
455 		u64 tail : 1;
456 		u64 tag_type : 2;
457 		u64 tag : 32;
458 	} s_sstatus3;
459 	/**
460 	 * Result for a SSO Status Load (when opcode is SL_WQP)
461 	 */
462 	struct {
463 		u64 reserved_58_63 : 6;
464 		u64 index : 11;
465 		u64 reserved_46 : 1;
466 		u64 grp : 6;
467 		u64 reserved_38_39 : 2;
468 		u64 wqp : 38;
469 	} s_sstatus3_cn68xx;
470 
471 	struct {
472 		u64 reserved_58_63 : 6;
473 		u64 grp : 10;
474 		u64 reserved_42_47 : 6;
475 		u64 tag : 42;
476 	} s_sso_ppx_wqp_cn78xx;
477 	/**
478 	 * Result for a POW Status Load (when get_cur==1, get_wqp==1, and get_rev==0)
479 	 */
480 	struct {
481 		u64 reserved_62_63 : 2;
482 		u64 link_index : 11;
483 		u64 index : 11;
484 		u64 grp : 4;
485 		u64 wqp : 36;
486 	} s_sstatus4;
487 	/**
488 	 * Result for a SSO Status Load (when opcode is SL_LINKS)
489 	 */
490 	struct {
491 		u64 reserved_46_63 : 18;
492 		u64 index : 11;
493 		u64 reserved_34 : 1;
494 		u64 grp : 6;
495 		u64 head : 1;
496 		u64 tail : 1;
497 		u64 reserved_24_25 : 2;
498 		u64 revlink_index : 11;
499 		u64 reserved_11_12 : 2;
500 		u64 link_index : 11;
501 	} s_sstatus4_cn68xx;
502 
503 	struct {
504 		u64 tailc : 1;
505 		u64 reserved_60_62 : 3;
506 		u64 index : 12;
507 		u64 reserved_38_47 : 10;
508 		u64 grp : 10;
509 		u64 head : 1;
510 		u64 tail : 1;
511 		u64 reserved_25 : 1;
512 		u64 revlink_index : 12;
513 		u64 link_index_vld : 1;
514 		u64 link_index : 12;
515 	} s_sso_ppx_links_cn78xx;
516 	/**
517 	 * Result for a POW Status Load (when get_cur==1, get_wqp==1, and get_rev==1)
518 	 */
519 	struct {
520 		u64 reserved_62_63 : 2;
521 		u64 revlink_index : 11;
522 		u64 index : 11;
523 		u64 grp : 4;
524 		u64 wqp : 36;
525 	} s_sstatus5;
526 	/**
527 	 * Result For POW Memory Load (get_des == 0 and get_wqp == 0)
528 	 */
529 	struct {
530 		u64 reserved_51_63 : 13;
531 		u64 next_index : 11;
532 		u64 grp : 4;
533 		u64 reserved_35 : 1;
534 		u64 tail : 1;
535 		u64 tag_type : 2;
536 		u64 tag : 32;
537 	} s_smemload0;
538 	/**
539 	 * Result For SSO Memory Load (opcode is ML_TAG)
540 	 */
541 	struct {
542 		u64 reserved_38_63 : 26;
543 		u64 tail : 1;
544 		u64 reserved_34_36 : 3;
545 		u64 tag_type : 2;
546 		u64 tag : 32;
547 	} s_smemload0_cn68xx;
548 
549 	struct {
550 		u64 reserved_39_63 : 25;
551 		u64 tail : 1;
552 		u64 reserved_34_36 : 3;
553 		u64 tag_type : 2;
554 		u64 tag : 32;
555 	} s_sso_iaq_ppx_tag_cn78xx;
556 	/**
557 	 * Result For POW Memory Load (get_des == 0 and get_wqp == 1)
558 	 */
559 	struct {
560 		u64 reserved_51_63 : 13;
561 		u64 next_index : 11;
562 		u64 grp : 4;
563 		u64 wqp : 36;
564 	} s_smemload1;
565 	/**
566 	 * Result For SSO Memory Load (opcode is ML_WQPGRP)
567 	 */
568 	struct {
569 		u64 reserved_48_63 : 16;
570 		u64 nosched : 1;
571 		u64 reserved_46 : 1;
572 		u64 grp : 6;
573 		u64 reserved_38_39 : 2;
574 		u64 wqp : 38;
575 	} s_smemload1_cn68xx;
576 
577 	/**
578 	 * Entry structures for the CN7XXX chips.
579 	 */
580 	struct {
581 		u64 reserved_39_63 : 25;
582 		u64 tailc : 1;
583 		u64 tail : 1;
584 		u64 reserved_34_36 : 3;
585 		u64 tt : 2;
586 		u64 tag : 32;
587 	} s_sso_ientx_tag_cn78xx;
588 
589 	struct {
590 		u64 reserved_62_63 : 2;
591 		u64 head : 1;
592 		u64 nosched : 1;
593 		u64 reserved_56_59 : 4;
594 		u64 grp : 8;
595 		u64 reserved_42_47 : 6;
596 		u64 wqp : 42;
597 	} s_sso_ientx_wqpgrp_cn73xx;
598 
599 	struct {
600 		u64 reserved_62_63 : 2;
601 		u64 head : 1;
602 		u64 nosched : 1;
603 		u64 reserved_58_59 : 2;
604 		u64 grp : 10;
605 		u64 reserved_42_47 : 6;
606 		u64 wqp : 42;
607 	} s_sso_ientx_wqpgrp_cn78xx;
608 
609 	struct {
610 		u64 reserved_38_63 : 26;
611 		u64 pend_switch : 1;
612 		u64 reserved_34_36 : 3;
613 		u64 pend_tt : 2;
614 		u64 pend_tag : 32;
615 	} s_sso_ientx_pendtag_cn78xx;
616 
617 	struct {
618 		u64 reserved_26_63 : 38;
619 		u64 prev_index : 10;
620 		u64 reserved_11_15 : 5;
621 		u64 next_index_vld : 1;
622 		u64 next_index : 10;
623 	} s_sso_ientx_links_cn73xx;
624 
625 	struct {
626 		u64 reserved_28_63 : 36;
627 		u64 prev_index : 12;
628 		u64 reserved_13_15 : 3;
629 		u64 next_index_vld : 1;
630 		u64 next_index : 12;
631 	} s_sso_ientx_links_cn78xx;
632 
633 	/**
634 	 * Result For POW Memory Load (get_des == 1)
635 	 */
636 	struct {
637 		u64 reserved_51_63 : 13;
638 		u64 fwd_index : 11;
639 		u64 grp : 4;
640 		u64 nosched : 1;
641 		u64 pend_switch : 1;
642 		u64 pend_type : 2;
643 		u64 pend_tag : 32;
644 	} s_smemload2;
645 	/**
646 	 * Result For SSO Memory Load (opcode is ML_PENTAG)
647 	 */
648 	struct {
649 		u64 reserved_38_63 : 26;
650 		u64 pend_switch : 1;
651 		u64 reserved_34_36 : 3;
652 		u64 pend_type : 2;
653 		u64 pend_tag : 32;
654 	} s_smemload2_cn68xx;
655 
656 	struct {
657 		u64 pend_switch : 1;
658 		u64 pend_get_work : 1;
659 		u64 pend_get_work_wait : 1;
660 		u64 pend_nosched : 1;
661 		u64 pend_nosched_clr : 1;
662 		u64 pend_desched : 1;
663 		u64 pend_alloc_we : 1;
664 		u64 reserved_34_56 : 23;
665 		u64 pend_tt : 2;
666 		u64 pend_tag : 32;
667 	} s_sso_ppx_pendtag_cn78xx;
668 	/**
669 	 * Result For SSO Memory Load (opcode is ML_LINKS)
670 	 */
671 	struct {
672 		u64 reserved_24_63 : 40;
673 		u64 fwd_index : 11;
674 		u64 reserved_11_12 : 2;
675 		u64 next_index : 11;
676 	} s_smemload3_cn68xx;
677 
678 	/**
679 	 * Result For POW Index/Pointer Load (get_rmt == 0/get_des_get_tail == 0)
680 	 */
681 	struct {
682 		u64 reserved_52_63 : 12;
683 		u64 free_val : 1;
684 		u64 free_one : 1;
685 		u64 reserved_49 : 1;
686 		u64 free_head : 11;
687 		u64 reserved_37 : 1;
688 		u64 free_tail : 11;
689 		u64 loc_val : 1;
690 		u64 loc_one : 1;
691 		u64 reserved_23 : 1;
692 		u64 loc_head : 11;
693 		u64 reserved_11 : 1;
694 		u64 loc_tail : 11;
695 	} sindexload0;
696 	/**
697 	 * Result for SSO Index/Pointer Load(opcode ==
698 	 * IPL_IQ/IPL_DESCHED/IPL_NOSCHED)
699 	 */
700 	struct {
701 		u64 reserved_28_63 : 36;
702 		u64 queue_val : 1;
703 		u64 queue_one : 1;
704 		u64 reserved_24_25 : 2;
705 		u64 queue_head : 11;
706 		u64 reserved_11_12 : 2;
707 		u64 queue_tail : 11;
708 	} sindexload0_cn68xx;
709 	/**
710 	 * Result For POW Index/Pointer Load (get_rmt == 0/get_des_get_tail == 1)
711 	 */
712 	struct {
713 		u64 reserved_52_63 : 12;
714 		u64 nosched_val : 1;
715 		u64 nosched_one : 1;
716 		u64 reserved_49 : 1;
717 		u64 nosched_head : 11;
718 		u64 reserved_37 : 1;
719 		u64 nosched_tail : 11;
720 		u64 des_val : 1;
721 		u64 des_one : 1;
722 		u64 reserved_23 : 1;
723 		u64 des_head : 11;
724 		u64 reserved_11 : 1;
725 		u64 des_tail : 11;
726 	} sindexload1;
727 	/**
728 	 * Result for SSO Index/Pointer Load(opcode == IPL_FREE0/IPL_FREE1/IPL_FREE2)
729 	 */
730 	struct {
731 		u64 reserved_60_63 : 4;
732 		u64 qnum_head : 2;
733 		u64 qnum_tail : 2;
734 		u64 reserved_28_55 : 28;
735 		u64 queue_val : 1;
736 		u64 queue_one : 1;
737 		u64 reserved_24_25 : 2;
738 		u64 queue_head : 11;
739 		u64 reserved_11_12 : 2;
740 		u64 queue_tail : 11;
741 	} sindexload1_cn68xx;
742 	/**
743 	 * Result For POW Index/Pointer Load (get_rmt == 1/get_des_get_tail == 0)
744 	 */
745 	struct {
746 		u64 reserved_39_63 : 25;
747 		u64 rmt_is_head : 1;
748 		u64 rmt_val : 1;
749 		u64 rmt_one : 1;
750 		u64 rmt_head : 36;
751 	} sindexload2;
752 	/**
753 	 * Result For POW Index/Pointer Load (get_rmt == 1/get_des_get_tail == 1)
754 	 */
755 	struct {
756 		u64 reserved_39_63 : 25;
757 		u64 rmt_is_head : 1;
758 		u64 rmt_val : 1;
759 		u64 rmt_one : 1;
760 		u64 rmt_tail : 36;
761 	} sindexload3;
762 	/**
763 	 * Response to NULL_RD request loads
764 	 */
765 	struct {
766 		u64 unused : 62;
767 		u64 state : 2;
768 	} s_null_rd;
769 
770 } cvmx_pow_tag_load_resp_t;
771 
772 typedef union {
773 	u64 u64;
774 	struct {
775 		u64 reserved_57_63 : 7;
776 		u64 index : 11;
777 		u64 reserved_45 : 1;
778 		u64 grp : 6;
779 		u64 head : 1;
780 		u64 tail : 1;
781 		u64 reserved_34_36 : 3;
782 		u64 tag_type : 2;
783 		u64 tag : 32;
784 	} s;
785 } cvmx_pow_sl_tag_resp_t;
786 
787 /**
788  * This structure describes the address used for stores to the POW.
789  *  The store address is meaningful on stores to the POW.  The hardware assumes that an aligned
790  *  64-bit store was used for all these stores.
791  *  Note the assumption that the work queue entry is aligned on an 8-byte
792  *  boundary (since the low-order 3 address bits must be zero).
793  *  Note that not all fields are used by all operations.
794  *
795  *  NOTE: The following is the behavior of the pending switch bit at the PP
796  *       for POW stores (i.e. when did<7:3> == 0xc)
797  *     - did<2:0> == 0      => pending switch bit is set
798  *     - did<2:0> == 1      => no affect on the pending switch bit
799  *     - did<2:0> == 3      => pending switch bit is cleared
800  *     - did<2:0> == 7      => no affect on the pending switch bit
801  *     - did<2:0> == others => must not be used
802  *     - No other loads/stores have an affect on the pending switch bit
803  *     - The switch bus from POW can clear the pending switch bit
804  *
805  *  NOTE: did<2:0> == 2 is used by the HW for a special single-cycle ADDWQ command
806  *  that only contains the pointer). SW must never use did<2:0> == 2.
807  */
808 typedef union {
809 	u64 u64;
810 	struct {
811 		u64 mem_reg : 2;
812 		u64 reserved_49_61 : 13;
813 		u64 is_io : 1;
814 		u64 did : 8;
815 		u64 addr : 40;
816 	} stag;
817 } cvmx_pow_tag_store_addr_t; /* FIXME- this type is unused */
818 
819 /**
820  * Decode of the store data when an IOBDMA SENDSINGLE is sent to POW
821  */
822 typedef union {
823 	u64 u64;
824 	struct {
825 		u64 scraddr : 8;
826 		u64 len : 8;
827 		u64 did : 8;
828 		u64 unused : 36;
829 		u64 wait : 1;
830 		u64 unused2 : 3;
831 	} s;
832 	struct {
833 		u64 scraddr : 8;
834 		u64 len : 8;
835 		u64 did : 8;
836 		u64 node : 4;
837 		u64 unused1 : 4;
838 		u64 indexed : 1;
839 		u64 grouped : 1;
840 		u64 rtngrp : 1;
841 		u64 unused2 : 13;
842 		u64 index_grp_mask : 12;
843 		u64 wait : 1;
844 		u64 unused3 : 3;
845 	} s_cn78xx;
846 } cvmx_pow_iobdma_store_t;
847 
848 /* CSR typedefs have been moved to cvmx-pow-defs.h */
849 
850 /*enum for group priority parameters which needs modification*/
851 enum cvmx_sso_group_modify_mask {
852 	CVMX_SSO_MODIFY_GROUP_PRIORITY = 0x01,
853 	CVMX_SSO_MODIFY_GROUP_WEIGHT = 0x02,
854 	CVMX_SSO_MODIFY_GROUP_AFFINITY = 0x04
855 };
856 
857 /**
858  * @INTERNAL
859  * Return the number of SSO groups for a given SoC model
860  */
cvmx_sso_num_xgrp(void)861 static inline unsigned int cvmx_sso_num_xgrp(void)
862 {
863 	if (OCTEON_IS_MODEL(OCTEON_CN78XX))
864 		return 256;
865 	if (OCTEON_IS_MODEL(OCTEON_CNF75XX))
866 		return 64;
867 	if (OCTEON_IS_MODEL(OCTEON_CN73XX))
868 		return 64;
869 	printf("ERROR: %s: Unknown model\n", __func__);
870 	return 0;
871 }
872 
873 /**
874  * @INTERNAL
875  * Return the number of POW groups on current model.
876  * In case of CN78XX/CN73XX this is the number of equivalent
877  * "legacy groups" on the chip when it is used in backward
878  * compatible mode.
879  */
cvmx_pow_num_groups(void)880 static inline unsigned int cvmx_pow_num_groups(void)
881 {
882 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))
883 		return cvmx_sso_num_xgrp() >> 3;
884 	else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE))
885 		return 64;
886 	else
887 		return 16;
888 }
889 
890 /**
891  * @INTERNAL
892  * Return the number of mask-set registers.
893  */
cvmx_sso_num_maskset(void)894 static inline unsigned int cvmx_sso_num_maskset(void)
895 {
896 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))
897 		return 2;
898 	else
899 		return 1;
900 }
901 
902 /**
903  * Get the POW tag for this core. This returns the current
904  * tag type, tag, group, and POW entry index associated with
905  * this core. Index is only valid if the tag type isn't NULL_NULL.
906  * If a tag switch is pending this routine returns the tag before
907  * the tag switch, not after.
908  *
909  * @return Current tag
910  */
cvmx_pow_get_current_tag(void)911 static inline cvmx_pow_tag_info_t cvmx_pow_get_current_tag(void)
912 {
913 	cvmx_pow_load_addr_t load_addr;
914 	cvmx_pow_tag_info_t result;
915 
916 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
917 		cvmx_sso_sl_ppx_tag_t sl_ppx_tag;
918 		cvmx_xgrp_t xgrp;
919 		int node, core;
920 
921 		CVMX_SYNCS;
922 		node = cvmx_get_node_num();
923 		core = cvmx_get_local_core_num();
924 		sl_ppx_tag.u64 = csr_rd_node(node, CVMX_SSO_SL_PPX_TAG(core));
925 		result.index = sl_ppx_tag.s.index;
926 		result.tag_type = sl_ppx_tag.s.tt;
927 		result.tag = sl_ppx_tag.s.tag;
928 
929 		/* Get native XGRP value */
930 		xgrp.xgrp = sl_ppx_tag.s.grp;
931 
932 		/* Return legacy style group 0..15 */
933 		result.grp = xgrp.group;
934 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
935 		cvmx_pow_sl_tag_resp_t load_resp;
936 
937 		load_addr.u64 = 0;
938 		load_addr.sstatus_cn68xx.mem_region = CVMX_IO_SEG;
939 		load_addr.sstatus_cn68xx.is_io = 1;
940 		load_addr.sstatus_cn68xx.did = CVMX_OCT_DID_TAG_TAG5;
941 		load_addr.sstatus_cn68xx.coreid = cvmx_get_core_num();
942 		load_addr.sstatus_cn68xx.opcode = 3;
943 		load_resp.u64 = csr_rd(load_addr.u64);
944 		result.grp = load_resp.s.grp;
945 		result.index = load_resp.s.index;
946 		result.tag_type = load_resp.s.tag_type;
947 		result.tag = load_resp.s.tag;
948 	} else {
949 		cvmx_pow_tag_load_resp_t load_resp;
950 
951 		load_addr.u64 = 0;
952 		load_addr.sstatus.mem_region = CVMX_IO_SEG;
953 		load_addr.sstatus.is_io = 1;
954 		load_addr.sstatus.did = CVMX_OCT_DID_TAG_TAG1;
955 		load_addr.sstatus.coreid = cvmx_get_core_num();
956 		load_addr.sstatus.get_cur = 1;
957 		load_resp.u64 = csr_rd(load_addr.u64);
958 		result.grp = load_resp.s_sstatus2.grp;
959 		result.index = load_resp.s_sstatus2.index;
960 		result.tag_type = load_resp.s_sstatus2.tag_type;
961 		result.tag = load_resp.s_sstatus2.tag;
962 	}
963 	return result;
964 }
965 
966 /**
967  * Get the POW WQE for this core. This returns the work queue
968  * entry currently associated with this core.
969  *
970  * @return WQE pointer
971  */
cvmx_pow_get_current_wqp(void)972 static inline cvmx_wqe_t *cvmx_pow_get_current_wqp(void)
973 {
974 	cvmx_pow_load_addr_t load_addr;
975 	cvmx_pow_tag_load_resp_t load_resp;
976 
977 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
978 		cvmx_sso_sl_ppx_wqp_t sso_wqp;
979 		int node = cvmx_get_node_num();
980 		int core = cvmx_get_local_core_num();
981 
982 		sso_wqp.u64 = csr_rd_node(node, CVMX_SSO_SL_PPX_WQP(core));
983 		if (sso_wqp.s.wqp)
984 			return (cvmx_wqe_t *)cvmx_phys_to_ptr(sso_wqp.s.wqp);
985 		return (cvmx_wqe_t *)0;
986 	}
987 	if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
988 		load_addr.u64 = 0;
989 		load_addr.sstatus_cn68xx.mem_region = CVMX_IO_SEG;
990 		load_addr.sstatus_cn68xx.is_io = 1;
991 		load_addr.sstatus_cn68xx.did = CVMX_OCT_DID_TAG_TAG5;
992 		load_addr.sstatus_cn68xx.coreid = cvmx_get_core_num();
993 		load_addr.sstatus_cn68xx.opcode = 4;
994 		load_resp.u64 = csr_rd(load_addr.u64);
995 		if (load_resp.s_sstatus3_cn68xx.wqp)
996 			return (cvmx_wqe_t *)cvmx_phys_to_ptr(load_resp.s_sstatus3_cn68xx.wqp);
997 		else
998 			return (cvmx_wqe_t *)0;
999 	} else {
1000 		load_addr.u64 = 0;
1001 		load_addr.sstatus.mem_region = CVMX_IO_SEG;
1002 		load_addr.sstatus.is_io = 1;
1003 		load_addr.sstatus.did = CVMX_OCT_DID_TAG_TAG1;
1004 		load_addr.sstatus.coreid = cvmx_get_core_num();
1005 		load_addr.sstatus.get_cur = 1;
1006 		load_addr.sstatus.get_wqp = 1;
1007 		load_resp.u64 = csr_rd(load_addr.u64);
1008 		return (cvmx_wqe_t *)cvmx_phys_to_ptr(load_resp.s_sstatus4.wqp);
1009 	}
1010 }
1011 
1012 /**
1013  * @INTERNAL
1014  * Print a warning if a tag switch is pending for this core
1015  *
1016  * @param function Function name checking for a pending tag switch
1017  */
__cvmx_pow_warn_if_pending_switch(const char * function)1018 static inline void __cvmx_pow_warn_if_pending_switch(const char *function)
1019 {
1020 	u64 switch_complete;
1021 
1022 	CVMX_MF_CHORD(switch_complete);
1023 	cvmx_warn_if(!switch_complete, "%s called with tag switch in progress\n", function);
1024 }
1025 
1026 /**
1027  * Waits for a tag switch to complete by polling the completion bit.
1028  * Note that switches to NULL complete immediately and do not need
1029  * to be waited for.
1030  */
cvmx_pow_tag_sw_wait(void)1031 static inline void cvmx_pow_tag_sw_wait(void)
1032 {
1033 	const u64 TIMEOUT_MS = 10; /* 10ms timeout */
1034 	u64 switch_complete;
1035 	u64 start_cycle;
1036 
1037 	if (CVMX_ENABLE_POW_CHECKS)
1038 		start_cycle = get_timer(0);
1039 
1040 	while (1) {
1041 		CVMX_MF_CHORD(switch_complete);
1042 		if (cvmx_likely(switch_complete))
1043 			break;
1044 
1045 		if (CVMX_ENABLE_POW_CHECKS) {
1046 			if (cvmx_unlikely(get_timer(start_cycle) > TIMEOUT_MS)) {
1047 				debug("WARNING: %s: Tag switch is taking a long time, possible deadlock\n",
1048 				      __func__);
1049 			}
1050 		}
1051 	}
1052 }
1053 
1054 /**
1055  * Synchronous work request.  Requests work from the POW.
1056  * This function does NOT wait for previous tag switches to complete,
1057  * so the caller must ensure that there is not a pending tag switch.
1058  *
1059  * @param wait   When set, call stalls until work becomes available, or
1060  *               times out. If not set, returns immediately.
1061  *
1062  * @return Returns the WQE pointer from POW. Returns NULL if no work was
1063  * available.
1064  */
cvmx_pow_work_request_sync_nocheck(cvmx_pow_wait_t wait)1065 static inline cvmx_wqe_t *cvmx_pow_work_request_sync_nocheck(cvmx_pow_wait_t wait)
1066 {
1067 	cvmx_pow_load_addr_t ptr;
1068 	cvmx_pow_tag_load_resp_t result;
1069 
1070 	if (CVMX_ENABLE_POW_CHECKS)
1071 		__cvmx_pow_warn_if_pending_switch(__func__);
1072 
1073 	ptr.u64 = 0;
1074 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1075 		ptr.swork_78xx.node = cvmx_get_node_num();
1076 		ptr.swork_78xx.mem_region = CVMX_IO_SEG;
1077 		ptr.swork_78xx.is_io = 1;
1078 		ptr.swork_78xx.did = CVMX_OCT_DID_TAG_SWTAG;
1079 		ptr.swork_78xx.wait = wait;
1080 	} else {
1081 		ptr.swork.mem_region = CVMX_IO_SEG;
1082 		ptr.swork.is_io = 1;
1083 		ptr.swork.did = CVMX_OCT_DID_TAG_SWTAG;
1084 		ptr.swork.wait = wait;
1085 	}
1086 
1087 	result.u64 = csr_rd(ptr.u64);
1088 	if (result.s_work.no_work)
1089 		return NULL;
1090 	else
1091 		return (cvmx_wqe_t *)cvmx_phys_to_ptr(result.s_work.addr);
1092 }
1093 
1094 /**
1095  * Synchronous work request.  Requests work from the POW.
1096  * This function waits for any previous tag switch to complete before
1097  * requesting the new work.
1098  *
1099  * @param wait   When set, call stalls until work becomes available, or
1100  *               times out. If not set, returns immediately.
1101  *
1102  * @return Returns the WQE pointer from POW. Returns NULL if no work was
1103  * available.
1104  */
cvmx_pow_work_request_sync(cvmx_pow_wait_t wait)1105 static inline cvmx_wqe_t *cvmx_pow_work_request_sync(cvmx_pow_wait_t wait)
1106 {
1107 	/* Must not have a switch pending when requesting work */
1108 	cvmx_pow_tag_sw_wait();
1109 	return (cvmx_pow_work_request_sync_nocheck(wait));
1110 }
1111 
1112 /**
1113  * Synchronous null_rd request.  Requests a switch out of NULL_NULL POW state.
1114  * This function waits for any previous tag switch to complete before
1115  * requesting the null_rd.
1116  *
1117  * @return Returns the POW state of type cvmx_pow_tag_type_t.
1118  */
cvmx_pow_work_request_null_rd(void)1119 static inline cvmx_pow_tag_type_t cvmx_pow_work_request_null_rd(void)
1120 {
1121 	cvmx_pow_load_addr_t ptr;
1122 	cvmx_pow_tag_load_resp_t result;
1123 
1124 	/* Must not have a switch pending when requesting work */
1125 	cvmx_pow_tag_sw_wait();
1126 
1127 	ptr.u64 = 0;
1128 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1129 		ptr.swork_78xx.mem_region = CVMX_IO_SEG;
1130 		ptr.swork_78xx.is_io = 1;
1131 		ptr.swork_78xx.did = CVMX_OCT_DID_TAG_NULL_RD;
1132 		ptr.swork_78xx.node = cvmx_get_node_num();
1133 	} else {
1134 		ptr.snull_rd.mem_region = CVMX_IO_SEG;
1135 		ptr.snull_rd.is_io = 1;
1136 		ptr.snull_rd.did = CVMX_OCT_DID_TAG_NULL_RD;
1137 	}
1138 	result.u64 = csr_rd(ptr.u64);
1139 	return (cvmx_pow_tag_type_t)result.s_null_rd.state;
1140 }
1141 
1142 /**
1143  * Asynchronous work request.
1144  * Work is requested from the POW unit, and should later be checked with
1145  * function cvmx_pow_work_response_async.
1146  * This function does NOT wait for previous tag switches to complete,
1147  * so the caller must ensure that there is not a pending tag switch.
1148  *
1149  * @param scr_addr Scratch memory address that response will be returned to,
1150  *     which is either a valid WQE, or a response with the invalid bit set.
1151  *     Byte address, must be 8 byte aligned.
1152  * @param wait 1 to cause response to wait for work to become available
1153  *               (or timeout)
1154  *             0 to cause response to return immediately
1155  */
cvmx_pow_work_request_async_nocheck(int scr_addr,cvmx_pow_wait_t wait)1156 static inline void cvmx_pow_work_request_async_nocheck(int scr_addr, cvmx_pow_wait_t wait)
1157 {
1158 	cvmx_pow_iobdma_store_t data;
1159 
1160 	if (CVMX_ENABLE_POW_CHECKS)
1161 		__cvmx_pow_warn_if_pending_switch(__func__);
1162 
1163 	/* scr_addr must be 8 byte aligned */
1164 	data.u64 = 0;
1165 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1166 		data.s_cn78xx.node = cvmx_get_node_num();
1167 		data.s_cn78xx.scraddr = scr_addr >> 3;
1168 		data.s_cn78xx.len = 1;
1169 		data.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
1170 		data.s_cn78xx.wait = wait;
1171 	} else {
1172 		data.s.scraddr = scr_addr >> 3;
1173 		data.s.len = 1;
1174 		data.s.did = CVMX_OCT_DID_TAG_SWTAG;
1175 		data.s.wait = wait;
1176 	}
1177 	cvmx_send_single(data.u64);
1178 }
1179 
1180 /**
1181  * Asynchronous work request.
1182  * Work is requested from the POW unit, and should later be checked with
1183  * function cvmx_pow_work_response_async.
1184  * This function waits for any previous tag switch to complete before
1185  * requesting the new work.
1186  *
1187  * @param scr_addr Scratch memory address that response will be returned to,
1188  *     which is either a valid WQE, or a response with the invalid bit set.
1189  *     Byte address, must be 8 byte aligned.
1190  * @param wait 1 to cause response to wait for work to become available
1191  *               (or timeout)
1192  *             0 to cause response to return immediately
1193  */
cvmx_pow_work_request_async(int scr_addr,cvmx_pow_wait_t wait)1194 static inline void cvmx_pow_work_request_async(int scr_addr, cvmx_pow_wait_t wait)
1195 {
1196 	/* Must not have a switch pending when requesting work */
1197 	cvmx_pow_tag_sw_wait();
1198 	cvmx_pow_work_request_async_nocheck(scr_addr, wait);
1199 }
1200 
1201 /**
1202  * Gets result of asynchronous work request.  Performs a IOBDMA sync
1203  * to wait for the response.
1204  *
1205  * @param scr_addr Scratch memory address to get result from
1206  *                  Byte address, must be 8 byte aligned.
1207  * @return Returns the WQE from the scratch register, or NULL if no work was
1208  *         available.
1209  */
cvmx_pow_work_response_async(int scr_addr)1210 static inline cvmx_wqe_t *cvmx_pow_work_response_async(int scr_addr)
1211 {
1212 	cvmx_pow_tag_load_resp_t result;
1213 
1214 	CVMX_SYNCIOBDMA;
1215 	result.u64 = cvmx_scratch_read64(scr_addr);
1216 	if (result.s_work.no_work)
1217 		return NULL;
1218 	else
1219 		return (cvmx_wqe_t *)cvmx_phys_to_ptr(result.s_work.addr);
1220 }
1221 
1222 /**
1223  * Checks if a work queue entry pointer returned by a work
1224  * request is valid.  It may be invalid due to no work
1225  * being available or due to a timeout.
1226  *
1227  * @param wqe_ptr pointer to a work queue entry returned by the POW
1228  *
1229  * @return 0 if pointer is valid
1230  *         1 if invalid (no work was returned)
1231  */
cvmx_pow_work_invalid(cvmx_wqe_t * wqe_ptr)1232 static inline u64 cvmx_pow_work_invalid(cvmx_wqe_t *wqe_ptr)
1233 {
1234 	return (!wqe_ptr); /* FIXME: improve */
1235 }
1236 
1237 /**
1238  * Starts a tag switch to the provided tag value and tag type.  Completion for
1239  * the tag switch must be checked for separately.
1240  * This function does NOT update the
1241  * work queue entry in dram to match tag value and type, so the application must
1242  * keep track of these if they are important to the application.
1243  * This tag switch command must not be used for switches to NULL, as the tag
1244  * switch pending bit will be set by the switch request, but never cleared by
1245  * the hardware.
1246  *
1247  * NOTE: This should not be used when switching from a NULL tag.  Use
1248  * cvmx_pow_tag_sw_full() instead.
1249  *
1250  * This function does no checks, so the caller must ensure that any previous tag
1251  * switch has completed.
1252  *
1253  * @param tag      new tag value
1254  * @param tag_type new tag type (ordered or atomic)
1255  */
cvmx_pow_tag_sw_nocheck(u32 tag,cvmx_pow_tag_type_t tag_type)1256 static inline void cvmx_pow_tag_sw_nocheck(u32 tag, cvmx_pow_tag_type_t tag_type)
1257 {
1258 	union cvmx_pow_tag_req_addr ptr;
1259 	cvmx_pow_tag_req_t tag_req;
1260 
1261 	if (CVMX_ENABLE_POW_CHECKS) {
1262 		cvmx_pow_tag_info_t current_tag;
1263 
1264 		__cvmx_pow_warn_if_pending_switch(__func__);
1265 		current_tag = cvmx_pow_get_current_tag();
1266 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
1267 			     "%s called with NULL_NULL tag\n", __func__);
1268 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
1269 			     "%s called with NULL tag\n", __func__);
1270 		cvmx_warn_if((current_tag.tag_type == tag_type) && (current_tag.tag == tag),
1271 			     "%s called to perform a tag switch to the same tag\n", __func__);
1272 		cvmx_warn_if(
1273 			tag_type == CVMX_POW_TAG_TYPE_NULL,
1274 			"%s called to perform a tag switch to NULL. Use cvmx_pow_tag_sw_null() instead\n",
1275 			__func__);
1276 	}
1277 
1278 	/*
1279 	 * Note that WQE in DRAM is not updated here, as the POW does not read
1280 	 * from DRAM once the WQE is in flight.  See hardware manual for
1281 	 * complete details.
1282 	 * It is the application's responsibility to keep track of the
1283 	 * current tag value if that is important.
1284 	 */
1285 	tag_req.u64 = 0;
1286 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1287 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG;
1288 		tag_req.s_cn78xx_other.type = tag_type;
1289 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1290 		tag_req.s_cn68xx_other.op = CVMX_POW_TAG_OP_SWTAG;
1291 		tag_req.s_cn68xx_other.tag = tag;
1292 		tag_req.s_cn68xx_other.type = tag_type;
1293 	} else {
1294 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_SWTAG;
1295 		tag_req.s_cn38xx.tag = tag;
1296 		tag_req.s_cn38xx.type = tag_type;
1297 	}
1298 	ptr.u64 = 0;
1299 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1300 		ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
1301 		ptr.s_cn78xx.is_io = 1;
1302 		ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
1303 		ptr.s_cn78xx.node = cvmx_get_node_num();
1304 		ptr.s_cn78xx.tag = tag;
1305 	} else {
1306 		ptr.s.mem_region = CVMX_IO_SEG;
1307 		ptr.s.is_io = 1;
1308 		ptr.s.did = CVMX_OCT_DID_TAG_SWTAG;
1309 	}
1310 	/* Once this store arrives at POW, it will attempt the switch
1311 	   software must wait for the switch to complete separately */
1312 	cvmx_write_io(ptr.u64, tag_req.u64);
1313 }
1314 
1315 /**
1316  * Starts a tag switch to the provided tag value and tag type.  Completion for
1317  * the tag switch must be checked for separately.
1318  * This function does NOT update the
1319  * work queue entry in dram to match tag value and type, so the application must
1320  * keep track of these if they are important to the application.
1321  * This tag switch command must not be used for switches to NULL, as the tag
1322  * switch pending bit will be set by the switch request, but never cleared by
1323  * the hardware.
1324  *
1325  * NOTE: This should not be used when switching from a NULL tag.  Use
1326  * cvmx_pow_tag_sw_full() instead.
1327  *
1328  * This function waits for any previous tag switch to complete, and also
1329  * displays an error on tag switches to NULL.
1330  *
1331  * @param tag      new tag value
1332  * @param tag_type new tag type (ordered or atomic)
1333  */
cvmx_pow_tag_sw(u32 tag,cvmx_pow_tag_type_t tag_type)1334 static inline void cvmx_pow_tag_sw(u32 tag, cvmx_pow_tag_type_t tag_type)
1335 {
1336 	/*
1337 	 * Note that WQE in DRAM is not updated here, as the POW does not read
1338 	 * from DRAM once the WQE is in flight.  See hardware manual for
1339 	 * complete details. It is the application's responsibility to keep
1340 	 * track of the current tag value if that is important.
1341 	 */
1342 
1343 	/*
1344 	 * Ensure that there is not a pending tag switch, as a tag switch
1345 	 * cannot be started if a previous switch is still pending.
1346 	 */
1347 	cvmx_pow_tag_sw_wait();
1348 	cvmx_pow_tag_sw_nocheck(tag, tag_type);
1349 }
1350 
1351 /**
1352  * Starts a tag switch to the provided tag value and tag type.  Completion for
1353  * the tag switch must be checked for separately.
1354  * This function does NOT update the
1355  * work queue entry in dram to match tag value and type, so the application must
1356  * keep track of these if they are important to the application.
1357  * This tag switch command must not be used for switches to NULL, as the tag
1358  * switch pending bit will be set by the switch request, but never cleared by
1359  * the hardware.
1360  *
1361  * This function must be used for tag switches from NULL.
1362  *
1363  * This function does no checks, so the caller must ensure that any previous tag
1364  * switch has completed.
1365  *
1366  * @param wqp      pointer to work queue entry to submit.  This entry is
1367  *                 updated to match the other parameters
1368  * @param tag      tag value to be assigned to work queue entry
1369  * @param tag_type type of tag
1370  * @param group    group value for the work queue entry.
1371  */
cvmx_pow_tag_sw_full_nocheck(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,u64 group)1372 static inline void cvmx_pow_tag_sw_full_nocheck(cvmx_wqe_t *wqp, u32 tag,
1373 						cvmx_pow_tag_type_t tag_type, u64 group)
1374 {
1375 	union cvmx_pow_tag_req_addr ptr;
1376 	cvmx_pow_tag_req_t tag_req;
1377 	unsigned int node = cvmx_get_node_num();
1378 	u64 wqp_phys = cvmx_ptr_to_phys(wqp);
1379 
1380 	if (CVMX_ENABLE_POW_CHECKS) {
1381 		cvmx_pow_tag_info_t current_tag;
1382 
1383 		__cvmx_pow_warn_if_pending_switch(__func__);
1384 		current_tag = cvmx_pow_get_current_tag();
1385 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
1386 			     "%s called with NULL_NULL tag\n", __func__);
1387 		cvmx_warn_if((current_tag.tag_type == tag_type) && (current_tag.tag == tag),
1388 			     "%s called to perform a tag switch to the same tag\n", __func__);
1389 		cvmx_warn_if(
1390 			tag_type == CVMX_POW_TAG_TYPE_NULL,
1391 			"%s called to perform a tag switch to NULL. Use cvmx_pow_tag_sw_null() instead\n",
1392 			__func__);
1393 		if ((wqp != cvmx_phys_to_ptr(0x80)) && cvmx_pow_get_current_wqp())
1394 			cvmx_warn_if(wqp != cvmx_pow_get_current_wqp(),
1395 				     "%s passed WQE(%p) doesn't match the address in the POW(%p)\n",
1396 				     __func__, wqp, cvmx_pow_get_current_wqp());
1397 	}
1398 
1399 	/*
1400 	 * Note that WQE in DRAM is not updated here, as the POW does not
1401 	 * read from DRAM once the WQE is in flight.  See hardware manual
1402 	 * for complete details. It is the application's responsibility to
1403 	 * keep track of the current tag value if that is important.
1404 	 */
1405 	tag_req.u64 = 0;
1406 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1407 		unsigned int xgrp;
1408 
1409 		if (wqp_phys != 0x80) {
1410 			/* If WQE is valid, use its XGRP:
1411 			 * WQE GRP is 10 bits, and is mapped
1412 			 * to legacy GRP + QoS, includes node number.
1413 			 */
1414 			xgrp = wqp->word1.cn78xx.grp;
1415 			/* Use XGRP[node] too */
1416 			node = xgrp >> 8;
1417 			/* Modify XGRP with legacy group # from arg */
1418 			xgrp &= ~0xf8;
1419 			xgrp |= 0xf8 & (group << 3);
1420 
1421 		} else {
1422 			/* If no WQE, build XGRP with QoS=0 and current node */
1423 			xgrp = group << 3;
1424 			xgrp |= node << 8;
1425 		}
1426 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG_FULL;
1427 		tag_req.s_cn78xx_other.type = tag_type;
1428 		tag_req.s_cn78xx_other.grp = xgrp;
1429 		tag_req.s_cn78xx_other.wqp = wqp_phys;
1430 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1431 		tag_req.s_cn68xx_other.op = CVMX_POW_TAG_OP_SWTAG_FULL;
1432 		tag_req.s_cn68xx_other.tag = tag;
1433 		tag_req.s_cn68xx_other.type = tag_type;
1434 		tag_req.s_cn68xx_other.grp = group;
1435 	} else {
1436 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_SWTAG_FULL;
1437 		tag_req.s_cn38xx.tag = tag;
1438 		tag_req.s_cn38xx.type = tag_type;
1439 		tag_req.s_cn38xx.grp = group;
1440 	}
1441 	ptr.u64 = 0;
1442 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1443 		ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
1444 		ptr.s_cn78xx.is_io = 1;
1445 		ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
1446 		ptr.s_cn78xx.node = node;
1447 		ptr.s_cn78xx.tag = tag;
1448 	} else {
1449 		ptr.s.mem_region = CVMX_IO_SEG;
1450 		ptr.s.is_io = 1;
1451 		ptr.s.did = CVMX_OCT_DID_TAG_SWTAG;
1452 		ptr.s.addr = wqp_phys;
1453 	}
1454 	/* Once this store arrives at POW, it will attempt the switch
1455 	   software must wait for the switch to complete separately */
1456 	cvmx_write_io(ptr.u64, tag_req.u64);
1457 }
1458 
1459 /**
1460  * Starts a tag switch to the provided tag value and tag type.
1461  * Completion for the tag switch must be checked for separately.
1462  * This function does NOT update the work queue entry in dram to match tag value
1463  * and type, so the application must keep track of these if they are important
1464  * to the application. This tag switch command must not be used for switches
1465  * to NULL, as the tag switch pending bit will be set by the switch request,
1466  * but never cleared by the hardware.
1467  *
1468  * This function must be used for tag switches from NULL.
1469  *
1470  * This function waits for any pending tag switches to complete
1471  * before requesting the tag switch.
1472  *
1473  * @param wqp      Pointer to work queue entry to submit.
1474  *     This entry is updated to match the other parameters
1475  * @param tag      Tag value to be assigned to work queue entry
1476  * @param tag_type Type of tag
1477  * @param group    Group value for the work queue entry.
1478  */
cvmx_pow_tag_sw_full(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,u64 group)1479 static inline void cvmx_pow_tag_sw_full(cvmx_wqe_t *wqp, u32 tag, cvmx_pow_tag_type_t tag_type,
1480 					u64 group)
1481 {
1482 	/*
1483 	 * Ensure that there is not a pending tag switch, as a tag switch cannot
1484 	 * be started if a previous switch is still pending.
1485 	 */
1486 	cvmx_pow_tag_sw_wait();
1487 	cvmx_pow_tag_sw_full_nocheck(wqp, tag, tag_type, group);
1488 }
1489 
1490 /**
1491  * Switch to a NULL tag, which ends any ordering or
1492  * synchronization provided by the POW for the current
1493  * work queue entry.  This operation completes immediately,
1494  * so completion should not be waited for.
1495  * This function does NOT wait for previous tag switches to complete,
1496  * so the caller must ensure that any previous tag switches have completed.
1497  */
cvmx_pow_tag_sw_null_nocheck(void)1498 static inline void cvmx_pow_tag_sw_null_nocheck(void)
1499 {
1500 	union cvmx_pow_tag_req_addr ptr;
1501 	cvmx_pow_tag_req_t tag_req;
1502 
1503 	if (CVMX_ENABLE_POW_CHECKS) {
1504 		cvmx_pow_tag_info_t current_tag;
1505 
1506 		__cvmx_pow_warn_if_pending_switch(__func__);
1507 		current_tag = cvmx_pow_get_current_tag();
1508 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
1509 			     "%s called with NULL_NULL tag\n", __func__);
1510 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
1511 			     "%s called when we already have a NULL tag\n", __func__);
1512 	}
1513 	tag_req.u64 = 0;
1514 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1515 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG;
1516 		tag_req.s_cn78xx_other.type = CVMX_POW_TAG_TYPE_NULL;
1517 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1518 		tag_req.s_cn68xx_other.op = CVMX_POW_TAG_OP_SWTAG;
1519 		tag_req.s_cn68xx_other.type = CVMX_POW_TAG_TYPE_NULL;
1520 	} else {
1521 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_SWTAG;
1522 		tag_req.s_cn38xx.type = CVMX_POW_TAG_TYPE_NULL;
1523 	}
1524 	ptr.u64 = 0;
1525 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1526 		ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
1527 		ptr.s_cn78xx.is_io = 1;
1528 		ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_TAG1;
1529 		ptr.s_cn78xx.node = cvmx_get_node_num();
1530 	} else {
1531 		ptr.s.mem_region = CVMX_IO_SEG;
1532 		ptr.s.is_io = 1;
1533 		ptr.s.did = CVMX_OCT_DID_TAG_TAG1;
1534 	}
1535 	cvmx_write_io(ptr.u64, tag_req.u64);
1536 }
1537 
1538 /**
1539  * Switch to a NULL tag, which ends any ordering or
1540  * synchronization provided by the POW for the current
1541  * work queue entry.  This operation completes immediately,
1542  * so completion should not be waited for.
1543  * This function waits for any pending tag switches to complete
1544  * before requesting the switch to NULL.
1545  */
cvmx_pow_tag_sw_null(void)1546 static inline void cvmx_pow_tag_sw_null(void)
1547 {
1548 	/*
1549 	 * Ensure that there is not a pending tag switch, as a tag switch cannot
1550 	 * be started if a previous switch is still pending.
1551 	 */
1552 	cvmx_pow_tag_sw_wait();
1553 	cvmx_pow_tag_sw_null_nocheck();
1554 }
1555 
1556 /**
1557  * Submits work to an input queue.
1558  * This function updates the work queue entry in DRAM to match the arguments given.
1559  * Note that the tag provided is for the work queue entry submitted, and
1560  * is unrelated to the tag that the core currently holds.
1561  *
1562  * @param wqp      pointer to work queue entry to submit.
1563  *                 This entry is updated to match the other parameters
1564  * @param tag      tag value to be assigned to work queue entry
1565  * @param tag_type type of tag
1566  * @param qos      Input queue to add to.
1567  * @param grp      group value for the work queue entry.
1568  */
cvmx_pow_work_submit(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,u64 qos,u64 grp)1569 static inline void cvmx_pow_work_submit(cvmx_wqe_t *wqp, u32 tag, cvmx_pow_tag_type_t tag_type,
1570 					u64 qos, u64 grp)
1571 {
1572 	union cvmx_pow_tag_req_addr ptr;
1573 	cvmx_pow_tag_req_t tag_req;
1574 
1575 	tag_req.u64 = 0;
1576 	ptr.u64 = 0;
1577 
1578 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1579 		unsigned int node = cvmx_get_node_num();
1580 		unsigned int xgrp;
1581 
1582 		xgrp = (grp & 0x1f) << 3;
1583 		xgrp |= (qos & 7);
1584 		xgrp |= 0x300 & (node << 8);
1585 
1586 		wqp->word1.cn78xx.rsvd_0 = 0;
1587 		wqp->word1.cn78xx.rsvd_1 = 0;
1588 		wqp->word1.cn78xx.tag = tag;
1589 		wqp->word1.cn78xx.tag_type = tag_type;
1590 		wqp->word1.cn78xx.grp = xgrp;
1591 		CVMX_SYNCWS;
1592 
1593 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_ADDWQ;
1594 		tag_req.s_cn78xx_other.type = tag_type;
1595 		tag_req.s_cn78xx_other.wqp = cvmx_ptr_to_phys(wqp);
1596 		tag_req.s_cn78xx_other.grp = xgrp;
1597 
1598 		ptr.s_cn78xx.did = 0x66; // CVMX_OCT_DID_TAG_TAG6;
1599 		ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
1600 		ptr.s_cn78xx.is_io = 1;
1601 		ptr.s_cn78xx.node = node;
1602 		ptr.s_cn78xx.tag = tag;
1603 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1604 		/* Reset all reserved bits */
1605 		wqp->word1.cn68xx.zero_0 = 0;
1606 		wqp->word1.cn68xx.zero_1 = 0;
1607 		wqp->word1.cn68xx.zero_2 = 0;
1608 		wqp->word1.cn68xx.qos = qos;
1609 		wqp->word1.cn68xx.grp = grp;
1610 
1611 		wqp->word1.tag = tag;
1612 		wqp->word1.tag_type = tag_type;
1613 
1614 		tag_req.s_cn68xx_add.op = CVMX_POW_TAG_OP_ADDWQ;
1615 		tag_req.s_cn68xx_add.type = tag_type;
1616 		tag_req.s_cn68xx_add.tag = tag;
1617 		tag_req.s_cn68xx_add.qos = qos;
1618 		tag_req.s_cn68xx_add.grp = grp;
1619 
1620 		ptr.s.mem_region = CVMX_IO_SEG;
1621 		ptr.s.is_io = 1;
1622 		ptr.s.did = CVMX_OCT_DID_TAG_TAG1;
1623 		ptr.s.addr = cvmx_ptr_to_phys(wqp);
1624 	} else {
1625 		/* Reset all reserved bits */
1626 		wqp->word1.cn38xx.zero_2 = 0;
1627 		wqp->word1.cn38xx.qos = qos;
1628 		wqp->word1.cn38xx.grp = grp;
1629 
1630 		wqp->word1.tag = tag;
1631 		wqp->word1.tag_type = tag_type;
1632 
1633 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_ADDWQ;
1634 		tag_req.s_cn38xx.type = tag_type;
1635 		tag_req.s_cn38xx.tag = tag;
1636 		tag_req.s_cn38xx.qos = qos;
1637 		tag_req.s_cn38xx.grp = grp;
1638 
1639 		ptr.s.mem_region = CVMX_IO_SEG;
1640 		ptr.s.is_io = 1;
1641 		ptr.s.did = CVMX_OCT_DID_TAG_TAG1;
1642 		ptr.s.addr = cvmx_ptr_to_phys(wqp);
1643 	}
1644 	/* SYNC write to memory before the work submit.
1645 	 * This is necessary as POW may read values from DRAM at this time */
1646 	CVMX_SYNCWS;
1647 	cvmx_write_io(ptr.u64, tag_req.u64);
1648 }
1649 
1650 /**
1651  * This function sets the group mask for a core.  The group mask
1652  * indicates which groups each core will accept work from. There are
1653  * 16 groups.
1654  *
1655  * @param core_num   core to apply mask to
1656  * @param mask   Group mask, one bit for up to 64 groups.
1657  *               Each 1 bit in the mask enables the core to accept work from
1658  *               the corresponding group.
1659  *               The CN68XX supports 64 groups, earlier models only support
1660  *               16 groups.
1661  *
1662  * The CN78XX in backwards compatibility mode allows up to 32 groups,
1663  * so the 'mask' argument has one bit for every of the legacy
1664  * groups, and a '1' in the mask causes a total of 8 groups
1665  * which share the legacy group numbher and 8 qos levels,
1666  * to be enabled for the calling processor core.
1667  * A '0' in the mask will disable the current core
1668  * from receiving work from the associated group.
1669  */
cvmx_pow_set_group_mask(u64 core_num,u64 mask)1670 static inline void cvmx_pow_set_group_mask(u64 core_num, u64 mask)
1671 {
1672 	u64 valid_mask;
1673 	int num_groups = cvmx_pow_num_groups();
1674 
1675 	if (num_groups >= 64)
1676 		valid_mask = ~0ull;
1677 	else
1678 		valid_mask = (1ull << num_groups) - 1;
1679 
1680 	if ((mask & valid_mask) == 0) {
1681 		printf("ERROR: %s empty group mask disables work on core# %llu, ignored.\n",
1682 		       __func__, (unsigned long long)core_num);
1683 		return;
1684 	}
1685 	cvmx_warn_if(mask & (~valid_mask), "%s group number range exceeded: %#llx\n", __func__,
1686 		     (unsigned long long)mask);
1687 
1688 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1689 		unsigned int mask_set;
1690 		cvmx_sso_ppx_sx_grpmskx_t grp_msk;
1691 		unsigned int core, node;
1692 		unsigned int rix;  /* Register index */
1693 		unsigned int grp;  /* Legacy group # */
1694 		unsigned int bit;  /* bit index */
1695 		unsigned int xgrp; /* native group # */
1696 
1697 		node = cvmx_coremask_core_to_node(core_num);
1698 		core = cvmx_coremask_core_on_node(core_num);
1699 
1700 		/* 78xx: 256 groups divided into 4 X 64 bit registers */
1701 		/* 73xx: 64 groups are in one register */
1702 		for (rix = 0; rix < (cvmx_sso_num_xgrp() >> 6); rix++) {
1703 			grp_msk.u64 = 0;
1704 			for (bit = 0; bit < 64; bit++) {
1705 				/* 8-bit native XGRP number */
1706 				xgrp = (rix << 6) | bit;
1707 				/* Legacy 5-bit group number */
1708 				grp = (xgrp >> 3) & 0x1f;
1709 				/* Inspect legacy mask by legacy group */
1710 				if (mask & (1ull << grp))
1711 					grp_msk.s.grp_msk |= 1ull << bit;
1712 				/* Pre-set to all 0's */
1713 			}
1714 			for (mask_set = 0; mask_set < cvmx_sso_num_maskset(); mask_set++) {
1715 				csr_wr_node(node, CVMX_SSO_PPX_SX_GRPMSKX(core, mask_set, rix),
1716 					    grp_msk.u64);
1717 			}
1718 		}
1719 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1720 		cvmx_sso_ppx_grp_msk_t grp_msk;
1721 
1722 		grp_msk.s.grp_msk = mask;
1723 		csr_wr(CVMX_SSO_PPX_GRP_MSK(core_num), grp_msk.u64);
1724 	} else {
1725 		cvmx_pow_pp_grp_mskx_t grp_msk;
1726 
1727 		grp_msk.u64 = csr_rd(CVMX_POW_PP_GRP_MSKX(core_num));
1728 		grp_msk.s.grp_msk = mask & 0xffff;
1729 		csr_wr(CVMX_POW_PP_GRP_MSKX(core_num), grp_msk.u64);
1730 	}
1731 }
1732 
1733 /**
1734  * This function gets the group mask for a core.  The group mask
1735  * indicates which groups each core will accept work from.
1736  *
1737  * @param core_num   core to apply mask to
1738  * @return	Group mask, one bit for up to 64 groups.
1739  *               Each 1 bit in the mask enables the core to accept work from
1740  *               the corresponding group.
1741  *               The CN68XX supports 64 groups, earlier models only support
1742  *               16 groups.
1743  *
1744  * The CN78XX in backwards compatibility mode allows up to 32 groups,
1745  * so the 'mask' argument has one bit for every of the legacy
1746  * groups, and a '1' in the mask causes a total of 8 groups
1747  * which share the legacy group numbher and 8 qos levels,
1748  * to be enabled for the calling processor core.
1749  * A '0' in the mask will disable the current core
1750  * from receiving work from the associated group.
1751  */
cvmx_pow_get_group_mask(u64 core_num)1752 static inline u64 cvmx_pow_get_group_mask(u64 core_num)
1753 {
1754 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1755 		cvmx_sso_ppx_sx_grpmskx_t grp_msk;
1756 		unsigned int core, node, i;
1757 		int rix; /* Register index */
1758 		u64 mask = 0;
1759 
1760 		node = cvmx_coremask_core_to_node(core_num);
1761 		core = cvmx_coremask_core_on_node(core_num);
1762 
1763 		/* 78xx: 256 groups divided into 4 X 64 bit registers */
1764 		/* 73xx: 64 groups are in one register */
1765 		for (rix = (cvmx_sso_num_xgrp() >> 6) - 1; rix >= 0; rix--) {
1766 			/* read only mask_set=0 (both 'set' was written same) */
1767 			grp_msk.u64 = csr_rd_node(node, CVMX_SSO_PPX_SX_GRPMSKX(core, 0, rix));
1768 			/* ASSUME: (this is how mask bits got written) */
1769 			/* grp_mask[7:0]: all bits 0..7 are same */
1770 			/* grp_mask[15:8]: all bits 8..15 are same, etc */
1771 			/* DO: mask[7:0] = grp_mask.u64[56,48,40,32,24,16,8,0] */
1772 			for (i = 0; i < 8; i++)
1773 				mask |= (grp_msk.u64 & ((u64)1 << (i * 8))) >> (7 * i);
1774 			/* we collected 8 MSBs in mask[7:0], <<=8 and continue */
1775 			if (cvmx_likely(rix != 0))
1776 				mask <<= 8;
1777 		}
1778 		return mask & 0xFFFFFFFF;
1779 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1780 		cvmx_sso_ppx_grp_msk_t grp_msk;
1781 
1782 		grp_msk.u64 = csr_rd(CVMX_SSO_PPX_GRP_MSK(core_num));
1783 		return grp_msk.u64;
1784 	} else {
1785 		cvmx_pow_pp_grp_mskx_t grp_msk;
1786 
1787 		grp_msk.u64 = csr_rd(CVMX_POW_PP_GRP_MSKX(core_num));
1788 		return grp_msk.u64 & 0xffff;
1789 	}
1790 }
1791 
1792 /*
1793  * Returns 0 if 78xx(73xx,75xx) is not programmed in legacy compatible mode
1794  * Returns 1 if 78xx(73xx,75xx) is programmed in legacy compatible mode
1795  * Returns 1 if octeon model is not 78xx(73xx,75xx)
1796  */
cvmx_pow_is_legacy78mode(u64 core_num)1797 static inline u64 cvmx_pow_is_legacy78mode(u64 core_num)
1798 {
1799 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1800 		cvmx_sso_ppx_sx_grpmskx_t grp_msk0, grp_msk1;
1801 		unsigned int core, node, i;
1802 		int rix; /* Register index */
1803 		u64 mask = 0;
1804 
1805 		node = cvmx_coremask_core_to_node(core_num);
1806 		core = cvmx_coremask_core_on_node(core_num);
1807 
1808 		/* 78xx: 256 groups divided into 4 X 64 bit registers */
1809 		/* 73xx: 64 groups are in one register */
1810 		/* 1) in order for the 78_SSO to be in legacy compatible mode
1811 		 * the both mask_sets should be programmed the same */
1812 		for (rix = (cvmx_sso_num_xgrp() >> 6) - 1; rix >= 0; rix--) {
1813 			/* read mask_set=0 (both 'set' was written same) */
1814 			grp_msk0.u64 = csr_rd_node(node, CVMX_SSO_PPX_SX_GRPMSKX(core, 0, rix));
1815 			grp_msk1.u64 = csr_rd_node(node, CVMX_SSO_PPX_SX_GRPMSKX(core, 1, rix));
1816 			if (grp_msk0.u64 != grp_msk1.u64) {
1817 				return 0;
1818 			}
1819 			/* (this is how mask bits should be written) */
1820 			/* grp_mask[7:0]: all bits 0..7 are same */
1821 			/* grp_mask[15:8]: all bits 8..15 are same, etc */
1822 			/* 2) in order for the 78_SSO to be in legacy compatible
1823 			 * mode above should be true (test only mask_set=0 */
1824 			for (i = 0; i < 8; i++) {
1825 				mask = (grp_msk0.u64 >> (i << 3)) & 0xFF;
1826 				if (!(mask == 0 || mask == 0xFF)) {
1827 					return 0;
1828 				}
1829 			}
1830 		}
1831 		/* if we come here, the 78_SSO is in legacy compatible mode */
1832 	}
1833 	return 1; /* the SSO/POW is in legacy (or compatible) mode */
1834 }
1835 
1836 /**
1837  * This function sets POW static priorities for a core. Each input queue has
1838  * an associated priority value.
1839  *
1840  * @param core_num   core to apply priorities to
1841  * @param priority   Vector of 8 priorities, one per POW Input Queue (0-7).
1842  *                   Highest priority is 0 and lowest is 7. A priority value
1843  *                   of 0xF instructs POW to skip the Input Queue when
1844  *                   scheduling to this specific core.
1845  *                   NOTE: priorities should not have gaps in values, meaning
1846  *                         {0,1,1,1,1,1,1,1} is a valid configuration while
1847  *                         {0,2,2,2,2,2,2,2} is not.
1848  */
cvmx_pow_set_priority(u64 core_num,const u8 priority[])1849 static inline void cvmx_pow_set_priority(u64 core_num, const u8 priority[])
1850 {
1851 	/* Detect gaps between priorities and flag error */
1852 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1853 		int i;
1854 		u32 prio_mask = 0;
1855 
1856 		for (i = 0; i < 8; i++)
1857 			if (priority[i] != 0xF)
1858 				prio_mask |= 1 << priority[i];
1859 
1860 		if (prio_mask ^ ((1 << cvmx_pop(prio_mask)) - 1)) {
1861 			debug("ERROR: POW static priorities should be contiguous (0x%llx)\n",
1862 			      (unsigned long long)prio_mask);
1863 			return;
1864 		}
1865 	}
1866 
1867 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1868 		unsigned int group;
1869 		unsigned int node = cvmx_get_node_num();
1870 		cvmx_sso_grpx_pri_t grp_pri;
1871 
1872 		/*grp_pri.s.weight = 0x3f; these will be anyway overwritten */
1873 		/*grp_pri.s.affinity = 0xf; by the next csr_rd_node(..), */
1874 
1875 		for (group = 0; group < cvmx_sso_num_xgrp(); group++) {
1876 			grp_pri.u64 = csr_rd_node(node, CVMX_SSO_GRPX_PRI(group));
1877 			grp_pri.s.pri = priority[group & 0x7];
1878 			csr_wr_node(node, CVMX_SSO_GRPX_PRI(group), grp_pri.u64);
1879 		}
1880 
1881 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1882 		cvmx_sso_ppx_qos_pri_t qos_pri;
1883 
1884 		qos_pri.u64 = csr_rd(CVMX_SSO_PPX_QOS_PRI(core_num));
1885 		qos_pri.s.qos0_pri = priority[0];
1886 		qos_pri.s.qos1_pri = priority[1];
1887 		qos_pri.s.qos2_pri = priority[2];
1888 		qos_pri.s.qos3_pri = priority[3];
1889 		qos_pri.s.qos4_pri = priority[4];
1890 		qos_pri.s.qos5_pri = priority[5];
1891 		qos_pri.s.qos6_pri = priority[6];
1892 		qos_pri.s.qos7_pri = priority[7];
1893 		csr_wr(CVMX_SSO_PPX_QOS_PRI(core_num), qos_pri.u64);
1894 	} else {
1895 		/* POW priorities on CN5xxx .. CN66XX */
1896 		cvmx_pow_pp_grp_mskx_t grp_msk;
1897 
1898 		grp_msk.u64 = csr_rd(CVMX_POW_PP_GRP_MSKX(core_num));
1899 		grp_msk.s.qos0_pri = priority[0];
1900 		grp_msk.s.qos1_pri = priority[1];
1901 		grp_msk.s.qos2_pri = priority[2];
1902 		grp_msk.s.qos3_pri = priority[3];
1903 		grp_msk.s.qos4_pri = priority[4];
1904 		grp_msk.s.qos5_pri = priority[5];
1905 		grp_msk.s.qos6_pri = priority[6];
1906 		grp_msk.s.qos7_pri = priority[7];
1907 
1908 		csr_wr(CVMX_POW_PP_GRP_MSKX(core_num), grp_msk.u64);
1909 	}
1910 }
1911 
1912 /**
1913  * This function gets POW static priorities for a core. Each input queue has
1914  * an associated priority value.
1915  *
1916  * @param[in]  core_num core to get priorities for
1917  * @param[out] priority Pointer to u8[] where to return priorities
1918  *			Vector of 8 priorities, one per POW Input Queue (0-7).
1919  *			Highest priority is 0 and lowest is 7. A priority value
1920  *			of 0xF instructs POW to skip the Input Queue when
1921  *			scheduling to this specific core.
1922  *                   NOTE: priorities should not have gaps in values, meaning
1923  *                         {0,1,1,1,1,1,1,1} is a valid configuration while
1924  *                         {0,2,2,2,2,2,2,2} is not.
1925  */
cvmx_pow_get_priority(u64 core_num,u8 priority[])1926 static inline void cvmx_pow_get_priority(u64 core_num, u8 priority[])
1927 {
1928 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1929 		unsigned int group;
1930 		unsigned int node = cvmx_get_node_num();
1931 		cvmx_sso_grpx_pri_t grp_pri;
1932 
1933 		/* read priority only from the first 8 groups */
1934 		/* the next groups are programmed the same (periodicaly) */
1935 		for (group = 0; group < 8 /*cvmx_sso_num_xgrp() */; group++) {
1936 			grp_pri.u64 = csr_rd_node(node, CVMX_SSO_GRPX_PRI(group));
1937 			priority[group /* & 0x7 */] = grp_pri.s.pri;
1938 		}
1939 
1940 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
1941 		cvmx_sso_ppx_qos_pri_t qos_pri;
1942 
1943 		qos_pri.u64 = csr_rd(CVMX_SSO_PPX_QOS_PRI(core_num));
1944 		priority[0] = qos_pri.s.qos0_pri;
1945 		priority[1] = qos_pri.s.qos1_pri;
1946 		priority[2] = qos_pri.s.qos2_pri;
1947 		priority[3] = qos_pri.s.qos3_pri;
1948 		priority[4] = qos_pri.s.qos4_pri;
1949 		priority[5] = qos_pri.s.qos5_pri;
1950 		priority[6] = qos_pri.s.qos6_pri;
1951 		priority[7] = qos_pri.s.qos7_pri;
1952 	} else {
1953 		/* POW priorities on CN5xxx .. CN66XX */
1954 		cvmx_pow_pp_grp_mskx_t grp_msk;
1955 
1956 		grp_msk.u64 = csr_rd(CVMX_POW_PP_GRP_MSKX(core_num));
1957 		priority[0] = grp_msk.s.qos0_pri;
1958 		priority[1] = grp_msk.s.qos1_pri;
1959 		priority[2] = grp_msk.s.qos2_pri;
1960 		priority[3] = grp_msk.s.qos3_pri;
1961 		priority[4] = grp_msk.s.qos4_pri;
1962 		priority[5] = grp_msk.s.qos5_pri;
1963 		priority[6] = grp_msk.s.qos6_pri;
1964 		priority[7] = grp_msk.s.qos7_pri;
1965 	}
1966 
1967 	/* Detect gaps between priorities and flag error - (optional) */
1968 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1969 		int i;
1970 		u32 prio_mask = 0;
1971 
1972 		for (i = 0; i < 8; i++)
1973 			if (priority[i] != 0xF)
1974 				prio_mask |= 1 << priority[i];
1975 
1976 		if (prio_mask ^ ((1 << cvmx_pop(prio_mask)) - 1)) {
1977 			debug("ERROR:%s: POW static priorities should be contiguous (0x%llx)\n",
1978 			      __func__, (unsigned long long)prio_mask);
1979 			return;
1980 		}
1981 	}
1982 }
1983 
cvmx_sso_get_group_priority(int node,cvmx_xgrp_t xgrp,int * priority,int * weight,int * affinity)1984 static inline void cvmx_sso_get_group_priority(int node, cvmx_xgrp_t xgrp, int *priority,
1985 					       int *weight, int *affinity)
1986 {
1987 	cvmx_sso_grpx_pri_t grp_pri;
1988 
1989 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
1990 		debug("ERROR: %s is not supported on this chip)\n", __func__);
1991 		return;
1992 	}
1993 
1994 	grp_pri.u64 = csr_rd_node(node, CVMX_SSO_GRPX_PRI(xgrp.xgrp));
1995 	*affinity = grp_pri.s.affinity;
1996 	*priority = grp_pri.s.pri;
1997 	*weight = grp_pri.s.weight;
1998 }
1999 
2000 /**
2001  * Performs a tag switch and then an immediate deschedule. This completes
2002  * immediately, so completion must not be waited for.  This function does NOT
2003  * update the wqe in DRAM to match arguments.
2004  *
2005  * This function does NOT wait for any prior tag switches to complete, so the
2006  * calling code must do this.
2007  *
2008  * Note the following CAVEAT of the Octeon HW behavior when
2009  * re-scheduling DE-SCHEDULEd items whose (next) state is
2010  * ORDERED:
2011  *   - If there are no switches pending at the time that the
2012  *     HW executes the de-schedule, the HW will only re-schedule
2013  *     the head of the FIFO associated with the given tag. This
2014  *     means that in many respects, the HW treats this ORDERED
2015  *     tag as an ATOMIC tag. Note that in the SWTAG_DESCH
2016  *     case (to an ORDERED tag), the HW will do the switch
2017  *     before the deschedule whenever it is possible to do
2018  *     the switch immediately, so it may often look like
2019  *     this case.
2020  *   - If there is a pending switch to ORDERED at the time
2021  *     the HW executes the de-schedule, the HW will perform
2022  *     the switch at the time it re-schedules, and will be
2023  *     able to reschedule any/all of the entries with the
2024  *     same tag.
2025  * Due to this behavior, the RECOMMENDATION to software is
2026  * that they have a (next) state of ATOMIC when they
2027  * DE-SCHEDULE. If an ORDERED tag is what was really desired,
2028  * SW can choose to immediately switch to an ORDERED tag
2029  * after the work (that has an ATOMIC tag) is re-scheduled.
2030  * Note that since there are never any tag switches pending
2031  * when the HW re-schedules, this switch can be IMMEDIATE upon
2032  * the reception of the pointer during the re-schedule.
2033  *
2034  * @param tag      New tag value
2035  * @param tag_type New tag type
2036  * @param group    New group value
2037  * @param no_sched Control whether this work queue entry will be rescheduled.
2038  *                 - 1 : don't schedule this work
2039  *                 - 0 : allow this work to be scheduled.
2040  */
cvmx_pow_tag_sw_desched_nocheck(u32 tag,cvmx_pow_tag_type_t tag_type,u64 group,u64 no_sched)2041 static inline void cvmx_pow_tag_sw_desched_nocheck(u32 tag, cvmx_pow_tag_type_t tag_type, u64 group,
2042 						   u64 no_sched)
2043 {
2044 	union cvmx_pow_tag_req_addr ptr;
2045 	cvmx_pow_tag_req_t tag_req;
2046 
2047 	if (CVMX_ENABLE_POW_CHECKS) {
2048 		cvmx_pow_tag_info_t current_tag;
2049 
2050 		__cvmx_pow_warn_if_pending_switch(__func__);
2051 		current_tag = cvmx_pow_get_current_tag();
2052 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
2053 			     "%s called with NULL_NULL tag\n", __func__);
2054 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
2055 			     "%s called with NULL tag. Deschedule not allowed from NULL state\n",
2056 			     __func__);
2057 		cvmx_warn_if((current_tag.tag_type != CVMX_POW_TAG_TYPE_ATOMIC) &&
2058 			     (tag_type != CVMX_POW_TAG_TYPE_ATOMIC),
2059 			     "%s called where neither the before or after tag is ATOMIC\n",
2060 			     __func__);
2061 	}
2062 	tag_req.u64 = 0;
2063 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2064 		cvmx_wqe_t *wqp = cvmx_pow_get_current_wqp();
2065 
2066 		if (!wqp) {
2067 			debug("ERROR: Failed to get WQE, %s\n", __func__);
2068 			return;
2069 		}
2070 		group &= 0x1f;
2071 		wqp->word1.cn78xx.tag = tag;
2072 		wqp->word1.cn78xx.tag_type = tag_type;
2073 		wqp->word1.cn78xx.grp = group << 3;
2074 		CVMX_SYNCWS;
2075 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG_DESCH;
2076 		tag_req.s_cn78xx_other.type = tag_type;
2077 		tag_req.s_cn78xx_other.grp = group << 3;
2078 		tag_req.s_cn78xx_other.no_sched = no_sched;
2079 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
2080 		group &= 0x3f;
2081 		tag_req.s_cn68xx_other.op = CVMX_POW_TAG_OP_SWTAG_DESCH;
2082 		tag_req.s_cn68xx_other.tag = tag;
2083 		tag_req.s_cn68xx_other.type = tag_type;
2084 		tag_req.s_cn68xx_other.grp = group;
2085 		tag_req.s_cn68xx_other.no_sched = no_sched;
2086 	} else {
2087 		group &= 0x0f;
2088 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_SWTAG_DESCH;
2089 		tag_req.s_cn38xx.tag = tag;
2090 		tag_req.s_cn38xx.type = tag_type;
2091 		tag_req.s_cn38xx.grp = group;
2092 		tag_req.s_cn38xx.no_sched = no_sched;
2093 	}
2094 	ptr.u64 = 0;
2095 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2096 		ptr.s.mem_region = CVMX_IO_SEG;
2097 		ptr.s.is_io = 1;
2098 		ptr.s.did = CVMX_OCT_DID_TAG_TAG3;
2099 		ptr.s_cn78xx.node = cvmx_get_node_num();
2100 		ptr.s_cn78xx.tag = tag;
2101 	} else {
2102 		ptr.s.mem_region = CVMX_IO_SEG;
2103 		ptr.s.is_io = 1;
2104 		ptr.s.did = CVMX_OCT_DID_TAG_TAG3;
2105 	}
2106 	cvmx_write_io(ptr.u64, tag_req.u64);
2107 }
2108 
2109 /**
2110  * Performs a tag switch and then an immediate deschedule. This completes
2111  * immediately, so completion must not be waited for.  This function does NOT
2112  * update the wqe in DRAM to match arguments.
2113  *
2114  * This function waits for any prior tag switches to complete, so the
2115  * calling code may call this function with a pending tag switch.
2116  *
2117  * Note the following CAVEAT of the Octeon HW behavior when
2118  * re-scheduling DE-SCHEDULEd items whose (next) state is
2119  * ORDERED:
2120  *   - If there are no switches pending at the time that the
2121  *     HW executes the de-schedule, the HW will only re-schedule
2122  *     the head of the FIFO associated with the given tag. This
2123  *     means that in many respects, the HW treats this ORDERED
2124  *     tag as an ATOMIC tag. Note that in the SWTAG_DESCH
2125  *     case (to an ORDERED tag), the HW will do the switch
2126  *     before the deschedule whenever it is possible to do
2127  *     the switch immediately, so it may often look like
2128  *     this case.
2129  *   - If there is a pending switch to ORDERED at the time
2130  *     the HW executes the de-schedule, the HW will perform
2131  *     the switch at the time it re-schedules, and will be
2132  *     able to reschedule any/all of the entries with the
2133  *     same tag.
2134  * Due to this behavior, the RECOMMENDATION to software is
2135  * that they have a (next) state of ATOMIC when they
2136  * DE-SCHEDULE. If an ORDERED tag is what was really desired,
2137  * SW can choose to immediately switch to an ORDERED tag
2138  * after the work (that has an ATOMIC tag) is re-scheduled.
2139  * Note that since there are never any tag switches pending
2140  * when the HW re-schedules, this switch can be IMMEDIATE upon
2141  * the reception of the pointer during the re-schedule.
2142  *
2143  * @param tag      New tag value
2144  * @param tag_type New tag type
2145  * @param group    New group value
2146  * @param no_sched Control whether this work queue entry will be rescheduled.
2147  *                 - 1 : don't schedule this work
2148  *                 - 0 : allow this work to be scheduled.
2149  */
cvmx_pow_tag_sw_desched(u32 tag,cvmx_pow_tag_type_t tag_type,u64 group,u64 no_sched)2150 static inline void cvmx_pow_tag_sw_desched(u32 tag, cvmx_pow_tag_type_t tag_type, u64 group,
2151 					   u64 no_sched)
2152 {
2153 	/* Need to make sure any writes to the work queue entry are complete */
2154 	CVMX_SYNCWS;
2155 	/* Ensure that there is not a pending tag switch, as a tag switch cannot be started
2156 	 * if a previous switch is still pending.  */
2157 	cvmx_pow_tag_sw_wait();
2158 	cvmx_pow_tag_sw_desched_nocheck(tag, tag_type, group, no_sched);
2159 }
2160 
2161 /**
2162  * Descchedules the current work queue entry.
2163  *
2164  * @param no_sched no schedule flag value to be set on the work queue entry.
2165  *     If this is set the entry will not be rescheduled.
2166  */
cvmx_pow_desched(u64 no_sched)2167 static inline void cvmx_pow_desched(u64 no_sched)
2168 {
2169 	union cvmx_pow_tag_req_addr ptr;
2170 	cvmx_pow_tag_req_t tag_req;
2171 
2172 	if (CVMX_ENABLE_POW_CHECKS) {
2173 		cvmx_pow_tag_info_t current_tag;
2174 
2175 		__cvmx_pow_warn_if_pending_switch(__func__);
2176 		current_tag = cvmx_pow_get_current_tag();
2177 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
2178 			     "%s called with NULL_NULL tag\n", __func__);
2179 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
2180 			     "%s called with NULL tag. Deschedule not expected from NULL state\n",
2181 			     __func__);
2182 	}
2183 	/* Need to make sure any writes to the work queue entry are complete */
2184 	CVMX_SYNCWS;
2185 
2186 	tag_req.u64 = 0;
2187 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2188 		tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_DESCH;
2189 		tag_req.s_cn78xx_other.no_sched = no_sched;
2190 	} else if (octeon_has_feature(OCTEON_FEATURE_CN68XX_WQE)) {
2191 		tag_req.s_cn68xx_other.op = CVMX_POW_TAG_OP_DESCH;
2192 		tag_req.s_cn68xx_other.no_sched = no_sched;
2193 	} else {
2194 		tag_req.s_cn38xx.op = CVMX_POW_TAG_OP_DESCH;
2195 		tag_req.s_cn38xx.no_sched = no_sched;
2196 	}
2197 	ptr.u64 = 0;
2198 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2199 		ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
2200 		ptr.s_cn78xx.is_io = 1;
2201 		ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_TAG3;
2202 		ptr.s_cn78xx.node = cvmx_get_node_num();
2203 	} else {
2204 		ptr.s.mem_region = CVMX_IO_SEG;
2205 		ptr.s.is_io = 1;
2206 		ptr.s.did = CVMX_OCT_DID_TAG_TAG3;
2207 	}
2208 	cvmx_write_io(ptr.u64, tag_req.u64);
2209 }
2210 
2211 /******************************************************************************/
2212 /* OCTEON3-specific functions.                                                */
2213 /******************************************************************************/
2214 /**
2215  * This function sets the the affinity of group to the cores in 78xx.
2216  * It sets up all the cores in core_mask to accept work from the specified group.
2217  *
2218  * @param xgrp	Group to accept work from, 0 - 255.
2219  * @param core_mask	Mask of all the cores which will accept work from this group
2220  * @param mask_set	Every core has set of 2 masks which can be set to accept work
2221  *     from 256 groups. At the time of get_work, cores can choose which mask_set
2222  *     to get work from. 'mask_set' values range from 0 to 3, where	each of the
2223  *     two bits represents a mask set. Cores will be added to the mask set with
2224  *     corresponding bit set, and removed from the mask set with corresponding
2225  *     bit clear.
2226  * Note: cores can only accept work from SSO groups on the same node,
2227  * so the node number for the group is derived from the core number.
2228  */
cvmx_sso_set_group_core_affinity(cvmx_xgrp_t xgrp,const struct cvmx_coremask * core_mask,u8 mask_set)2229 static inline void cvmx_sso_set_group_core_affinity(cvmx_xgrp_t xgrp,
2230 						    const struct cvmx_coremask *core_mask,
2231 						    u8 mask_set)
2232 {
2233 	cvmx_sso_ppx_sx_grpmskx_t grp_msk;
2234 	int core;
2235 	int grp_index = xgrp.xgrp >> 6;
2236 	int bit_pos = xgrp.xgrp % 64;
2237 
2238 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2239 		debug("ERROR: %s is not supported on this chip)\n", __func__);
2240 		return;
2241 	}
2242 	cvmx_coremask_for_each_core(core, core_mask)
2243 	{
2244 		unsigned int node, ncore;
2245 		u64 reg_addr;
2246 
2247 		node = cvmx_coremask_core_to_node(core);
2248 		ncore = cvmx_coremask_core_on_node(core);
2249 
2250 		reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(ncore, 0, grp_index);
2251 		grp_msk.u64 = csr_rd_node(node, reg_addr);
2252 
2253 		if (mask_set & 1)
2254 			grp_msk.s.grp_msk |= (1ull << bit_pos);
2255 		else
2256 			grp_msk.s.grp_msk &= ~(1ull << bit_pos);
2257 
2258 		csr_wr_node(node, reg_addr, grp_msk.u64);
2259 
2260 		reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(ncore, 1, grp_index);
2261 		grp_msk.u64 = csr_rd_node(node, reg_addr);
2262 
2263 		if (mask_set & 2)
2264 			grp_msk.s.grp_msk |= (1ull << bit_pos);
2265 		else
2266 			grp_msk.s.grp_msk &= ~(1ull << bit_pos);
2267 
2268 		csr_wr_node(node, reg_addr, grp_msk.u64);
2269 	}
2270 }
2271 
2272 /**
2273  * This function sets the priority and group affinity arbitration for each group.
2274  *
2275  * @param node		Node number
2276  * @param xgrp	Group 0 - 255 to apply mask parameters to
2277  * @param priority	Priority of the group relative to other groups
2278  *     0x0 - highest priority
2279  *     0x7 - lowest priority
2280  * @param weight	Cross-group arbitration weight to apply to this group.
2281  *     valid values are 1-63
2282  *     h/w default is 0x3f
2283  * @param affinity	Processor affinity arbitration weight to apply to this group.
2284  *     If zero, affinity is disabled.
2285  *     valid values are 0-15
2286  *     h/w default which is 0xf.
2287  * @param modify_mask   mask of the parameters which needs to be modified.
2288  *     enum cvmx_sso_group_modify_mask
2289  *     to modify only priority -- set bit0
2290  *     to modify only weight   -- set bit1
2291  *     to modify only affinity -- set bit2
2292  */
cvmx_sso_set_group_priority(int node,cvmx_xgrp_t xgrp,int priority,int weight,int affinity,enum cvmx_sso_group_modify_mask modify_mask)2293 static inline void cvmx_sso_set_group_priority(int node, cvmx_xgrp_t xgrp, int priority, int weight,
2294 					       int affinity,
2295 					       enum cvmx_sso_group_modify_mask modify_mask)
2296 {
2297 	cvmx_sso_grpx_pri_t grp_pri;
2298 
2299 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2300 		debug("ERROR: %s is not supported on this chip)\n", __func__);
2301 		return;
2302 	}
2303 	if (weight <= 0)
2304 		weight = 0x3f; /* Force HW default when out of range */
2305 
2306 	grp_pri.u64 = csr_rd_node(node, CVMX_SSO_GRPX_PRI(xgrp.xgrp));
2307 	if (grp_pri.s.weight == 0)
2308 		grp_pri.s.weight = 0x3f;
2309 	if (modify_mask & CVMX_SSO_MODIFY_GROUP_PRIORITY)
2310 		grp_pri.s.pri = priority;
2311 	if (modify_mask & CVMX_SSO_MODIFY_GROUP_WEIGHT)
2312 		grp_pri.s.weight = weight;
2313 	if (modify_mask & CVMX_SSO_MODIFY_GROUP_AFFINITY)
2314 		grp_pri.s.affinity = affinity;
2315 	csr_wr_node(node, CVMX_SSO_GRPX_PRI(xgrp.xgrp), grp_pri.u64);
2316 }
2317 
2318 /**
2319  * Asynchronous work request.
2320  * Only works on CN78XX style SSO.
2321  *
2322  * Work is requested from the SSO unit, and should later be checked with
2323  * function cvmx_pow_work_response_async.
2324  * This function does NOT wait for previous tag switches to complete,
2325  * so the caller must ensure that there is not a pending tag switch.
2326  *
2327  * @param scr_addr Scratch memory address that response will be returned to,
2328  *     which is either a valid WQE, or a response with the invalid bit set.
2329  *     Byte address, must be 8 byte aligned.
2330  * @param xgrp  Group to receive work for (0-255).
2331  * @param wait
2332  *     1 to cause response to wait for work to become available (or timeout)
2333  *     0 to cause response to return immediately
2334  */
cvmx_sso_work_request_grp_async_nocheck(int scr_addr,cvmx_xgrp_t xgrp,cvmx_pow_wait_t wait)2335 static inline void cvmx_sso_work_request_grp_async_nocheck(int scr_addr, cvmx_xgrp_t xgrp,
2336 							   cvmx_pow_wait_t wait)
2337 {
2338 	cvmx_pow_iobdma_store_t data;
2339 	unsigned int node = cvmx_get_node_num();
2340 
2341 	if (CVMX_ENABLE_POW_CHECKS) {
2342 		__cvmx_pow_warn_if_pending_switch(__func__);
2343 		cvmx_warn_if(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE), "Not CN78XX");
2344 	}
2345 	/* scr_addr must be 8 byte aligned */
2346 	data.u64 = 0;
2347 	data.s_cn78xx.scraddr = scr_addr >> 3;
2348 	data.s_cn78xx.len = 1;
2349 	data.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
2350 	data.s_cn78xx.grouped = 1;
2351 	data.s_cn78xx.index_grp_mask = (node << 8) | xgrp.xgrp;
2352 	data.s_cn78xx.wait = wait;
2353 	data.s_cn78xx.node = node;
2354 
2355 	cvmx_send_single(data.u64);
2356 }
2357 
2358 /**
2359  * Synchronous work request from the node-local SSO without verifying
2360  * pending tag switch. It requests work from a specific SSO group.
2361  *
2362  * @param lgrp The local group number (within the SSO of the node of the caller)
2363  *     from which to get the work.
2364  * @param wait When set, call stalls until work becomes available, or times out.
2365  *     If not set, returns immediately.
2366  *
2367  * @return Returns the WQE pointer from SSO.
2368  *     Returns NULL if no work was available.
2369  */
cvmx_sso_work_request_grp_sync_nocheck(unsigned int lgrp,cvmx_pow_wait_t wait)2370 static inline void *cvmx_sso_work_request_grp_sync_nocheck(unsigned int lgrp, cvmx_pow_wait_t wait)
2371 {
2372 	cvmx_pow_load_addr_t ptr;
2373 	cvmx_pow_tag_load_resp_t result;
2374 	unsigned int node = cvmx_get_node_num() & 3;
2375 
2376 	if (CVMX_ENABLE_POW_CHECKS) {
2377 		__cvmx_pow_warn_if_pending_switch(__func__);
2378 		cvmx_warn_if(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE), "Not CN78XX");
2379 	}
2380 	ptr.u64 = 0;
2381 	ptr.swork_78xx.mem_region = CVMX_IO_SEG;
2382 	ptr.swork_78xx.is_io = 1;
2383 	ptr.swork_78xx.did = CVMX_OCT_DID_TAG_SWTAG;
2384 	ptr.swork_78xx.node = node;
2385 	ptr.swork_78xx.grouped = 1;
2386 	ptr.swork_78xx.index = (lgrp & 0xff) | node << 8;
2387 	ptr.swork_78xx.wait = wait;
2388 
2389 	result.u64 = csr_rd(ptr.u64);
2390 	if (result.s_work.no_work)
2391 		return NULL;
2392 	else
2393 		return cvmx_phys_to_ptr(result.s_work.addr);
2394 }
2395 
2396 /**
2397  * Synchronous work request from the node-local SSO.
2398  * It requests work from a specific SSO group.
2399  * This function waits for any previous tag switch to complete before
2400  * requesting the new work.
2401  *
2402  * @param lgrp The node-local group number from which to get the work.
2403  * @param wait When set, call stalls until work becomes available, or times out.
2404  *     If not set, returns immediately.
2405  *
2406  * @return The WQE pointer or NULL, if work is not available.
2407  */
cvmx_sso_work_request_grp_sync(unsigned int lgrp,cvmx_pow_wait_t wait)2408 static inline void *cvmx_sso_work_request_grp_sync(unsigned int lgrp, cvmx_pow_wait_t wait)
2409 {
2410 	cvmx_pow_tag_sw_wait();
2411 	return cvmx_sso_work_request_grp_sync_nocheck(lgrp, wait);
2412 }
2413 
2414 /**
2415  * This function sets the group mask for a core.  The group mask bits
2416  * indicate which groups each core will accept work from.
2417  *
2418  * @param core_num	Processor core to apply mask to.
2419  * @param mask_set	7XXX has 2 sets of masks per core.
2420  *     Bit 0 represents the first mask set, bit 1 -- the second.
2421  * @param xgrp_mask	Group mask array.
2422  *     Total number of groups is divided into a number of
2423  *     64-bits mask sets. Each bit in the mask, if set, enables
2424  *     the core to accept work from the corresponding group.
2425  *
2426  * NOTE: Each core can be configured to accept work in accordance to both
2427  * mask sets, with the first having higher precedence over the second,
2428  * or to accept work in accordance to just one of the two mask sets.
2429  * The 'core_num' argument represents a processor core on any node
2430  * in a coherent multi-chip system.
2431  *
2432  * If the 'mask_set' argument is 3, both mask sets are configured
2433  * with the same value (which is not typically the intention),
2434  * so keep in mind the function needs to be called twice
2435  * to set a different value into each of the mask sets,
2436  * once with 'mask_set=1' and second time with 'mask_set=2'.
2437  */
cvmx_pow_set_xgrp_mask(u64 core_num,u8 mask_set,const u64 xgrp_mask[])2438 static inline void cvmx_pow_set_xgrp_mask(u64 core_num, u8 mask_set, const u64 xgrp_mask[])
2439 {
2440 	unsigned int grp, node, core;
2441 	u64 reg_addr;
2442 
2443 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2444 		debug("ERROR: %s is not supported on this chip)\n", __func__);
2445 		return;
2446 	}
2447 
2448 	if (CVMX_ENABLE_POW_CHECKS) {
2449 		cvmx_warn_if(((mask_set < 1) || (mask_set > 3)), "Invalid mask set");
2450 	}
2451 
2452 	if ((mask_set < 1) || (mask_set > 3))
2453 		mask_set = 3;
2454 
2455 	node = cvmx_coremask_core_to_node(core_num);
2456 	core = cvmx_coremask_core_on_node(core_num);
2457 
2458 	for (grp = 0; grp < (cvmx_sso_num_xgrp() >> 6); grp++) {
2459 		if (mask_set & 1) {
2460 			reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(core, 0, grp),
2461 			csr_wr_node(node, reg_addr, xgrp_mask[grp]);
2462 		}
2463 		if (mask_set & 2) {
2464 			reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(core, 1, grp),
2465 			csr_wr_node(node, reg_addr, xgrp_mask[grp]);
2466 		}
2467 	}
2468 }
2469 
2470 /**
2471  * This function gets the group mask for a core.  The group mask bits
2472  * indicate which groups each core will accept work from.
2473  *
2474  * @param core_num	Processor core to apply mask to.
2475  * @param mask_set	7XXX has 2 sets of masks per core.
2476  *     Bit 0 represents the first mask set, bit 1 -- the second.
2477  * @param xgrp_mask	Provide pointer to u64 mask[8] output array.
2478  *     Total number of groups is divided into a number of
2479  *     64-bits mask sets. Each bit in the mask represents
2480  *     the core accepts work from the corresponding group.
2481  *
2482  * NOTE: Each core can be configured to accept work in accordance to both
2483  * mask sets, with the first having higher precedence over the second,
2484  * or to accept work in accordance to just one of the two mask sets.
2485  * The 'core_num' argument represents a processor core on any node
2486  * in a coherent multi-chip system.
2487  */
cvmx_pow_get_xgrp_mask(u64 core_num,u8 mask_set,u64 * xgrp_mask)2488 static inline void cvmx_pow_get_xgrp_mask(u64 core_num, u8 mask_set, u64 *xgrp_mask)
2489 {
2490 	cvmx_sso_ppx_sx_grpmskx_t grp_msk;
2491 	unsigned int grp, node, core;
2492 	u64 reg_addr;
2493 
2494 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2495 		debug("ERROR: %s is not supported on this chip)\n", __func__);
2496 		return;
2497 	}
2498 
2499 	if (CVMX_ENABLE_POW_CHECKS) {
2500 		cvmx_warn_if(mask_set != 1 && mask_set != 2, "Invalid mask set");
2501 	}
2502 
2503 	node = cvmx_coremask_core_to_node(core_num);
2504 	core = cvmx_coremask_core_on_node(core_num);
2505 
2506 	for (grp = 0; grp < cvmx_sso_num_xgrp() >> 6; grp++) {
2507 		if (mask_set & 1) {
2508 			reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(core, 0, grp),
2509 			grp_msk.u64 = csr_rd_node(node, reg_addr);
2510 			xgrp_mask[grp] = grp_msk.s.grp_msk;
2511 		}
2512 		if (mask_set & 2) {
2513 			reg_addr = CVMX_SSO_PPX_SX_GRPMSKX(core, 1, grp),
2514 			grp_msk.u64 = csr_rd_node(node, reg_addr);
2515 			xgrp_mask[grp] = grp_msk.s.grp_msk;
2516 		}
2517 	}
2518 }
2519 
2520 /**
2521  * Executes SSO SWTAG command.
2522  * This is similar to cvmx_pow_tag_sw() function, but uses linear
2523  * (vs. integrated group-qos) group index.
2524  */
cvmx_pow_tag_sw_node(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,int node)2525 static inline void cvmx_pow_tag_sw_node(cvmx_wqe_t *wqp, u32 tag, cvmx_pow_tag_type_t tag_type,
2526 					int node)
2527 {
2528 	union cvmx_pow_tag_req_addr ptr;
2529 	cvmx_pow_tag_req_t tag_req;
2530 
2531 	if (cvmx_unlikely(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))) {
2532 		debug("ERROR: %s is supported on OCTEON3 only\n", __func__);
2533 		return;
2534 	}
2535 	CVMX_SYNCWS;
2536 	cvmx_pow_tag_sw_wait();
2537 
2538 	if (CVMX_ENABLE_POW_CHECKS) {
2539 		cvmx_pow_tag_info_t current_tag;
2540 
2541 		__cvmx_pow_warn_if_pending_switch(__func__);
2542 		current_tag = cvmx_pow_get_current_tag();
2543 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
2544 			     "%s called with NULL_NULL tag\n", __func__);
2545 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
2546 			     "%s called with NULL tag\n", __func__);
2547 		cvmx_warn_if((current_tag.tag_type == tag_type) && (current_tag.tag == tag),
2548 			     "%s called to perform a tag switch to the same tag\n", __func__);
2549 		cvmx_warn_if(
2550 			tag_type == CVMX_POW_TAG_TYPE_NULL,
2551 			"%s called to perform a tag switch to NULL. Use cvmx_pow_tag_sw_null() instead\n",
2552 			__func__);
2553 	}
2554 	wqp->word1.cn78xx.tag = tag;
2555 	wqp->word1.cn78xx.tag_type = tag_type;
2556 	CVMX_SYNCWS;
2557 
2558 	tag_req.u64 = 0;
2559 	tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG;
2560 	tag_req.s_cn78xx_other.type = tag_type;
2561 
2562 	ptr.u64 = 0;
2563 	ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
2564 	ptr.s_cn78xx.is_io = 1;
2565 	ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
2566 	ptr.s_cn78xx.node = node;
2567 	ptr.s_cn78xx.tag = tag;
2568 	cvmx_write_io(ptr.u64, tag_req.u64);
2569 }
2570 
2571 /**
2572  * Executes SSO SWTAG_FULL command.
2573  * This is similar to cvmx_pow_tag_sw_full() function, but
2574  * uses linear (vs. integrated group-qos) group index.
2575  */
cvmx_pow_tag_sw_full_node(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,u8 xgrp,int node)2576 static inline void cvmx_pow_tag_sw_full_node(cvmx_wqe_t *wqp, u32 tag, cvmx_pow_tag_type_t tag_type,
2577 					     u8 xgrp, int node)
2578 {
2579 	union cvmx_pow_tag_req_addr ptr;
2580 	cvmx_pow_tag_req_t tag_req;
2581 	u16 gxgrp;
2582 
2583 	if (cvmx_unlikely(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))) {
2584 		debug("ERROR: %s is supported on OCTEON3 only\n", __func__);
2585 		return;
2586 	}
2587 	/* Ensure that there is not a pending tag switch, as a tag switch cannot be
2588 	 * started, if a previous switch is still pending. */
2589 	CVMX_SYNCWS;
2590 	cvmx_pow_tag_sw_wait();
2591 
2592 	if (CVMX_ENABLE_POW_CHECKS) {
2593 		cvmx_pow_tag_info_t current_tag;
2594 
2595 		__cvmx_pow_warn_if_pending_switch(__func__);
2596 		current_tag = cvmx_pow_get_current_tag();
2597 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
2598 			     "%s called with NULL_NULL tag\n", __func__);
2599 		cvmx_warn_if((current_tag.tag_type == tag_type) && (current_tag.tag == tag),
2600 			     "%s called to perform a tag switch to the same tag\n", __func__);
2601 		cvmx_warn_if(
2602 			tag_type == CVMX_POW_TAG_TYPE_NULL,
2603 			"%s called to perform a tag switch to NULL. Use cvmx_pow_tag_sw_null() instead\n",
2604 			__func__);
2605 		if ((wqp != cvmx_phys_to_ptr(0x80)) && cvmx_pow_get_current_wqp())
2606 			cvmx_warn_if(wqp != cvmx_pow_get_current_wqp(),
2607 				     "%s passed WQE(%p) doesn't match the address in the POW(%p)\n",
2608 				     __func__, wqp, cvmx_pow_get_current_wqp());
2609 	}
2610 	gxgrp = node;
2611 	gxgrp = gxgrp << 8 | xgrp;
2612 	wqp->word1.cn78xx.grp = gxgrp;
2613 	wqp->word1.cn78xx.tag = tag;
2614 	wqp->word1.cn78xx.tag_type = tag_type;
2615 	CVMX_SYNCWS;
2616 
2617 	tag_req.u64 = 0;
2618 	tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG_FULL;
2619 	tag_req.s_cn78xx_other.type = tag_type;
2620 	tag_req.s_cn78xx_other.grp = gxgrp;
2621 	tag_req.s_cn78xx_other.wqp = cvmx_ptr_to_phys(wqp);
2622 
2623 	ptr.u64 = 0;
2624 	ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
2625 	ptr.s_cn78xx.is_io = 1;
2626 	ptr.s_cn78xx.did = CVMX_OCT_DID_TAG_SWTAG;
2627 	ptr.s_cn78xx.node = node;
2628 	ptr.s_cn78xx.tag = tag;
2629 	cvmx_write_io(ptr.u64, tag_req.u64);
2630 }
2631 
2632 /**
2633  * Submits work to an SSO group on any OCI node.
2634  * This function updates the work queue entry in DRAM to match
2635  * the arguments given.
2636  * Note that the tag provided is for the work queue entry submitted,
2637  * and is unrelated to the tag that the core currently holds.
2638  *
2639  * @param wqp pointer to work queue entry to submit.
2640  * This entry is updated to match the other parameters
2641  * @param tag tag value to be assigned to work queue entry
2642  * @param tag_type type of tag
2643  * @param xgrp native CN78XX group in the range 0..255
2644  * @param node The OCI node number for the target group
2645  *
2646  * When this function is called on a model prior to CN78XX, which does
2647  * not support OCI nodes, the 'node' argument is ignored, and the 'xgrp'
2648  * parameter is converted into 'qos' (the lower 3 bits) and 'grp' (the higher
2649  * 5 bits), following the backward-compatibility scheme of translating
2650  * between new and old style group numbers.
2651  */
cvmx_pow_work_submit_node(cvmx_wqe_t * wqp,u32 tag,cvmx_pow_tag_type_t tag_type,u8 xgrp,u8 node)2652 static inline void cvmx_pow_work_submit_node(cvmx_wqe_t *wqp, u32 tag, cvmx_pow_tag_type_t tag_type,
2653 					     u8 xgrp, u8 node)
2654 {
2655 	union cvmx_pow_tag_req_addr ptr;
2656 	cvmx_pow_tag_req_t tag_req;
2657 	u16 group;
2658 
2659 	if (cvmx_unlikely(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))) {
2660 		debug("ERROR: %s is supported on OCTEON3 only\n", __func__);
2661 		return;
2662 	}
2663 	group = node;
2664 	group = group << 8 | xgrp;
2665 	wqp->word1.cn78xx.tag = tag;
2666 	wqp->word1.cn78xx.tag_type = tag_type;
2667 	wqp->word1.cn78xx.grp = group;
2668 	CVMX_SYNCWS;
2669 
2670 	tag_req.u64 = 0;
2671 	tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_ADDWQ;
2672 	tag_req.s_cn78xx_other.type = tag_type;
2673 	tag_req.s_cn78xx_other.wqp = cvmx_ptr_to_phys(wqp);
2674 	tag_req.s_cn78xx_other.grp = group;
2675 
2676 	ptr.u64 = 0;
2677 	ptr.s_cn78xx.did = 0x66; // CVMX_OCT_DID_TAG_TAG6;
2678 	ptr.s_cn78xx.mem_region = CVMX_IO_SEG;
2679 	ptr.s_cn78xx.is_io = 1;
2680 	ptr.s_cn78xx.node = node;
2681 	ptr.s_cn78xx.tag = tag;
2682 
2683 	/* SYNC write to memory before the work submit.  This is necessary
2684 	 ** as POW may read values from DRAM at this time */
2685 	CVMX_SYNCWS;
2686 	cvmx_write_io(ptr.u64, tag_req.u64);
2687 }
2688 
2689 /**
2690  * Executes the SSO SWTAG_DESCHED operation.
2691  * This is similar to the cvmx_pow_tag_sw_desched() function, but
2692  * uses linear (vs. unified group-qos) group index.
2693  */
cvmx_pow_tag_sw_desched_node(cvmx_wqe_t * wqe,u32 tag,cvmx_pow_tag_type_t tag_type,u8 xgrp,u64 no_sched,u8 node)2694 static inline void cvmx_pow_tag_sw_desched_node(cvmx_wqe_t *wqe, u32 tag,
2695 						cvmx_pow_tag_type_t tag_type, u8 xgrp, u64 no_sched,
2696 						u8 node)
2697 {
2698 	union cvmx_pow_tag_req_addr ptr;
2699 	cvmx_pow_tag_req_t tag_req;
2700 	u16 group;
2701 
2702 	if (cvmx_unlikely(!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE))) {
2703 		debug("ERROR: %s is supported on OCTEON3 only\n", __func__);
2704 		return;
2705 	}
2706 	/* Need to make sure any writes to the work queue entry are complete */
2707 	CVMX_SYNCWS;
2708 	/*
2709 	 * Ensure that there is not a pending tag switch, as a tag switch cannot
2710 	 * be started if a previous switch is still pending.
2711 	 */
2712 	cvmx_pow_tag_sw_wait();
2713 
2714 	if (CVMX_ENABLE_POW_CHECKS) {
2715 		cvmx_pow_tag_info_t current_tag;
2716 
2717 		__cvmx_pow_warn_if_pending_switch(__func__);
2718 		current_tag = cvmx_pow_get_current_tag();
2719 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL_NULL,
2720 			     "%s called with NULL_NULL tag\n", __func__);
2721 		cvmx_warn_if(current_tag.tag_type == CVMX_POW_TAG_TYPE_NULL,
2722 			     "%s called with NULL tag. Deschedule not allowed from NULL state\n",
2723 			     __func__);
2724 		cvmx_warn_if((current_tag.tag_type != CVMX_POW_TAG_TYPE_ATOMIC) &&
2725 			     (tag_type != CVMX_POW_TAG_TYPE_ATOMIC),
2726 			     "%s called where neither the before or after tag is ATOMIC\n",
2727 			     __func__);
2728 	}
2729 	group = node;
2730 	group = group << 8 | xgrp;
2731 	wqe->word1.cn78xx.tag = tag;
2732 	wqe->word1.cn78xx.tag_type = tag_type;
2733 	wqe->word1.cn78xx.grp = group;
2734 	CVMX_SYNCWS;
2735 
2736 	tag_req.u64 = 0;
2737 	tag_req.s_cn78xx_other.op = CVMX_POW_TAG_OP_SWTAG_DESCH;
2738 	tag_req.s_cn78xx_other.type = tag_type;
2739 	tag_req.s_cn78xx_other.grp = group;
2740 	tag_req.s_cn78xx_other.no_sched = no_sched;
2741 
2742 	ptr.u64 = 0;
2743 	ptr.s.mem_region = CVMX_IO_SEG;
2744 	ptr.s.is_io = 1;
2745 	ptr.s.did = CVMX_OCT_DID_TAG_TAG3;
2746 	ptr.s_cn78xx.node = node;
2747 	ptr.s_cn78xx.tag = tag;
2748 	cvmx_write_io(ptr.u64, tag_req.u64);
2749 }
2750 
2751 /* Executes the UPD_WQP_GRP SSO operation.
2752  *
2753  * @param wqp  Pointer to the new work queue entry to switch to.
2754  * @param xgrp SSO group in the range 0..255
2755  *
2756  * NOTE: The operation can be performed only on the local node.
2757  */
cvmx_sso_update_wqp_group(cvmx_wqe_t * wqp,u8 xgrp)2758 static inline void cvmx_sso_update_wqp_group(cvmx_wqe_t *wqp, u8 xgrp)
2759 {
2760 	union cvmx_pow_tag_req_addr addr;
2761 	cvmx_pow_tag_req_t data;
2762 	int node = cvmx_get_node_num();
2763 	int group = node << 8 | xgrp;
2764 
2765 	if (!octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2766 		debug("ERROR: %s is not supported on this chip)\n", __func__);
2767 		return;
2768 	}
2769 	wqp->word1.cn78xx.grp = group;
2770 	CVMX_SYNCWS;
2771 
2772 	data.u64 = 0;
2773 	data.s_cn78xx_other.op = CVMX_POW_TAG_OP_UPDATE_WQP_GRP;
2774 	data.s_cn78xx_other.grp = group;
2775 	data.s_cn78xx_other.wqp = cvmx_ptr_to_phys(wqp);
2776 
2777 	addr.u64 = 0;
2778 	addr.s_cn78xx.mem_region = CVMX_IO_SEG;
2779 	addr.s_cn78xx.is_io = 1;
2780 	addr.s_cn78xx.did = CVMX_OCT_DID_TAG_TAG1;
2781 	addr.s_cn78xx.node = node;
2782 	cvmx_write_io(addr.u64, data.u64);
2783 }
2784 
2785 /******************************************************************************/
2786 /* Define usage of bits within the 32 bit tag values.                         */
2787 /******************************************************************************/
2788 /*
2789  * Number of bits of the tag used by software.  The SW bits
2790  * are always a contiguous block of the high starting at bit 31.
2791  * The hardware bits are always the low bits.  By default, the top 8 bits
2792  * of the tag are reserved for software, and the low 24 are set by the IPD unit.
2793  */
2794 #define CVMX_TAG_SW_BITS  (8)
2795 #define CVMX_TAG_SW_SHIFT (32 - CVMX_TAG_SW_BITS)
2796 
2797 /* Below is the list of values for the top 8 bits of the tag. */
2798 /*
2799  * Tag values with top byte of this value are reserved for internal executive
2800  * uses
2801  */
2802 #define CVMX_TAG_SW_BITS_INTERNAL 0x1
2803 
2804 /*
2805  * The executive divides the remaining 24 bits as follows:
2806  * the upper 8 bits (bits 23 - 16 of the tag) define a subgroup
2807  * the lower 16 bits (bits 15 - 0 of the tag) define are the value with
2808  * the subgroup. Note that this section describes the format of tags generated
2809  * by software - refer to the hardware documentation for a description of the
2810  * tags values generated by the packet input hardware.
2811  * Subgroups are defined here
2812  */
2813 
2814 /* Mask for the value portion of the tag */
2815 #define CVMX_TAG_SUBGROUP_MASK	0xFFFF
2816 #define CVMX_TAG_SUBGROUP_SHIFT 16
2817 #define CVMX_TAG_SUBGROUP_PKO	0x1
2818 
2819 /* End of executive tag subgroup definitions */
2820 
2821 /* The remaining values software bit values 0x2 - 0xff are available
2822  * for application use */
2823 
2824 /**
2825  * This function creates a 32 bit tag value from the two values provided.
2826  *
2827  * @param sw_bits The upper bits (number depends on configuration) are set
2828  *     to this value.  The remainder of bits are set by the hw_bits parameter.
2829  * @param hw_bits The lower bits (number depends on configuration) are set
2830  *     to this value.  The remainder of bits are set by the sw_bits parameter.
2831  *
2832  * @return 32 bit value of the combined hw and sw bits.
2833  */
cvmx_pow_tag_compose(u64 sw_bits,u64 hw_bits)2834 static inline u32 cvmx_pow_tag_compose(u64 sw_bits, u64 hw_bits)
2835 {
2836 	return (((sw_bits & cvmx_build_mask(CVMX_TAG_SW_BITS)) << CVMX_TAG_SW_SHIFT) |
2837 		(hw_bits & cvmx_build_mask(32 - CVMX_TAG_SW_BITS)));
2838 }
2839 
2840 /**
2841  * Extracts the bits allocated for software use from the tag
2842  *
2843  * @param tag    32 bit tag value
2844  *
2845  * @return N bit software tag value, where N is configurable with
2846  *     the CVMX_TAG_SW_BITS define
2847  */
cvmx_pow_tag_get_sw_bits(u64 tag)2848 static inline u32 cvmx_pow_tag_get_sw_bits(u64 tag)
2849 {
2850 	return ((tag >> (32 - CVMX_TAG_SW_BITS)) & cvmx_build_mask(CVMX_TAG_SW_BITS));
2851 }
2852 
2853 /**
2854  *
2855  * Extracts the bits allocated for hardware use from the tag
2856  *
2857  * @param tag    32 bit tag value
2858  *
2859  * @return (32 - N) bit software tag value, where N is configurable with
2860  *     the CVMX_TAG_SW_BITS define
2861  */
cvmx_pow_tag_get_hw_bits(u64 tag)2862 static inline u32 cvmx_pow_tag_get_hw_bits(u64 tag)
2863 {
2864 	return (tag & cvmx_build_mask(32 - CVMX_TAG_SW_BITS));
2865 }
2866 
cvmx_sso3_get_wqe_count(int node)2867 static inline u64 cvmx_sso3_get_wqe_count(int node)
2868 {
2869 	cvmx_sso_grpx_aq_cnt_t aq_cnt;
2870 	unsigned int grp = 0;
2871 	u64 cnt = 0;
2872 
2873 	for (grp = 0; grp < cvmx_sso_num_xgrp(); grp++) {
2874 		aq_cnt.u64 = csr_rd_node(node, CVMX_SSO_GRPX_AQ_CNT(grp));
2875 		cnt += aq_cnt.s.aq_cnt;
2876 	}
2877 	return cnt;
2878 }
2879 
cvmx_sso_get_total_wqe_count(void)2880 static inline u64 cvmx_sso_get_total_wqe_count(void)
2881 {
2882 	if (octeon_has_feature(OCTEON_FEATURE_CN78XX_WQE)) {
2883 		int node = cvmx_get_node_num();
2884 
2885 		return cvmx_sso3_get_wqe_count(node);
2886 	} else if (OCTEON_IS_MODEL(OCTEON_CN68XX)) {
2887 		cvmx_sso_iq_com_cnt_t sso_iq_com_cnt;
2888 
2889 		sso_iq_com_cnt.u64 = csr_rd(CVMX_SSO_IQ_COM_CNT);
2890 		return (sso_iq_com_cnt.s.iq_cnt);
2891 	} else {
2892 		cvmx_pow_iq_com_cnt_t pow_iq_com_cnt;
2893 
2894 		pow_iq_com_cnt.u64 = csr_rd(CVMX_POW_IQ_COM_CNT);
2895 		return (pow_iq_com_cnt.s.iq_cnt);
2896 	}
2897 }
2898 
2899 /**
2900  * Store the current POW internal state into the supplied
2901  * buffer. It is recommended that you pass a buffer of at least
2902  * 128KB. The format of the capture may change based on SDK
2903  * version and Octeon chip.
2904  *
2905  * @param buffer Buffer to store capture into
2906  * @param buffer_size The size of the supplied buffer
2907  *
2908  * @return Zero on success, negative on failure
2909  */
2910 int cvmx_pow_capture(void *buffer, int buffer_size);
2911 
2912 /**
2913  * Dump a POW capture to the console in a human readable format.
2914  *
2915  * @param buffer POW capture from cvmx_pow_capture()
2916  * @param buffer_size Size of the buffer
2917  */
2918 void cvmx_pow_display(void *buffer, int buffer_size);
2919 
2920 /**
2921  * Return the number of POW entries supported by this chip
2922  *
2923  * @return Number of POW entries
2924  */
2925 int cvmx_pow_get_num_entries(void);
2926 int cvmx_pow_get_dump_size(void);
2927 
2928 /**
2929  * This will allocate count number of SSO groups on the specified node to the
2930  * calling application. These groups will be for exclusive use of the
2931  * application until they are freed.
2932  * @param node The numa node for the allocation.
2933  * @param base_group Pointer to the initial group, -1 to allocate anywhere.
2934  * @param count  The number of consecutive groups to allocate.
2935  * @return 0 on success and -1 on failure.
2936  */
2937 int cvmx_sso_reserve_group_range(int node, int *base_group, int count);
2938 #define cvmx_sso_allocate_group_range cvmx_sso_reserve_group_range
2939 int cvmx_sso_reserve_group(int node);
2940 #define cvmx_sso_allocate_group cvmx_sso_reserve_group
2941 int cvmx_sso_release_group_range(int node, int base_group, int count);
2942 int cvmx_sso_release_group(int node, int group);
2943 
2944 /**
2945  * Show integrated SSO configuration.
2946  *
2947  * @param node	   node number
2948  */
2949 int cvmx_sso_config_dump(unsigned int node);
2950 
2951 /**
2952  * Show integrated SSO statistics.
2953  *
2954  * @param node	   node number
2955  */
2956 int cvmx_sso_stats_dump(unsigned int node);
2957 
2958 /**
2959  * Clear integrated SSO statistics.
2960  *
2961  * @param node	   node number
2962  */
2963 int cvmx_sso_stats_clear(unsigned int node);
2964 
2965 /**
2966  * Show SSO core-group affinity and priority per node (multi-node systems)
2967  */
2968 void cvmx_pow_mask_priority_dump_node(unsigned int node, struct cvmx_coremask *avail_coremask);
2969 
2970 /**
2971  * Show POW/SSO core-group affinity and priority (legacy, single-node systems)
2972  */
cvmx_pow_mask_priority_dump(struct cvmx_coremask * avail_coremask)2973 static inline void cvmx_pow_mask_priority_dump(struct cvmx_coremask *avail_coremask)
2974 {
2975 	cvmx_pow_mask_priority_dump_node(0 /*node */, avail_coremask);
2976 }
2977 
2978 /**
2979  * Show SSO performance counters (multi-node systems)
2980  */
2981 void cvmx_pow_show_perf_counters_node(unsigned int node);
2982 
2983 /**
2984  * Show POW/SSO performance counters (legacy, single-node systems)
2985  */
cvmx_pow_show_perf_counters(void)2986 static inline void cvmx_pow_show_perf_counters(void)
2987 {
2988 	cvmx_pow_show_perf_counters_node(0 /*node */);
2989 }
2990 
2991 #endif /* __CVMX_POW_H__ */
2992