xref: /illumos-gate/usr/src/uts/sun4/io/px/px_msiq.c (revision d3d50737)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * px_msiq.c
28  */
29 
30 #include <sys/types.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/sysmacros.h>
36 #include <sys/machsystm.h>	/* intr_dist_add */
37 #include <sys/modctl.h>
38 #include <sys/disp.h>
39 #include <sys/stat.h>
40 #include <sys/ddi_impldefs.h>
41 #include "px_obj.h"
42 
43 static int px_msiq_get_props(px_t *px_p);
44 
45 /*
46  * px_msiq_attach()
47  */
48 int
49 px_msiq_attach(px_t *px_p)
50 {
51 	px_ib_t		*ib_p = px_p->px_ib_p;
52 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
53 	int		qcnt, i, ret = DDI_SUCCESS;
54 
55 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_attach\n");
56 
57 	/*
58 	 * Check for all MSIQ related properties and
59 	 * save all information.
60 	 *
61 	 * Avaialble MSIQs and its properties.
62 	 */
63 	if (px_msiq_get_props(px_p) != DDI_SUCCESS)
64 		return (DDI_FAILURE);
65 
66 	/*
67 	 * 10% of available MSIQs are reserved for the PCIe messages.
68 	 * Around 90% of available MSIQs are reserved for the MSI/Xs.
69 	 */
70 	msiq_state_p->msiq_msg_qcnt = howmany(msiq_state_p->msiq_cnt, 10);
71 
72 	qcnt = MIN(msiq_state_p->msiq_msg_qcnt, px_max_msiq_msgs);
73 	msiq_state_p->msiq_msg_qcnt = qcnt = MAX(qcnt, px_min_msiq_msgs);
74 	msiq_state_p->msiq_msi_qcnt = msiq_state_p->msiq_cnt - qcnt;
75 
76 	msiq_state_p->msiq_1st_msi_qid = msiq_state_p->msiq_1st_msiq_id;
77 	msiq_state_p->msiq_1st_msg_qid = msiq_state_p->msiq_1st_msiq_id +
78 	    msiq_state_p->msiq_msi_qcnt;
79 
80 	mutex_init(&msiq_state_p->msiq_mutex, NULL, MUTEX_DRIVER, NULL);
81 	msiq_state_p->msiq_p = kmem_zalloc(msiq_state_p->msiq_cnt *
82 	    sizeof (px_msiq_t), KM_SLEEP);
83 
84 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
85 		msiq_state_p->msiq_p[i].msiq_id =
86 		    msiq_state_p->msiq_1st_msiq_id + i;
87 		msiq_state_p->msiq_p[i].msiq_refcnt = 0;
88 		msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_FREE;
89 		(void) px_ib_alloc_ino(ib_p, px_msiqid_to_devino(px_p,
90 		    msiq_state_p->msiq_p[i].msiq_id));
91 	}
92 
93 	if ((ret = px_lib_msiq_init(px_p->px_dip)) != DDI_SUCCESS)
94 		px_msiq_detach(px_p);
95 
96 	msiq_state_p->msiq_redist_flag = B_TRUE;
97 	return (ret);
98 }
99 
100 /*
101  * px_msiq_detach()
102  */
103 void
104 px_msiq_detach(px_t *px_p)
105 {
106 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
107 
108 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_detach\n");
109 
110 	if (px_lib_msiq_fini(px_p->px_dip) != DDI_SUCCESS) {
111 		DBG(DBG_MSIQ, px_p->px_dip,
112 		    "px_lib_msiq_fini: failed\n");
113 	}
114 
115 	mutex_destroy(&msiq_state_p->msiq_mutex);
116 	kmem_free(msiq_state_p->msiq_p,
117 	    msiq_state_p->msiq_cnt * sizeof (px_msiq_t));
118 
119 	bzero(msiq_state_p, sizeof (px_msiq_state_t));
120 }
121 
122 /*
123  * px_msiq_resume()
124  */
125 void
126 px_msiq_resume(px_t *px_p)
127 {
128 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
129 	int		i;
130 
131 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
132 		(void) px_lib_msiq_gethead(px_p->px_dip,
133 		    msiq_state_p->msiq_p[i].msiq_id,
134 		    &msiq_state_p->msiq_p[i].msiq_curr_head_index);
135 		msiq_state_p->msiq_p[i].msiq_new_head_index = 0;
136 		msiq_state_p->msiq_p[i].msiq_recs2process = 0;
137 	}
138 }
139 
140 /*
141  * px_msiq_alloc()
142  */
143 int
144 px_msiq_alloc(px_t *px_p, msiq_rec_type_t rec_type, msgcode_t msg_code,
145     msiqid_t *msiq_id_p)
146 {
147 	px_ib_t		*ib_p = px_p->px_ib_p;
148 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
149 	msiqid_t	first_msiq_id;
150 	uint_t		msiq_cnt;
151 	ushort_t	least_refcnt;
152 	int		i;
153 
154 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc\n");
155 
156 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
157 	mutex_enter(&msiq_state_p->msiq_mutex);
158 
159 	if (rec_type == MSG_REC) {
160 		/*
161 		 * The first MSG EQ is dedicated to PCIE_MSG_CODE_ERR_COR
162 		 * messages. All other messages will be spread across
163 		 * the remaining MSG EQs.
164 		 */
165 		first_msiq_id = msiq_state_p->msiq_1st_msg_qid;
166 
167 		if (msg_code == PCIE_MSG_CODE_ERR_COR) {
168 			msiq_state_p->msiq_p[first_msiq_id].msiq_state =
169 			    MSIQ_STATE_INUSE;
170 
171 			(void) px_lib_msiq_gethead(px_p->px_dip, first_msiq_id,
172 			    &msiq_state_p->msiq_p[first_msiq_id].
173 			    msiq_curr_head_index);
174 
175 			*msiq_id_p =
176 			    msiq_state_p->msiq_p[first_msiq_id].msiq_id;
177 
178 			msiq_state_p->msiq_p[first_msiq_id].msiq_refcnt++;
179 
180 			DBG(DBG_MSIQ, px_p->px_dip,
181 			    "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p);
182 
183 			mutex_exit(&msiq_state_p->msiq_mutex);
184 			return (DDI_SUCCESS);
185 		}
186 
187 		/* Jump past the first/dedicated EQ */
188 		first_msiq_id++;
189 		msiq_cnt = msiq_state_p->msiq_msg_qcnt - 1;
190 	} else {
191 		msiq_cnt = msiq_state_p->msiq_msi_qcnt;
192 		first_msiq_id = msiq_state_p->msiq_1st_msi_qid;
193 	}
194 
195 	*msiq_id_p = first_msiq_id;
196 	least_refcnt = msiq_state_p->msiq_p[first_msiq_id].msiq_refcnt;
197 
198 	/* Allocate MSIQs */
199 	for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) {
200 		if (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE) {
201 			msiq_state_p->msiq_p[i].msiq_state = MSIQ_STATE_INUSE;
202 			(void) px_lib_msiq_gethead(px_p->px_dip, i,
203 			    &msiq_state_p->msiq_p[i].msiq_curr_head_index);
204 			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
205 			break;
206 		}
207 
208 		if (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt) {
209 			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
210 			least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt;
211 		}
212 	}
213 
214 	msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++;
215 
216 	DBG(DBG_MSIQ, px_p->px_dip,
217 	    "px_msiq_alloc: msiq_id 0x%x\n", *msiq_id_p);
218 
219 	mutex_exit(&msiq_state_p->msiq_mutex);
220 	return (DDI_SUCCESS);
221 }
222 
223 /*
224  * px_msiq_alloc_based_on_cpuid()
225  */
226 int
227 px_msiq_alloc_based_on_cpuid(px_t *px_p, msiq_rec_type_t rec_type,
228     cpuid_t cpuid, msiqid_t *msiq_id_p)
229 {
230 	px_ib_t		*ib_p = px_p->px_ib_p;
231 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
232 	msiqid_t	first_msiq_id, free_msiq_id;
233 	uint_t		msiq_cnt;
234 	ushort_t	least_refcnt;
235 	px_ino_t	*ino_p;
236 	int		i;
237 
238 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_alloc_based_on_cpuid: "
239 	    "cpuid 0x%x\n", cpuid);
240 
241 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
242 
243 	mutex_enter(&msiq_state_p->msiq_mutex);
244 
245 	if (rec_type == MSG_REC) {
246 		msiq_cnt = msiq_state_p->msiq_msg_qcnt;
247 		first_msiq_id = msiq_state_p->msiq_1st_msg_qid;
248 	} else {
249 		msiq_cnt = msiq_state_p->msiq_msi_qcnt;
250 		first_msiq_id = msiq_state_p->msiq_1st_msi_qid;
251 	}
252 
253 	*msiq_id_p = free_msiq_id = (msiqid_t)-1;
254 	least_refcnt = (ushort_t)-1;
255 
256 	/* Allocate MSIQs */
257 	for (i = first_msiq_id; i < (first_msiq_id + msiq_cnt); i++) {
258 		ino_p = px_ib_locate_ino(ib_p, px_msiqid_to_devino(px_p, i));
259 
260 		if ((ino_p->ino_cpuid == cpuid) &&
261 		    (least_refcnt > msiq_state_p->msiq_p[i].msiq_refcnt)) {
262 			*msiq_id_p = msiq_state_p->msiq_p[i].msiq_id;
263 			least_refcnt = msiq_state_p->msiq_p[i].msiq_refcnt;
264 		}
265 
266 		if ((*msiq_id_p == -1) && (free_msiq_id == -1) &&
267 		    (msiq_state_p->msiq_p[i].msiq_state == MSIQ_STATE_FREE))
268 			free_msiq_id = msiq_state_p->msiq_p[i].msiq_id;
269 	}
270 
271 	if (*msiq_id_p == -1) {
272 		if (free_msiq_id == -1) {
273 			DBG(DBG_MSIQ, px_p->px_dip,
274 			    "px_msiq_alloc_based_on_cpuid: No EQ is available "
275 			    "for CPU 0x%x\n", cpuid);
276 
277 			mutex_exit(&msiq_state_p->msiq_mutex);
278 			return (DDI_EINVAL);
279 		}
280 
281 		*msiq_id_p = free_msiq_id;
282 		ino_p = px_ib_locate_ino(ib_p,
283 		    px_msiqid_to_devino(px_p, *msiq_id_p));
284 		ino_p->ino_cpuid = ino_p->ino_default_cpuid = cpuid;
285 	}
286 
287 	if (msiq_state_p->msiq_p[*msiq_id_p].msiq_state == MSIQ_STATE_FREE) {
288 		msiq_state_p->msiq_p[*msiq_id_p].msiq_state = MSIQ_STATE_INUSE;
289 		(void) px_lib_msiq_gethead(px_p->px_dip, *msiq_id_p,
290 		    &msiq_state_p->msiq_p[*msiq_id_p].msiq_curr_head_index);
291 	}
292 
293 	msiq_state_p->msiq_p[*msiq_id_p].msiq_refcnt++;
294 
295 	DBG(DBG_MSIQ, px_p->px_dip,
296 	    "px_msiq_alloc_based_on_cpuid: msiq_id 0x%x\n", *msiq_id_p);
297 
298 	mutex_exit(&msiq_state_p->msiq_mutex);
299 	return (DDI_SUCCESS);
300 }
301 
302 /*
303  * px_msiq_free()
304  */
305 int
306 px_msiq_free(px_t *px_p, msiqid_t msiq_id)
307 {
308 	px_ib_t		*ib_p = px_p->px_ib_p;
309 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
310 
311 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_free: msiq_id 0x%x", msiq_id);
312 
313 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
314 	mutex_enter(&msiq_state_p->msiq_mutex);
315 
316 	if ((msiq_id < msiq_state_p->msiq_1st_msiq_id) || (msiq_id >=
317 	    (msiq_state_p->msiq_1st_msiq_id + msiq_state_p->msiq_cnt))) {
318 		DBG(DBG_MSIQ, px_p->px_dip,
319 		    "px_msiq_free: Invalid msiq_id 0x%x", msiq_id);
320 
321 		mutex_exit(&msiq_state_p->msiq_mutex);
322 		return (DDI_FAILURE);
323 	}
324 
325 	if (--msiq_state_p->msiq_p[msiq_id].msiq_refcnt == 0)
326 		msiq_state_p->msiq_p[msiq_id].msiq_state = MSIQ_STATE_FREE;
327 
328 	mutex_exit(&msiq_state_p->msiq_mutex);
329 	return (DDI_SUCCESS);
330 }
331 
332 /*
333  * px_msiq_redist()
334  */
335 void
336 px_msiq_redist(px_t *px_p)
337 {
338 	px_ib_t		*ib_p = px_p->px_ib_p;
339 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
340 	px_ino_t	*ino_p;
341 	int		i;
342 
343 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
344 
345 	mutex_enter(&msiq_state_p->msiq_mutex);
346 
347 	if (msiq_state_p->msiq_redist_flag == B_FALSE) {
348 		mutex_exit(&msiq_state_p->msiq_mutex);
349 		return;
350 	}
351 
352 	for (i = 0; i < msiq_state_p->msiq_cnt; i++) {
353 		ino_p = px_ib_locate_ino(ib_p,
354 		    px_msiqid_to_devino(px_p, msiq_state_p->msiq_p[i].msiq_id));
355 
356 		if (ino_p) {
357 			ino_p->ino_cpuid = ino_p->ino_default_cpuid =
358 			    intr_dist_cpuid();
359 
360 			DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_redist: "
361 			    "sysino 0x%llx current cpuid 0x%x "
362 			    "default cpuid 0x%x\n", ino_p->ino_sysino,
363 			    ino_p->ino_cpuid, ino_p->ino_default_cpuid);
364 		}
365 	}
366 
367 	msiq_state_p->msiq_redist_flag = B_FALSE;
368 	mutex_exit(&msiq_state_p->msiq_mutex);
369 }
370 
371 /*
372  * px_msiqid_to_devino()
373  */
374 devino_t
375 px_msiqid_to_devino(px_t *px_p, msiqid_t msiq_id)
376 {
377 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
378 	devino_t	devino;
379 
380 	devino = msiq_state_p->msiq_1st_devino +
381 	    msiq_id - msiq_state_p->msiq_1st_msiq_id;
382 
383 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiqid_to_devino: "
384 	    "msiq_id 0x%x devino 0x%x\n", msiq_id, devino);
385 
386 	return (devino);
387 }
388 
389 /*
390  * px_devino_to_msiqid()
391  */
392 msiqid_t
393 px_devino_to_msiqid(px_t *px_p, devino_t devino)
394 {
395 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
396 	msiqid_t	msiq_id;
397 
398 	msiq_id = msiq_state_p->msiq_1st_msiq_id +
399 	    devino - msiq_state_p->msiq_1st_devino;
400 
401 	DBG(DBG_MSIQ, px_p->px_dip, "px_devino_to_msiq: "
402 	    "devino 0x%x msiq_id 0x%x\n", devino, msiq_id);
403 
404 	return (msiq_id);
405 }
406 
407 /*
408  * px_msiq_get_props()
409  */
410 static int
411 px_msiq_get_props(px_t *px_p)
412 {
413 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
414 	int		length = sizeof (int);
415 	char		*valuep = NULL;
416 	int		ret;
417 
418 	DBG(DBG_MSIQ, px_p->px_dip, "px_msiq_get_props\n");
419 
420 	/* #msi-eqs */
421 	msiq_state_p->msiq_cnt = ddi_getprop(DDI_DEV_T_ANY, px_p->px_dip,
422 	    DDI_PROP_DONTPASS, "#msi-eqs", 0);
423 
424 	DBG(DBG_MSIQ, px_p->px_dip, "msiq_cnt=%d\n", msiq_state_p->msiq_cnt);
425 
426 	/* msi-eq-size */
427 	msiq_state_p->msiq_rec_cnt = ddi_getprop(DDI_DEV_T_ANY, px_p->px_dip,
428 	    DDI_PROP_DONTPASS, "msi-eq-size", 0);
429 
430 	DBG(DBG_MSIQ, px_p->px_dip, "msiq_rec_cnt=%d\n",
431 	    msiq_state_p->msiq_rec_cnt);
432 
433 	if ((msiq_state_p->msiq_cnt == 0) || (msiq_state_p->msiq_rec_cnt == 0))
434 		return (DDI_FAILURE);
435 
436 	/* msi-eq-to-devino: msi-eq#, devino# fields */
437 	ret = ddi_prop_op(DDI_DEV_T_ANY, px_p->px_dip, PROP_LEN_AND_VAL_ALLOC,
438 	    DDI_PROP_DONTPASS, "msi-eq-to-devino", (caddr_t)&valuep, &length);
439 
440 	/*
441 	 * NOTE:
442 	 * On sun4u PCIe systems, the msi-eq-to-devino property is broken and
443 	 * these systems defines this property as msi-eq-devino.
444 	 */
445 	if (ret == DDI_PROP_NOT_FOUND) {
446 		DBG(DBG_MSIQ, px_p->px_dip, "msi-eq-to-devino is not found\n");
447 		ret = ddi_prop_op(DDI_DEV_T_ANY, px_p->px_dip,
448 		    PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, "msi-eq-devino",
449 		    (caddr_t)&valuep, &length);
450 	}
451 
452 	if (ret != DDI_PROP_SUCCESS) {
453 		return (DDI_FAILURE);
454 	}
455 
456 	msiq_state_p->msiq_1st_msiq_id =
457 	    ((px_msi_eq_to_devino_t *)valuep)->msi_eq_no;
458 	msiq_state_p->msiq_1st_devino =
459 	    ((px_msi_eq_to_devino_t *)valuep)->devino_no;
460 
461 	DBG(DBG_MSIQ, px_p->px_dip, "msiq_1st_msiq_id=%d\n",
462 	    msiq_state_p->msiq_1st_msiq_id);
463 
464 	DBG(DBG_MSIQ, px_p->px_dip, "msiq_1st_devino=%d\n",
465 	    msiq_state_p->msiq_1st_devino);
466 
467 	kmem_free(valuep, (size_t)length);
468 	return (DDI_SUCCESS);
469 }
470