xref: /dragonfly/sys/dev/crypto/ubsec/ubsec.c (revision b40e316c)
1 /* $FreeBSD: src/sys/dev/ubsec/ubsec.c,v 1.6.2.12 2003/06/04 17:56:59 sam Exp $ */
2 /* $DragonFly: src/sys/dev/crypto/ubsec/ubsec.c,v 1.6 2004/06/02 14:42:49 eirikn Exp $ */
3 /*	$OpenBSD: ubsec.c,v 1.115 2002/09/24 18:33:26 jason Exp $	*/
4 
5 /*
6  * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
7  * Copyright (c) 2000 Theo de Raadt (deraadt@openbsd.org)
8  * Copyright (c) 2001 Patrik Lindergren (patrik@ipunplugged.com)
9  *
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Jason L. Wright
23  * 4. The name of the author may not be used to endorse or promote products
24  *    derived from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
27  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  *
38  * Effort sponsored in part by the Defense Advanced Research Projects
39  * Agency (DARPA) and Air Force Research Laboratory, Air Force
40  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
41  *
42  */
43 
44 /*
45  * uBsec 5[56]01, 58xx hardware crypto accelerator
46  */
47 
48 #include "opt_ubsec.h"
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/proc.h>
53 #include <sys/errno.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/mbuf.h>
57 #include <sys/sysctl.h>
58 #include <sys/endian.h>
59 
60 #include <vm/vm.h>
61 #include <vm/pmap.h>
62 
63 #include <machine/clock.h>
64 #include <machine/bus.h>
65 #include <machine/resource.h>
66 #include <sys/bus.h>
67 #include <sys/rman.h>
68 
69 #include <crypto/sha1.h>
70 #include <opencrypto/cryptodev.h>
71 #include <opencrypto/cryptosoft.h>
72 #include <sys/md5.h>
73 #include <sys/random.h>
74 
75 #include <bus/pci/pcivar.h>
76 #include <bus/pci/pcireg.h>
77 
78 /* grr, #defines for gratuitous incompatibility in queue.h */
79 #define	SIMPLEQ_HEAD		STAILQ_HEAD
80 #define	SIMPLEQ_ENTRY		STAILQ_ENTRY
81 #define	SIMPLEQ_INIT		STAILQ_INIT
82 #define	SIMPLEQ_INSERT_TAIL	STAILQ_INSERT_TAIL
83 #define	SIMPLEQ_EMPTY		STAILQ_EMPTY
84 #define	SIMPLEQ_FIRST		STAILQ_FIRST
85 #define	SIMPLEQ_REMOVE_HEAD	STAILQ_REMOVE_HEAD_UNTIL
86 #define	SIMPLEQ_FOREACH		STAILQ_FOREACH
87 /* ditto for endian.h */
88 #define	letoh16(x)		le16toh(x)
89 #define	letoh32(x)		le32toh(x)
90 
91 #ifdef UBSEC_RNDTEST
92 #include "../rndtest/rndtest.h"
93 #endif
94 #include "ubsecreg.h"
95 #include "ubsecvar.h"
96 
97 /*
98  * Prototypes and count for the pci_device structure
99  */
100 static	int ubsec_probe(device_t);
101 static	int ubsec_attach(device_t);
102 static	int ubsec_detach(device_t);
103 static	int ubsec_suspend(device_t);
104 static	int ubsec_resume(device_t);
105 static	void ubsec_shutdown(device_t);
106 
107 static device_method_t ubsec_methods[] = {
108 	/* Device interface */
109 	DEVMETHOD(device_probe,		ubsec_probe),
110 	DEVMETHOD(device_attach,	ubsec_attach),
111 	DEVMETHOD(device_detach,	ubsec_detach),
112 	DEVMETHOD(device_suspend,	ubsec_suspend),
113 	DEVMETHOD(device_resume,	ubsec_resume),
114 	DEVMETHOD(device_shutdown,	ubsec_shutdown),
115 
116 	/* bus interface */
117 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
118 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
119 
120 	{ 0, 0 }
121 };
122 static driver_t ubsec_driver = {
123 	"ubsec",
124 	ubsec_methods,
125 	sizeof (struct ubsec_softc)
126 };
127 static devclass_t ubsec_devclass;
128 
129 DECLARE_DUMMY_MODULE(ubsec);
130 DRIVER_MODULE(ubsec, pci, ubsec_driver, ubsec_devclass, 0, 0);
131 MODULE_DEPEND(ubsec, crypto, 1, 1, 1);
132 #ifdef UBSEC_RNDTEST
133 MODULE_DEPEND(ubsec, rndtest, 1, 1, 1);
134 #endif
135 
136 static	void ubsec_intr(void *);
137 static	int ubsec_newsession(void *, u_int32_t *, struct cryptoini *);
138 static	int ubsec_freesession(void *, u_int64_t);
139 static	int ubsec_process(void *, struct cryptop *, int);
140 static	void ubsec_callback(struct ubsec_softc *, struct ubsec_q *);
141 static	void ubsec_feed(struct ubsec_softc *);
142 static	void ubsec_mcopy(struct mbuf *, struct mbuf *, int, int);
143 static	void ubsec_callback2(struct ubsec_softc *, struct ubsec_q2 *);
144 static	int ubsec_feed2(struct ubsec_softc *);
145 static	void ubsec_rng(void *);
146 static	int ubsec_dma_malloc(struct ubsec_softc *, bus_size_t,
147 			     struct ubsec_dma_alloc *, int);
148 #define	ubsec_dma_sync(_dma, _flags) \
149 	bus_dmamap_sync((_dma)->dma_tag, (_dma)->dma_map, (_flags))
150 static	void ubsec_dma_free(struct ubsec_softc *, struct ubsec_dma_alloc *);
151 static	int ubsec_dmamap_aligned(struct ubsec_operand *op);
152 
153 static	void ubsec_reset_board(struct ubsec_softc *sc);
154 static	void ubsec_init_board(struct ubsec_softc *sc);
155 static	void ubsec_init_pciregs(device_t dev);
156 static	void ubsec_totalreset(struct ubsec_softc *sc);
157 
158 static	int ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q);
159 
160 static	int ubsec_kprocess(void*, struct cryptkop *, int);
161 static	int ubsec_kprocess_modexp_hw(struct ubsec_softc *, struct cryptkop *, int);
162 static	int ubsec_kprocess_modexp_sw(struct ubsec_softc *, struct cryptkop *, int);
163 static	int ubsec_kprocess_rsapriv(struct ubsec_softc *, struct cryptkop *, int);
164 static	void ubsec_kfree(struct ubsec_softc *, struct ubsec_q2 *);
165 static	int ubsec_ksigbits(struct crparam *);
166 static	void ubsec_kshift_r(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
167 static	void ubsec_kshift_l(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
168 
169 SYSCTL_NODE(_hw, OID_AUTO, ubsec, CTLFLAG_RD, 0, "Broadcom driver parameters");
170 
171 #ifdef UBSEC_DEBUG
172 static	void ubsec_dump_pb(volatile struct ubsec_pktbuf *);
173 static	void ubsec_dump_mcr(struct ubsec_mcr *);
174 static	void ubsec_dump_ctx2(struct ubsec_ctx_keyop *);
175 
176 static	int ubsec_debug = 0;
177 SYSCTL_INT(_hw_ubsec, OID_AUTO, debug, CTLFLAG_RW, &ubsec_debug,
178 	    0, "control debugging msgs");
179 #endif
180 
181 #define	READ_REG(sc,r) \
182 	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
183 
184 #define WRITE_REG(sc,reg,val) \
185 	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
186 
187 #define	SWAP32(x) (x) = htole32(ntohl((x)))
188 #define	HTOLE32(x) (x) = htole32(x)
189 
190 
191 struct ubsec_stats ubsecstats;
192 SYSCTL_STRUCT(_hw_ubsec, OID_AUTO, stats, CTLFLAG_RD, &ubsecstats,
193 	    ubsec_stats, "driver statistics");
194 
195 static int
196 ubsec_probe(device_t dev)
197 {
198 	if (pci_get_vendor(dev) == PCI_VENDOR_SUN &&
199 	    (pci_get_device(dev) == PCI_PRODUCT_SUN_5821 ||
200 	     pci_get_device(dev) == PCI_PRODUCT_SUN_SCA1K))
201 		return (0);
202 	if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL &&
203 	    (pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5501 ||
204 	     pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601))
205 		return (0);
206 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
207 	    (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5801 ||
208 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5802 ||
209 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805 ||
210 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820 ||
211 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 ||
212 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822 ||
213 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5823
214 	     ))
215 		return (0);
216 	return (ENXIO);
217 }
218 
219 static const char*
220 ubsec_partname(struct ubsec_softc *sc)
221 {
222 	/* XXX sprintf numbers when not decoded */
223 	switch (pci_get_vendor(sc->sc_dev)) {
224 	case PCI_VENDOR_BROADCOM:
225 		switch (pci_get_device(sc->sc_dev)) {
226 		case PCI_PRODUCT_BROADCOM_5801:	return "Broadcom 5801";
227 		case PCI_PRODUCT_BROADCOM_5802:	return "Broadcom 5802";
228 		case PCI_PRODUCT_BROADCOM_5805:	return "Broadcom 5805";
229 		case PCI_PRODUCT_BROADCOM_5820:	return "Broadcom 5820";
230 		case PCI_PRODUCT_BROADCOM_5821:	return "Broadcom 5821";
231 		case PCI_PRODUCT_BROADCOM_5822:	return "Broadcom 5822";
232 		case PCI_PRODUCT_BROADCOM_5823:	return "Broadcom 5823";
233 		}
234 		return "Broadcom unknown-part";
235 	case PCI_VENDOR_BLUESTEEL:
236 		switch (pci_get_device(sc->sc_dev)) {
237 		case PCI_PRODUCT_BLUESTEEL_5601: return "Bluesteel 5601";
238 		}
239 		return "Bluesteel unknown-part";
240 	case PCI_VENDOR_SUN:
241 		switch (pci_get_device(sc->sc_dev)) {
242 		case PCI_PRODUCT_SUN_5821: return "Sun Crypto 5821";
243 		case PCI_PRODUCT_SUN_SCA1K: return "Sun Crypto 1K";
244 		}
245 		return "Sun unknown-part";
246 	}
247 	return "Unknown-vendor unknown-part";
248 }
249 
250 static void
251 default_harvest(struct rndtest_state *rsp, void *buf, u_int count)
252 {
253 	u_int32_t *p = (u_int32_t *)buf;
254 	for (count /= sizeof (u_int32_t); count; count--)
255 		add_true_randomness(*p++);
256 }
257 
258 static int
259 ubsec_attach(device_t dev)
260 {
261 	struct ubsec_softc *sc = device_get_softc(dev);
262 	struct ubsec_dma *dmap;
263 	u_int32_t cmd, i;
264 	int rid;
265 
266 	KASSERT(sc != NULL, ("ubsec_attach: null software carrier!"));
267 	bzero(sc, sizeof (*sc));
268 	sc->sc_dev = dev;
269 
270 	SIMPLEQ_INIT(&sc->sc_queue);
271 	SIMPLEQ_INIT(&sc->sc_qchip);
272 	SIMPLEQ_INIT(&sc->sc_queue2);
273 	SIMPLEQ_INIT(&sc->sc_qchip2);
274 	SIMPLEQ_INIT(&sc->sc_q2free);
275 
276 	/* XXX handle power management */
277 
278 	sc->sc_statmask = BS_STAT_MCR1_DONE | BS_STAT_DMAERR;
279 
280 	if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL &&
281 	    pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601)
282 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG;
283 
284 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
285 	    (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5802 ||
286 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805))
287 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG;
288 
289 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
290 	    pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820)
291 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG |
292 		    UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY;
293 
294 	if ((pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
295 	     (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 ||
296 	      pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822 ||
297 	      pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5823)) ||
298 	    (pci_get_vendor(dev) == PCI_VENDOR_SUN &&
299 	     (pci_get_device(dev) == PCI_PRODUCT_SUN_SCA1K ||
300 	      pci_get_device(dev) == PCI_PRODUCT_SUN_5821))) {
301 		/* NB: the 5821/5822 defines some additional status bits */
302 		sc->sc_statmask |= BS_STAT_MCR1_ALLEMPTY |
303 		    BS_STAT_MCR2_ALLEMPTY;
304 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG |
305 		    UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY;
306 	}
307 
308 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
309 	cmd |= PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN;
310 	pci_write_config(dev, PCIR_COMMAND, cmd, 4);
311 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
312 
313 	if (!(cmd & PCIM_CMD_MEMEN)) {
314 		device_printf(dev, "failed to enable memory mapping\n");
315 		goto bad;
316 	}
317 
318 	if (!(cmd & PCIM_CMD_BUSMASTEREN)) {
319 		device_printf(dev, "failed to enable bus mastering\n");
320 		goto bad;
321 	}
322 
323 	/*
324 	 * Setup memory-mapping of PCI registers.
325 	 */
326 	rid = BS_BAR;
327 	sc->sc_sr = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
328 				       0, ~0, 1, RF_ACTIVE);
329 	if (sc->sc_sr == NULL) {
330 		device_printf(dev, "cannot map register space\n");
331 		goto bad;
332 	}
333 	sc->sc_st = rman_get_bustag(sc->sc_sr);
334 	sc->sc_sh = rman_get_bushandle(sc->sc_sr);
335 
336 	/*
337 	 * Arrange interrupt line.
338 	 */
339 	rid = 0;
340 	sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid,
341 					0, ~0, 1, RF_SHAREABLE|RF_ACTIVE);
342 	if (sc->sc_irq == NULL) {
343 		device_printf(dev, "could not map interrupt\n");
344 		goto bad1;
345 	}
346 	/*
347 	 * NB: Network code assumes we are blocked with splimp()
348 	 *     so make sure the IRQ is mapped appropriately.
349 	 */
350 	if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET,
351 			   ubsec_intr, sc, &sc->sc_ih)) {
352 		device_printf(dev, "could not establish interrupt\n");
353 		goto bad2;
354 	}
355 
356 	sc->sc_cid = crypto_get_driverid(0);
357 	if (sc->sc_cid < 0) {
358 		device_printf(dev, "could not get crypto driver id\n");
359 		goto bad3;
360 	}
361 
362 	/*
363 	 * Setup DMA descriptor area.
364 	 */
365 	if (bus_dma_tag_create(NULL,			/* parent */
366 			       1, 0,			/* alignment, bounds */
367 			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
368 			       BUS_SPACE_MAXADDR,	/* highaddr */
369 			       NULL, NULL,		/* filter, filterarg */
370 			       0x3ffff,			/* maxsize */
371 			       UBS_MAX_SCATTER,		/* nsegments */
372 			       0xffff,			/* maxsegsize */
373 			       BUS_DMA_ALLOCNOW,	/* flags */
374 			       &sc->sc_dmat)) {
375 		device_printf(dev, "cannot allocate DMA tag\n");
376 		goto bad4;
377 	}
378 	SIMPLEQ_INIT(&sc->sc_freequeue);
379 	dmap = sc->sc_dmaa;
380 	for (i = 0; i < UBS_MAX_NQUEUE; i++, dmap++) {
381 		struct ubsec_q *q;
382 
383 		q = malloc(sizeof(struct ubsec_q), M_DEVBUF, M_WAITOK);
384 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_dmachunk),
385 		    &dmap->d_alloc, 0)) {
386 			device_printf(dev, "cannot allocate dma buffers\n");
387 			free(q, M_DEVBUF);
388 			break;
389 		}
390 		dmap->d_dma = (struct ubsec_dmachunk *)dmap->d_alloc.dma_vaddr;
391 
392 		q->q_dma = dmap;
393 		sc->sc_queuea[i] = q;
394 
395 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
396 	}
397 
398 	device_printf(sc->sc_dev, "%s\n", ubsec_partname(sc));
399 
400 	crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0,
401 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
402 	crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0,
403 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
404 	crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0,
405 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
406 	crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0,
407 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
408 
409 	/*
410 	 * Reset Broadcom chip
411 	 */
412 	ubsec_reset_board(sc);
413 
414 	/*
415 	 * Init Broadcom specific PCI settings
416 	 */
417 	ubsec_init_pciregs(dev);
418 
419 	/*
420 	 * Init Broadcom chip
421 	 */
422 	ubsec_init_board(sc);
423 
424 #ifndef UBSEC_NO_RNG
425 	if (sc->sc_flags & UBS_FLAGS_RNG) {
426 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
427 #ifdef UBSEC_RNDTEST
428 		sc->sc_rndtest = rndtest_attach(dev);
429 		if (sc->sc_rndtest)
430 			sc->sc_harvest = rndtest_harvest;
431 		else
432 			sc->sc_harvest = default_harvest;
433 #else
434 		sc->sc_harvest = default_harvest;
435 #endif
436 
437 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
438 		    &sc->sc_rng.rng_q.q_mcr, 0))
439 			goto skip_rng;
440 
441 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rngbypass),
442 		    &sc->sc_rng.rng_q.q_ctx, 0)) {
443 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
444 			goto skip_rng;
445 		}
446 
447 		if (ubsec_dma_malloc(sc, sizeof(u_int32_t) *
448 		    UBSEC_RNG_BUFSIZ, &sc->sc_rng.rng_buf, 0)) {
449 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
450 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
451 			goto skip_rng;
452 		}
453 
454 		if (hz >= 100)
455 			sc->sc_rnghz = hz / 100;
456 		else
457 			sc->sc_rnghz = 1;
458 		callout_init(&sc->sc_rngto);
459 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
460 skip_rng:
461 	;
462 	}
463 #endif /* UBSEC_NO_RNG */
464 
465 	if (sc->sc_flags & UBS_FLAGS_KEY) {
466 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
467 
468 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0,
469 			ubsec_kprocess, sc);
470 #if 0
471 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0,
472 			ubsec_kprocess, sc);
473 #endif
474 	}
475 	return (0);
476 bad4:
477 	crypto_unregister_all(sc->sc_cid);
478 bad3:
479 	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
480 bad2:
481 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
482 bad1:
483 	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
484 bad:
485 	return (ENXIO);
486 }
487 
488 /*
489  * Detach a device that successfully probed.
490  */
491 static int
492 ubsec_detach(device_t dev)
493 {
494 	struct ubsec_softc *sc = device_get_softc(dev);
495 	int s;
496 
497 	KASSERT(sc != NULL, ("ubsec_detach: null software carrier"));
498 
499 	/* XXX wait/abort active ops */
500 
501 	s = splimp();
502 
503 	callout_stop(&sc->sc_rngto);
504 
505 	crypto_unregister_all(sc->sc_cid);
506 
507 #ifdef UBSEC_RNDTEST
508 	if (sc->sc_rndtest)
509 		rndtest_detach(sc->sc_rndtest);
510 #endif
511 
512 	while (!SIMPLEQ_EMPTY(&sc->sc_freequeue)) {
513 		struct ubsec_q *q;
514 
515 		q = SIMPLEQ_FIRST(&sc->sc_freequeue);
516 		SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, q, q_next);
517 		ubsec_dma_free(sc, &q->q_dma->d_alloc);
518 		free(q, M_DEVBUF);
519 	}
520 #ifndef UBSEC_NO_RNG
521 	if (sc->sc_flags & UBS_FLAGS_RNG) {
522 		ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
523 		ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
524 		ubsec_dma_free(sc, &sc->sc_rng.rng_buf);
525 	}
526 #endif /* UBSEC_NO_RNG */
527 
528 	bus_generic_detach(dev);
529 	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
530 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
531 
532 	bus_dma_tag_destroy(sc->sc_dmat);
533 	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
534 
535 	splx(s);
536 
537 	return (0);
538 }
539 
540 /*
541  * Stop all chip i/o so that the kernel's probe routines don't
542  * get confused by errant DMAs when rebooting.
543  */
544 static void
545 ubsec_shutdown(device_t dev)
546 {
547 #ifdef notyet
548 	ubsec_stop(device_get_softc(dev));
549 #endif
550 }
551 
552 /*
553  * Device suspend routine.
554  */
555 static int
556 ubsec_suspend(device_t dev)
557 {
558 	struct ubsec_softc *sc = device_get_softc(dev);
559 
560 	KASSERT(sc != NULL, ("ubsec_suspend: null software carrier"));
561 #ifdef notyet
562 	/* XXX stop the device and save PCI settings */
563 #endif
564 	sc->sc_suspended = 1;
565 
566 	return (0);
567 }
568 
569 static int
570 ubsec_resume(device_t dev)
571 {
572 	struct ubsec_softc *sc = device_get_softc(dev);
573 
574 	KASSERT(sc != NULL, ("ubsec_resume: null software carrier"));
575 #ifdef notyet
576 	/* XXX retore PCI settings and start the device */
577 #endif
578 	sc->sc_suspended = 0;
579 	return (0);
580 }
581 
582 /*
583  * UBSEC Interrupt routine
584  */
585 static void
586 ubsec_intr(void *arg)
587 {
588 	struct ubsec_softc *sc = arg;
589 	volatile u_int32_t stat;
590 	struct ubsec_q *q;
591 	struct ubsec_dma *dmap;
592 	int npkts = 0, i;
593 
594 	stat = READ_REG(sc, BS_STAT);
595 	stat &= sc->sc_statmask;
596 	if (stat == 0) {
597 		return;
598 	}
599 
600 	WRITE_REG(sc, BS_STAT, stat);		/* IACK */
601 
602 	/*
603 	 * Check to see if we have any packets waiting for us
604 	 */
605 	if ((stat & BS_STAT_MCR1_DONE)) {
606 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
607 			q = SIMPLEQ_FIRST(&sc->sc_qchip);
608 			dmap = q->q_dma;
609 
610 			if ((dmap->d_dma->d_mcr.mcr_flags & htole16(UBS_MCR_DONE)) == 0)
611 				break;
612 
613 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next);
614 
615 			npkts = q->q_nstacked_mcrs;
616 			sc->sc_nqchip -= 1+npkts;
617 			/*
618 			 * search for further sc_qchip ubsec_q's that share
619 			 * the same MCR, and complete them too, they must be
620 			 * at the top.
621 			 */
622 			for (i = 0; i < npkts; i++) {
623 				if(q->q_stacked_mcr[i]) {
624 					ubsec_callback(sc, q->q_stacked_mcr[i]);
625 				} else {
626 					break;
627 				}
628 			}
629 			ubsec_callback(sc, q);
630 		}
631 
632 		/*
633 		 * Don't send any more packet to chip if there has been
634 		 * a DMAERR.
635 		 */
636 		if (!(stat & BS_STAT_DMAERR))
637 			ubsec_feed(sc);
638 	}
639 
640 	/*
641 	 * Check to see if we have any key setups/rng's waiting for us
642 	 */
643 	if ((sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) &&
644 	    (stat & BS_STAT_MCR2_DONE)) {
645 		struct ubsec_q2 *q2;
646 		struct ubsec_mcr *mcr;
647 
648 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip2)) {
649 			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
650 
651 			ubsec_dma_sync(&q2->q_mcr,
652 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
653 
654 			mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr;
655 			if ((mcr->mcr_flags & htole16(UBS_MCR_DONE)) == 0) {
656 				ubsec_dma_sync(&q2->q_mcr,
657 				    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
658 				break;
659 			}
660 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip2, q2, q_next);
661 			ubsec_callback2(sc, q2);
662 			/*
663 			 * Don't send any more packet to chip if there has been
664 			 * a DMAERR.
665 			 */
666 			if (!(stat & BS_STAT_DMAERR))
667 				ubsec_feed2(sc);
668 		}
669 	}
670 
671 	/*
672 	 * Check to see if we got any DMA Error
673 	 */
674 	if (stat & BS_STAT_DMAERR) {
675 #ifdef UBSEC_DEBUG
676 		if (ubsec_debug) {
677 			volatile u_int32_t a = READ_REG(sc, BS_ERR);
678 
679 			printf("dmaerr %s@%08x\n",
680 			    (a & BS_ERR_READ) ? "read" : "write",
681 			    a & BS_ERR_ADDR);
682 		}
683 #endif /* UBSEC_DEBUG */
684 		ubsecstats.hst_dmaerr++;
685 		ubsec_totalreset(sc);
686 		ubsec_feed(sc);
687 	}
688 
689 	if (sc->sc_needwakeup) {		/* XXX check high watermark */
690 		int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
691 #ifdef UBSEC_DEBUG
692 		if (ubsec_debug)
693 			device_printf(sc->sc_dev, "wakeup crypto (%x)\n",
694 				sc->sc_needwakeup);
695 #endif /* UBSEC_DEBUG */
696 		sc->sc_needwakeup &= ~wakeup;
697 		crypto_unblock(sc->sc_cid, wakeup);
698 	}
699 }
700 
701 /*
702  * ubsec_feed() - aggregate and post requests to chip
703  */
704 static void
705 ubsec_feed(struct ubsec_softc *sc)
706 {
707 	struct ubsec_q *q, *q2;
708 	int npkts, i;
709 	void *v;
710 	u_int32_t stat;
711 
712 	/*
713 	 * Decide how many ops to combine in a single MCR.  We cannot
714 	 * aggregate more than UBS_MAX_AGGR because this is the number
715 	 * of slots defined in the data structure.  Note that
716 	 * aggregation only happens if ops are marked batch'able.
717 	 * Aggregating ops reduces the number of interrupts to the host
718 	 * but also (potentially) increases the latency for processing
719 	 * completed ops as we only get an interrupt when all aggregated
720 	 * ops have completed.
721 	 */
722 	if (sc->sc_nqueue == 0)
723 		return;
724 	if (sc->sc_nqueue > 1) {
725 		npkts = 0;
726 		SIMPLEQ_FOREACH(q, &sc->sc_queue, q_next) {
727 			npkts++;
728 			if ((q->q_crp->crp_flags & CRYPTO_F_BATCH) == 0)
729 				break;
730 		}
731 	} else
732 		npkts = 1;
733 	/*
734 	 * Check device status before going any further.
735 	 */
736 	if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
737 		if (stat & BS_STAT_DMAERR) {
738 			ubsec_totalreset(sc);
739 			ubsecstats.hst_dmaerr++;
740 		} else
741 			ubsecstats.hst_mcr1full++;
742 		return;
743 	}
744 	if (sc->sc_nqueue > ubsecstats.hst_maxqueue)
745 		ubsecstats.hst_maxqueue = sc->sc_nqueue;
746 	if (npkts > UBS_MAX_AGGR)
747 		npkts = UBS_MAX_AGGR;
748 	if (npkts < 2)				/* special case 1 op */
749 		goto feed1;
750 
751 	ubsecstats.hst_totbatch += npkts-1;
752 #ifdef UBSEC_DEBUG
753 	if (ubsec_debug)
754 		printf("merging %d records\n", npkts);
755 #endif /* UBSEC_DEBUG */
756 
757 	q = SIMPLEQ_FIRST(&sc->sc_queue);
758 	SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next);
759 	--sc->sc_nqueue;
760 
761 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_PREWRITE);
762 	if (q->q_dst_map != NULL)
763 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_PREREAD);
764 
765 	q->q_nstacked_mcrs = npkts - 1;		/* Number of packets stacked */
766 
767 	for (i = 0; i < q->q_nstacked_mcrs; i++) {
768 		q2 = SIMPLEQ_FIRST(&sc->sc_queue);
769 		bus_dmamap_sync(sc->sc_dmat, q2->q_src_map,
770 		    BUS_DMASYNC_PREWRITE);
771 		if (q2->q_dst_map != NULL)
772 			bus_dmamap_sync(sc->sc_dmat, q2->q_dst_map,
773 			    BUS_DMASYNC_PREREAD);
774 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q2, q_next);
775 		--sc->sc_nqueue;
776 
777 		v = (void*)(((char *)&q2->q_dma->d_dma->d_mcr) + sizeof(struct ubsec_mcr) -
778 		    sizeof(struct ubsec_mcr_add));
779 		bcopy(v, &q->q_dma->d_dma->d_mcradd[i], sizeof(struct ubsec_mcr_add));
780 		q->q_stacked_mcr[i] = q2;
781 	}
782 	q->q_dma->d_dma->d_mcr.mcr_pkts = htole16(npkts);
783 	SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
784 	sc->sc_nqchip += npkts;
785 	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
786 		ubsecstats.hst_maxqchip = sc->sc_nqchip;
787 	ubsec_dma_sync(&q->q_dma->d_alloc,
788 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
789 	WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
790 	    offsetof(struct ubsec_dmachunk, d_mcr));
791 	return;
792 
793 feed1:
794 	q = SIMPLEQ_FIRST(&sc->sc_queue);
795 
796 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_PREWRITE);
797 	if (q->q_dst_map != NULL)
798 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_PREREAD);
799 	ubsec_dma_sync(&q->q_dma->d_alloc,
800 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
801 
802 	WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
803 	    offsetof(struct ubsec_dmachunk, d_mcr));
804 #ifdef UBSEC_DEBUG
805 	if (ubsec_debug)
806 		printf("feed1: q->chip %p %08x stat %08x\n",
807 		      q, (u_int32_t)vtophys(&q->q_dma->d_dma->d_mcr),
808 		      stat);
809 #endif /* UBSEC_DEBUG */
810 	SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next);
811 	--sc->sc_nqueue;
812 	SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
813 	sc->sc_nqchip++;
814 	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
815 		ubsecstats.hst_maxqchip = sc->sc_nqchip;
816 	return;
817 }
818 
819 /*
820  * Allocate a new 'session' and return an encoded session id.  'sidp'
821  * contains our registration id, and should contain an encoded session
822  * id on successful allocation.
823  */
824 static int
825 ubsec_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri)
826 {
827 	struct cryptoini *c, *encini = NULL, *macini = NULL;
828 	struct ubsec_softc *sc = arg;
829 	struct ubsec_session *ses = NULL;
830 	MD5_CTX md5ctx;
831 	SHA1_CTX sha1ctx;
832 	int i, sesn;
833 
834 	KASSERT(sc != NULL, ("ubsec_newsession: null softc"));
835 	if (sidp == NULL || cri == NULL || sc == NULL)
836 		return (EINVAL);
837 
838 	for (c = cri; c != NULL; c = c->cri_next) {
839 		if (c->cri_alg == CRYPTO_MD5_HMAC ||
840 		    c->cri_alg == CRYPTO_SHA1_HMAC) {
841 			if (macini)
842 				return (EINVAL);
843 			macini = c;
844 		} else if (c->cri_alg == CRYPTO_DES_CBC ||
845 		    c->cri_alg == CRYPTO_3DES_CBC) {
846 			if (encini)
847 				return (EINVAL);
848 			encini = c;
849 		} else
850 			return (EINVAL);
851 	}
852 	if (encini == NULL && macini == NULL)
853 		return (EINVAL);
854 
855 	if (sc->sc_sessions == NULL) {
856 		ses = sc->sc_sessions = malloc(sizeof(struct ubsec_session),
857 						M_DEVBUF, M_INTWAIT);
858 		sesn = 0;
859 		sc->sc_nsessions = 1;
860 	} else {
861 		for (sesn = 0; sesn < sc->sc_nsessions; sesn++) {
862 			if (sc->sc_sessions[sesn].ses_used == 0) {
863 				ses = &sc->sc_sessions[sesn];
864 				break;
865 			}
866 		}
867 
868 		if (ses == NULL) {
869 			sesn = sc->sc_nsessions;
870 			ses = malloc((sesn + 1) * sizeof(struct ubsec_session),
871 					M_DEVBUF, M_INTWAIT);
872 			bcopy(sc->sc_sessions, ses, sesn *
873 			    sizeof(struct ubsec_session));
874 			bzero(sc->sc_sessions, sesn *
875 			    sizeof(struct ubsec_session));
876 			free(sc->sc_sessions, M_DEVBUF);
877 			sc->sc_sessions = ses;
878 			ses = &sc->sc_sessions[sesn];
879 			sc->sc_nsessions++;
880 		}
881 	}
882 
883 	bzero(ses, sizeof(struct ubsec_session));
884 	ses->ses_used = 1;
885 	if (encini) {
886 		/* get an IV, network byte order */
887 		/* XXX may read fewer than requested */
888 		read_random(ses->ses_iv, sizeof(ses->ses_iv));
889 
890 		/* Go ahead and compute key in ubsec's byte order */
891 		if (encini->cri_alg == CRYPTO_DES_CBC) {
892 			bcopy(encini->cri_key, &ses->ses_deskey[0], 8);
893 			bcopy(encini->cri_key, &ses->ses_deskey[2], 8);
894 			bcopy(encini->cri_key, &ses->ses_deskey[4], 8);
895 		} else
896 			bcopy(encini->cri_key, ses->ses_deskey, 24);
897 
898 		SWAP32(ses->ses_deskey[0]);
899 		SWAP32(ses->ses_deskey[1]);
900 		SWAP32(ses->ses_deskey[2]);
901 		SWAP32(ses->ses_deskey[3]);
902 		SWAP32(ses->ses_deskey[4]);
903 		SWAP32(ses->ses_deskey[5]);
904 	}
905 
906 	if (macini) {
907 		for (i = 0; i < macini->cri_klen / 8; i++)
908 			macini->cri_key[i] ^= HMAC_IPAD_VAL;
909 
910 		if (macini->cri_alg == CRYPTO_MD5_HMAC) {
911 			MD5Init(&md5ctx);
912 			MD5Update(&md5ctx, macini->cri_key,
913 			    macini->cri_klen / 8);
914 			MD5Update(&md5ctx, hmac_ipad_buffer,
915 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
916 			bcopy(md5ctx.state, ses->ses_hminner,
917 			    sizeof(md5ctx.state));
918 		} else {
919 			SHA1Init(&sha1ctx);
920 			SHA1Update(&sha1ctx, macini->cri_key,
921 			    macini->cri_klen / 8);
922 			SHA1Update(&sha1ctx, hmac_ipad_buffer,
923 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
924 			bcopy(sha1ctx.h.b32, ses->ses_hminner,
925 			    sizeof(sha1ctx.h.b32));
926 		}
927 
928 		for (i = 0; i < macini->cri_klen / 8; i++)
929 			macini->cri_key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
930 
931 		if (macini->cri_alg == CRYPTO_MD5_HMAC) {
932 			MD5Init(&md5ctx);
933 			MD5Update(&md5ctx, macini->cri_key,
934 			    macini->cri_klen / 8);
935 			MD5Update(&md5ctx, hmac_opad_buffer,
936 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
937 			bcopy(md5ctx.state, ses->ses_hmouter,
938 			    sizeof(md5ctx.state));
939 		} else {
940 			SHA1Init(&sha1ctx);
941 			SHA1Update(&sha1ctx, macini->cri_key,
942 			    macini->cri_klen / 8);
943 			SHA1Update(&sha1ctx, hmac_opad_buffer,
944 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
945 			bcopy(sha1ctx.h.b32, ses->ses_hmouter,
946 			    sizeof(sha1ctx.h.b32));
947 		}
948 
949 		for (i = 0; i < macini->cri_klen / 8; i++)
950 			macini->cri_key[i] ^= HMAC_OPAD_VAL;
951 	}
952 
953 	*sidp = UBSEC_SID(device_get_unit(sc->sc_dev), sesn);
954 	return (0);
955 }
956 
957 /*
958  * Deallocate a session.
959  */
960 static int
961 ubsec_freesession(void *arg, u_int64_t tid)
962 {
963 	struct ubsec_softc *sc = arg;
964 	int session;
965 	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
966 
967 	KASSERT(sc != NULL, ("ubsec_freesession: null softc"));
968 	if (sc == NULL)
969 		return (EINVAL);
970 
971 	session = UBSEC_SESSION(sid);
972 	if (session >= sc->sc_nsessions)
973 		return (EINVAL);
974 
975 	bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session]));
976 	return (0);
977 }
978 
979 static void
980 ubsec_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
981 {
982 	struct ubsec_operand *op = arg;
983 
984 	KASSERT(nsegs <= UBS_MAX_SCATTER,
985 		("Too many DMA segments returned when mapping operand"));
986 #ifdef UBSEC_DEBUG
987 	if (ubsec_debug)
988 		printf("ubsec_op_cb: mapsize %u nsegs %d\n",
989 			(u_int) mapsize, nsegs);
990 #endif
991 	op->mapsize = mapsize;
992 	op->nsegs = nsegs;
993 	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
994 }
995 
996 static int
997 ubsec_process(void *arg, struct cryptop *crp, int hint)
998 {
999 	struct ubsec_q *q = NULL;
1000 	int err = 0, i, j, s, nicealign;
1001 	struct ubsec_softc *sc = arg;
1002 	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
1003 	int encoffset = 0, macoffset = 0, cpskip, cpoffset;
1004 	int sskip, dskip, stheend, dtheend;
1005 	int16_t coffset;
1006 	struct ubsec_session *ses;
1007 	struct ubsec_pktctx ctx;
1008 	struct ubsec_dma *dmap = NULL;
1009 
1010 	if (crp == NULL || crp->crp_callback == NULL || sc == NULL) {
1011 		ubsecstats.hst_invalid++;
1012 		return (EINVAL);
1013 	}
1014 	if (UBSEC_SESSION(crp->crp_sid) >= sc->sc_nsessions) {
1015 		ubsecstats.hst_badsession++;
1016 		return (EINVAL);
1017 	}
1018 
1019 	s = splimp();
1020 
1021 	if (SIMPLEQ_EMPTY(&sc->sc_freequeue)) {
1022 		ubsecstats.hst_queuefull++;
1023 		sc->sc_needwakeup |= CRYPTO_SYMQ;
1024 		splx(s);
1025 		return (ERESTART);
1026 	}
1027 	q = SIMPLEQ_FIRST(&sc->sc_freequeue);
1028 	SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, q, q_next);
1029 	splx(s);
1030 
1031 	dmap = q->q_dma; /* Save dma pointer */
1032 	bzero(q, sizeof(struct ubsec_q));
1033 	bzero(&ctx, sizeof(ctx));
1034 
1035 	q->q_sesn = UBSEC_SESSION(crp->crp_sid);
1036 	q->q_dma = dmap;
1037 	ses = &sc->sc_sessions[q->q_sesn];
1038 
1039 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1040 		q->q_src_m = (struct mbuf *)crp->crp_buf;
1041 		q->q_dst_m = (struct mbuf *)crp->crp_buf;
1042 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1043 		q->q_src_io = (struct uio *)crp->crp_buf;
1044 		q->q_dst_io = (struct uio *)crp->crp_buf;
1045 	} else {
1046 		ubsecstats.hst_badflags++;
1047 		err = EINVAL;
1048 		goto errout;	/* XXX we don't handle contiguous blocks! */
1049 	}
1050 
1051 	bzero(&dmap->d_dma->d_mcr, sizeof(struct ubsec_mcr));
1052 
1053 	dmap->d_dma->d_mcr.mcr_pkts = htole16(1);
1054 	dmap->d_dma->d_mcr.mcr_flags = 0;
1055 	q->q_crp = crp;
1056 
1057 	crd1 = crp->crp_desc;
1058 	if (crd1 == NULL) {
1059 		ubsecstats.hst_nodesc++;
1060 		err = EINVAL;
1061 		goto errout;
1062 	}
1063 	crd2 = crd1->crd_next;
1064 
1065 	if (crd2 == NULL) {
1066 		if (crd1->crd_alg == CRYPTO_MD5_HMAC ||
1067 		    crd1->crd_alg == CRYPTO_SHA1_HMAC) {
1068 			maccrd = crd1;
1069 			enccrd = NULL;
1070 		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
1071 		    crd1->crd_alg == CRYPTO_3DES_CBC) {
1072 			maccrd = NULL;
1073 			enccrd = crd1;
1074 		} else {
1075 			ubsecstats.hst_badalg++;
1076 			err = EINVAL;
1077 			goto errout;
1078 		}
1079 	} else {
1080 		if ((crd1->crd_alg == CRYPTO_MD5_HMAC ||
1081 		    crd1->crd_alg == CRYPTO_SHA1_HMAC) &&
1082 		    (crd2->crd_alg == CRYPTO_DES_CBC ||
1083 			crd2->crd_alg == CRYPTO_3DES_CBC) &&
1084 		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
1085 			maccrd = crd1;
1086 			enccrd = crd2;
1087 		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
1088 		    crd1->crd_alg == CRYPTO_3DES_CBC) &&
1089 		    (crd2->crd_alg == CRYPTO_MD5_HMAC ||
1090 			crd2->crd_alg == CRYPTO_SHA1_HMAC) &&
1091 		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
1092 			enccrd = crd1;
1093 			maccrd = crd2;
1094 		} else {
1095 			/*
1096 			 * We cannot order the ubsec as requested
1097 			 */
1098 			ubsecstats.hst_badalg++;
1099 			err = EINVAL;
1100 			goto errout;
1101 		}
1102 	}
1103 
1104 	if (enccrd) {
1105 		encoffset = enccrd->crd_skip;
1106 		ctx.pc_flags |= htole16(UBS_PKTCTX_ENC_3DES);
1107 
1108 		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
1109 			q->q_flags |= UBSEC_QFLAGS_COPYOUTIV;
1110 
1111 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1112 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
1113 			else {
1114 				ctx.pc_iv[0] = ses->ses_iv[0];
1115 				ctx.pc_iv[1] = ses->ses_iv[1];
1116 			}
1117 
1118 			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
1119 				if (crp->crp_flags & CRYPTO_F_IMBUF)
1120 					m_copyback(q->q_src_m,
1121 					    enccrd->crd_inject,
1122 					    8, (caddr_t)ctx.pc_iv);
1123 				else if (crp->crp_flags & CRYPTO_F_IOV)
1124 					cuio_copyback(q->q_src_io,
1125 					    enccrd->crd_inject,
1126 					    8, (caddr_t)ctx.pc_iv);
1127 			}
1128 		} else {
1129 			ctx.pc_flags |= htole16(UBS_PKTCTX_INBOUND);
1130 
1131 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1132 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
1133 			else if (crp->crp_flags & CRYPTO_F_IMBUF)
1134 				m_copydata(q->q_src_m, enccrd->crd_inject,
1135 				    8, (caddr_t)ctx.pc_iv);
1136 			else if (crp->crp_flags & CRYPTO_F_IOV)
1137 				cuio_copydata(q->q_src_io,
1138 				    enccrd->crd_inject, 8,
1139 				    (caddr_t)ctx.pc_iv);
1140 		}
1141 
1142 		ctx.pc_deskey[0] = ses->ses_deskey[0];
1143 		ctx.pc_deskey[1] = ses->ses_deskey[1];
1144 		ctx.pc_deskey[2] = ses->ses_deskey[2];
1145 		ctx.pc_deskey[3] = ses->ses_deskey[3];
1146 		ctx.pc_deskey[4] = ses->ses_deskey[4];
1147 		ctx.pc_deskey[5] = ses->ses_deskey[5];
1148 		SWAP32(ctx.pc_iv[0]);
1149 		SWAP32(ctx.pc_iv[1]);
1150 	}
1151 
1152 	if (maccrd) {
1153 		macoffset = maccrd->crd_skip;
1154 
1155 		if (maccrd->crd_alg == CRYPTO_MD5_HMAC)
1156 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_MD5);
1157 		else
1158 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_SHA1);
1159 
1160 		for (i = 0; i < 5; i++) {
1161 			ctx.pc_hminner[i] = ses->ses_hminner[i];
1162 			ctx.pc_hmouter[i] = ses->ses_hmouter[i];
1163 
1164 			HTOLE32(ctx.pc_hminner[i]);
1165 			HTOLE32(ctx.pc_hmouter[i]);
1166 		}
1167 	}
1168 
1169 	if (enccrd && maccrd) {
1170 		/*
1171 		 * ubsec cannot handle packets where the end of encryption
1172 		 * and authentication are not the same, or where the
1173 		 * encrypted part begins before the authenticated part.
1174 		 */
1175 		if ((encoffset + enccrd->crd_len) !=
1176 		    (macoffset + maccrd->crd_len)) {
1177 			ubsecstats.hst_lenmismatch++;
1178 			err = EINVAL;
1179 			goto errout;
1180 		}
1181 		if (enccrd->crd_skip < maccrd->crd_skip) {
1182 			ubsecstats.hst_skipmismatch++;
1183 			err = EINVAL;
1184 			goto errout;
1185 		}
1186 		sskip = maccrd->crd_skip;
1187 		cpskip = dskip = enccrd->crd_skip;
1188 		stheend = maccrd->crd_len;
1189 		dtheend = enccrd->crd_len;
1190 		coffset = enccrd->crd_skip - maccrd->crd_skip;
1191 		cpoffset = cpskip + dtheend;
1192 #ifdef UBSEC_DEBUG
1193 		if (ubsec_debug) {
1194 			printf("mac: skip %d, len %d, inject %d\n",
1195 			    maccrd->crd_skip, maccrd->crd_len, maccrd->crd_inject);
1196 			printf("enc: skip %d, len %d, inject %d\n",
1197 			    enccrd->crd_skip, enccrd->crd_len, enccrd->crd_inject);
1198 			printf("src: skip %d, len %d\n", sskip, stheend);
1199 			printf("dst: skip %d, len %d\n", dskip, dtheend);
1200 			printf("ubs: coffset %d, pktlen %d, cpskip %d, cpoffset %d\n",
1201 			    coffset, stheend, cpskip, cpoffset);
1202 		}
1203 #endif
1204 	} else {
1205 		cpskip = dskip = sskip = macoffset + encoffset;
1206 		dtheend = stheend = (enccrd)?enccrd->crd_len:maccrd->crd_len;
1207 		cpoffset = cpskip + dtheend;
1208 		coffset = 0;
1209 	}
1210 	ctx.pc_offset = htole16(coffset >> 2);
1211 
1212 	if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &q->q_src_map)) {
1213 		ubsecstats.hst_nomap++;
1214 		err = ENOMEM;
1215 		goto errout;
1216 	}
1217 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1218 		if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_src_map,
1219 		    q->q_src_m, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) {
1220 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1221 			q->q_src_map = NULL;
1222 			ubsecstats.hst_noload++;
1223 			err = ENOMEM;
1224 			goto errout;
1225 		}
1226 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1227 		if (bus_dmamap_load_uio(sc->sc_dmat, q->q_src_map,
1228 		    q->q_src_io, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) {
1229 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1230 			q->q_src_map = NULL;
1231 			ubsecstats.hst_noload++;
1232 			err = ENOMEM;
1233 			goto errout;
1234 		}
1235 	}
1236 	nicealign = ubsec_dmamap_aligned(&q->q_src);
1237 
1238 	dmap->d_dma->d_mcr.mcr_pktlen = htole16(stheend);
1239 
1240 #ifdef UBSEC_DEBUG
1241 	if (ubsec_debug)
1242 		printf("src skip: %d nicealign: %u\n", sskip, nicealign);
1243 #endif
1244 	for (i = j = 0; i < q->q_src_nsegs; i++) {
1245 		struct ubsec_pktbuf *pb;
1246 		bus_size_t packl = q->q_src_segs[i].ds_len;
1247 		bus_addr_t packp = q->q_src_segs[i].ds_addr;
1248 
1249 		if (sskip >= packl) {
1250 			sskip -= packl;
1251 			continue;
1252 		}
1253 
1254 		packl -= sskip;
1255 		packp += sskip;
1256 		sskip = 0;
1257 
1258 		if (packl > 0xfffc) {
1259 			err = EIO;
1260 			goto errout;
1261 		}
1262 
1263 		if (j == 0)
1264 			pb = &dmap->d_dma->d_mcr.mcr_ipktbuf;
1265 		else
1266 			pb = &dmap->d_dma->d_sbuf[j - 1];
1267 
1268 		pb->pb_addr = htole32(packp);
1269 
1270 		if (stheend) {
1271 			if (packl > stheend) {
1272 				pb->pb_len = htole32(stheend);
1273 				stheend = 0;
1274 			} else {
1275 				pb->pb_len = htole32(packl);
1276 				stheend -= packl;
1277 			}
1278 		} else
1279 			pb->pb_len = htole32(packl);
1280 
1281 		if ((i + 1) == q->q_src_nsegs)
1282 			pb->pb_next = 0;
1283 		else
1284 			pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1285 			    offsetof(struct ubsec_dmachunk, d_sbuf[j]));
1286 		j++;
1287 	}
1288 
1289 	if (enccrd == NULL && maccrd != NULL) {
1290 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr = 0;
1291 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_len = 0;
1292 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_next = htole32(dmap->d_alloc.dma_paddr +
1293 		    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1294 #ifdef UBSEC_DEBUG
1295 		if (ubsec_debug)
1296 			printf("opkt: %x %x %x\n",
1297 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr,
1298 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_len,
1299 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_next);
1300 #endif
1301 	} else {
1302 		if (crp->crp_flags & CRYPTO_F_IOV) {
1303 			if (!nicealign) {
1304 				ubsecstats.hst_iovmisaligned++;
1305 				err = EINVAL;
1306 				goto errout;
1307 			}
1308 			if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT,
1309 			     &q->q_dst_map)) {
1310 				ubsecstats.hst_nomap++;
1311 				err = ENOMEM;
1312 				goto errout;
1313 			}
1314 			if (bus_dmamap_load_uio(sc->sc_dmat, q->q_dst_map,
1315 			    q->q_dst_io, ubsec_op_cb, &q->q_dst, BUS_DMA_NOWAIT) != 0) {
1316 				bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1317 				q->q_dst_map = NULL;
1318 				ubsecstats.hst_noload++;
1319 				err = ENOMEM;
1320 				goto errout;
1321 			}
1322 		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
1323 			if (nicealign) {
1324 				q->q_dst = q->q_src;
1325 			} else {
1326 				int totlen, len;
1327 				struct mbuf *m, *top, **mp;
1328 
1329 				ubsecstats.hst_unaligned++;
1330 				totlen = q->q_src_mapsize;
1331 				if (q->q_src_m->m_flags & M_PKTHDR) {
1332 					len = MHLEN;
1333 					MGETHDR(m, MB_DONTWAIT, MT_DATA);
1334 					if (m && !m_dup_pkthdr(m, q->q_src_m, MB_DONTWAIT)) {
1335 						m_free(m);
1336 						m = NULL;
1337 					}
1338 				} else {
1339 					len = MLEN;
1340 					MGET(m, MB_DONTWAIT, MT_DATA);
1341 				}
1342 				if (m == NULL) {
1343 					ubsecstats.hst_nombuf++;
1344 					err = sc->sc_nqueue ? ERESTART : ENOMEM;
1345 					goto errout;
1346 				}
1347 				if (totlen >= MINCLSIZE) {
1348 					MCLGET(m, MB_DONTWAIT);
1349 					if ((m->m_flags & M_EXT) == 0) {
1350 						m_free(m);
1351 						ubsecstats.hst_nomcl++;
1352 						err = sc->sc_nqueue ? ERESTART : ENOMEM;
1353 						goto errout;
1354 					}
1355 					len = MCLBYTES;
1356 				}
1357 				m->m_len = len;
1358 				top = NULL;
1359 				mp = &top;
1360 
1361 				while (totlen > 0) {
1362 					if (top) {
1363 						MGET(m, MB_DONTWAIT, MT_DATA);
1364 						if (m == NULL) {
1365 							m_freem(top);
1366 							ubsecstats.hst_nombuf++;
1367 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1368 							goto errout;
1369 						}
1370 						len = MLEN;
1371 					}
1372 					if (top && totlen >= MINCLSIZE) {
1373 						MCLGET(m, MB_DONTWAIT);
1374 						if ((m->m_flags & M_EXT) == 0) {
1375 							*mp = m;
1376 							m_freem(top);
1377 							ubsecstats.hst_nomcl++;
1378 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1379 							goto errout;
1380 						}
1381 						len = MCLBYTES;
1382 					}
1383 					m->m_len = len = min(totlen, len);
1384 					totlen -= len;
1385 					*mp = m;
1386 					mp = &m->m_next;
1387 				}
1388 				q->q_dst_m = top;
1389 				ubsec_mcopy(q->q_src_m, q->q_dst_m,
1390 				    cpskip, cpoffset);
1391 				if (bus_dmamap_create(sc->sc_dmat,
1392 				    BUS_DMA_NOWAIT, &q->q_dst_map) != 0) {
1393 					ubsecstats.hst_nomap++;
1394 					err = ENOMEM;
1395 					goto errout;
1396 				}
1397 				if (bus_dmamap_load_mbuf(sc->sc_dmat,
1398 				    q->q_dst_map, q->q_dst_m,
1399 				    ubsec_op_cb, &q->q_dst,
1400 				    BUS_DMA_NOWAIT) != 0) {
1401 					bus_dmamap_destroy(sc->sc_dmat,
1402 					q->q_dst_map);
1403 					q->q_dst_map = NULL;
1404 					ubsecstats.hst_noload++;
1405 					err = ENOMEM;
1406 					goto errout;
1407 				}
1408 			}
1409 		} else {
1410 			ubsecstats.hst_badflags++;
1411 			err = EINVAL;
1412 			goto errout;
1413 		}
1414 
1415 #ifdef UBSEC_DEBUG
1416 		if (ubsec_debug)
1417 			printf("dst skip: %d\n", dskip);
1418 #endif
1419 		for (i = j = 0; i < q->q_dst_nsegs; i++) {
1420 			struct ubsec_pktbuf *pb;
1421 			bus_size_t packl = q->q_dst_segs[i].ds_len;
1422 			bus_addr_t packp = q->q_dst_segs[i].ds_addr;
1423 
1424 			if (dskip >= packl) {
1425 				dskip -= packl;
1426 				continue;
1427 			}
1428 
1429 			packl -= dskip;
1430 			packp += dskip;
1431 			dskip = 0;
1432 
1433 			if (packl > 0xfffc) {
1434 				err = EIO;
1435 				goto errout;
1436 			}
1437 
1438 			if (j == 0)
1439 				pb = &dmap->d_dma->d_mcr.mcr_opktbuf;
1440 			else
1441 				pb = &dmap->d_dma->d_dbuf[j - 1];
1442 
1443 			pb->pb_addr = htole32(packp);
1444 
1445 			if (dtheend) {
1446 				if (packl > dtheend) {
1447 					pb->pb_len = htole32(dtheend);
1448 					dtheend = 0;
1449 				} else {
1450 					pb->pb_len = htole32(packl);
1451 					dtheend -= packl;
1452 				}
1453 			} else
1454 				pb->pb_len = htole32(packl);
1455 
1456 			if ((i + 1) == q->q_dst_nsegs) {
1457 				if (maccrd)
1458 					pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1459 					    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1460 				else
1461 					pb->pb_next = 0;
1462 			} else
1463 				pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1464 				    offsetof(struct ubsec_dmachunk, d_dbuf[j]));
1465 			j++;
1466 		}
1467 	}
1468 
1469 	dmap->d_dma->d_mcr.mcr_cmdctxp = htole32(dmap->d_alloc.dma_paddr +
1470 	    offsetof(struct ubsec_dmachunk, d_ctx));
1471 
1472 	if (sc->sc_flags & UBS_FLAGS_LONGCTX) {
1473 		struct ubsec_pktctx_long *ctxl;
1474 
1475 		ctxl = (struct ubsec_pktctx_long *)(dmap->d_alloc.dma_vaddr +
1476 		    offsetof(struct ubsec_dmachunk, d_ctx));
1477 
1478 		/* transform small context into long context */
1479 		ctxl->pc_len = htole16(sizeof(struct ubsec_pktctx_long));
1480 		ctxl->pc_type = htole16(UBS_PKTCTX_TYPE_IPSEC);
1481 		ctxl->pc_flags = ctx.pc_flags;
1482 		ctxl->pc_offset = ctx.pc_offset;
1483 		for (i = 0; i < 6; i++)
1484 			ctxl->pc_deskey[i] = ctx.pc_deskey[i];
1485 		for (i = 0; i < 5; i++)
1486 			ctxl->pc_hminner[i] = ctx.pc_hminner[i];
1487 		for (i = 0; i < 5; i++)
1488 			ctxl->pc_hmouter[i] = ctx.pc_hmouter[i];
1489 		ctxl->pc_iv[0] = ctx.pc_iv[0];
1490 		ctxl->pc_iv[1] = ctx.pc_iv[1];
1491 	} else
1492 		bcopy(&ctx, dmap->d_alloc.dma_vaddr +
1493 		    offsetof(struct ubsec_dmachunk, d_ctx),
1494 		    sizeof(struct ubsec_pktctx));
1495 
1496 	s = splimp();
1497 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue, q, q_next);
1498 	sc->sc_nqueue++;
1499 	ubsecstats.hst_ipackets++;
1500 	ubsecstats.hst_ibytes += dmap->d_alloc.dma_size;
1501 	if ((hint & CRYPTO_HINT_MORE) == 0 || sc->sc_nqueue >= UBS_MAX_AGGR)
1502 		ubsec_feed(sc);
1503 	splx(s);
1504 	return (0);
1505 
1506 errout:
1507 	if (q != NULL) {
1508 		if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
1509 			m_freem(q->q_dst_m);
1510 
1511 		if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1512 			bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1513 			bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1514 		}
1515 		if (q->q_src_map != NULL) {
1516 			bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1517 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1518 		}
1519 
1520 		s = splimp();
1521 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1522 		splx(s);
1523 	}
1524 	if (err != ERESTART) {
1525 		crp->crp_etype = err;
1526 		crypto_done(crp);
1527 	} else {
1528 		sc->sc_needwakeup |= CRYPTO_SYMQ;
1529 	}
1530 	return (err);
1531 }
1532 
1533 static void
1534 ubsec_callback(struct ubsec_softc *sc, struct ubsec_q *q)
1535 {
1536 	struct cryptop *crp = (struct cryptop *)q->q_crp;
1537 	struct cryptodesc *crd;
1538 	struct ubsec_dma *dmap = q->q_dma;
1539 
1540 	ubsecstats.hst_opackets++;
1541 	ubsecstats.hst_obytes += dmap->d_alloc.dma_size;
1542 
1543 	ubsec_dma_sync(&dmap->d_alloc,
1544 	    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1545 	if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1546 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
1547 		    BUS_DMASYNC_POSTREAD);
1548 		bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1549 		bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1550 	}
1551 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_POSTWRITE);
1552 	bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1553 	bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1554 
1555 	if ((crp->crp_flags & CRYPTO_F_IMBUF) && (q->q_src_m != q->q_dst_m)) {
1556 		m_freem(q->q_src_m);
1557 		crp->crp_buf = (caddr_t)q->q_dst_m;
1558 	}
1559 	ubsecstats.hst_obytes += ((struct mbuf *)crp->crp_buf)->m_len;
1560 
1561 	/* copy out IV for future use */
1562 	if (q->q_flags & UBSEC_QFLAGS_COPYOUTIV) {
1563 		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1564 			if (crd->crd_alg != CRYPTO_DES_CBC &&
1565 			    crd->crd_alg != CRYPTO_3DES_CBC)
1566 				continue;
1567 			if (crp->crp_flags & CRYPTO_F_IMBUF)
1568 				m_copydata((struct mbuf *)crp->crp_buf,
1569 				    crd->crd_skip + crd->crd_len - 8, 8,
1570 				    (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv);
1571 			else if (crp->crp_flags & CRYPTO_F_IOV) {
1572 				cuio_copydata((struct uio *)crp->crp_buf,
1573 				    crd->crd_skip + crd->crd_len - 8, 8,
1574 				    (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv);
1575 			}
1576 			break;
1577 		}
1578 	}
1579 
1580 	for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1581 		if (crd->crd_alg != CRYPTO_MD5_HMAC &&
1582 		    crd->crd_alg != CRYPTO_SHA1_HMAC)
1583 			continue;
1584 		if (crp->crp_flags & CRYPTO_F_IMBUF)
1585 			m_copyback((struct mbuf *)crp->crp_buf,
1586 			    crd->crd_inject, 12,
1587 			    (caddr_t)dmap->d_dma->d_macbuf);
1588 		else if (crp->crp_flags & CRYPTO_F_IOV && crp->crp_mac)
1589 			bcopy((caddr_t)dmap->d_dma->d_macbuf,
1590 			    crp->crp_mac, 12);
1591 		break;
1592 	}
1593 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1594 	crypto_done(crp);
1595 }
1596 
1597 static void
1598 ubsec_mcopy(struct mbuf *srcm, struct mbuf *dstm, int hoffset, int toffset)
1599 {
1600 	int i, j, dlen, slen;
1601 	caddr_t dptr, sptr;
1602 
1603 	j = 0;
1604 	sptr = srcm->m_data;
1605 	slen = srcm->m_len;
1606 	dptr = dstm->m_data;
1607 	dlen = dstm->m_len;
1608 
1609 	while (1) {
1610 		for (i = 0; i < min(slen, dlen); i++) {
1611 			if (j < hoffset || j >= toffset)
1612 				*dptr++ = *sptr++;
1613 			slen--;
1614 			dlen--;
1615 			j++;
1616 		}
1617 		if (slen == 0) {
1618 			srcm = srcm->m_next;
1619 			if (srcm == NULL)
1620 				return;
1621 			sptr = srcm->m_data;
1622 			slen = srcm->m_len;
1623 		}
1624 		if (dlen == 0) {
1625 			dstm = dstm->m_next;
1626 			if (dstm == NULL)
1627 				return;
1628 			dptr = dstm->m_data;
1629 			dlen = dstm->m_len;
1630 		}
1631 	}
1632 }
1633 
1634 /*
1635  * feed the key generator, must be called at splimp() or higher.
1636  */
1637 static int
1638 ubsec_feed2(struct ubsec_softc *sc)
1639 {
1640 	struct ubsec_q2 *q;
1641 
1642 	while (!SIMPLEQ_EMPTY(&sc->sc_queue2)) {
1643 		if (READ_REG(sc, BS_STAT) & BS_STAT_MCR2_FULL)
1644 			break;
1645 		q = SIMPLEQ_FIRST(&sc->sc_queue2);
1646 
1647 		ubsec_dma_sync(&q->q_mcr,
1648 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1649 		ubsec_dma_sync(&q->q_ctx, BUS_DMASYNC_PREWRITE);
1650 
1651 		WRITE_REG(sc, BS_MCR2, q->q_mcr.dma_paddr);
1652 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue2, q, q_next);
1653 		--sc->sc_nqueue2;
1654 		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip2, q, q_next);
1655 	}
1656 	return (0);
1657 }
1658 
1659 /*
1660  * Callback for handling random numbers
1661  */
1662 static void
1663 ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q)
1664 {
1665 	struct cryptkop *krp;
1666 	struct ubsec_ctx_keyop *ctx;
1667 
1668 	ctx = (struct ubsec_ctx_keyop *)q->q_ctx.dma_vaddr;
1669 	ubsec_dma_sync(&q->q_ctx, BUS_DMASYNC_POSTWRITE);
1670 
1671 	switch (q->q_type) {
1672 #ifndef UBSEC_NO_RNG
1673 	case UBS_CTXOP_RNGBYPASS: {
1674 		struct ubsec_q2_rng *rng = (struct ubsec_q2_rng *)q;
1675 
1676 		ubsec_dma_sync(&rng->rng_buf, BUS_DMASYNC_POSTREAD);
1677 		(*sc->sc_harvest)(sc->sc_rndtest,
1678 			rng->rng_buf.dma_vaddr,
1679 			UBSEC_RNG_BUFSIZ*sizeof (u_int32_t));
1680 		rng->rng_used = 0;
1681 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
1682 		break;
1683 	}
1684 #endif
1685 	case UBS_CTXOP_MODEXP: {
1686 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
1687 		u_int rlen, clen;
1688 
1689 		krp = me->me_krp;
1690 		rlen = (me->me_modbits + 7) / 8;
1691 		clen = (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8;
1692 
1693 		ubsec_dma_sync(&me->me_M, BUS_DMASYNC_POSTWRITE);
1694 		ubsec_dma_sync(&me->me_E, BUS_DMASYNC_POSTWRITE);
1695 		ubsec_dma_sync(&me->me_C, BUS_DMASYNC_POSTREAD);
1696 		ubsec_dma_sync(&me->me_epb, BUS_DMASYNC_POSTWRITE);
1697 
1698 		if (clen < rlen)
1699 			krp->krp_status = E2BIG;
1700 		else {
1701 			if (sc->sc_flags & UBS_FLAGS_HWNORM) {
1702 				bzero(krp->krp_param[krp->krp_iparams].crp_p,
1703 				    (krp->krp_param[krp->krp_iparams].crp_nbits
1704 					+ 7) / 8);
1705 				bcopy(me->me_C.dma_vaddr,
1706 				    krp->krp_param[krp->krp_iparams].crp_p,
1707 				    (me->me_modbits + 7) / 8);
1708 			} else
1709 				ubsec_kshift_l(me->me_shiftbits,
1710 				    me->me_C.dma_vaddr, me->me_normbits,
1711 				    krp->krp_param[krp->krp_iparams].crp_p,
1712 				    krp->krp_param[krp->krp_iparams].crp_nbits);
1713 		}
1714 
1715 		crypto_kdone(krp);
1716 
1717 		/* bzero all potentially sensitive data */
1718 		bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
1719 		bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
1720 		bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
1721 		bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
1722 
1723 		/* Can't free here, so put us on the free list. */
1724 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &me->me_q, q_next);
1725 		break;
1726 	}
1727 	case UBS_CTXOP_RSAPRIV: {
1728 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
1729 		u_int len;
1730 
1731 		krp = rp->rpr_krp;
1732 		ubsec_dma_sync(&rp->rpr_msgin, BUS_DMASYNC_POSTWRITE);
1733 		ubsec_dma_sync(&rp->rpr_msgout, BUS_DMASYNC_POSTREAD);
1734 
1735 		len = (krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_nbits + 7) / 8;
1736 		bcopy(rp->rpr_msgout.dma_vaddr,
1737 		    krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_p, len);
1738 
1739 		crypto_kdone(krp);
1740 
1741 		bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
1742 		bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
1743 		bzero(rp->rpr_q.q_ctx.dma_vaddr, rp->rpr_q.q_ctx.dma_size);
1744 
1745 		/* Can't free here, so put us on the free list. */
1746 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &rp->rpr_q, q_next);
1747 		break;
1748 	}
1749 	default:
1750 		device_printf(sc->sc_dev, "unknown ctx op: %x\n",
1751 		    letoh16(ctx->ctx_op));
1752 		break;
1753 	}
1754 }
1755 
1756 #ifndef UBSEC_NO_RNG
1757 static void
1758 ubsec_rng(void *vsc)
1759 {
1760 	struct ubsec_softc *sc = vsc;
1761 	struct ubsec_q2_rng *rng = &sc->sc_rng;
1762 	struct ubsec_mcr *mcr;
1763 	struct ubsec_ctx_rngbypass *ctx;
1764 	int s;
1765 
1766 	s = splimp();
1767 	if (rng->rng_used) {
1768 		splx(s);
1769 		return;
1770 	}
1771 	sc->sc_nqueue2++;
1772 	if (sc->sc_nqueue2 >= UBS_MAX_NQUEUE)
1773 		goto out;
1774 
1775 	mcr = (struct ubsec_mcr *)rng->rng_q.q_mcr.dma_vaddr;
1776 	ctx = (struct ubsec_ctx_rngbypass *)rng->rng_q.q_ctx.dma_vaddr;
1777 
1778 	mcr->mcr_pkts = htole16(1);
1779 	mcr->mcr_flags = 0;
1780 	mcr->mcr_cmdctxp = htole32(rng->rng_q.q_ctx.dma_paddr);
1781 	mcr->mcr_ipktbuf.pb_addr = mcr->mcr_ipktbuf.pb_next = 0;
1782 	mcr->mcr_ipktbuf.pb_len = 0;
1783 	mcr->mcr_reserved = mcr->mcr_pktlen = 0;
1784 	mcr->mcr_opktbuf.pb_addr = htole32(rng->rng_buf.dma_paddr);
1785 	mcr->mcr_opktbuf.pb_len = htole32(((sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ)) &
1786 	    UBS_PKTBUF_LEN);
1787 	mcr->mcr_opktbuf.pb_next = 0;
1788 
1789 	ctx->rbp_len = htole16(sizeof(struct ubsec_ctx_rngbypass));
1790 	ctx->rbp_op = htole16(UBS_CTXOP_RNGBYPASS);
1791 	rng->rng_q.q_type = UBS_CTXOP_RNGBYPASS;
1792 
1793 	ubsec_dma_sync(&rng->rng_buf, BUS_DMASYNC_PREREAD);
1794 
1795 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rng->rng_q, q_next);
1796 	rng->rng_used = 1;
1797 	ubsec_feed2(sc);
1798 	ubsecstats.hst_rng++;
1799 	splx(s);
1800 
1801 	return;
1802 
1803 out:
1804 	/*
1805 	 * Something weird happened, generate our own call back.
1806 	 */
1807 	sc->sc_nqueue2--;
1808 	splx(s);
1809 	callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
1810 }
1811 #endif /* UBSEC_NO_RNG */
1812 
1813 static void
1814 ubsec_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1815 {
1816 	bus_addr_t *paddr = (bus_addr_t*) arg;
1817 	*paddr = segs->ds_addr;
1818 }
1819 
1820 static int
1821 ubsec_dma_malloc(
1822 	struct ubsec_softc *sc,
1823 	bus_size_t size,
1824 	struct ubsec_dma_alloc *dma,
1825 	int mapflags
1826 )
1827 {
1828 	int r;
1829 
1830 	/* XXX could specify sc_dmat as parent but that just adds overhead */
1831 	r = bus_dma_tag_create(NULL,			/* parent */
1832 			       1, 0,			/* alignment, bounds */
1833 			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
1834 			       BUS_SPACE_MAXADDR,	/* highaddr */
1835 			       NULL, NULL,		/* filter, filterarg */
1836 			       size,			/* maxsize */
1837 			       1,			/* nsegments */
1838 			       size,			/* maxsegsize */
1839 			       BUS_DMA_ALLOCNOW,	/* flags */
1840 			       &dma->dma_tag);
1841 	if (r != 0) {
1842 		device_printf(sc->sc_dev, "ubsec_dma_malloc: "
1843 			"bus_dma_tag_create failed; error %u\n", r);
1844 		goto fail_0;
1845 	}
1846 
1847 	r = bus_dmamap_create(dma->dma_tag, BUS_DMA_NOWAIT, &dma->dma_map);
1848 	if (r != 0) {
1849 		device_printf(sc->sc_dev, "ubsec_dma_malloc: "
1850 			"bus_dmamap_create failed; error %u\n", r);
1851 		goto fail_1;
1852 	}
1853 
1854 	r = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
1855 			     BUS_DMA_NOWAIT, &dma->dma_map);
1856 	if (r != 0) {
1857 		device_printf(sc->sc_dev, "ubsec_dma_malloc: "
1858 			"bus_dmammem_alloc failed; size %u, error %u\n",
1859 			size, r);
1860 		goto fail_2;
1861 	}
1862 
1863 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
1864 		            size,
1865 			    ubsec_dmamap_cb,
1866 			    &dma->dma_paddr,
1867 			    mapflags | BUS_DMA_NOWAIT);
1868 	if (r != 0) {
1869 		device_printf(sc->sc_dev, "ubsec_dma_malloc: "
1870 			"bus_dmamap_load failed; error %u\n", r);
1871 		goto fail_3;
1872 	}
1873 
1874 	dma->dma_size = size;
1875 	return (0);
1876 
1877 fail_3:
1878 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1879 fail_2:
1880 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1881 fail_1:
1882 	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1883 	bus_dma_tag_destroy(dma->dma_tag);
1884 fail_0:
1885 	dma->dma_map = NULL;
1886 	dma->dma_tag = NULL;
1887 	return (r);
1888 }
1889 
1890 static void
1891 ubsec_dma_free(struct ubsec_softc *sc, struct ubsec_dma_alloc *dma)
1892 {
1893 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1894 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
1895 	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1896 	bus_dma_tag_destroy(dma->dma_tag);
1897 }
1898 
1899 /*
1900  * Resets the board.  Values in the regesters are left as is
1901  * from the reset (i.e. initial values are assigned elsewhere).
1902  */
1903 static void
1904 ubsec_reset_board(struct ubsec_softc *sc)
1905 {
1906     volatile u_int32_t ctrl;
1907 
1908     ctrl = READ_REG(sc, BS_CTRL);
1909     ctrl |= BS_CTRL_RESET;
1910     WRITE_REG(sc, BS_CTRL, ctrl);
1911 
1912     /*
1913      * Wait aprox. 30 PCI clocks = 900 ns = 0.9 us
1914      */
1915     DELAY(10);
1916 }
1917 
1918 /*
1919  * Init Broadcom registers
1920  */
1921 static void
1922 ubsec_init_board(struct ubsec_softc *sc)
1923 {
1924 	u_int32_t ctrl;
1925 
1926 	ctrl = READ_REG(sc, BS_CTRL);
1927 	ctrl &= ~(BS_CTRL_BE32 | BS_CTRL_BE64);
1928 	ctrl |= BS_CTRL_LITTLE_ENDIAN | BS_CTRL_MCR1INT;
1929 
1930 	if (sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG))
1931 		ctrl |= BS_CTRL_MCR2INT;
1932 	else
1933 		ctrl &= ~BS_CTRL_MCR2INT;
1934 
1935 	if (sc->sc_flags & UBS_FLAGS_HWNORM)
1936 		ctrl &= ~BS_CTRL_SWNORM;
1937 
1938 	WRITE_REG(sc, BS_CTRL, ctrl);
1939 }
1940 
1941 /*
1942  * Init Broadcom PCI registers
1943  */
1944 static void
1945 ubsec_init_pciregs(device_t dev)
1946 {
1947 #if 0
1948 	u_int32_t misc;
1949 
1950 	misc = pci_conf_read(pc, pa->pa_tag, BS_RTY_TOUT);
1951 	misc = (misc & ~(UBS_PCI_RTY_MASK << UBS_PCI_RTY_SHIFT))
1952 	    | ((UBS_DEF_RTY & 0xff) << UBS_PCI_RTY_SHIFT);
1953 	misc = (misc & ~(UBS_PCI_TOUT_MASK << UBS_PCI_TOUT_SHIFT))
1954 	    | ((UBS_DEF_TOUT & 0xff) << UBS_PCI_TOUT_SHIFT);
1955 	pci_conf_write(pc, pa->pa_tag, BS_RTY_TOUT, misc);
1956 #endif
1957 
1958 	/*
1959 	 * This will set the cache line size to 1, this will
1960 	 * force the BCM58xx chip just to do burst read/writes.
1961 	 * Cache line read/writes are to slow
1962 	 */
1963 	pci_write_config(dev, PCIR_CACHELNSZ, UBS_DEF_CACHELINE, 1);
1964 }
1965 
1966 /*
1967  * Clean up after a chip crash.
1968  * It is assumed that the caller in splimp()
1969  */
1970 static void
1971 ubsec_cleanchip(struct ubsec_softc *sc)
1972 {
1973 	struct ubsec_q *q;
1974 
1975 	while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
1976 		q = SIMPLEQ_FIRST(&sc->sc_qchip);
1977 		SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next);
1978 		ubsec_free_q(sc, q);
1979 	}
1980 	sc->sc_nqchip = 0;
1981 }
1982 
1983 /*
1984  * free a ubsec_q
1985  * It is assumed that the caller is within spimp()
1986  */
1987 static int
1988 ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q)
1989 {
1990 	struct ubsec_q *q2;
1991 	struct cryptop *crp;
1992 	int npkts;
1993 	int i;
1994 
1995 	npkts = q->q_nstacked_mcrs;
1996 
1997 	for (i = 0; i < npkts; i++) {
1998 		if(q->q_stacked_mcr[i]) {
1999 			q2 = q->q_stacked_mcr[i];
2000 
2001 			if ((q2->q_dst_m != NULL) && (q2->q_src_m != q2->q_dst_m))
2002 				m_freem(q2->q_dst_m);
2003 
2004 			crp = (struct cryptop *)q2->q_crp;
2005 
2006 			SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q2, q_next);
2007 
2008 			crp->crp_etype = EFAULT;
2009 			crypto_done(crp);
2010 		} else {
2011 			break;
2012 		}
2013 	}
2014 
2015 	/*
2016 	 * Free header MCR
2017 	 */
2018 	if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
2019 		m_freem(q->q_dst_m);
2020 
2021 	crp = (struct cryptop *)q->q_crp;
2022 
2023 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
2024 
2025 	crp->crp_etype = EFAULT;
2026 	crypto_done(crp);
2027 	return(0);
2028 }
2029 
2030 /*
2031  * Routine to reset the chip and clean up.
2032  * It is assumed that the caller is in splimp()
2033  */
2034 static void
2035 ubsec_totalreset(struct ubsec_softc *sc)
2036 {
2037 	ubsec_reset_board(sc);
2038 	ubsec_init_board(sc);
2039 	ubsec_cleanchip(sc);
2040 }
2041 
2042 static int
2043 ubsec_dmamap_aligned(struct ubsec_operand *op)
2044 {
2045 	int i;
2046 
2047 	for (i = 0; i < op->nsegs; i++) {
2048 		if (op->segs[i].ds_addr & 3)
2049 			return (0);
2050 		if ((i != (op->nsegs - 1)) &&
2051 		    (op->segs[i].ds_len & 3))
2052 			return (0);
2053 	}
2054 	return (1);
2055 }
2056 
2057 static void
2058 ubsec_kfree(struct ubsec_softc *sc, struct ubsec_q2 *q)
2059 {
2060 	switch (q->q_type) {
2061 	case UBS_CTXOP_MODEXP: {
2062 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
2063 
2064 		ubsec_dma_free(sc, &me->me_q.q_mcr);
2065 		ubsec_dma_free(sc, &me->me_q.q_ctx);
2066 		ubsec_dma_free(sc, &me->me_M);
2067 		ubsec_dma_free(sc, &me->me_E);
2068 		ubsec_dma_free(sc, &me->me_C);
2069 		ubsec_dma_free(sc, &me->me_epb);
2070 		free(me, M_DEVBUF);
2071 		break;
2072 	}
2073 	case UBS_CTXOP_RSAPRIV: {
2074 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
2075 
2076 		ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2077 		ubsec_dma_free(sc, &rp->rpr_q.q_ctx);
2078 		ubsec_dma_free(sc, &rp->rpr_msgin);
2079 		ubsec_dma_free(sc, &rp->rpr_msgout);
2080 		free(rp, M_DEVBUF);
2081 		break;
2082 	}
2083 	default:
2084 		device_printf(sc->sc_dev, "invalid kfree 0x%x\n", q->q_type);
2085 		break;
2086 	}
2087 }
2088 
2089 static int
2090 ubsec_kprocess(void *arg, struct cryptkop *krp, int hint)
2091 {
2092 	struct ubsec_softc *sc = arg;
2093 	int r;
2094 
2095 	if (krp == NULL || krp->krp_callback == NULL)
2096 		return (EINVAL);
2097 
2098 	while (!SIMPLEQ_EMPTY(&sc->sc_q2free)) {
2099 		struct ubsec_q2 *q;
2100 
2101 		q = SIMPLEQ_FIRST(&sc->sc_q2free);
2102 		SIMPLEQ_REMOVE_HEAD(&sc->sc_q2free, q, q_next);
2103 		ubsec_kfree(sc, q);
2104 	}
2105 
2106 	switch (krp->krp_op) {
2107 	case CRK_MOD_EXP:
2108 		if (sc->sc_flags & UBS_FLAGS_HWNORM)
2109 			r = ubsec_kprocess_modexp_hw(sc, krp, hint);
2110 		else
2111 			r = ubsec_kprocess_modexp_sw(sc, krp, hint);
2112 		break;
2113 	case CRK_MOD_EXP_CRT:
2114 		return (ubsec_kprocess_rsapriv(sc, krp, hint));
2115 	default:
2116 		device_printf(sc->sc_dev, "kprocess: invalid op 0x%x\n",
2117 		    krp->krp_op);
2118 		krp->krp_status = EOPNOTSUPP;
2119 		crypto_kdone(krp);
2120 		return (0);
2121 	}
2122 	return (0);			/* silence compiler */
2123 }
2124 
2125 /*
2126  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (sw normalization)
2127  */
2128 static int
2129 ubsec_kprocess_modexp_sw(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
2130 {
2131 	struct ubsec_q2_modexp *me;
2132 	struct ubsec_mcr *mcr;
2133 	struct ubsec_ctx_modexp *ctx;
2134 	struct ubsec_pktbuf *epb;
2135 	int s, err = 0;
2136 	u_int nbits, normbits, mbits, shiftbits, ebits;
2137 
2138 	me = malloc(sizeof *me, M_DEVBUF, M_INTWAIT | M_ZERO);
2139 	me->me_krp = krp;
2140 	me->me_q.q_type = UBS_CTXOP_MODEXP;
2141 
2142 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2143 	if (nbits <= 512)
2144 		normbits = 512;
2145 	else if (nbits <= 768)
2146 		normbits = 768;
2147 	else if (nbits <= 1024)
2148 		normbits = 1024;
2149 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2150 		normbits = 1536;
2151 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2152 		normbits = 2048;
2153 	else {
2154 		err = E2BIG;
2155 		goto errout;
2156 	}
2157 
2158 	shiftbits = normbits - nbits;
2159 
2160 	me->me_modbits = nbits;
2161 	me->me_shiftbits = shiftbits;
2162 	me->me_normbits = normbits;
2163 
2164 	/* Sanity check: result bits must be >= true modulus bits. */
2165 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2166 		err = ERANGE;
2167 		goto errout;
2168 	}
2169 
2170 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2171 	    &me->me_q.q_mcr, 0)) {
2172 		err = ENOMEM;
2173 		goto errout;
2174 	}
2175 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2176 
2177 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2178 	    &me->me_q.q_ctx, 0)) {
2179 		err = ENOMEM;
2180 		goto errout;
2181 	}
2182 
2183 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2184 	if (mbits > nbits) {
2185 		err = E2BIG;
2186 		goto errout;
2187 	}
2188 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2189 		err = ENOMEM;
2190 		goto errout;
2191 	}
2192 	ubsec_kshift_r(shiftbits,
2193 	    krp->krp_param[UBS_MODEXP_PAR_M].crp_p, mbits,
2194 	    me->me_M.dma_vaddr, normbits);
2195 
2196 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2197 		err = ENOMEM;
2198 		goto errout;
2199 	}
2200 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2201 
2202 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2203 	if (ebits > nbits) {
2204 		err = E2BIG;
2205 		goto errout;
2206 	}
2207 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2208 		err = ENOMEM;
2209 		goto errout;
2210 	}
2211 	ubsec_kshift_r(shiftbits,
2212 	    krp->krp_param[UBS_MODEXP_PAR_E].crp_p, ebits,
2213 	    me->me_E.dma_vaddr, normbits);
2214 
2215 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2216 	    &me->me_epb, 0)) {
2217 		err = ENOMEM;
2218 		goto errout;
2219 	}
2220 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2221 	epb->pb_addr = htole32(me->me_E.dma_paddr);
2222 	epb->pb_next = 0;
2223 	epb->pb_len = htole32(normbits / 8);
2224 
2225 #ifdef UBSEC_DEBUG
2226 	if (ubsec_debug) {
2227 		printf("Epb ");
2228 		ubsec_dump_pb(epb);
2229 	}
2230 #endif
2231 
2232 	mcr->mcr_pkts = htole16(1);
2233 	mcr->mcr_flags = 0;
2234 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2235 	mcr->mcr_reserved = 0;
2236 	mcr->mcr_pktlen = 0;
2237 
2238 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2239 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2240 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2241 
2242 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2243 	mcr->mcr_opktbuf.pb_next = 0;
2244 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2245 
2246 #ifdef DIAGNOSTIC
2247 	/* Misaligned output buffer will hang the chip. */
2248 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2249 		panic("%s: modexp invalid addr 0x%x\n",
2250 		    device_get_nameunit(sc->sc_dev),
2251 		    letoh32(mcr->mcr_opktbuf.pb_addr));
2252 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2253 		panic("%s: modexp invalid len 0x%x\n",
2254 		    device_get_nameunit(sc->sc_dev),
2255 		    letoh32(mcr->mcr_opktbuf.pb_len));
2256 #endif
2257 
2258 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2259 	bzero(ctx, sizeof(*ctx));
2260 	ubsec_kshift_r(shiftbits,
2261 	    krp->krp_param[UBS_MODEXP_PAR_N].crp_p, nbits,
2262 	    ctx->me_N, normbits);
2263 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2264 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2265 	ctx->me_E_len = htole16(nbits);
2266 	ctx->me_N_len = htole16(nbits);
2267 
2268 #ifdef UBSEC_DEBUG
2269 	if (ubsec_debug) {
2270 		ubsec_dump_mcr(mcr);
2271 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2272 	}
2273 #endif
2274 
2275 	/*
2276 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2277 	 * everything else.
2278 	 */
2279 	ubsec_dma_sync(&me->me_M, BUS_DMASYNC_PREWRITE);
2280 	ubsec_dma_sync(&me->me_E, BUS_DMASYNC_PREWRITE);
2281 	ubsec_dma_sync(&me->me_C, BUS_DMASYNC_PREREAD);
2282 	ubsec_dma_sync(&me->me_epb, BUS_DMASYNC_PREWRITE);
2283 
2284 	/* Enqueue and we're done... */
2285 	s = splimp();
2286 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2287 	ubsec_feed2(sc);
2288 	ubsecstats.hst_modexp++;
2289 	splx(s);
2290 
2291 	return (0);
2292 
2293 errout:
2294 	if (me != NULL) {
2295 		if (me->me_q.q_mcr.dma_map != NULL)
2296 			ubsec_dma_free(sc, &me->me_q.q_mcr);
2297 		if (me->me_q.q_ctx.dma_map != NULL) {
2298 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
2299 			ubsec_dma_free(sc, &me->me_q.q_ctx);
2300 		}
2301 		if (me->me_M.dma_map != NULL) {
2302 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
2303 			ubsec_dma_free(sc, &me->me_M);
2304 		}
2305 		if (me->me_E.dma_map != NULL) {
2306 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
2307 			ubsec_dma_free(sc, &me->me_E);
2308 		}
2309 		if (me->me_C.dma_map != NULL) {
2310 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2311 			ubsec_dma_free(sc, &me->me_C);
2312 		}
2313 		if (me->me_epb.dma_map != NULL)
2314 			ubsec_dma_free(sc, &me->me_epb);
2315 		free(me, M_DEVBUF);
2316 	}
2317 	krp->krp_status = err;
2318 	crypto_kdone(krp);
2319 	return (0);
2320 }
2321 
2322 /*
2323  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (hw normalization)
2324  */
2325 static int
2326 ubsec_kprocess_modexp_hw(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
2327 {
2328 	struct ubsec_q2_modexp *me;
2329 	struct ubsec_mcr *mcr;
2330 	struct ubsec_ctx_modexp *ctx;
2331 	struct ubsec_pktbuf *epb;
2332 	int s, err = 0;
2333 	u_int nbits, normbits, mbits, shiftbits, ebits;
2334 
2335 	me = malloc(sizeof *me, M_DEVBUF, M_INTWAIT | M_ZERO);
2336 	me->me_krp = krp;
2337 	me->me_q.q_type = UBS_CTXOP_MODEXP;
2338 
2339 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2340 	if (nbits <= 512)
2341 		normbits = 512;
2342 	else if (nbits <= 768)
2343 		normbits = 768;
2344 	else if (nbits <= 1024)
2345 		normbits = 1024;
2346 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2347 		normbits = 1536;
2348 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2349 		normbits = 2048;
2350 	else {
2351 		err = E2BIG;
2352 		goto errout;
2353 	}
2354 
2355 	shiftbits = normbits - nbits;
2356 
2357 	/* XXX ??? */
2358 	me->me_modbits = nbits;
2359 	me->me_shiftbits = shiftbits;
2360 	me->me_normbits = normbits;
2361 
2362 	/* Sanity check: result bits must be >= true modulus bits. */
2363 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2364 		err = ERANGE;
2365 		goto errout;
2366 	}
2367 
2368 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2369 	    &me->me_q.q_mcr, 0)) {
2370 		err = ENOMEM;
2371 		goto errout;
2372 	}
2373 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2374 
2375 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2376 	    &me->me_q.q_ctx, 0)) {
2377 		err = ENOMEM;
2378 		goto errout;
2379 	}
2380 
2381 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2382 	if (mbits > nbits) {
2383 		err = E2BIG;
2384 		goto errout;
2385 	}
2386 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2387 		err = ENOMEM;
2388 		goto errout;
2389 	}
2390 	bzero(me->me_M.dma_vaddr, normbits / 8);
2391 	bcopy(krp->krp_param[UBS_MODEXP_PAR_M].crp_p,
2392 	    me->me_M.dma_vaddr, (mbits + 7) / 8);
2393 
2394 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2395 		err = ENOMEM;
2396 		goto errout;
2397 	}
2398 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2399 
2400 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2401 	if (ebits > nbits) {
2402 		err = E2BIG;
2403 		goto errout;
2404 	}
2405 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2406 		err = ENOMEM;
2407 		goto errout;
2408 	}
2409 	bzero(me->me_E.dma_vaddr, normbits / 8);
2410 	bcopy(krp->krp_param[UBS_MODEXP_PAR_E].crp_p,
2411 	    me->me_E.dma_vaddr, (ebits + 7) / 8);
2412 
2413 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2414 	    &me->me_epb, 0)) {
2415 		err = ENOMEM;
2416 		goto errout;
2417 	}
2418 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2419 	epb->pb_addr = htole32(me->me_E.dma_paddr);
2420 	epb->pb_next = 0;
2421 	epb->pb_len = htole32((ebits + 7) / 8);
2422 
2423 #ifdef UBSEC_DEBUG
2424 	if (ubsec_debug) {
2425 		printf("Epb ");
2426 		ubsec_dump_pb(epb);
2427 	}
2428 #endif
2429 
2430 	mcr->mcr_pkts = htole16(1);
2431 	mcr->mcr_flags = 0;
2432 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2433 	mcr->mcr_reserved = 0;
2434 	mcr->mcr_pktlen = 0;
2435 
2436 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2437 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2438 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2439 
2440 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2441 	mcr->mcr_opktbuf.pb_next = 0;
2442 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2443 
2444 #ifdef DIAGNOSTIC
2445 	/* Misaligned output buffer will hang the chip. */
2446 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2447 		panic("%s: modexp invalid addr 0x%x\n",
2448 		    device_get_nameunit(sc->sc_dev),
2449 		    letoh32(mcr->mcr_opktbuf.pb_addr));
2450 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2451 		panic("%s: modexp invalid len 0x%x\n",
2452 		    device_get_nameunit(sc->sc_dev),
2453 		    letoh32(mcr->mcr_opktbuf.pb_len));
2454 #endif
2455 
2456 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2457 	bzero(ctx, sizeof(*ctx));
2458 	bcopy(krp->krp_param[UBS_MODEXP_PAR_N].crp_p, ctx->me_N,
2459 	    (nbits + 7) / 8);
2460 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2461 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2462 	ctx->me_E_len = htole16(ebits);
2463 	ctx->me_N_len = htole16(nbits);
2464 
2465 #ifdef UBSEC_DEBUG
2466 	if (ubsec_debug) {
2467 		ubsec_dump_mcr(mcr);
2468 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2469 	}
2470 #endif
2471 
2472 	/*
2473 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2474 	 * everything else.
2475 	 */
2476 	ubsec_dma_sync(&me->me_M, BUS_DMASYNC_PREWRITE);
2477 	ubsec_dma_sync(&me->me_E, BUS_DMASYNC_PREWRITE);
2478 	ubsec_dma_sync(&me->me_C, BUS_DMASYNC_PREREAD);
2479 	ubsec_dma_sync(&me->me_epb, BUS_DMASYNC_PREWRITE);
2480 
2481 	/* Enqueue and we're done... */
2482 	s = splimp();
2483 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2484 	ubsec_feed2(sc);
2485 	splx(s);
2486 
2487 	return (0);
2488 
2489 errout:
2490 	if (me != NULL) {
2491 		if (me->me_q.q_mcr.dma_map != NULL)
2492 			ubsec_dma_free(sc, &me->me_q.q_mcr);
2493 		if (me->me_q.q_ctx.dma_map != NULL) {
2494 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
2495 			ubsec_dma_free(sc, &me->me_q.q_ctx);
2496 		}
2497 		if (me->me_M.dma_map != NULL) {
2498 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
2499 			ubsec_dma_free(sc, &me->me_M);
2500 		}
2501 		if (me->me_E.dma_map != NULL) {
2502 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
2503 			ubsec_dma_free(sc, &me->me_E);
2504 		}
2505 		if (me->me_C.dma_map != NULL) {
2506 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2507 			ubsec_dma_free(sc, &me->me_C);
2508 		}
2509 		if (me->me_epb.dma_map != NULL)
2510 			ubsec_dma_free(sc, &me->me_epb);
2511 		free(me, M_DEVBUF);
2512 	}
2513 	krp->krp_status = err;
2514 	crypto_kdone(krp);
2515 	return (0);
2516 }
2517 
2518 static int
2519 ubsec_kprocess_rsapriv(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
2520 {
2521 	struct ubsec_q2_rsapriv *rp = NULL;
2522 	struct ubsec_mcr *mcr;
2523 	struct ubsec_ctx_rsapriv *ctx;
2524 	int s, err = 0;
2525 	u_int padlen, msglen;
2526 
2527 	msglen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_P]);
2528 	padlen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_Q]);
2529 	if (msglen > padlen)
2530 		padlen = msglen;
2531 
2532 	if (padlen <= 256)
2533 		padlen = 256;
2534 	else if (padlen <= 384)
2535 		padlen = 384;
2536 	else if (padlen <= 512)
2537 		padlen = 512;
2538 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 768)
2539 		padlen = 768;
2540 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 1024)
2541 		padlen = 1024;
2542 	else {
2543 		err = E2BIG;
2544 		goto errout;
2545 	}
2546 
2547 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DP]) > padlen) {
2548 		err = E2BIG;
2549 		goto errout;
2550 	}
2551 
2552 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DQ]) > padlen) {
2553 		err = E2BIG;
2554 		goto errout;
2555 	}
2556 
2557 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_PINV]) > padlen) {
2558 		err = E2BIG;
2559 		goto errout;
2560 	}
2561 
2562 	rp = malloc(sizeof *rp, M_DEVBUF, M_INTWAIT | M_ZERO);
2563 	rp->rpr_krp = krp;
2564 	rp->rpr_q.q_type = UBS_CTXOP_RSAPRIV;
2565 
2566 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2567 	    &rp->rpr_q.q_mcr, 0)) {
2568 		err = ENOMEM;
2569 		goto errout;
2570 	}
2571 	mcr = (struct ubsec_mcr *)rp->rpr_q.q_mcr.dma_vaddr;
2572 
2573 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rsapriv),
2574 	    &rp->rpr_q.q_ctx, 0)) {
2575 		err = ENOMEM;
2576 		goto errout;
2577 	}
2578 	ctx = (struct ubsec_ctx_rsapriv *)rp->rpr_q.q_ctx.dma_vaddr;
2579 	bzero(ctx, sizeof *ctx);
2580 
2581 	/* Copy in p */
2582 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_P].crp_p,
2583 	    &ctx->rpr_buf[0 * (padlen / 8)],
2584 	    (krp->krp_param[UBS_RSAPRIV_PAR_P].crp_nbits + 7) / 8);
2585 
2586 	/* Copy in q */
2587 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_p,
2588 	    &ctx->rpr_buf[1 * (padlen / 8)],
2589 	    (krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_nbits + 7) / 8);
2590 
2591 	/* Copy in dp */
2592 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_p,
2593 	    &ctx->rpr_buf[2 * (padlen / 8)],
2594 	    (krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_nbits + 7) / 8);
2595 
2596 	/* Copy in dq */
2597 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_p,
2598 	    &ctx->rpr_buf[3 * (padlen / 8)],
2599 	    (krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_nbits + 7) / 8);
2600 
2601 	/* Copy in pinv */
2602 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_p,
2603 	    &ctx->rpr_buf[4 * (padlen / 8)],
2604 	    (krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_nbits + 7) / 8);
2605 
2606 	msglen = padlen * 2;
2607 
2608 	/* Copy in input message (aligned buffer/length). */
2609 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGIN]) > msglen) {
2610 		/* Is this likely? */
2611 		err = E2BIG;
2612 		goto errout;
2613 	}
2614 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgin, 0)) {
2615 		err = ENOMEM;
2616 		goto errout;
2617 	}
2618 	bzero(rp->rpr_msgin.dma_vaddr, (msglen + 7) / 8);
2619 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_p,
2620 	    rp->rpr_msgin.dma_vaddr,
2621 	    (krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_nbits + 7) / 8);
2622 
2623 	/* Prepare space for output message (aligned buffer/length). */
2624 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT]) < msglen) {
2625 		/* Is this likely? */
2626 		err = E2BIG;
2627 		goto errout;
2628 	}
2629 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgout, 0)) {
2630 		err = ENOMEM;
2631 		goto errout;
2632 	}
2633 	bzero(rp->rpr_msgout.dma_vaddr, (msglen + 7) / 8);
2634 
2635 	mcr->mcr_pkts = htole16(1);
2636 	mcr->mcr_flags = 0;
2637 	mcr->mcr_cmdctxp = htole32(rp->rpr_q.q_ctx.dma_paddr);
2638 	mcr->mcr_ipktbuf.pb_addr = htole32(rp->rpr_msgin.dma_paddr);
2639 	mcr->mcr_ipktbuf.pb_next = 0;
2640 	mcr->mcr_ipktbuf.pb_len = htole32(rp->rpr_msgin.dma_size);
2641 	mcr->mcr_reserved = 0;
2642 	mcr->mcr_pktlen = htole16(msglen);
2643 	mcr->mcr_opktbuf.pb_addr = htole32(rp->rpr_msgout.dma_paddr);
2644 	mcr->mcr_opktbuf.pb_next = 0;
2645 	mcr->mcr_opktbuf.pb_len = htole32(rp->rpr_msgout.dma_size);
2646 
2647 #ifdef DIAGNOSTIC
2648 	if (rp->rpr_msgin.dma_paddr & 3 || rp->rpr_msgin.dma_size & 3) {
2649 		panic("%s: rsapriv: invalid msgin %x(0x%x)",
2650 		    device_get_nameunit(sc->sc_dev),
2651 		    rp->rpr_msgin.dma_paddr, rp->rpr_msgin.dma_size);
2652 	}
2653 	if (rp->rpr_msgout.dma_paddr & 3 || rp->rpr_msgout.dma_size & 3) {
2654 		panic("%s: rsapriv: invalid msgout %x(0x%x)",
2655 		    device_get_nameunit(sc->sc_dev),
2656 		    rp->rpr_msgout.dma_paddr, rp->rpr_msgout.dma_size);
2657 	}
2658 #endif
2659 
2660 	ctx->rpr_len = (sizeof(u_int16_t) * 4) + (5 * (padlen / 8));
2661 	ctx->rpr_op = htole16(UBS_CTXOP_RSAPRIV);
2662 	ctx->rpr_q_len = htole16(padlen);
2663 	ctx->rpr_p_len = htole16(padlen);
2664 
2665 	/*
2666 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2667 	 * everything else.
2668 	 */
2669 	ubsec_dma_sync(&rp->rpr_msgin, BUS_DMASYNC_PREWRITE);
2670 	ubsec_dma_sync(&rp->rpr_msgout, BUS_DMASYNC_PREREAD);
2671 
2672 	/* Enqueue and we're done... */
2673 	s = splimp();
2674 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rp->rpr_q, q_next);
2675 	ubsec_feed2(sc);
2676 	ubsecstats.hst_modexpcrt++;
2677 	splx(s);
2678 	return (0);
2679 
2680 errout:
2681 	if (rp != NULL) {
2682 		if (rp->rpr_q.q_mcr.dma_map != NULL)
2683 			ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2684 		if (rp->rpr_msgin.dma_map != NULL) {
2685 			bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
2686 			ubsec_dma_free(sc, &rp->rpr_msgin);
2687 		}
2688 		if (rp->rpr_msgout.dma_map != NULL) {
2689 			bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
2690 			ubsec_dma_free(sc, &rp->rpr_msgout);
2691 		}
2692 		free(rp, M_DEVBUF);
2693 	}
2694 	krp->krp_status = err;
2695 	crypto_kdone(krp);
2696 	return (0);
2697 }
2698 
2699 #ifdef UBSEC_DEBUG
2700 static void
2701 ubsec_dump_pb(volatile struct ubsec_pktbuf *pb)
2702 {
2703 	printf("addr 0x%x (0x%x) next 0x%x\n",
2704 	    pb->pb_addr, pb->pb_len, pb->pb_next);
2705 }
2706 
2707 static void
2708 ubsec_dump_ctx2(struct ubsec_ctx_keyop *c)
2709 {
2710 	printf("CTX (0x%x):\n", c->ctx_len);
2711 	switch (letoh16(c->ctx_op)) {
2712 	case UBS_CTXOP_RNGBYPASS:
2713 	case UBS_CTXOP_RNGSHA1:
2714 		break;
2715 	case UBS_CTXOP_MODEXP:
2716 	{
2717 		struct ubsec_ctx_modexp *cx = (void *)c;
2718 		int i, len;
2719 
2720 		printf(" Elen %u, Nlen %u\n",
2721 		    letoh16(cx->me_E_len), letoh16(cx->me_N_len));
2722 		len = (cx->me_N_len + 7)/8;
2723 		for (i = 0; i < len; i++)
2724 			printf("%s%02x", (i == 0) ? " N: " : ":", cx->me_N[i]);
2725 		printf("\n");
2726 		break;
2727 	}
2728 	default:
2729 		printf("unknown context: %x\n", c->ctx_op);
2730 	}
2731 	printf("END CTX\n");
2732 }
2733 
2734 static void
2735 ubsec_dump_mcr(struct ubsec_mcr *mcr)
2736 {
2737 	volatile struct ubsec_mcr_add *ma;
2738 	int i;
2739 
2740 	printf("MCR:\n");
2741 	printf(" pkts: %u, flags 0x%x\n",
2742 	    letoh16(mcr->mcr_pkts), letoh16(mcr->mcr_flags));
2743 	ma = (volatile struct ubsec_mcr_add *)&mcr->mcr_cmdctxp;
2744 	for (i = 0; i < letoh16(mcr->mcr_pkts); i++) {
2745 		printf(" %d: ctx 0x%x len 0x%x rsvd 0x%x\n", i,
2746 		    letoh32(ma->mcr_cmdctxp), letoh16(ma->mcr_pktlen),
2747 		    letoh16(ma->mcr_reserved));
2748 		printf(" %d: ipkt ", i);
2749 		ubsec_dump_pb(&ma->mcr_ipktbuf);
2750 		printf(" %d: opkt ", i);
2751 		ubsec_dump_pb(&ma->mcr_opktbuf);
2752 		ma++;
2753 	}
2754 	printf("END MCR\n");
2755 }
2756 #endif /* UBSEC_DEBUG */
2757 
2758 /*
2759  * Return the number of significant bits of a big number.
2760  */
2761 static int
2762 ubsec_ksigbits(struct crparam *cr)
2763 {
2764 	u_int plen = (cr->crp_nbits + 7) / 8;
2765 	int i, sig = plen * 8;
2766 	u_int8_t c, *p = cr->crp_p;
2767 
2768 	for (i = plen - 1; i >= 0; i--) {
2769 		c = p[i];
2770 		if (c != 0) {
2771 			while ((c & 0x80) == 0) {
2772 				sig--;
2773 				c <<= 1;
2774 			}
2775 			break;
2776 		}
2777 		sig -= 8;
2778 	}
2779 	return (sig);
2780 }
2781 
2782 static void
2783 ubsec_kshift_r(
2784 	u_int shiftbits,
2785 	u_int8_t *src, u_int srcbits,
2786 	u_int8_t *dst, u_int dstbits)
2787 {
2788 	u_int slen, dlen;
2789 	int i, si, di, n;
2790 
2791 	slen = (srcbits + 7) / 8;
2792 	dlen = (dstbits + 7) / 8;
2793 
2794 	for (i = 0; i < slen; i++)
2795 		dst[i] = src[i];
2796 	for (i = 0; i < dlen - slen; i++)
2797 		dst[slen + i] = 0;
2798 
2799 	n = shiftbits / 8;
2800 	if (n != 0) {
2801 		si = dlen - n - 1;
2802 		di = dlen - 1;
2803 		while (si >= 0)
2804 			dst[di--] = dst[si--];
2805 		while (di >= 0)
2806 			dst[di--] = 0;
2807 	}
2808 
2809 	n = shiftbits % 8;
2810 	if (n != 0) {
2811 		for (i = dlen - 1; i > 0; i--)
2812 			dst[i] = (dst[i] << n) |
2813 			    (dst[i - 1] >> (8 - n));
2814 		dst[0] = dst[0] << n;
2815 	}
2816 }
2817 
2818 static void
2819 ubsec_kshift_l(
2820 	u_int shiftbits,
2821 	u_int8_t *src, u_int srcbits,
2822 	u_int8_t *dst, u_int dstbits)
2823 {
2824 	int slen, dlen, i, n;
2825 
2826 	slen = (srcbits + 7) / 8;
2827 	dlen = (dstbits + 7) / 8;
2828 
2829 	n = shiftbits / 8;
2830 	for (i = 0; i < slen; i++)
2831 		dst[i] = src[i + n];
2832 	for (i = 0; i < dlen - slen; i++)
2833 		dst[slen + i] = 0;
2834 
2835 	n = shiftbits % 8;
2836 	if (n != 0) {
2837 		for (i = 0; i < (dlen - 1); i++)
2838 			dst[i] = (dst[i] >> n) | (dst[i + 1] << (8 - n));
2839 		dst[dlen - 1] = dst[dlen - 1] >> n;
2840 	}
2841 }
2842