1 /*
2 * QEMU NVM Express End-to-End Data Protection support
3 *
4 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
5 *
6 * Authors:
7 * Klaus Jensen <k.jensen@samsung.com>
8 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "sysemu/block-backend.h"
14
15 #include "nvme.h"
16 #include "trace.h"
17
nvme_check_prinfo(NvmeNamespace * ns,uint8_t prinfo,uint64_t slba,uint32_t reftag)18 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
19 uint32_t reftag)
20 {
21 if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
22 (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
23 return NVME_INVALID_PROT_INFO | NVME_DNR;
24 }
25
26 return NVME_SUCCESS;
27 }
28
29 /* from Linux kernel (crypto/crct10dif_common.c) */
crc_t10dif(uint16_t crc,const unsigned char * buffer,size_t len)30 static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
31 size_t len)
32 {
33 unsigned int i;
34
35 for (i = 0; i < len; i++) {
36 crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
37 }
38
39 return crc;
40 }
41
nvme_dif_pract_generate_dif(NvmeNamespace * ns,uint8_t * buf,size_t len,uint8_t * mbuf,size_t mlen,uint16_t apptag,uint32_t * reftag)42 void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
43 uint8_t *mbuf, size_t mlen, uint16_t apptag,
44 uint32_t *reftag)
45 {
46 uint8_t *end = buf + len;
47 int16_t pil = 0;
48
49 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
50 pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
51 }
52
53 trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
54 apptag, *reftag);
55
56 for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
57 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
58 uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
59
60 if (pil) {
61 crc = crc_t10dif(crc, mbuf, pil);
62 }
63
64 dif->guard = cpu_to_be16(crc);
65 dif->apptag = cpu_to_be16(apptag);
66 dif->reftag = cpu_to_be32(*reftag);
67
68 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
69 (*reftag)++;
70 }
71 }
72 }
73
nvme_dif_prchk(NvmeNamespace * ns,NvmeDifTuple * dif,uint8_t * buf,uint8_t * mbuf,size_t pil,uint8_t prinfo,uint16_t apptag,uint16_t appmask,uint32_t reftag)74 static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
75 uint8_t *buf, uint8_t *mbuf, size_t pil,
76 uint8_t prinfo, uint16_t apptag,
77 uint16_t appmask, uint32_t reftag)
78 {
79 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
80 case NVME_ID_NS_DPS_TYPE_3:
81 if (be32_to_cpu(dif->reftag) != 0xffffffff) {
82 break;
83 }
84
85 /* fallthrough */
86 case NVME_ID_NS_DPS_TYPE_1:
87 case NVME_ID_NS_DPS_TYPE_2:
88 if (be16_to_cpu(dif->apptag) != 0xffff) {
89 break;
90 }
91
92 trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
93 be32_to_cpu(dif->reftag));
94
95 return NVME_SUCCESS;
96 }
97
98 if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
99 uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
100
101 if (pil) {
102 crc = crc_t10dif(crc, mbuf, pil);
103 }
104
105 trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
106
107 if (be16_to_cpu(dif->guard) != crc) {
108 return NVME_E2E_GUARD_ERROR;
109 }
110 }
111
112 if (prinfo & NVME_PRINFO_PRCHK_APP) {
113 trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
114 appmask);
115
116 if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
117 return NVME_E2E_APP_ERROR;
118 }
119 }
120
121 if (prinfo & NVME_PRINFO_PRCHK_REF) {
122 trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
123
124 if (be32_to_cpu(dif->reftag) != reftag) {
125 return NVME_E2E_REF_ERROR;
126 }
127 }
128
129 return NVME_SUCCESS;
130 }
131
nvme_dif_check(NvmeNamespace * ns,uint8_t * buf,size_t len,uint8_t * mbuf,size_t mlen,uint8_t prinfo,uint64_t slba,uint16_t apptag,uint16_t appmask,uint32_t * reftag)132 uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
133 uint8_t *mbuf, size_t mlen, uint8_t prinfo,
134 uint64_t slba, uint16_t apptag,
135 uint16_t appmask, uint32_t *reftag)
136 {
137 uint8_t *end = buf + len;
138 int16_t pil = 0;
139 uint16_t status;
140
141 status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
142 if (status) {
143 return status;
144 }
145
146 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
147 pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
148 }
149
150 trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
151
152 for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
153 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
154
155 status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
156 appmask, *reftag);
157 if (status) {
158 return status;
159 }
160
161 if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
162 (*reftag)++;
163 }
164 }
165
166 return NVME_SUCCESS;
167 }
168
nvme_dif_mangle_mdata(NvmeNamespace * ns,uint8_t * mbuf,size_t mlen,uint64_t slba)169 uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
170 uint64_t slba)
171 {
172 BlockBackend *blk = ns->blkconf.blk;
173 BlockDriverState *bs = blk_bs(blk);
174
175 int64_t moffset = 0, offset = nvme_l2b(ns, slba);
176 uint8_t *mbufp, *end;
177 bool zeroed;
178 int16_t pil = 0;
179 int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
180 int64_t pnum = 0;
181
182 Error *err = NULL;
183
184
185 if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
186 pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
187 }
188
189 do {
190 int ret;
191
192 bytes -= pnum;
193
194 ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
195 if (ret < 0) {
196 error_setg_errno(&err, -ret, "unable to get block status");
197 error_report_err(err);
198
199 return NVME_INTERNAL_DEV_ERROR;
200 }
201
202 zeroed = !!(ret & BDRV_BLOCK_ZERO);
203
204 trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
205
206 if (zeroed) {
207 mbufp = mbuf + moffset;
208 mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
209 end = mbufp + mlen;
210
211 for (; mbufp < end; mbufp += ns->lbaf.ms) {
212 memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
213 }
214 }
215
216 moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
217 offset += pnum;
218 } while (pnum != bytes);
219
220 return NVME_SUCCESS;
221 }
222
nvme_dif_rw_cb(void * opaque,int ret)223 static void nvme_dif_rw_cb(void *opaque, int ret)
224 {
225 NvmeBounceContext *ctx = opaque;
226 NvmeRequest *req = ctx->req;
227 NvmeNamespace *ns = req->ns;
228 BlockBackend *blk = ns->blkconf.blk;
229
230 trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
231
232 qemu_iovec_destroy(&ctx->data.iov);
233 g_free(ctx->data.bounce);
234
235 qemu_iovec_destroy(&ctx->mdata.iov);
236 g_free(ctx->mdata.bounce);
237
238 g_free(ctx);
239
240 nvme_rw_complete_cb(req, ret);
241 }
242
nvme_dif_rw_check_cb(void * opaque,int ret)243 static void nvme_dif_rw_check_cb(void *opaque, int ret)
244 {
245 NvmeBounceContext *ctx = opaque;
246 NvmeRequest *req = ctx->req;
247 NvmeNamespace *ns = req->ns;
248 NvmeCtrl *n = nvme_ctrl(req);
249 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
250 uint64_t slba = le64_to_cpu(rw->slba);
251 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
252 uint16_t apptag = le16_to_cpu(rw->apptag);
253 uint16_t appmask = le16_to_cpu(rw->appmask);
254 uint32_t reftag = le32_to_cpu(rw->reftag);
255 uint16_t status;
256
257 trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
258 reftag);
259
260 if (ret) {
261 goto out;
262 }
263
264 status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
265 slba);
266 if (status) {
267 req->status = status;
268 goto out;
269 }
270
271 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
272 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
273 slba, apptag, appmask, &reftag);
274 if (status) {
275 req->status = status;
276 goto out;
277 }
278
279 status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
280 NVME_TX_DIRECTION_FROM_DEVICE, req);
281 if (status) {
282 req->status = status;
283 goto out;
284 }
285
286 if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
287 goto out;
288 }
289
290 status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
291 NVME_TX_DIRECTION_FROM_DEVICE, req);
292 if (status) {
293 req->status = status;
294 }
295
296 out:
297 nvme_dif_rw_cb(ctx, ret);
298 }
299
nvme_dif_rw_mdata_in_cb(void * opaque,int ret)300 static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
301 {
302 NvmeBounceContext *ctx = opaque;
303 NvmeRequest *req = ctx->req;
304 NvmeNamespace *ns = req->ns;
305 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
306 uint64_t slba = le64_to_cpu(rw->slba);
307 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
308 size_t mlen = nvme_m2b(ns, nlb);
309 uint64_t offset = nvme_moff(ns, slba);
310 BlockBackend *blk = ns->blkconf.blk;
311
312 trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
313
314 if (ret) {
315 goto out;
316 }
317
318 ctx->mdata.bounce = g_malloc(mlen);
319
320 qemu_iovec_reset(&ctx->mdata.iov);
321 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
322
323 req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
324 nvme_dif_rw_check_cb, ctx);
325 return;
326
327 out:
328 nvme_dif_rw_cb(ctx, ret);
329 }
330
nvme_dif_rw_mdata_out_cb(void * opaque,int ret)331 static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
332 {
333 NvmeBounceContext *ctx = opaque;
334 NvmeRequest *req = ctx->req;
335 NvmeNamespace *ns = req->ns;
336 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
337 uint64_t slba = le64_to_cpu(rw->slba);
338 uint64_t offset = nvme_moff(ns, slba);
339 BlockBackend *blk = ns->blkconf.blk;
340
341 trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
342
343 if (ret) {
344 goto out;
345 }
346
347 req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
348 nvme_dif_rw_cb, ctx);
349 return;
350
351 out:
352 nvme_dif_rw_cb(ctx, ret);
353 }
354
nvme_dif_rw(NvmeCtrl * n,NvmeRequest * req)355 uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
356 {
357 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
358 NvmeNamespace *ns = req->ns;
359 BlockBackend *blk = ns->blkconf.blk;
360 bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
361 uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
362 uint64_t slba = le64_to_cpu(rw->slba);
363 size_t len = nvme_l2b(ns, nlb);
364 size_t mlen = nvme_m2b(ns, nlb);
365 size_t mapped_len = len;
366 int64_t offset = nvme_l2b(ns, slba);
367 uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
368 uint16_t apptag = le16_to_cpu(rw->apptag);
369 uint16_t appmask = le16_to_cpu(rw->appmask);
370 uint32_t reftag = le32_to_cpu(rw->reftag);
371 bool pract = !!(prinfo & NVME_PRINFO_PRACT);
372 NvmeBounceContext *ctx;
373 uint16_t status;
374
375 trace_pci_nvme_dif_rw(pract, prinfo);
376
377 ctx = g_new0(NvmeBounceContext, 1);
378 ctx->req = req;
379
380 if (wrz) {
381 BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
382
383 if (prinfo & NVME_PRINFO_PRCHK_MASK) {
384 status = NVME_INVALID_PROT_INFO | NVME_DNR;
385 goto err;
386 }
387
388 if (pract) {
389 uint8_t *mbuf, *end;
390 int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
391
392 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
393 if (status) {
394 goto err;
395 }
396
397 flags = 0;
398
399 ctx->mdata.bounce = g_malloc0(mlen);
400
401 qemu_iovec_init(&ctx->mdata.iov, 1);
402 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
403
404 mbuf = ctx->mdata.bounce;
405 end = mbuf + mlen;
406
407 if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
408 pil = 0;
409 }
410
411 for (; mbuf < end; mbuf += ns->lbaf.ms) {
412 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
413
414 dif->apptag = cpu_to_be16(apptag);
415 dif->reftag = cpu_to_be32(reftag);
416
417 switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
418 case NVME_ID_NS_DPS_TYPE_1:
419 case NVME_ID_NS_DPS_TYPE_2:
420 reftag++;
421 }
422 }
423 }
424
425 req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
426 nvme_dif_rw_mdata_out_cb, ctx);
427 return NVME_NO_COMPLETE;
428 }
429
430 if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
431 mapped_len += mlen;
432 }
433
434 status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
435 if (status) {
436 goto err;
437 }
438
439 ctx->data.bounce = g_malloc(len);
440
441 qemu_iovec_init(&ctx->data.iov, 1);
442 qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
443
444 if (req->cmd.opcode == NVME_CMD_READ) {
445 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
446 BLOCK_ACCT_READ);
447
448 req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
449 nvme_dif_rw_mdata_in_cb, ctx);
450 return NVME_NO_COMPLETE;
451 }
452
453 status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
454 NVME_TX_DIRECTION_TO_DEVICE, req);
455 if (status) {
456 goto err;
457 }
458
459 ctx->mdata.bounce = g_malloc(mlen);
460
461 qemu_iovec_init(&ctx->mdata.iov, 1);
462 qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
463
464 if (!(pract && ns->lbaf.ms == 8)) {
465 status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
466 NVME_TX_DIRECTION_TO_DEVICE, req);
467 if (status) {
468 goto err;
469 }
470 }
471
472 status = nvme_check_prinfo(ns, prinfo, slba, reftag);
473 if (status) {
474 goto err;
475 }
476
477 if (pract) {
478 /* splice generated protection information into the buffer */
479 nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
480 ctx->mdata.bounce, ctx->mdata.iov.size,
481 apptag, &reftag);
482 } else {
483 status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
484 ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
485 slba, apptag, appmask, &reftag);
486 if (status) {
487 goto err;
488 }
489 }
490
491 block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
492 BLOCK_ACCT_WRITE);
493
494 req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
495 nvme_dif_rw_mdata_out_cb, ctx);
496
497 return NVME_NO_COMPLETE;
498
499 err:
500 qemu_iovec_destroy(&ctx->data.iov);
501 g_free(ctx->data.bounce);
502
503 qemu_iovec_destroy(&ctx->mdata.iov);
504 g_free(ctx->mdata.bounce);
505
506 g_free(ctx);
507
508 return status;
509 }
510