xref: /qemu/hw/i386/intel_iommu.c (revision 69b205bb)
1 /*
2  * QEMU emulation of an Intel IOMMU (VT-d)
3  *   (DMA Remapping device)
4  *
5  * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
6  * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12 
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17 
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include "qemu/osdep.h"
23 #include "hw/sysbus.h"
24 #include "exec/address-spaces.h"
25 #include "intel_iommu_internal.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/pci_bus.h"
28 #include "hw/i386/pc.h"
29 
30 /*#define DEBUG_INTEL_IOMMU*/
31 #ifdef DEBUG_INTEL_IOMMU
32 enum {
33     DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
34     DEBUG_CACHE,
35 };
36 #define VTD_DBGBIT(x)   (1 << DEBUG_##x)
37 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
38 
39 #define VTD_DPRINTF(what, fmt, ...) do { \
40     if (vtd_dbgflags & VTD_DBGBIT(what)) { \
41         fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
42                 ## __VA_ARGS__); } \
43     } while (0)
44 #else
45 #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
46 #endif
47 
48 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
49                             uint64_t wmask, uint64_t w1cmask)
50 {
51     stq_le_p(&s->csr[addr], val);
52     stq_le_p(&s->wmask[addr], wmask);
53     stq_le_p(&s->w1cmask[addr], w1cmask);
54 }
55 
56 static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask)
57 {
58     stq_le_p(&s->womask[addr], mask);
59 }
60 
61 static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
62                             uint32_t wmask, uint32_t w1cmask)
63 {
64     stl_le_p(&s->csr[addr], val);
65     stl_le_p(&s->wmask[addr], wmask);
66     stl_le_p(&s->w1cmask[addr], w1cmask);
67 }
68 
69 static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask)
70 {
71     stl_le_p(&s->womask[addr], mask);
72 }
73 
74 /* "External" get/set operations */
75 static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
76 {
77     uint64_t oldval = ldq_le_p(&s->csr[addr]);
78     uint64_t wmask = ldq_le_p(&s->wmask[addr]);
79     uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
80     stq_le_p(&s->csr[addr],
81              ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
82 }
83 
84 static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
85 {
86     uint32_t oldval = ldl_le_p(&s->csr[addr]);
87     uint32_t wmask = ldl_le_p(&s->wmask[addr]);
88     uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
89     stl_le_p(&s->csr[addr],
90              ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
91 }
92 
93 static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr)
94 {
95     uint64_t val = ldq_le_p(&s->csr[addr]);
96     uint64_t womask = ldq_le_p(&s->womask[addr]);
97     return val & ~womask;
98 }
99 
100 static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr)
101 {
102     uint32_t val = ldl_le_p(&s->csr[addr]);
103     uint32_t womask = ldl_le_p(&s->womask[addr]);
104     return val & ~womask;
105 }
106 
107 /* "Internal" get/set operations */
108 static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr)
109 {
110     return ldq_le_p(&s->csr[addr]);
111 }
112 
113 static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr)
114 {
115     return ldl_le_p(&s->csr[addr]);
116 }
117 
118 static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val)
119 {
120     stq_le_p(&s->csr[addr], val);
121 }
122 
123 static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
124                                         uint32_t clear, uint32_t mask)
125 {
126     uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask;
127     stl_le_p(&s->csr[addr], new_val);
128     return new_val;
129 }
130 
131 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
132                                         uint64_t clear, uint64_t mask)
133 {
134     uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask;
135     stq_le_p(&s->csr[addr], new_val);
136     return new_val;
137 }
138 
139 /* GHashTable functions */
140 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
141 {
142     return *((const uint64_t *)v1) == *((const uint64_t *)v2);
143 }
144 
145 static guint vtd_uint64_hash(gconstpointer v)
146 {
147     return (guint)*(const uint64_t *)v;
148 }
149 
150 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
151                                           gpointer user_data)
152 {
153     VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
154     uint16_t domain_id = *(uint16_t *)user_data;
155     return entry->domain_id == domain_id;
156 }
157 
158 /* The shift of an addr for a certain level of paging structure */
159 static inline uint32_t vtd_slpt_level_shift(uint32_t level)
160 {
161     return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
162 }
163 
164 static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
165 {
166     return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
167 }
168 
169 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
170                                         gpointer user_data)
171 {
172     VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
173     VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
174     uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
175     uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
176     return (entry->domain_id == info->domain_id) &&
177             (((entry->gfn & info->mask) == gfn) ||
178              (entry->gfn == gfn_tlb));
179 }
180 
181 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
182  * IntelIOMMUState to 1.
183  */
184 static void vtd_reset_context_cache(IntelIOMMUState *s)
185 {
186     VTDAddressSpace *vtd_as;
187     VTDBus *vtd_bus;
188     GHashTableIter bus_it;
189     uint32_t devfn_it;
190 
191     g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
192 
193     VTD_DPRINTF(CACHE, "global context_cache_gen=1");
194     while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
195         for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
196             vtd_as = vtd_bus->dev_as[devfn_it];
197             if (!vtd_as) {
198                 continue;
199             }
200             vtd_as->context_cache_entry.context_cache_gen = 0;
201         }
202     }
203     s->context_cache_gen = 1;
204 }
205 
206 static void vtd_reset_iotlb(IntelIOMMUState *s)
207 {
208     assert(s->iotlb);
209     g_hash_table_remove_all(s->iotlb);
210 }
211 
212 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
213                                   uint32_t level)
214 {
215     return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
216            ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
217 }
218 
219 static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
220 {
221     return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
222 }
223 
224 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
225                                        hwaddr addr)
226 {
227     VTDIOTLBEntry *entry;
228     uint64_t key;
229     int level;
230 
231     for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
232         key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
233                                 source_id, level);
234         entry = g_hash_table_lookup(s->iotlb, &key);
235         if (entry) {
236             goto out;
237         }
238     }
239 
240 out:
241     return entry;
242 }
243 
244 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
245                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
246                              bool read_flags, bool write_flags,
247                              uint32_t level)
248 {
249     VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
250     uint64_t *key = g_malloc(sizeof(*key));
251     uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
252 
253     VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
254                 " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
255                 domain_id);
256     if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
257         VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
258         vtd_reset_iotlb(s);
259     }
260 
261     entry->gfn = gfn;
262     entry->domain_id = domain_id;
263     entry->slpte = slpte;
264     entry->read_flags = read_flags;
265     entry->write_flags = write_flags;
266     entry->mask = vtd_slpt_level_page_mask(level);
267     *key = vtd_get_iotlb_key(gfn, source_id, level);
268     g_hash_table_replace(s->iotlb, key, entry);
269 }
270 
271 /* Given the reg addr of both the message data and address, generate an
272  * interrupt via MSI.
273  */
274 static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
275                                    hwaddr mesg_data_reg)
276 {
277     hwaddr addr;
278     uint32_t data;
279 
280     assert(mesg_data_reg < DMAR_REG_SIZE);
281     assert(mesg_addr_reg < DMAR_REG_SIZE);
282 
283     addr = vtd_get_long_raw(s, mesg_addr_reg);
284     data = vtd_get_long_raw(s, mesg_data_reg);
285 
286     VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data);
287     address_space_stl_le(&address_space_memory, addr, data,
288                          MEMTXATTRS_UNSPECIFIED, NULL);
289 }
290 
291 /* Generate a fault event to software via MSI if conditions are met.
292  * Notice that the value of FSTS_REG being passed to it should be the one
293  * before any update.
294  */
295 static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
296 {
297     if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
298         pre_fsts & VTD_FSTS_IQE) {
299         VTD_DPRINTF(FLOG, "there are previous interrupt conditions "
300                     "to be serviced by software, fault event is not generated "
301                     "(FSTS_REG 0x%"PRIx32 ")", pre_fsts);
302         return;
303     }
304     vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
305     if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
306         VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated");
307     } else {
308         vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
309         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
310     }
311 }
312 
313 /* Check if the Fault (F) field of the Fault Recording Register referenced by
314  * @index is Set.
315  */
316 static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index)
317 {
318     /* Each reg is 128-bit */
319     hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
320     addr += 8; /* Access the high 64-bit half */
321 
322     assert(index < DMAR_FRCD_REG_NR);
323 
324     return vtd_get_quad_raw(s, addr) & VTD_FRCD_F;
325 }
326 
327 /* Update the PPF field of Fault Status Register.
328  * Should be called whenever change the F field of any fault recording
329  * registers.
330  */
331 static void vtd_update_fsts_ppf(IntelIOMMUState *s)
332 {
333     uint32_t i;
334     uint32_t ppf_mask = 0;
335 
336     for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
337         if (vtd_is_frcd_set(s, i)) {
338             ppf_mask = VTD_FSTS_PPF;
339             break;
340         }
341     }
342     vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
343     VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0);
344 }
345 
346 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
347 {
348     /* Each reg is 128-bit */
349     hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
350     addr += 8; /* Access the high 64-bit half */
351 
352     assert(index < DMAR_FRCD_REG_NR);
353 
354     vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F);
355     vtd_update_fsts_ppf(s);
356 }
357 
358 /* Must not update F field now, should be done later */
359 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
360                             uint16_t source_id, hwaddr addr,
361                             VTDFaultReason fault, bool is_write)
362 {
363     uint64_t hi = 0, lo;
364     hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
365 
366     assert(index < DMAR_FRCD_REG_NR);
367 
368     lo = VTD_FRCD_FI(addr);
369     hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
370     if (!is_write) {
371         hi |= VTD_FRCD_T;
372     }
373     vtd_set_quad_raw(s, frcd_reg_addr, lo);
374     vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
375     VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64
376                 ", lo 0x%"PRIx64, index, hi, lo);
377 }
378 
379 /* Try to collapse multiple pending faults from the same requester */
380 static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
381 {
382     uint32_t i;
383     uint64_t frcd_reg;
384     hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */
385 
386     for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
387         frcd_reg = vtd_get_quad_raw(s, addr);
388         VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg);
389         if ((frcd_reg & VTD_FRCD_F) &&
390             ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
391             return true;
392         }
393         addr += 16; /* 128-bit for each */
394     }
395     return false;
396 }
397 
398 /* Log and report an DMAR (address translation) fault to software */
399 static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
400                                   hwaddr addr, VTDFaultReason fault,
401                                   bool is_write)
402 {
403     uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
404 
405     assert(fault < VTD_FR_MAX);
406 
407     if (fault == VTD_FR_RESERVED_ERR) {
408         /* This is not a normal fault reason case. Drop it. */
409         return;
410     }
411     VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
412                 ", is_write %d", source_id, fault, addr, is_write);
413     if (fsts_reg & VTD_FSTS_PFO) {
414         VTD_DPRINTF(FLOG, "new fault is not recorded due to "
415                     "Primary Fault Overflow");
416         return;
417     }
418     if (vtd_try_collapse_fault(s, source_id)) {
419         VTD_DPRINTF(FLOG, "new fault is not recorded due to "
420                     "compression of faults");
421         return;
422     }
423     if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
424         VTD_DPRINTF(FLOG, "Primary Fault Overflow and "
425                     "new fault is not recorded, set PFO field");
426         vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
427         return;
428     }
429 
430     vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
431 
432     if (fsts_reg & VTD_FSTS_PPF) {
433         VTD_DPRINTF(FLOG, "there are pending faults already, "
434                     "fault event is not generated");
435         vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
436         s->next_frcd_reg++;
437         if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
438             s->next_frcd_reg = 0;
439         }
440     } else {
441         vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK,
442                                 VTD_FSTS_FRI(s->next_frcd_reg));
443         vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */
444         s->next_frcd_reg++;
445         if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
446             s->next_frcd_reg = 0;
447         }
448         /* This case actually cause the PPF to be Set.
449          * So generate fault event (interrupt).
450          */
451          vtd_generate_fault_event(s, fsts_reg);
452     }
453 }
454 
455 /* Handle Invalidation Queue Errors of queued invalidation interface error
456  * conditions.
457  */
458 static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
459 {
460     uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
461 
462     vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
463     vtd_generate_fault_event(s, fsts_reg);
464 }
465 
466 /* Set the IWC field and try to generate an invalidation completion interrupt */
467 static void vtd_generate_completion_event(IntelIOMMUState *s)
468 {
469     VTD_DPRINTF(INV, "completes an invalidation wait command with "
470                 "Interrupt Flag");
471     if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
472         VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
473                     "serviced by software, "
474                     "new invalidation event is not generated");
475         return;
476     }
477     vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
478     vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
479     if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
480         VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
481                     "event is not generated");
482         return;
483     } else {
484         /* Generate the interrupt event */
485         vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
486         vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
487     }
488 }
489 
490 static inline bool vtd_root_entry_present(VTDRootEntry *root)
491 {
492     return root->val & VTD_ROOT_ENTRY_P;
493 }
494 
495 static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
496                               VTDRootEntry *re)
497 {
498     dma_addr_t addr;
499 
500     addr = s->root + index * sizeof(*re);
501     if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
502         VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64
503                     " + %"PRIu8, s->root, index);
504         re->val = 0;
505         return -VTD_FR_ROOT_TABLE_INV;
506     }
507     re->val = le64_to_cpu(re->val);
508     return 0;
509 }
510 
511 static inline bool vtd_context_entry_present(VTDContextEntry *context)
512 {
513     return context->lo & VTD_CONTEXT_ENTRY_P;
514 }
515 
516 static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
517                                            VTDContextEntry *ce)
518 {
519     dma_addr_t addr;
520 
521     if (!vtd_root_entry_present(root)) {
522         VTD_DPRINTF(GENERAL, "error: root-entry is not present");
523         return -VTD_FR_ROOT_ENTRY_P;
524     }
525     addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
526     if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
527         VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64
528                     " + %"PRIu8,
529                     (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index);
530         return -VTD_FR_CONTEXT_TABLE_INV;
531     }
532     ce->lo = le64_to_cpu(ce->lo);
533     ce->hi = le64_to_cpu(ce->hi);
534     return 0;
535 }
536 
537 static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce)
538 {
539     return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
540 }
541 
542 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
543 {
544     return slpte & VTD_SL_PT_BASE_ADDR_MASK;
545 }
546 
547 /* Whether the pte indicates the address of the page frame */
548 static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
549 {
550     return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
551 }
552 
553 /* Get the content of a spte located in @base_addr[@index] */
554 static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
555 {
556     uint64_t slpte;
557 
558     assert(index < VTD_SL_PT_ENTRY_NR);
559 
560     if (dma_memory_read(&address_space_memory,
561                         base_addr + index * sizeof(slpte), &slpte,
562                         sizeof(slpte))) {
563         slpte = (uint64_t)-1;
564         return slpte;
565     }
566     slpte = le64_to_cpu(slpte);
567     return slpte;
568 }
569 
570 /* Given a gpa and the level of paging structure, return the offset of current
571  * level.
572  */
573 static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level)
574 {
575     return (gpa >> vtd_slpt_level_shift(level)) &
576             ((1ULL << VTD_SL_LEVEL_BITS) - 1);
577 }
578 
579 /* Check Capability Register to see if the @level of page-table is supported */
580 static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
581 {
582     return VTD_CAP_SAGAW_MASK & s->cap &
583            (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
584 }
585 
586 /* Get the page-table level that hardware should use for the second-level
587  * page-table walk from the Address Width field of context-entry.
588  */
589 static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce)
590 {
591     return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
592 }
593 
594 static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
595 {
596     return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
597 }
598 
599 static const uint64_t vtd_paging_entry_rsvd_field[] = {
600     [0] = ~0ULL,
601     /* For not large page */
602     [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
603     [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
604     [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
605     [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
606     /* For large page */
607     [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
608     [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
609     [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
610     [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
611 };
612 
613 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
614 {
615     if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
616         /* Maybe large page */
617         return slpte & vtd_paging_entry_rsvd_field[level + 4];
618     } else {
619         return slpte & vtd_paging_entry_rsvd_field[level];
620     }
621 }
622 
623 /* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
624  * of the translation, can be used for deciding the size of large page.
625  */
626 static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
627                             uint64_t *slptep, uint32_t *slpte_level,
628                             bool *reads, bool *writes)
629 {
630     dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
631     uint32_t level = vtd_get_level_from_context_entry(ce);
632     uint32_t offset;
633     uint64_t slpte;
634     uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
635     uint64_t access_right_check;
636 
637     /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG
638      * and AW in context-entry.
639      */
640     if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
641         VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa);
642         return -VTD_FR_ADDR_BEYOND_MGAW;
643     }
644 
645     /* FIXME: what is the Atomics request here? */
646     access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
647 
648     while (true) {
649         offset = vtd_gpa_level_offset(gpa, level);
650         slpte = vtd_get_slpte(addr, offset);
651 
652         if (slpte == (uint64_t)-1) {
653             VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
654                         "entry at level %"PRIu32 " for gpa 0x%"PRIx64,
655                         level, gpa);
656             if (level == vtd_get_level_from_context_entry(ce)) {
657                 /* Invalid programming of context-entry */
658                 return -VTD_FR_CONTEXT_ENTRY_INV;
659             } else {
660                 return -VTD_FR_PAGING_ENTRY_INV;
661             }
662         }
663         *reads = (*reads) && (slpte & VTD_SL_R);
664         *writes = (*writes) && (slpte & VTD_SL_W);
665         if (!(slpte & access_right_check)) {
666             VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
667                         "gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
668                         (is_write ? "write" : "read"), gpa, slpte);
669             return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
670         }
671         if (vtd_slpte_nonzero_rsvd(slpte, level)) {
672             VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
673                         "level paging entry level %"PRIu32 " slpte 0x%"PRIx64,
674                         level, slpte);
675             return -VTD_FR_PAGING_ENTRY_RSVD;
676         }
677 
678         if (vtd_is_last_slpte(slpte, level)) {
679             *slptep = slpte;
680             *slpte_level = level;
681             return 0;
682         }
683         addr = vtd_get_slpte_addr(slpte);
684         level--;
685     }
686 }
687 
688 /* Map a device to its corresponding domain (context-entry) */
689 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
690                                     uint8_t devfn, VTDContextEntry *ce)
691 {
692     VTDRootEntry re;
693     int ret_fr;
694 
695     ret_fr = vtd_get_root_entry(s, bus_num, &re);
696     if (ret_fr) {
697         return ret_fr;
698     }
699 
700     if (!vtd_root_entry_present(&re)) {
701         VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present",
702                     bus_num);
703         return -VTD_FR_ROOT_ENTRY_P;
704     } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
705         VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry "
706                     "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val);
707         return -VTD_FR_ROOT_ENTRY_RSVD;
708     }
709 
710     ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce);
711     if (ret_fr) {
712         return ret_fr;
713     }
714 
715     if (!vtd_context_entry_present(ce)) {
716         VTD_DPRINTF(GENERAL,
717                     "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
718                     "is not present", devfn, bus_num);
719         return -VTD_FR_CONTEXT_ENTRY_P;
720     } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
721                (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
722         VTD_DPRINTF(GENERAL,
723                     "error: non-zero reserved field in context-entry "
724                     "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo);
725         return -VTD_FR_CONTEXT_ENTRY_RSVD;
726     }
727     /* Check if the programming of context-entry is valid */
728     if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
729         VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in "
730                     "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
731                     ce->hi, ce->lo);
732         return -VTD_FR_CONTEXT_ENTRY_INV;
733     } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) {
734         VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in "
735                     "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64,
736                     ce->hi, ce->lo);
737         return -VTD_FR_CONTEXT_ENTRY_INV;
738     }
739     return 0;
740 }
741 
742 static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
743 {
744     return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
745 }
746 
747 static const bool vtd_qualified_faults[] = {
748     [VTD_FR_RESERVED] = false,
749     [VTD_FR_ROOT_ENTRY_P] = false,
750     [VTD_FR_CONTEXT_ENTRY_P] = true,
751     [VTD_FR_CONTEXT_ENTRY_INV] = true,
752     [VTD_FR_ADDR_BEYOND_MGAW] = true,
753     [VTD_FR_WRITE] = true,
754     [VTD_FR_READ] = true,
755     [VTD_FR_PAGING_ENTRY_INV] = true,
756     [VTD_FR_ROOT_TABLE_INV] = false,
757     [VTD_FR_CONTEXT_TABLE_INV] = false,
758     [VTD_FR_ROOT_ENTRY_RSVD] = false,
759     [VTD_FR_PAGING_ENTRY_RSVD] = true,
760     [VTD_FR_CONTEXT_ENTRY_TT] = true,
761     [VTD_FR_RESERVED_ERR] = false,
762     [VTD_FR_MAX] = false,
763 };
764 
765 /* To see if a fault condition is "qualified", which is reported to software
766  * only if the FPD field in the context-entry used to process the faulting
767  * request is 0.
768  */
769 static inline bool vtd_is_qualified_fault(VTDFaultReason fault)
770 {
771     return vtd_qualified_faults[fault];
772 }
773 
774 static inline bool vtd_is_interrupt_addr(hwaddr addr)
775 {
776     return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
777 }
778 
779 /* Map dev to context-entry then do a paging-structures walk to do a iommu
780  * translation.
781  *
782  * Called from RCU critical section.
783  *
784  * @bus_num: The bus number
785  * @devfn: The devfn, which is the  combined of device and function number
786  * @is_write: The access is a write operation
787  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
788  */
789 static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
790                                    uint8_t devfn, hwaddr addr, bool is_write,
791                                    IOMMUTLBEntry *entry)
792 {
793     IntelIOMMUState *s = vtd_as->iommu_state;
794     VTDContextEntry ce;
795     uint8_t bus_num = pci_bus_num(bus);
796     VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
797     uint64_t slpte, page_mask;
798     uint32_t level;
799     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
800     int ret_fr;
801     bool is_fpd_set = false;
802     bool reads = true;
803     bool writes = true;
804     VTDIOTLBEntry *iotlb_entry;
805 
806     /* Check if the request is in interrupt address range */
807     if (vtd_is_interrupt_addr(addr)) {
808         if (is_write) {
809             /* FIXME: since we don't know the length of the access here, we
810              * treat Non-DWORD length write requests without PASID as
811              * interrupt requests, too. Withoud interrupt remapping support,
812              * we just use 1:1 mapping.
813              */
814             VTD_DPRINTF(MMU, "write request to interrupt address "
815                         "gpa 0x%"PRIx64, addr);
816             entry->iova = addr & VTD_PAGE_MASK_4K;
817             entry->translated_addr = addr & VTD_PAGE_MASK_4K;
818             entry->addr_mask = ~VTD_PAGE_MASK_4K;
819             entry->perm = IOMMU_WO;
820             return;
821         } else {
822             VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
823                         "gpa 0x%"PRIx64, addr);
824             vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
825             return;
826         }
827     }
828     /* Try to fetch slpte form IOTLB */
829     iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
830     if (iotlb_entry) {
831         VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
832                     " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
833                     iotlb_entry->slpte, iotlb_entry->domain_id);
834         slpte = iotlb_entry->slpte;
835         reads = iotlb_entry->read_flags;
836         writes = iotlb_entry->write_flags;
837         page_mask = iotlb_entry->mask;
838         goto out;
839     }
840     /* Try to fetch context-entry from cache first */
841     if (cc_entry->context_cache_gen == s->context_cache_gen) {
842         VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
843                     "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")",
844                     bus_num, devfn, cc_entry->context_entry.hi,
845                     cc_entry->context_entry.lo, cc_entry->context_cache_gen);
846         ce = cc_entry->context_entry;
847         is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
848     } else {
849         ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
850         is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
851         if (ret_fr) {
852             ret_fr = -ret_fr;
853             if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
854                 VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
855                             "requests through this context-entry "
856                             "(with FPD Set)");
857             } else {
858                 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
859             }
860             return;
861         }
862         /* Update context-cache */
863         VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d "
864                     "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")",
865                     bus_num, devfn, ce.hi, ce.lo,
866                     cc_entry->context_cache_gen, s->context_cache_gen);
867         cc_entry->context_entry = ce;
868         cc_entry->context_cache_gen = s->context_cache_gen;
869     }
870 
871     ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
872                               &reads, &writes);
873     if (ret_fr) {
874         ret_fr = -ret_fr;
875         if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
876             VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
877                         "through this context-entry (with FPD Set)");
878         } else {
879             vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
880         }
881         return;
882     }
883 
884     page_mask = vtd_slpt_level_page_mask(level);
885     vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
886                      reads, writes, level);
887 out:
888     entry->iova = addr & page_mask;
889     entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
890     entry->addr_mask = ~page_mask;
891     entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0);
892 }
893 
894 static void vtd_root_table_setup(IntelIOMMUState *s)
895 {
896     s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
897     s->root_extended = s->root & VTD_RTADDR_RTT;
898     s->root &= VTD_RTADDR_ADDR_MASK;
899 
900     VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root,
901                 (s->root_extended ? "(extended)" : ""));
902 }
903 
904 static void vtd_context_global_invalidate(IntelIOMMUState *s)
905 {
906     s->context_cache_gen++;
907     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
908         vtd_reset_context_cache(s);
909     }
910 }
911 
912 
913 /* Find the VTD address space currently associated with a given bus number,
914  */
915 static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
916 {
917     VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
918     if (!vtd_bus) {
919         /* Iterate over the registered buses to find the one
920          * which currently hold this bus number, and update the bus_num lookup table:
921          */
922         GHashTableIter iter;
923 
924         g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
925         while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) {
926             if (pci_bus_num(vtd_bus->bus) == bus_num) {
927                 s->vtd_as_by_bus_num[bus_num] = vtd_bus;
928                 return vtd_bus;
929             }
930         }
931     }
932     return vtd_bus;
933 }
934 
935 /* Do a context-cache device-selective invalidation.
936  * @func_mask: FM field after shifting
937  */
938 static void vtd_context_device_invalidate(IntelIOMMUState *s,
939                                           uint16_t source_id,
940                                           uint16_t func_mask)
941 {
942     uint16_t mask;
943     VTDBus *vtd_bus;
944     VTDAddressSpace *vtd_as;
945     uint16_t devfn;
946     uint16_t devfn_it;
947 
948     switch (func_mask & 3) {
949     case 0:
950         mask = 0;   /* No bits in the SID field masked */
951         break;
952     case 1:
953         mask = 4;   /* Mask bit 2 in the SID field */
954         break;
955     case 2:
956         mask = 6;   /* Mask bit 2:1 in the SID field */
957         break;
958     case 3:
959         mask = 7;   /* Mask bit 2:0 in the SID field */
960         break;
961     }
962     VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
963                     " mask %"PRIu16, source_id, mask);
964     vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
965     if (vtd_bus) {
966         devfn = VTD_SID_TO_DEVFN(source_id);
967         for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
968             vtd_as = vtd_bus->dev_as[devfn_it];
969             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
970                 VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
971                             devfn_it);
972                 vtd_as->context_cache_entry.context_cache_gen = 0;
973             }
974         }
975     }
976 }
977 
978 /* Context-cache invalidation
979  * Returns the Context Actual Invalidation Granularity.
980  * @val: the content of the CCMD_REG
981  */
982 static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
983 {
984     uint64_t caig;
985     uint64_t type = val & VTD_CCMD_CIRG_MASK;
986 
987     switch (type) {
988     case VTD_CCMD_DOMAIN_INVL:
989         VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
990                     (uint16_t)VTD_CCMD_DID(val));
991         /* Fall through */
992     case VTD_CCMD_GLOBAL_INVL:
993         VTD_DPRINTF(INV, "global invalidation");
994         caig = VTD_CCMD_GLOBAL_INVL_A;
995         vtd_context_global_invalidate(s);
996         break;
997 
998     case VTD_CCMD_DEVICE_INVL:
999         caig = VTD_CCMD_DEVICE_INVL_A;
1000         vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val));
1001         break;
1002 
1003     default:
1004         VTD_DPRINTF(GENERAL, "error: invalid granularity");
1005         caig = 0;
1006     }
1007     return caig;
1008 }
1009 
1010 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
1011 {
1012     vtd_reset_iotlb(s);
1013 }
1014 
1015 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
1016 {
1017     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
1018                                 &domain_id);
1019 }
1020 
1021 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
1022                                       hwaddr addr, uint8_t am)
1023 {
1024     VTDIOTLBPageInvInfo info;
1025 
1026     assert(am <= VTD_MAMV);
1027     info.domain_id = domain_id;
1028     info.addr = addr;
1029     info.mask = ~((1 << am) - 1);
1030     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
1031 }
1032 
1033 /* Flush IOTLB
1034  * Returns the IOTLB Actual Invalidation Granularity.
1035  * @val: the content of the IOTLB_REG
1036  */
1037 static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
1038 {
1039     uint64_t iaig;
1040     uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK;
1041     uint16_t domain_id;
1042     hwaddr addr;
1043     uint8_t am;
1044 
1045     switch (type) {
1046     case VTD_TLB_GLOBAL_FLUSH:
1047         VTD_DPRINTF(INV, "global invalidation");
1048         iaig = VTD_TLB_GLOBAL_FLUSH_A;
1049         vtd_iotlb_global_invalidate(s);
1050         break;
1051 
1052     case VTD_TLB_DSI_FLUSH:
1053         domain_id = VTD_TLB_DID(val);
1054         VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1055                     domain_id);
1056         iaig = VTD_TLB_DSI_FLUSH_A;
1057         vtd_iotlb_domain_invalidate(s, domain_id);
1058         break;
1059 
1060     case VTD_TLB_PSI_FLUSH:
1061         domain_id = VTD_TLB_DID(val);
1062         addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
1063         am = VTD_IVA_AM(addr);
1064         addr = VTD_IVA_ADDR(addr);
1065         VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
1066                     " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
1067         if (am > VTD_MAMV) {
1068             VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
1069                         "%"PRIu8, (uint8_t)VTD_MAMV);
1070             iaig = 0;
1071             break;
1072         }
1073         iaig = VTD_TLB_PSI_FLUSH_A;
1074         vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1075         break;
1076 
1077     default:
1078         VTD_DPRINTF(GENERAL, "error: invalid granularity");
1079         iaig = 0;
1080     }
1081     return iaig;
1082 }
1083 
1084 static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
1085 {
1086     return s->iq_tail == 0;
1087 }
1088 
1089 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
1090 {
1091     return s->qi_enabled && (s->iq_tail == s->iq_head) &&
1092            (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
1093 }
1094 
1095 static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
1096 {
1097     uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
1098 
1099     VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
1100     if (en) {
1101         if (vtd_queued_inv_enable_check(s)) {
1102             s->iq = iqa_val & VTD_IQA_IQA_MASK;
1103             /* 2^(x+8) entries */
1104             s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
1105             s->qi_enabled = true;
1106             VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
1107             VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
1108                         s->iq, s->iq_size);
1109             /* Ok - report back to driver */
1110             vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
1111         } else {
1112             VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
1113                         "tail %"PRIu16, s->iq_tail);
1114         }
1115     } else {
1116         if (vtd_queued_inv_disable_check(s)) {
1117             /* disable Queued Invalidation */
1118             vtd_set_quad_raw(s, DMAR_IQH_REG, 0);
1119             s->iq_head = 0;
1120             s->qi_enabled = false;
1121             /* Ok - report back to driver */
1122             vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
1123         } else {
1124             VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
1125                         "head %"PRIu16 ", tail %"PRIu16
1126                         ", last_descriptor %"PRIu8,
1127                         s->iq_head, s->iq_tail, s->iq_last_desc_type);
1128         }
1129     }
1130 }
1131 
1132 /* Set Root Table Pointer */
1133 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
1134 {
1135     VTD_DPRINTF(CSR, "set Root Table Pointer");
1136 
1137     vtd_root_table_setup(s);
1138     /* Ok - report back to driver */
1139     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
1140 }
1141 
1142 /* Handle Translation Enable/Disable */
1143 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
1144 {
1145     VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
1146 
1147     if (en) {
1148         s->dmar_enabled = true;
1149         /* Ok - report back to driver */
1150         vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES);
1151     } else {
1152         s->dmar_enabled = false;
1153 
1154         /* Clear the index of Fault Recording Register */
1155         s->next_frcd_reg = 0;
1156         /* Ok - report back to driver */
1157         vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
1158     }
1159 }
1160 
1161 /* Handle write to Global Command Register */
1162 static void vtd_handle_gcmd_write(IntelIOMMUState *s)
1163 {
1164     uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
1165     uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
1166     uint32_t changed = status ^ val;
1167 
1168     VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status);
1169     if (changed & VTD_GCMD_TE) {
1170         /* Translation enable/disable */
1171         vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
1172     }
1173     if (val & VTD_GCMD_SRTP) {
1174         /* Set/update the root-table pointer */
1175         vtd_handle_gcmd_srtp(s);
1176     }
1177     if (changed & VTD_GCMD_QIE) {
1178         /* Queued Invalidation Enable */
1179         vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
1180     }
1181 }
1182 
1183 /* Handle write to Context Command Register */
1184 static void vtd_handle_ccmd_write(IntelIOMMUState *s)
1185 {
1186     uint64_t ret;
1187     uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG);
1188 
1189     /* Context-cache invalidation request */
1190     if (val & VTD_CCMD_ICC) {
1191         if (s->qi_enabled) {
1192             VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1193                         "should not use register-based invalidation");
1194             return;
1195         }
1196         ret = vtd_context_cache_invalidate(s, val);
1197         /* Invalidation completed. Change something to show */
1198         vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
1199         ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
1200                                       ret);
1201         VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret);
1202     }
1203 }
1204 
1205 /* Handle write to IOTLB Invalidation Register */
1206 static void vtd_handle_iotlb_write(IntelIOMMUState *s)
1207 {
1208     uint64_t ret;
1209     uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG);
1210 
1211     /* IOTLB invalidation request */
1212     if (val & VTD_TLB_IVT) {
1213         if (s->qi_enabled) {
1214             VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1215                         "should not use register-based invalidation");
1216             return;
1217         }
1218         ret = vtd_iotlb_flush(s, val);
1219         /* Invalidation completed. Change something to show */
1220         vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
1221         ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
1222                                       VTD_TLB_FLUSH_GRANU_MASK_A, ret);
1223         VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret);
1224     }
1225 }
1226 
1227 /* Fetch an Invalidation Descriptor from the Invalidation Queue */
1228 static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
1229                              VTDInvDesc *inv_desc)
1230 {
1231     dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
1232     if (dma_memory_read(&address_space_memory, addr, inv_desc,
1233         sizeof(*inv_desc))) {
1234         VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor "
1235                     "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset);
1236         inv_desc->lo = 0;
1237         inv_desc->hi = 0;
1238 
1239         return false;
1240     }
1241     inv_desc->lo = le64_to_cpu(inv_desc->lo);
1242     inv_desc->hi = le64_to_cpu(inv_desc->hi);
1243     return true;
1244 }
1245 
1246 static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1247 {
1248     if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
1249         (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
1250         VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation "
1251                     "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
1252                     inv_desc->hi, inv_desc->lo);
1253         return false;
1254     }
1255     if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
1256         /* Status Write */
1257         uint32_t status_data = (uint32_t)(inv_desc->lo >>
1258                                VTD_INV_DESC_WAIT_DATA_SHIFT);
1259 
1260         assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
1261 
1262         /* FIXME: need to be masked with HAW? */
1263         dma_addr_t status_addr = inv_desc->hi;
1264         VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64,
1265                     status_data, status_addr);
1266         status_data = cpu_to_le32(status_data);
1267         if (dma_memory_write(&address_space_memory, status_addr, &status_data,
1268                              sizeof(status_data))) {
1269             VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write");
1270             return false;
1271         }
1272     } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
1273         /* Interrupt flag */
1274         VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion");
1275         vtd_generate_completion_event(s);
1276     } else {
1277         VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: "
1278                     "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo);
1279         return false;
1280     }
1281     return true;
1282 }
1283 
1284 static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
1285                                            VTDInvDesc *inv_desc)
1286 {
1287     if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
1288         VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache "
1289                     "Invalidate Descriptor");
1290         return false;
1291     }
1292     switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
1293     case VTD_INV_DESC_CC_DOMAIN:
1294         VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1295                     (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
1296         /* Fall through */
1297     case VTD_INV_DESC_CC_GLOBAL:
1298         VTD_DPRINTF(INV, "global invalidation");
1299         vtd_context_global_invalidate(s);
1300         break;
1301 
1302     case VTD_INV_DESC_CC_DEVICE:
1303         vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo),
1304                                       VTD_INV_DESC_CC_FM(inv_desc->lo));
1305         break;
1306 
1307     default:
1308         VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache "
1309                     "Invalidate Descriptor hi 0x%"PRIx64  " lo 0x%"PRIx64,
1310                     inv_desc->hi, inv_desc->lo);
1311         return false;
1312     }
1313     return true;
1314 }
1315 
1316 static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1317 {
1318     uint16_t domain_id;
1319     uint8_t am;
1320     hwaddr addr;
1321 
1322     if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
1323         (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
1324         VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB "
1325                     "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
1326                     inv_desc->hi, inv_desc->lo);
1327         return false;
1328     }
1329 
1330     switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
1331     case VTD_INV_DESC_IOTLB_GLOBAL:
1332         VTD_DPRINTF(INV, "global invalidation");
1333         vtd_iotlb_global_invalidate(s);
1334         break;
1335 
1336     case VTD_INV_DESC_IOTLB_DOMAIN:
1337         domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1338         VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1339                     domain_id);
1340         vtd_iotlb_domain_invalidate(s, domain_id);
1341         break;
1342 
1343     case VTD_INV_DESC_IOTLB_PAGE:
1344         domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1345         addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
1346         am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
1347         VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
1348                     " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
1349         if (am > VTD_MAMV) {
1350             VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
1351                         "%"PRIu8, (uint8_t)VTD_MAMV);
1352             return false;
1353         }
1354         vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1355         break;
1356 
1357     default:
1358         VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate "
1359                     "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
1360                     inv_desc->hi, inv_desc->lo);
1361         return false;
1362     }
1363     return true;
1364 }
1365 
1366 static bool vtd_process_inv_desc(IntelIOMMUState *s)
1367 {
1368     VTDInvDesc inv_desc;
1369     uint8_t desc_type;
1370 
1371     VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head);
1372     if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
1373         s->iq_last_desc_type = VTD_INV_DESC_NONE;
1374         return false;
1375     }
1376     desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
1377     /* FIXME: should update at first or at last? */
1378     s->iq_last_desc_type = desc_type;
1379 
1380     switch (desc_type) {
1381     case VTD_INV_DESC_CC:
1382         VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64
1383                     " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
1384         if (!vtd_process_context_cache_desc(s, &inv_desc)) {
1385             return false;
1386         }
1387         break;
1388 
1389     case VTD_INV_DESC_IOTLB:
1390         VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64
1391                     " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
1392         if (!vtd_process_iotlb_desc(s, &inv_desc)) {
1393             return false;
1394         }
1395         break;
1396 
1397     case VTD_INV_DESC_WAIT:
1398         VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64
1399                     " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
1400         if (!vtd_process_wait_desc(s, &inv_desc)) {
1401             return false;
1402         }
1403         break;
1404 
1405     default:
1406         VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
1407                     "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
1408                     inv_desc.hi, inv_desc.lo, desc_type);
1409         return false;
1410     }
1411     s->iq_head++;
1412     if (s->iq_head == s->iq_size) {
1413         s->iq_head = 0;
1414     }
1415     return true;
1416 }
1417 
1418 /* Try to fetch and process more Invalidation Descriptors */
1419 static void vtd_fetch_inv_desc(IntelIOMMUState *s)
1420 {
1421     VTD_DPRINTF(INV, "fetch Invalidation Descriptors");
1422     if (s->iq_tail >= s->iq_size) {
1423         /* Detects an invalid Tail pointer */
1424         VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16
1425                     " while iq_size is %"PRIu16, s->iq_tail, s->iq_size);
1426         vtd_handle_inv_queue_error(s);
1427         return;
1428     }
1429     while (s->iq_head != s->iq_tail) {
1430         if (!vtd_process_inv_desc(s)) {
1431             /* Invalidation Queue Errors */
1432             vtd_handle_inv_queue_error(s);
1433             break;
1434         }
1435         /* Must update the IQH_REG in time */
1436         vtd_set_quad_raw(s, DMAR_IQH_REG,
1437                          (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) &
1438                          VTD_IQH_QH_MASK);
1439     }
1440 }
1441 
1442 /* Handle write to Invalidation Queue Tail Register */
1443 static void vtd_handle_iqt_write(IntelIOMMUState *s)
1444 {
1445     uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
1446 
1447     s->iq_tail = VTD_IQT_QT(val);
1448     VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail);
1449     if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
1450         /* Process Invalidation Queue here */
1451         vtd_fetch_inv_desc(s);
1452     }
1453 }
1454 
1455 static void vtd_handle_fsts_write(IntelIOMMUState *s)
1456 {
1457     uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
1458     uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1459     uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE;
1460 
1461     if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
1462         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1463         VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear "
1464                     "IP field of FECTL_REG");
1465     }
1466     /* FIXME: when IQE is Clear, should we try to fetch some Invalidation
1467      * Descriptors if there are any when Queued Invalidation is enabled?
1468      */
1469 }
1470 
1471 static void vtd_handle_fectl_write(IntelIOMMUState *s)
1472 {
1473     uint32_t fectl_reg;
1474     /* FIXME: when software clears the IM field, check the IP field. But do we
1475      * need to compare the old value and the new value to conclude that
1476      * software clears the IM field? Or just check if the IM field is zero?
1477      */
1478     fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1479     if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
1480         vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
1481         vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1482         VTD_DPRINTF(FLOG, "IM field is cleared, generate "
1483                     "fault event interrupt");
1484     }
1485 }
1486 
1487 static void vtd_handle_ics_write(IntelIOMMUState *s)
1488 {
1489     uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG);
1490     uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
1491 
1492     if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
1493         vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
1494         VTD_DPRINTF(INV, "pending completion interrupt condition serviced, "
1495                     "clear IP field of IECTL_REG");
1496     }
1497 }
1498 
1499 static void vtd_handle_iectl_write(IntelIOMMUState *s)
1500 {
1501     uint32_t iectl_reg;
1502     /* FIXME: when software clears the IM field, check the IP field. But do we
1503      * need to compare the old value and the new value to conclude that
1504      * software clears the IM field? Or just check if the IM field is zero?
1505      */
1506     iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
1507     if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
1508         vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
1509         vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
1510         VTD_DPRINTF(INV, "IM field is cleared, generate "
1511                     "invalidation event interrupt");
1512     }
1513 }
1514 
1515 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
1516 {
1517     IntelIOMMUState *s = opaque;
1518     uint64_t val;
1519 
1520     if (addr + size > DMAR_REG_SIZE) {
1521         VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
1522                     ", got 0x%"PRIx64 " %d",
1523                     (uint64_t)DMAR_REG_SIZE, addr, size);
1524         return (uint64_t)-1;
1525     }
1526 
1527     switch (addr) {
1528     /* Root Table Address Register, 64-bit */
1529     case DMAR_RTADDR_REG:
1530         if (size == 4) {
1531             val = s->root & ((1ULL << 32) - 1);
1532         } else {
1533             val = s->root;
1534         }
1535         break;
1536 
1537     case DMAR_RTADDR_REG_HI:
1538         assert(size == 4);
1539         val = s->root >> 32;
1540         break;
1541 
1542     /* Invalidation Queue Address Register, 64-bit */
1543     case DMAR_IQA_REG:
1544         val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS);
1545         if (size == 4) {
1546             val = val & ((1ULL << 32) - 1);
1547         }
1548         break;
1549 
1550     case DMAR_IQA_REG_HI:
1551         assert(size == 4);
1552         val = s->iq >> 32;
1553         break;
1554 
1555     default:
1556         if (size == 4) {
1557             val = vtd_get_long(s, addr);
1558         } else {
1559             val = vtd_get_quad(s, addr);
1560         }
1561     }
1562     VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64,
1563                 addr, size, val);
1564     return val;
1565 }
1566 
1567 static void vtd_mem_write(void *opaque, hwaddr addr,
1568                           uint64_t val, unsigned size)
1569 {
1570     IntelIOMMUState *s = opaque;
1571 
1572     if (addr + size > DMAR_REG_SIZE) {
1573         VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
1574                     ", got 0x%"PRIx64 " %d",
1575                     (uint64_t)DMAR_REG_SIZE, addr, size);
1576         return;
1577     }
1578 
1579     switch (addr) {
1580     /* Global Command Register, 32-bit */
1581     case DMAR_GCMD_REG:
1582         VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64
1583                     ", size %d, val 0x%"PRIx64, addr, size, val);
1584         vtd_set_long(s, addr, val);
1585         vtd_handle_gcmd_write(s);
1586         break;
1587 
1588     /* Context Command Register, 64-bit */
1589     case DMAR_CCMD_REG:
1590         VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64
1591                     ", size %d, val 0x%"PRIx64, addr, size, val);
1592         if (size == 4) {
1593             vtd_set_long(s, addr, val);
1594         } else {
1595             vtd_set_quad(s, addr, val);
1596             vtd_handle_ccmd_write(s);
1597         }
1598         break;
1599 
1600     case DMAR_CCMD_REG_HI:
1601         VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64
1602                     ", size %d, val 0x%"PRIx64, addr, size, val);
1603         assert(size == 4);
1604         vtd_set_long(s, addr, val);
1605         vtd_handle_ccmd_write(s);
1606         break;
1607 
1608     /* IOTLB Invalidation Register, 64-bit */
1609     case DMAR_IOTLB_REG:
1610         VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64
1611                     ", size %d, val 0x%"PRIx64, addr, size, val);
1612         if (size == 4) {
1613             vtd_set_long(s, addr, val);
1614         } else {
1615             vtd_set_quad(s, addr, val);
1616             vtd_handle_iotlb_write(s);
1617         }
1618         break;
1619 
1620     case DMAR_IOTLB_REG_HI:
1621         VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64
1622                     ", size %d, val 0x%"PRIx64, addr, size, val);
1623         assert(size == 4);
1624         vtd_set_long(s, addr, val);
1625         vtd_handle_iotlb_write(s);
1626         break;
1627 
1628     /* Invalidate Address Register, 64-bit */
1629     case DMAR_IVA_REG:
1630         VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
1631                     ", size %d, val 0x%"PRIx64, addr, size, val);
1632         if (size == 4) {
1633             vtd_set_long(s, addr, val);
1634         } else {
1635             vtd_set_quad(s, addr, val);
1636         }
1637         break;
1638 
1639     case DMAR_IVA_REG_HI:
1640         VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
1641                     ", size %d, val 0x%"PRIx64, addr, size, val);
1642         assert(size == 4);
1643         vtd_set_long(s, addr, val);
1644         break;
1645 
1646     /* Fault Status Register, 32-bit */
1647     case DMAR_FSTS_REG:
1648         VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
1649                     ", size %d, val 0x%"PRIx64, addr, size, val);
1650         assert(size == 4);
1651         vtd_set_long(s, addr, val);
1652         vtd_handle_fsts_write(s);
1653         break;
1654 
1655     /* Fault Event Control Register, 32-bit */
1656     case DMAR_FECTL_REG:
1657         VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64
1658                     ", size %d, val 0x%"PRIx64, addr, size, val);
1659         assert(size == 4);
1660         vtd_set_long(s, addr, val);
1661         vtd_handle_fectl_write(s);
1662         break;
1663 
1664     /* Fault Event Data Register, 32-bit */
1665     case DMAR_FEDATA_REG:
1666         VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64
1667                     ", size %d, val 0x%"PRIx64, addr, size, val);
1668         assert(size == 4);
1669         vtd_set_long(s, addr, val);
1670         break;
1671 
1672     /* Fault Event Address Register, 32-bit */
1673     case DMAR_FEADDR_REG:
1674         VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64
1675                     ", size %d, val 0x%"PRIx64, addr, size, val);
1676         assert(size == 4);
1677         vtd_set_long(s, addr, val);
1678         break;
1679 
1680     /* Fault Event Upper Address Register, 32-bit */
1681     case DMAR_FEUADDR_REG:
1682         VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64
1683                     ", size %d, val 0x%"PRIx64, addr, size, val);
1684         assert(size == 4);
1685         vtd_set_long(s, addr, val);
1686         break;
1687 
1688     /* Protected Memory Enable Register, 32-bit */
1689     case DMAR_PMEN_REG:
1690         VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64
1691                     ", size %d, val 0x%"PRIx64, addr, size, val);
1692         assert(size == 4);
1693         vtd_set_long(s, addr, val);
1694         break;
1695 
1696     /* Root Table Address Register, 64-bit */
1697     case DMAR_RTADDR_REG:
1698         VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64
1699                     ", size %d, val 0x%"PRIx64, addr, size, val);
1700         if (size == 4) {
1701             vtd_set_long(s, addr, val);
1702         } else {
1703             vtd_set_quad(s, addr, val);
1704         }
1705         break;
1706 
1707     case DMAR_RTADDR_REG_HI:
1708         VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64
1709                     ", size %d, val 0x%"PRIx64, addr, size, val);
1710         assert(size == 4);
1711         vtd_set_long(s, addr, val);
1712         break;
1713 
1714     /* Invalidation Queue Tail Register, 64-bit */
1715     case DMAR_IQT_REG:
1716         VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64
1717                     ", size %d, val 0x%"PRIx64, addr, size, val);
1718         if (size == 4) {
1719             vtd_set_long(s, addr, val);
1720         } else {
1721             vtd_set_quad(s, addr, val);
1722         }
1723         vtd_handle_iqt_write(s);
1724         break;
1725 
1726     case DMAR_IQT_REG_HI:
1727         VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64
1728                     ", size %d, val 0x%"PRIx64, addr, size, val);
1729         assert(size == 4);
1730         vtd_set_long(s, addr, val);
1731         /* 19:63 of IQT_REG is RsvdZ, do nothing here */
1732         break;
1733 
1734     /* Invalidation Queue Address Register, 64-bit */
1735     case DMAR_IQA_REG:
1736         VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64
1737                     ", size %d, val 0x%"PRIx64, addr, size, val);
1738         if (size == 4) {
1739             vtd_set_long(s, addr, val);
1740         } else {
1741             vtd_set_quad(s, addr, val);
1742         }
1743         break;
1744 
1745     case DMAR_IQA_REG_HI:
1746         VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64
1747                     ", size %d, val 0x%"PRIx64, addr, size, val);
1748         assert(size == 4);
1749         vtd_set_long(s, addr, val);
1750         break;
1751 
1752     /* Invalidation Completion Status Register, 32-bit */
1753     case DMAR_ICS_REG:
1754         VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64
1755                     ", size %d, val 0x%"PRIx64, addr, size, val);
1756         assert(size == 4);
1757         vtd_set_long(s, addr, val);
1758         vtd_handle_ics_write(s);
1759         break;
1760 
1761     /* Invalidation Event Control Register, 32-bit */
1762     case DMAR_IECTL_REG:
1763         VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
1764                     ", size %d, val 0x%"PRIx64, addr, size, val);
1765         assert(size == 4);
1766         vtd_set_long(s, addr, val);
1767         vtd_handle_iectl_write(s);
1768         break;
1769 
1770     /* Invalidation Event Data Register, 32-bit */
1771     case DMAR_IEDATA_REG:
1772         VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64
1773                     ", size %d, val 0x%"PRIx64, addr, size, val);
1774         assert(size == 4);
1775         vtd_set_long(s, addr, val);
1776         break;
1777 
1778     /* Invalidation Event Address Register, 32-bit */
1779     case DMAR_IEADDR_REG:
1780         VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64
1781                     ", size %d, val 0x%"PRIx64, addr, size, val);
1782         assert(size == 4);
1783         vtd_set_long(s, addr, val);
1784         break;
1785 
1786     /* Invalidation Event Upper Address Register, 32-bit */
1787     case DMAR_IEUADDR_REG:
1788         VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64
1789                     ", size %d, val 0x%"PRIx64, addr, size, val);
1790         assert(size == 4);
1791         vtd_set_long(s, addr, val);
1792         break;
1793 
1794     /* Fault Recording Registers, 128-bit */
1795     case DMAR_FRCD_REG_0_0:
1796         VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64
1797                     ", size %d, val 0x%"PRIx64, addr, size, val);
1798         if (size == 4) {
1799             vtd_set_long(s, addr, val);
1800         } else {
1801             vtd_set_quad(s, addr, val);
1802         }
1803         break;
1804 
1805     case DMAR_FRCD_REG_0_1:
1806         VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64
1807                     ", size %d, val 0x%"PRIx64, addr, size, val);
1808         assert(size == 4);
1809         vtd_set_long(s, addr, val);
1810         break;
1811 
1812     case DMAR_FRCD_REG_0_2:
1813         VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64
1814                     ", size %d, val 0x%"PRIx64, addr, size, val);
1815         if (size == 4) {
1816             vtd_set_long(s, addr, val);
1817         } else {
1818             vtd_set_quad(s, addr, val);
1819             /* May clear bit 127 (Fault), update PPF */
1820             vtd_update_fsts_ppf(s);
1821         }
1822         break;
1823 
1824     case DMAR_FRCD_REG_0_3:
1825         VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64
1826                     ", size %d, val 0x%"PRIx64, addr, size, val);
1827         assert(size == 4);
1828         vtd_set_long(s, addr, val);
1829         /* May clear bit 127 (Fault), update PPF */
1830         vtd_update_fsts_ppf(s);
1831         break;
1832 
1833     default:
1834         VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
1835                     ", size %d, val 0x%"PRIx64, addr, size, val);
1836         if (size == 4) {
1837             vtd_set_long(s, addr, val);
1838         } else {
1839             vtd_set_quad(s, addr, val);
1840         }
1841     }
1842 }
1843 
1844 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
1845                                          bool is_write)
1846 {
1847     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
1848     IntelIOMMUState *s = vtd_as->iommu_state;
1849     IOMMUTLBEntry ret = {
1850         .target_as = &address_space_memory,
1851         .iova = addr,
1852         .translated_addr = 0,
1853         .addr_mask = ~(hwaddr)0,
1854         .perm = IOMMU_NONE,
1855     };
1856 
1857     if (!s->dmar_enabled) {
1858         /* DMAR disabled, passthrough, use 4k-page*/
1859         ret.iova = addr & VTD_PAGE_MASK_4K;
1860         ret.translated_addr = addr & VTD_PAGE_MASK_4K;
1861         ret.addr_mask = ~VTD_PAGE_MASK_4K;
1862         ret.perm = IOMMU_RW;
1863         return ret;
1864     }
1865 
1866     vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
1867                            is_write, &ret);
1868     VTD_DPRINTF(MMU,
1869                 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
1870                 " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
1871                 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
1872                 vtd_as->devfn, addr, ret.translated_addr);
1873     return ret;
1874 }
1875 
1876 static void vtd_iommu_notify_started(MemoryRegion *iommu)
1877 {
1878     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
1879 
1880     hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
1881              "is currently not supported by intel-iommu emulation",
1882              vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
1883              PCI_FUNC(vtd_as->devfn));
1884 }
1885 
1886 static const VMStateDescription vtd_vmstate = {
1887     .name = "iommu-intel",
1888     .unmigratable = 1,
1889 };
1890 
1891 static const MemoryRegionOps vtd_mem_ops = {
1892     .read = vtd_mem_read,
1893     .write = vtd_mem_write,
1894     .endianness = DEVICE_LITTLE_ENDIAN,
1895     .impl = {
1896         .min_access_size = 4,
1897         .max_access_size = 8,
1898     },
1899     .valid = {
1900         .min_access_size = 4,
1901         .max_access_size = 8,
1902     },
1903 };
1904 
1905 static Property vtd_properties[] = {
1906     DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
1907     DEFINE_PROP_END_OF_LIST(),
1908 };
1909 
1910 
1911 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
1912 {
1913     uintptr_t key = (uintptr_t)bus;
1914     VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
1915     VTDAddressSpace *vtd_dev_as;
1916 
1917     if (!vtd_bus) {
1918         /* No corresponding free() */
1919         vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
1920         vtd_bus->bus = bus;
1921         key = (uintptr_t)bus;
1922         g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
1923     }
1924 
1925     vtd_dev_as = vtd_bus->dev_as[devfn];
1926 
1927     if (!vtd_dev_as) {
1928         vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
1929 
1930         vtd_dev_as->bus = bus;
1931         vtd_dev_as->devfn = (uint8_t)devfn;
1932         vtd_dev_as->iommu_state = s;
1933         vtd_dev_as->context_cache_entry.context_cache_gen = 0;
1934         memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
1935                                  &s->iommu_ops, "intel_iommu", UINT64_MAX);
1936         address_space_init(&vtd_dev_as->as,
1937                            &vtd_dev_as->iommu, "intel_iommu");
1938     }
1939     return vtd_dev_as;
1940 }
1941 
1942 /* Do the initialization. It will also be called when reset, so pay
1943  * attention when adding new initialization stuff.
1944  */
1945 static void vtd_init(IntelIOMMUState *s)
1946 {
1947     memset(s->csr, 0, DMAR_REG_SIZE);
1948     memset(s->wmask, 0, DMAR_REG_SIZE);
1949     memset(s->w1cmask, 0, DMAR_REG_SIZE);
1950     memset(s->womask, 0, DMAR_REG_SIZE);
1951 
1952     s->iommu_ops.translate = vtd_iommu_translate;
1953     s->iommu_ops.notify_started = vtd_iommu_notify_started;
1954     s->root = 0;
1955     s->root_extended = false;
1956     s->dmar_enabled = false;
1957     s->iq_head = 0;
1958     s->iq_tail = 0;
1959     s->iq = 0;
1960     s->iq_size = 0;
1961     s->qi_enabled = false;
1962     s->iq_last_desc_type = VTD_INV_DESC_NONE;
1963     s->next_frcd_reg = 0;
1964     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
1965              VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
1966     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
1967 
1968     vtd_reset_context_cache(s);
1969     vtd_reset_iotlb(s);
1970 
1971     /* Define registers with default values and bit semantics */
1972     vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
1973     vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
1974     vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
1975     vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
1976     vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
1977     vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
1978     vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0);
1979     vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0);
1980     vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL);
1981 
1982     /* Advanced Fault Logging not supported */
1983     vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL);
1984     vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0);
1985     vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0);
1986     vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0);
1987 
1988     /* Treated as RsvdZ when EIM in ECAP_REG is not supported
1989      * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0);
1990      */
1991     vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0);
1992 
1993     /* Treated as RO for implementations that PLMR and PHMR fields reported
1994      * as Clear in the CAP_REG.
1995      * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0);
1996      */
1997     vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0);
1998 
1999     vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0);
2000     vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
2001     vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
2002     vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
2003     vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
2004     vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
2005     vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0);
2006     /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */
2007     vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0);
2008 
2009     /* IOTLB registers */
2010     vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0);
2011     vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0);
2012     vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL);
2013 
2014     /* Fault Recording Registers, 128-bit */
2015     vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
2016     vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
2017 }
2018 
2019 /* Should not reset address_spaces when reset because devices will still use
2020  * the address space they got at first (won't ask the bus again).
2021  */
2022 static void vtd_reset(DeviceState *dev)
2023 {
2024     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
2025 
2026     VTD_DPRINTF(GENERAL, "");
2027     vtd_init(s);
2028 }
2029 
2030 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
2031 {
2032     IntelIOMMUState *s = opaque;
2033     VTDAddressSpace *vtd_as;
2034 
2035     assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
2036 
2037     vtd_as = vtd_find_add_as(s, bus, devfn);
2038     return &vtd_as->as;
2039 }
2040 
2041 static void vtd_realize(DeviceState *dev, Error **errp)
2042 {
2043     PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
2044     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
2045 
2046     VTD_DPRINTF(GENERAL, "");
2047     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
2048     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
2049                           "intel_iommu", DMAR_REG_SIZE);
2050     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
2051     /* No corresponding destroy */
2052     s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
2053                                      g_free, g_free);
2054     s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
2055                                               g_free, g_free);
2056     vtd_init(s);
2057     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
2058     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
2059 }
2060 
2061 static void vtd_class_init(ObjectClass *klass, void *data)
2062 {
2063     DeviceClass *dc = DEVICE_CLASS(klass);
2064 
2065     dc->reset = vtd_reset;
2066     dc->realize = vtd_realize;
2067     dc->vmsd = &vtd_vmstate;
2068     dc->props = vtd_properties;
2069     dc->hotpluggable = false;
2070 }
2071 
2072 static const TypeInfo vtd_info = {
2073     .name          = TYPE_INTEL_IOMMU_DEVICE,
2074     .parent        = TYPE_SYS_BUS_DEVICE,
2075     .instance_size = sizeof(IntelIOMMUState),
2076     .class_init    = vtd_class_init,
2077 };
2078 
2079 static void vtd_register_types(void)
2080 {
2081     VTD_DPRINTF(GENERAL, "");
2082     type_register_static(&vtd_info);
2083 }
2084 
2085 type_init(vtd_register_types)
2086