11a571ae4Shaad 21a571ae4Shaad Device-mapper Locking architecture 31a571ae4Shaad 41a571ae4ShaadOverview 51a571ae4Shaad 61a571ae4ShaadThere are 2 users in device-mapper driver 71a571ae4Shaad a) Users who uses disk drives 81a571ae4Shaad b) Users who uses ioctl management interface 91a571ae4Shaad 101a571ae4ShaadManagement is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There are 111a571ae4Shaadtwo major structures used in these routines/device-mapper. 121a571ae4Shaad 131a571ae4ShaadTable entry: 141a571ae4Shaad 151a571ae4Shaadtypedef struct dm_table_entry { 161a571ae4Shaad struct dm_dev *dm_dev; /* backlink */ 171a571ae4Shaad uint64_t start; 181a571ae4Shaad uint64_t length; 191a571ae4Shaad 201a571ae4Shaad struct dm_target *target; /* Link to table target. */ 211a571ae4Shaad void *target_config; /* Target specific data. */ 221a571ae4Shaad SLIST_ENTRY(dm_table_entry) next; 231a571ae4Shaad} dm_table_entry_t; 241a571ae4Shaad 251a571ae4ShaadThis structure stores every target part of dm device. Every device can have 261a571ae4Shaadmore than one target mapping entries stored in a list. This structure describe 271a571ae4Shaadmapping between logical/physical blocks in dm device. 281a571ae4Shaad 291a571ae4Shaadstart length target block device offset 301a571ae4Shaad0 102400 linear /dev/wd1a 384 311a571ae4Shaad102400 204800 linear /dev/wd2a 384 321a571ae4Shaad204800 409600 linear /dev/wd3a 384 331a571ae4Shaad 341a571ae4ShaadEvery device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table is 351a571ae4Shaadused during IO. Every IO operation on dm device have to walk through dm_table_entries list. 361a571ae4Shaad 371a571ae4ShaadDevice entry: 381a571ae4Shaad 391a571ae4Shaadtypedef struct dm_dev { 401a571ae4Shaad char name[DM_NAME_LEN]; 411a571ae4Shaad char uuid[DM_UUID_LEN]; 421a571ae4Shaad 431a571ae4Shaad int minor; 441a571ae4Shaad uint32_t flags; /* store communication protocol flags */ 451a571ae4Shaad 46*c6207fc3Sandvar kmutex_t dev_mtx; /* mutex for general device lock */ 471a571ae4Shaad kcondvar_t dev_cv; /* cv for ioctl synchronisation */ 481a571ae4Shaad 491a571ae4Shaad uint32_t event_nr; 501a571ae4Shaad uint32_t ref_cnt; 511a571ae4Shaad 521a571ae4Shaad dm_table_head_t table_head; 531a571ae4Shaad 541a571ae4Shaad struct dm_dev_head upcalls; 551a571ae4Shaad 561a571ae4Shaad struct disklabel *dk_label; /* Disklabel for this table. */ 571a571ae4Shaad 581a571ae4Shaad TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted devices. */ 591a571ae4Shaad 601a571ae4Shaad TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */ 611a571ae4Shaad} dm_dev_t; 621a571ae4Shaad 631a571ae4ShaadEvery device created in dm device-mapper is represented with this structure. 641a571ae4ShaadAll devices are stored in a list. Every ioctl routine have to work with this 651a571ae4Shaadstructure. 661a571ae4Shaad 671a571ae4Shaad Locking in dm driver 681a571ae4Shaad 691a571ae4ShaadLocking must be done in two ways. Synchronisation between ioctl routines and 701a571ae4Shaadbetween IO operations and ioctl. Table entries are read during IO and during some ioctl routines. There are only few routines which manipulates table lists. 711a571ae4Shaad 721a571ae4ShaadRead access to table list: 731a571ae4Shaad 741a571ae4Shaaddmsize 751a571ae4Shaaddmstrategy 761a571ae4Shaaddm_dev_status_ioctl 771a571ae4Shaaddm_table_info_ioctl 781a571ae4Shaaddm_table_deps_ioctl 791a571ae4Shaaddm_disk_ioctl -> DIOCCACHESYNC ioctl 801a571ae4Shaad 811a571ae4ShaadWrite access to table list: 821a571ae4Shaaddm_dev_remove_ioctl -> remove device from list, this routine have to 831a571ae4Shaad remove all tables. 841a571ae4Shaaddm_dev_resume_ioctl -> Switch tables on suspended device, switch INACTIVE 851a571ae4Shaad and ACTIVE tables. 861a571ae4Shaaddm_table_clear_ioctl -> Remove INACTIVE table from table list. 871a571ae4Shaad 881a571ae4Shaad 891a571ae4ShaadSynchronisation between readers and writers in table list 901a571ae4Shaad 911a571ae4ShaadI moved everything needed for table synchronisation to struct dm_table_head. 921a571ae4Shaad 931a571ae4Shaadtypedef struct dm_table_head { 941a571ae4Shaad /* Current active table is selected with this. */ 951a571ae4Shaad int cur_active_table; 961a571ae4Shaad struct dm_table tables[2]; 971a571ae4Shaad 981a571ae4Shaad kmutex_t table_mtx; 991a571ae4Shaad kcondvar_t table_cv; /*IO waiting cv */ 1001a571ae4Shaad 1011a571ae4Shaad uint32_t io_cnt; 1021a571ae4Shaad} dm_table_head_t; 1031a571ae4Shaad 1041a571ae4Shaaddm_table_head_t is used as entry for every dm_table synchronisation routine. 1051a571ae4Shaad 1061a571ae4ShaadBecause every table user have to get list to table list head I have implemented 1071a571ae4Shaadthese routines to manage access to table lists. 1081a571ae4Shaad 1091a571ae4Shaad/* 1101a571ae4Shaad * Destroy all table data. This function can run when there are no 1111a571ae4Shaad * readers on table lists. 1121a571ae4Shaad */ 1131a571ae4Shaadint dm_table_destroy(dm_table_head_t *, uint8_t); 1141a571ae4Shaad 1151a571ae4Shaad/* 1161a571ae4Shaad * Return length of active table in device. 1171a571ae4Shaad */ 1181a571ae4Shaaduint64_t dm_table_size(dm_table_head_t *); 1191a571ae4Shaad 1201a571ae4Shaad/* 1211a571ae4Shaad * Return current active table to caller, increment io_cnt reference counter. 1221a571ae4Shaad */ 1231a571ae4Shaadstruct dm_table *dm_table_get_entry(dm_table_head_t *, uint8_t); 1241a571ae4Shaad 1251a571ae4Shaad/* 1261a571ae4Shaad * Return > 0 if table is at least one table entry (returns number of entries) 1271a571ae4Shaad * and return 0 if there is not. Target count returned from this function 128*c6207fc3Sandvar * doesn't need to be true when userspace user receives it (after return 129*c6207fc3Sandvar * there can be dm_dev_resume_ioctl), therefore this is only informative. 1301a571ae4Shaad */ 1311a571ae4Shaadint dm_table_get_target_count(dm_table_head_t *, uint8_t); 1321a571ae4Shaad 1331a571ae4Shaad/* 1341a571ae4Shaad * Decrement io reference counter and wake up all callers, with table_head cv. 1351a571ae4Shaad */ 1361a571ae4Shaadvoid dm_table_release(dm_table_head_t *, uint8_t s); 1371a571ae4Shaad 1381a571ae4Shaad/* 1391a571ae4Shaad * Switch table from inactive to active mode. Have to wait until io_cnt is 0. 1401a571ae4Shaad */ 1411a571ae4Shaadvoid dm_table_switch_tables(dm_table_head_t *); 1421a571ae4Shaad 1431a571ae4Shaad/* 1441a571ae4Shaad * Initialize table_head structures, I'm trying to keep this structure as 1451a571ae4Shaad * opaque as possible. 1461a571ae4Shaad */ 1471a571ae4Shaadvoid dm_table_head_init(dm_table_head_t *); 1481a571ae4Shaad 1491a571ae4Shaad/* 1501a571ae4Shaad * Destroy all variables in table_head 1511a571ae4Shaad */ 1521a571ae4Shaadvoid dm_table_head_destroy(dm_table_head_t *); 1531a571ae4Shaad 1541a571ae4ShaadInternal table synchronisation protocol 1551a571ae4Shaad 1561a571ae4ShaadReaders: 1571a571ae4Shaaddm_table_size 1581a571ae4Shaaddm_table_get_target_count 1591a571ae4Shaaddm_table_get_target_count 1601a571ae4Shaad 1611a571ae4ShaadReaders with hold reference counter: 1621a571ae4Shaaddm_table_get_entry 1631a571ae4Shaaddm_table_release 1641a571ae4Shaad 1651a571ae4ShaadWriter: 1661a571ae4Shaaddm_table_destroy 1671a571ae4Shaaddm_table_switch_tables 1681a571ae4Shaad 1691a571ae4ShaadFor managing synchronisation to table lists I use these routines. Every reader 1701a571ae4Shaaduses dm_table_busy routine to hold reference counter during work and dm_table_unbusy for reference counter release. Every writer have to wait while 1711a571ae4Shaadis reference counter 0 and only then it can work with device. It will sleep on 1721a571ae4Shaadhead->table_cv while there are other readers. dm_table_get_entry is specific in that it will return table with hold reference counter. After dm_table_get_entry 1731a571ae4Shaadevery caller must call dm_table_release when it doesn't want to work with it. 1741a571ae4Shaad 1751a571ae4Shaad/* 1761a571ae4Shaad * Function to increment table user reference counter. Return id 1771a571ae4Shaad * of table_id table. 1781a571ae4Shaad * DM_TABLE_ACTIVE will return active table id. 1791a571ae4Shaad * DM_TABLE_INACTIVE will return inactive table id. 1801a571ae4Shaad */ 1811a571ae4Shaadstatic int 1821a571ae4Shaaddm_table_busy(dm_table_head_t *head, uint8_t table_id) 1831a571ae4Shaad{ 1841a571ae4Shaad uint8_t id; 1851a571ae4Shaad 1861a571ae4Shaad id = 0; 1871a571ae4Shaad 1881a571ae4Shaad mutex_enter(&head->table_mtx); 1891a571ae4Shaad 1901a571ae4Shaad if (table_id == DM_TABLE_ACTIVE) 1911a571ae4Shaad id = head->cur_active_table; 1921a571ae4Shaad else 1931a571ae4Shaad id = 1 - head->cur_active_table; 1941a571ae4Shaad 1951a571ae4Shaad head->io_cnt++; 1961a571ae4Shaad 1971a571ae4Shaad mutex_exit(&head->table_mtx); 1981a571ae4Shaad return id; 1991a571ae4Shaad} 2001a571ae4Shaad 2011a571ae4Shaad/* 2021a571ae4Shaad * Function release table lock and eventually wakeup all waiters. 2031a571ae4Shaad */ 2041a571ae4Shaadstatic void 2051a571ae4Shaaddm_table_unbusy(dm_table_head_t *head) 2061a571ae4Shaad{ 2071a571ae4Shaad KASSERT(head->io_cnt != 0); 2081a571ae4Shaad 2091a571ae4Shaad mutex_enter(&head->table_mtx); 2101a571ae4Shaad 2111a571ae4Shaad if (--head->io_cnt == 0) 2121a571ae4Shaad cv_broadcast(&head->table_cv); 2131a571ae4Shaad 2141a571ae4Shaad mutex_exit(&head->table_mtx); 2151a571ae4Shaad} 2161a571ae4Shaad 2171a571ae4ShaadDevice-mapper betwwen ioctl device synchronisation 2181a571ae4Shaad 2191a571ae4Shaad 2201a571ae4ShaadEvery ioctl user have to find dm_device with name, uuid, minor number. 2211a571ae4ShaadFor this dm_dev_lookup is used. This routine returns device with hold reference 2221a571ae4Shaadcounter. 2231a571ae4Shaad 2241a571ae4Shaadvoid 2251a571ae4Shaaddm_dev_busy(dm_dev_t *dmv) 2261a571ae4Shaad{ 2271a571ae4Shaad mutex_enter(&dmv->dev_mtx); 2281a571ae4Shaad dmv->ref_cnt++; 2291a571ae4Shaad mutex_exit(&dmv->dev_mtx); 2301a571ae4Shaad} 2311a571ae4Shaad 2321a571ae4Shaadvoid 2331a571ae4Shaaddm_dev_unbusy(dm_dev_t *dmv) 2341a571ae4Shaad{ 2351a571ae4Shaad KASSERT(dmv->ref_cnt != 0); 2361a571ae4Shaad 2371a571ae4Shaad mutex_enter(&dmv->dev_mtx); 2381a571ae4Shaad if (--dmv->ref_cnt == 0) 2391a571ae4Shaad cv_broadcast(&dmv->dev_cv); 2401a571ae4Shaad mutex_exit(&dmv->dev_mtx); 2411a571ae4Shaad} 2421a571ae4Shaad 2431a571ae4ShaadBefore returning from ioctl routine must release reference counter with 2441a571ae4Shaaddm_dev_unbusy. 2451a571ae4Shaad 2461a571ae4Shaaddm_dev_remove_ioctl routine have to remove dm_dev from global device list, 2471a571ae4Shaadand wait until all ioctl users from dm_dev are gone. 2481a571ae4Shaad 249