xref: /netbsd/sys/dev/dm/doc/locking.txt (revision c6207fc3)
11a571ae4Shaad
21a571ae4Shaad				Device-mapper Locking architecture
31a571ae4Shaad
41a571ae4ShaadOverview
51a571ae4Shaad
61a571ae4ShaadThere are 2 users in device-mapper driver
71a571ae4Shaad      a) Users who uses disk drives
81a571ae4Shaad      b) Users who uses ioctl management interface
91a571ae4Shaad
101a571ae4ShaadManagement is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There are
111a571ae4Shaadtwo major structures used in these routines/device-mapper.
121a571ae4Shaad
131a571ae4ShaadTable entry:
141a571ae4Shaad
151a571ae4Shaadtypedef struct dm_table_entry {
161a571ae4Shaad        struct dm_dev *dm_dev;          /* backlink */
171a571ae4Shaad        uint64_t start;
181a571ae4Shaad        uint64_t length;
191a571ae4Shaad
201a571ae4Shaad        struct dm_target *target;      /* Link to table target. */
211a571ae4Shaad        void *target_config;           /* Target specific data. */
221a571ae4Shaad        SLIST_ENTRY(dm_table_entry) next;
231a571ae4Shaad} dm_table_entry_t;
241a571ae4Shaad
251a571ae4ShaadThis structure stores every target part of dm device. Every device can have
261a571ae4Shaadmore than one target mapping entries stored in a list. This structure describe
271a571ae4Shaadmapping between logical/physical blocks in dm device.
281a571ae4Shaad
291a571ae4Shaadstart  length target block device offset
301a571ae4Shaad0 	   102400 linear /dev/wd1a     384
311a571ae4Shaad102400 204800 linear /dev/wd2a     384
321a571ae4Shaad204800 409600 linear /dev/wd3a     384
331a571ae4Shaad
341a571ae4ShaadEvery device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table is
351a571ae4Shaadused during IO. Every IO operation on dm device have to walk through dm_table_entries list.
361a571ae4Shaad
371a571ae4ShaadDevice entry:
381a571ae4Shaad
391a571ae4Shaadtypedef struct dm_dev {
401a571ae4Shaad        char name[DM_NAME_LEN];
411a571ae4Shaad        char uuid[DM_UUID_LEN];
421a571ae4Shaad
431a571ae4Shaad        int minor;
441a571ae4Shaad        uint32_t flags; /* store communication protocol flags */
451a571ae4Shaad
46*c6207fc3Sandvar        kmutex_t dev_mtx; /* mutex for general device lock */
471a571ae4Shaad        kcondvar_t dev_cv; /* cv for ioctl synchronisation */
481a571ae4Shaad
491a571ae4Shaad        uint32_t event_nr;
501a571ae4Shaad        uint32_t ref_cnt;
511a571ae4Shaad
521a571ae4Shaad        dm_table_head_t table_head;
531a571ae4Shaad
541a571ae4Shaad        struct dm_dev_head upcalls;
551a571ae4Shaad
561a571ae4Shaad        struct disklabel *dk_label;    /* Disklabel for this table. */
571a571ae4Shaad
581a571ae4Shaad        TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted devices. */
591a571ae4Shaad
601a571ae4Shaad        TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
611a571ae4Shaad} dm_dev_t;
621a571ae4Shaad
631a571ae4ShaadEvery device created in dm device-mapper is represented with this structure.
641a571ae4ShaadAll devices are stored in a list. Every ioctl routine have to work with this
651a571ae4Shaadstructure.
661a571ae4Shaad
671a571ae4Shaad	Locking in dm driver
681a571ae4Shaad
691a571ae4ShaadLocking must be done in two ways. Synchronisation between ioctl routines and
701a571ae4Shaadbetween IO operations and ioctl. Table entries are read during IO and during some ioctl routines. There are only few routines which manipulates table lists.
711a571ae4Shaad
721a571ae4ShaadRead access to table list:
731a571ae4Shaad
741a571ae4Shaaddmsize
751a571ae4Shaaddmstrategy
761a571ae4Shaaddm_dev_status_ioctl
771a571ae4Shaaddm_table_info_ioctl
781a571ae4Shaaddm_table_deps_ioctl
791a571ae4Shaaddm_disk_ioctl 		-> DIOCCACHESYNC ioctl
801a571ae4Shaad
811a571ae4ShaadWrite access to table list:
821a571ae4Shaaddm_dev_remove_ioctl        -> remove device from list, this routine have to
831a571ae4Shaad							  remove all tables.
841a571ae4Shaaddm_dev_resume_ioctl		   -> Switch tables on suspended device, switch INACTIVE
851a571ae4Shaad							  and ACTIVE tables.
861a571ae4Shaaddm_table_clear_ioctl  	   -> Remove INACTIVE table from table list.
871a571ae4Shaad
881a571ae4Shaad
891a571ae4ShaadSynchronisation between readers and writers in table list
901a571ae4Shaad
911a571ae4ShaadI moved everything needed for table synchronisation to struct dm_table_head.
921a571ae4Shaad
931a571ae4Shaadtypedef struct dm_table_head {
941a571ae4Shaad        /* Current active table is selected with this. */
951a571ae4Shaad        int cur_active_table;
961a571ae4Shaad        struct dm_table tables[2];
971a571ae4Shaad
981a571ae4Shaad        kmutex_t   table_mtx;
991a571ae4Shaad        kcondvar_t table_cv; /*IO waiting cv */
1001a571ae4Shaad
1011a571ae4Shaad        uint32_t io_cnt;
1021a571ae4Shaad} dm_table_head_t;
1031a571ae4Shaad
1041a571ae4Shaaddm_table_head_t is used as entry for every dm_table synchronisation routine.
1051a571ae4Shaad
1061a571ae4ShaadBecause every table user have to get list to table list head I have implemented
1071a571ae4Shaadthese routines to manage access to table lists.
1081a571ae4Shaad
1091a571ae4Shaad/*
1101a571ae4Shaad * Destroy all table data. This function can run when there are no
1111a571ae4Shaad * readers on table lists.
1121a571ae4Shaad */
1131a571ae4Shaadint dm_table_destroy(dm_table_head_t *, uint8_t);
1141a571ae4Shaad
1151a571ae4Shaad/*
1161a571ae4Shaad * Return length of active table in device.
1171a571ae4Shaad */
1181a571ae4Shaaduint64_t dm_table_size(dm_table_head_t *);
1191a571ae4Shaad
1201a571ae4Shaad/*
1211a571ae4Shaad * Return current active table to caller, increment io_cnt reference counter.
1221a571ae4Shaad */
1231a571ae4Shaadstruct dm_table *dm_table_get_entry(dm_table_head_t *, uint8_t);
1241a571ae4Shaad
1251a571ae4Shaad/*
1261a571ae4Shaad * Return > 0 if table is at least one table entry (returns number of entries)
1271a571ae4Shaad * and return 0 if there is not. Target count returned from this function
128*c6207fc3Sandvar * doesn't need to be true when userspace user receives it (after return
129*c6207fc3Sandvar * there can be dm_dev_resume_ioctl), therefore this is only informative.
1301a571ae4Shaad */
1311a571ae4Shaadint dm_table_get_target_count(dm_table_head_t *, uint8_t);
1321a571ae4Shaad
1331a571ae4Shaad/*
1341a571ae4Shaad * Decrement io reference counter and wake up all callers, with table_head cv.
1351a571ae4Shaad */
1361a571ae4Shaadvoid dm_table_release(dm_table_head_t *, uint8_t s);
1371a571ae4Shaad
1381a571ae4Shaad/*
1391a571ae4Shaad * Switch table from inactive to active mode. Have to wait until io_cnt is 0.
1401a571ae4Shaad */
1411a571ae4Shaadvoid dm_table_switch_tables(dm_table_head_t *);
1421a571ae4Shaad
1431a571ae4Shaad/*
1441a571ae4Shaad * Initialize table_head structures, I'm trying to keep this structure as
1451a571ae4Shaad * opaque as possible.
1461a571ae4Shaad */
1471a571ae4Shaadvoid dm_table_head_init(dm_table_head_t *);
1481a571ae4Shaad
1491a571ae4Shaad/*
1501a571ae4Shaad * Destroy all variables in table_head
1511a571ae4Shaad */
1521a571ae4Shaadvoid dm_table_head_destroy(dm_table_head_t *);
1531a571ae4Shaad
1541a571ae4ShaadInternal table synchronisation protocol
1551a571ae4Shaad
1561a571ae4ShaadReaders:
1571a571ae4Shaaddm_table_size
1581a571ae4Shaaddm_table_get_target_count
1591a571ae4Shaaddm_table_get_target_count
1601a571ae4Shaad
1611a571ae4ShaadReaders with hold reference counter:
1621a571ae4Shaaddm_table_get_entry
1631a571ae4Shaaddm_table_release
1641a571ae4Shaad
1651a571ae4ShaadWriter:
1661a571ae4Shaaddm_table_destroy
1671a571ae4Shaaddm_table_switch_tables
1681a571ae4Shaad
1691a571ae4ShaadFor managing synchronisation to table lists I use these routines. Every reader
1701a571ae4Shaaduses dm_table_busy routine to hold reference counter during work and dm_table_unbusy for reference counter release. Every writer have to wait while
1711a571ae4Shaadis reference counter 0 and only then it can work with device. It will sleep on
1721a571ae4Shaadhead->table_cv while there are other readers. dm_table_get_entry is specific in that it will return table with hold reference counter. After dm_table_get_entry
1731a571ae4Shaadevery caller must call dm_table_release when it doesn't want to work with it.
1741a571ae4Shaad
1751a571ae4Shaad/*
1761a571ae4Shaad * Function to increment table user reference counter. Return id
1771a571ae4Shaad * of table_id table.
1781a571ae4Shaad * DM_TABLE_ACTIVE will return active table id.
1791a571ae4Shaad * DM_TABLE_INACTIVE will return inactive table id.
1801a571ae4Shaad */
1811a571ae4Shaadstatic int
1821a571ae4Shaaddm_table_busy(dm_table_head_t *head, uint8_t table_id)
1831a571ae4Shaad{
1841a571ae4Shaad        uint8_t id;
1851a571ae4Shaad
1861a571ae4Shaad        id = 0;
1871a571ae4Shaad
1881a571ae4Shaad        mutex_enter(&head->table_mtx);
1891a571ae4Shaad
1901a571ae4Shaad        if (table_id == DM_TABLE_ACTIVE)
1911a571ae4Shaad                id = head->cur_active_table;
1921a571ae4Shaad        else
1931a571ae4Shaad                id = 1 - head->cur_active_table;
1941a571ae4Shaad
1951a571ae4Shaad        head->io_cnt++;
1961a571ae4Shaad
1971a571ae4Shaad        mutex_exit(&head->table_mtx);
1981a571ae4Shaad        return id;
1991a571ae4Shaad}
2001a571ae4Shaad
2011a571ae4Shaad/*
2021a571ae4Shaad * Function release table lock and eventually wakeup all waiters.
2031a571ae4Shaad */
2041a571ae4Shaadstatic void
2051a571ae4Shaaddm_table_unbusy(dm_table_head_t *head)
2061a571ae4Shaad{
2071a571ae4Shaad        KASSERT(head->io_cnt != 0);
2081a571ae4Shaad
2091a571ae4Shaad        mutex_enter(&head->table_mtx);
2101a571ae4Shaad
2111a571ae4Shaad        if (--head->io_cnt == 0)
2121a571ae4Shaad                cv_broadcast(&head->table_cv);
2131a571ae4Shaad
2141a571ae4Shaad        mutex_exit(&head->table_mtx);
2151a571ae4Shaad}
2161a571ae4Shaad
2171a571ae4ShaadDevice-mapper betwwen ioctl device synchronisation
2181a571ae4Shaad
2191a571ae4Shaad
2201a571ae4ShaadEvery ioctl user have to find dm_device with name, uuid, minor number.
2211a571ae4ShaadFor this dm_dev_lookup is used. This routine returns device with hold reference
2221a571ae4Shaadcounter.
2231a571ae4Shaad
2241a571ae4Shaadvoid
2251a571ae4Shaaddm_dev_busy(dm_dev_t *dmv)
2261a571ae4Shaad{
2271a571ae4Shaad        mutex_enter(&dmv->dev_mtx);
2281a571ae4Shaad        dmv->ref_cnt++;
2291a571ae4Shaad        mutex_exit(&dmv->dev_mtx);
2301a571ae4Shaad}
2311a571ae4Shaad
2321a571ae4Shaadvoid
2331a571ae4Shaaddm_dev_unbusy(dm_dev_t *dmv)
2341a571ae4Shaad{
2351a571ae4Shaad        KASSERT(dmv->ref_cnt != 0);
2361a571ae4Shaad
2371a571ae4Shaad        mutex_enter(&dmv->dev_mtx);
2381a571ae4Shaad        if (--dmv->ref_cnt == 0)
2391a571ae4Shaad                cv_broadcast(&dmv->dev_cv);
2401a571ae4Shaad        mutex_exit(&dmv->dev_mtx);
2411a571ae4Shaad}
2421a571ae4Shaad
2431a571ae4ShaadBefore returning from ioctl routine must release reference counter with
2441a571ae4Shaaddm_dev_unbusy.
2451a571ae4Shaad
2461a571ae4Shaaddm_dev_remove_ioctl routine have to remove dm_dev from global device list,
2471a571ae4Shaadand wait until all ioctl users from dm_dev are gone.
2481a571ae4Shaad
249