1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/proc.h> 39 #include <sys/sglist.h> 40 #include <sys/sleepqueue.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/bus.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/filio.h> 47 #include <sys/rwlock.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 52 #include <machine/stdarg.h> 53 54 #if defined(__i386__) || defined(__amd64__) 55 #include <machine/md_var.h> 56 #endif 57 58 #include <linux/kobject.h> 59 #include <linux/device.h> 60 #include <linux/slab.h> 61 #include <linux/module.h> 62 #include <linux/moduleparam.h> 63 #include <linux/cdev.h> 64 #include <linux/file.h> 65 #include <linux/sysfs.h> 66 #include <linux/mm.h> 67 #include <linux/io.h> 68 #include <linux/vmalloc.h> 69 #include <linux/netdevice.h> 70 #include <linux/timer.h> 71 #include <linux/workqueue.h> 72 #include <linux/interrupt.h> 73 #include <linux/uaccess.h> 74 #include <linux/kernel.h> 75 #include <linux/list.h> 76 #include <linux/compat.h> 77 78 #include <vm/vm_pager.h> 79 80 SYSCTL_NODE(_compat, OID_AUTO, linuxkpi, CTLFLAG_RW, 0, "LinuxKPI parameters"); 81 82 MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 83 84 #include <linux/rbtree.h> 85 /* Undo Linux compat changes. */ 86 #undef RB_ROOT 87 #undef file 88 #undef cdev 89 #define RB_ROOT(head) (head)->rbh_root 90 91 struct kobject linux_class_root; 92 struct device linux_root_device; 93 struct class linux_class_misc; 94 struct list_head pci_drivers; 95 struct list_head pci_devices; 96 struct net init_net; 97 spinlock_t pci_lock; 98 99 unsigned long linux_timer_hz_mask; 100 101 int 102 panic_cmp(struct rb_node *one, struct rb_node *two) 103 { 104 panic("no cmp"); 105 } 106 107 RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 108 109 int 110 kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list args) 111 { 112 va_list tmp_va; 113 int len; 114 char *old; 115 char *name; 116 char dummy; 117 118 old = kobj->name; 119 120 if (old && fmt == NULL) 121 return (0); 122 123 /* compute length of string */ 124 va_copy(tmp_va, args); 125 len = vsnprintf(&dummy, 0, fmt, tmp_va); 126 va_end(tmp_va); 127 128 /* account for zero termination */ 129 len++; 130 131 /* check for error */ 132 if (len < 1) 133 return (-EINVAL); 134 135 /* allocate memory for string */ 136 name = kzalloc(len, GFP_KERNEL); 137 if (name == NULL) 138 return (-ENOMEM); 139 vsnprintf(name, len, fmt, args); 140 kobj->name = name; 141 142 /* free old string */ 143 kfree(old); 144 145 /* filter new string */ 146 for (; *name != '\0'; name++) 147 if (*name == '/') 148 *name = '!'; 149 return (0); 150 } 151 152 int 153 kobject_set_name(struct kobject *kobj, const char *fmt, ...) 154 { 155 va_list args; 156 int error; 157 158 va_start(args, fmt); 159 error = kobject_set_name_vargs(kobj, fmt, args); 160 va_end(args); 161 162 return (error); 163 } 164 165 static int 166 kobject_add_complete(struct kobject *kobj, struct kobject *parent) 167 { 168 const struct kobj_type *t; 169 int error; 170 171 kobj->parent = parent; 172 error = sysfs_create_dir(kobj); 173 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 174 struct attribute **attr; 175 t = kobj->ktype; 176 177 for (attr = t->default_attrs; *attr != NULL; attr++) { 178 error = sysfs_create_file(kobj, *attr); 179 if (error) 180 break; 181 } 182 if (error) 183 sysfs_remove_dir(kobj); 184 185 } 186 return (error); 187 } 188 189 int 190 kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 191 { 192 va_list args; 193 int error; 194 195 va_start(args, fmt); 196 error = kobject_set_name_vargs(kobj, fmt, args); 197 va_end(args); 198 if (error) 199 return (error); 200 201 return kobject_add_complete(kobj, parent); 202 } 203 204 void 205 linux_kobject_release(struct kref *kref) 206 { 207 struct kobject *kobj; 208 char *name; 209 210 kobj = container_of(kref, struct kobject, kref); 211 sysfs_remove_dir(kobj); 212 name = kobj->name; 213 if (kobj->ktype && kobj->ktype->release) 214 kobj->ktype->release(kobj); 215 kfree(name); 216 } 217 218 static void 219 linux_kobject_kfree(struct kobject *kobj) 220 { 221 kfree(kobj); 222 } 223 224 static void 225 linux_kobject_kfree_name(struct kobject *kobj) 226 { 227 if (kobj) { 228 kfree(kobj->name); 229 } 230 } 231 232 const struct kobj_type linux_kfree_type = { 233 .release = linux_kobject_kfree 234 }; 235 236 static void 237 linux_device_release(struct device *dev) 238 { 239 pr_debug("linux_device_release: %s\n", dev_name(dev)); 240 kfree(dev); 241 } 242 243 static ssize_t 244 linux_class_show(struct kobject *kobj, struct attribute *attr, char *buf) 245 { 246 struct class_attribute *dattr; 247 ssize_t error; 248 249 dattr = container_of(attr, struct class_attribute, attr); 250 error = -EIO; 251 if (dattr->show) 252 error = dattr->show(container_of(kobj, struct class, kobj), 253 dattr, buf); 254 return (error); 255 } 256 257 static ssize_t 258 linux_class_store(struct kobject *kobj, struct attribute *attr, const char *buf, 259 size_t count) 260 { 261 struct class_attribute *dattr; 262 ssize_t error; 263 264 dattr = container_of(attr, struct class_attribute, attr); 265 error = -EIO; 266 if (dattr->store) 267 error = dattr->store(container_of(kobj, struct class, kobj), 268 dattr, buf, count); 269 return (error); 270 } 271 272 static void 273 linux_class_release(struct kobject *kobj) 274 { 275 struct class *class; 276 277 class = container_of(kobj, struct class, kobj); 278 if (class->class_release) 279 class->class_release(class); 280 } 281 282 static const struct sysfs_ops linux_class_sysfs = { 283 .show = linux_class_show, 284 .store = linux_class_store, 285 }; 286 287 const struct kobj_type linux_class_ktype = { 288 .release = linux_class_release, 289 .sysfs_ops = &linux_class_sysfs 290 }; 291 292 static void 293 linux_dev_release(struct kobject *kobj) 294 { 295 struct device *dev; 296 297 dev = container_of(kobj, struct device, kobj); 298 /* This is the precedence defined by linux. */ 299 if (dev->release) 300 dev->release(dev); 301 else if (dev->class && dev->class->dev_release) 302 dev->class->dev_release(dev); 303 } 304 305 static ssize_t 306 linux_dev_show(struct kobject *kobj, struct attribute *attr, char *buf) 307 { 308 struct device_attribute *dattr; 309 ssize_t error; 310 311 dattr = container_of(attr, struct device_attribute, attr); 312 error = -EIO; 313 if (dattr->show) 314 error = dattr->show(container_of(kobj, struct device, kobj), 315 dattr, buf); 316 return (error); 317 } 318 319 static ssize_t 320 linux_dev_store(struct kobject *kobj, struct attribute *attr, const char *buf, 321 size_t count) 322 { 323 struct device_attribute *dattr; 324 ssize_t error; 325 326 dattr = container_of(attr, struct device_attribute, attr); 327 error = -EIO; 328 if (dattr->store) 329 error = dattr->store(container_of(kobj, struct device, kobj), 330 dattr, buf, count); 331 return (error); 332 } 333 334 static const struct sysfs_ops linux_dev_sysfs = { 335 .show = linux_dev_show, 336 .store = linux_dev_store, 337 }; 338 339 const struct kobj_type linux_dev_ktype = { 340 .release = linux_dev_release, 341 .sysfs_ops = &linux_dev_sysfs 342 }; 343 344 struct device * 345 device_create(struct class *class, struct device *parent, dev_t devt, 346 void *drvdata, const char *fmt, ...) 347 { 348 struct device *dev; 349 va_list args; 350 351 dev = kzalloc(sizeof(*dev), M_WAITOK); 352 dev->parent = parent; 353 dev->class = class; 354 dev->devt = devt; 355 dev->driver_data = drvdata; 356 dev->release = linux_device_release; 357 va_start(args, fmt); 358 kobject_set_name_vargs(&dev->kobj, fmt, args); 359 va_end(args); 360 device_register(dev); 361 362 return (dev); 363 } 364 365 int 366 kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, 367 struct kobject *parent, const char *fmt, ...) 368 { 369 va_list args; 370 int error; 371 372 kobject_init(kobj, ktype); 373 kobj->ktype = ktype; 374 kobj->parent = parent; 375 kobj->name = NULL; 376 377 va_start(args, fmt); 378 error = kobject_set_name_vargs(kobj, fmt, args); 379 va_end(args); 380 if (error) 381 return (error); 382 return kobject_add_complete(kobj, parent); 383 } 384 385 static void 386 linux_file_dtor(void *cdp) 387 { 388 struct linux_file *filp; 389 390 linux_set_current(curthread); 391 filp = cdp; 392 filp->f_op->release(filp->f_vnode, filp); 393 vdrop(filp->f_vnode); 394 kfree(filp); 395 } 396 397 static int 398 linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 399 { 400 struct linux_cdev *ldev; 401 struct linux_file *filp; 402 struct file *file; 403 int error; 404 405 file = td->td_fpop; 406 ldev = dev->si_drv1; 407 if (ldev == NULL) 408 return (ENODEV); 409 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 410 filp->f_dentry = &filp->f_dentry_store; 411 filp->f_op = ldev->ops; 412 filp->f_flags = file->f_flag; 413 vhold(file->f_vnode); 414 filp->f_vnode = file->f_vnode; 415 linux_set_current(td); 416 if (filp->f_op->open) { 417 error = -filp->f_op->open(file->f_vnode, filp); 418 if (error) { 419 kfree(filp); 420 goto done; 421 } 422 } 423 error = devfs_set_cdevpriv(filp, linux_file_dtor); 424 if (error) { 425 filp->f_op->release(file->f_vnode, filp); 426 kfree(filp); 427 } 428 done: 429 return (error); 430 } 431 432 static int 433 linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 434 { 435 struct linux_cdev *ldev; 436 struct linux_file *filp; 437 struct file *file; 438 int error; 439 440 file = td->td_fpop; 441 ldev = dev->si_drv1; 442 if (ldev == NULL) 443 return (0); 444 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 445 return (error); 446 filp->f_flags = file->f_flag; 447 devfs_clear_cdevpriv(); 448 449 450 return (0); 451 } 452 453 #define LINUX_IOCTL_MIN_PTR 0x10000UL 454 #define LINUX_IOCTL_MAX_PTR (LINUX_IOCTL_MIN_PTR + IOCPARM_MAX) 455 456 static inline int 457 linux_remap_address(void **uaddr, size_t len) 458 { 459 uintptr_t uaddr_val = (uintptr_t)(*uaddr); 460 461 if (unlikely(uaddr_val >= LINUX_IOCTL_MIN_PTR && 462 uaddr_val < LINUX_IOCTL_MAX_PTR)) { 463 struct task_struct *pts = current; 464 if (pts == NULL) { 465 *uaddr = NULL; 466 return (1); 467 } 468 469 /* compute data offset */ 470 uaddr_val -= LINUX_IOCTL_MIN_PTR; 471 472 /* check that length is within bounds */ 473 if ((len > IOCPARM_MAX) || 474 (uaddr_val + len) > pts->bsd_ioctl_len) { 475 *uaddr = NULL; 476 return (1); 477 } 478 479 /* re-add kernel buffer address */ 480 uaddr_val += (uintptr_t)pts->bsd_ioctl_data; 481 482 /* update address location */ 483 *uaddr = (void *)uaddr_val; 484 return (1); 485 } 486 return (0); 487 } 488 489 int 490 linux_copyin(const void *uaddr, void *kaddr, size_t len) 491 { 492 if (linux_remap_address(__DECONST(void **, &uaddr), len)) { 493 if (uaddr == NULL) 494 return (-EFAULT); 495 memcpy(kaddr, uaddr, len); 496 return (0); 497 } 498 return (-copyin(uaddr, kaddr, len)); 499 } 500 501 int 502 linux_copyout(const void *kaddr, void *uaddr, size_t len) 503 { 504 if (linux_remap_address(&uaddr, len)) { 505 if (uaddr == NULL) 506 return (-EFAULT); 507 memcpy(uaddr, kaddr, len); 508 return (0); 509 } 510 return (-copyout(kaddr, uaddr, len)); 511 } 512 513 static int 514 linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 515 struct thread *td) 516 { 517 struct linux_cdev *ldev; 518 struct linux_file *filp; 519 struct file *file; 520 unsigned size; 521 int error; 522 523 file = td->td_fpop; 524 ldev = dev->si_drv1; 525 if (ldev == NULL) 526 return (0); 527 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 528 return (error); 529 filp->f_flags = file->f_flag; 530 531 linux_set_current(td); 532 size = IOCPARM_LEN(cmd); 533 /* refer to logic in sys_ioctl() */ 534 if (size > 0) { 535 /* 536 * Setup hint for linux_copyin() and linux_copyout(). 537 * 538 * Background: Linux code expects a user-space address 539 * while FreeBSD supplies a kernel-space address. 540 */ 541 current->bsd_ioctl_data = data; 542 current->bsd_ioctl_len = size; 543 data = (void *)LINUX_IOCTL_MIN_PTR; 544 } else { 545 /* fetch user-space pointer */ 546 data = *(void **)data; 547 } 548 if (filp->f_op->unlocked_ioctl) 549 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 550 else 551 error = ENOTTY; 552 if (size > 0) { 553 current->bsd_ioctl_data = NULL; 554 current->bsd_ioctl_len = 0; 555 } 556 557 return (error); 558 } 559 560 static int 561 linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 562 { 563 struct linux_cdev *ldev; 564 struct linux_file *filp; 565 struct thread *td; 566 struct file *file; 567 ssize_t bytes; 568 int error; 569 570 td = curthread; 571 file = td->td_fpop; 572 ldev = dev->si_drv1; 573 if (ldev == NULL) 574 return (0); 575 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 576 return (error); 577 filp->f_flags = file->f_flag; 578 /* XXX no support for I/O vectors currently */ 579 if (uio->uio_iovcnt != 1) 580 return (EOPNOTSUPP); 581 linux_set_current(td); 582 if (filp->f_op->read) { 583 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 584 uio->uio_iov->iov_len, &uio->uio_offset); 585 if (bytes >= 0) { 586 uio->uio_iov->iov_base = 587 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 588 uio->uio_iov->iov_len -= bytes; 589 uio->uio_resid -= bytes; 590 } else 591 error = -bytes; 592 } else 593 error = ENXIO; 594 595 return (error); 596 } 597 598 static int 599 linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 600 { 601 struct linux_cdev *ldev; 602 struct linux_file *filp; 603 struct thread *td; 604 struct file *file; 605 ssize_t bytes; 606 int error; 607 608 td = curthread; 609 file = td->td_fpop; 610 ldev = dev->si_drv1; 611 if (ldev == NULL) 612 return (0); 613 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 614 return (error); 615 filp->f_flags = file->f_flag; 616 /* XXX no support for I/O vectors currently */ 617 if (uio->uio_iovcnt != 1) 618 return (EOPNOTSUPP); 619 linux_set_current(td); 620 if (filp->f_op->write) { 621 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 622 uio->uio_iov->iov_len, &uio->uio_offset); 623 if (bytes >= 0) { 624 uio->uio_iov->iov_base = 625 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 626 uio->uio_iov->iov_len -= bytes; 627 uio->uio_resid -= bytes; 628 } else 629 error = -bytes; 630 } else 631 error = ENXIO; 632 633 return (error); 634 } 635 636 static int 637 linux_dev_poll(struct cdev *dev, int events, struct thread *td) 638 { 639 struct linux_cdev *ldev; 640 struct linux_file *filp; 641 struct file *file; 642 int revents; 643 int error; 644 645 file = td->td_fpop; 646 ldev = dev->si_drv1; 647 if (ldev == NULL) 648 return (0); 649 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 650 return (error); 651 filp->f_flags = file->f_flag; 652 linux_set_current(td); 653 if (filp->f_op->poll) 654 revents = filp->f_op->poll(filp, NULL) & events; 655 else 656 revents = 0; 657 658 return (revents); 659 } 660 661 static int 662 linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 663 vm_size_t size, struct vm_object **object, int nprot) 664 { 665 struct linux_cdev *ldev; 666 struct linux_file *filp; 667 struct thread *td; 668 struct file *file; 669 struct vm_area_struct vma; 670 int error; 671 672 td = curthread; 673 file = td->td_fpop; 674 ldev = dev->si_drv1; 675 if (ldev == NULL) 676 return (ENODEV); 677 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 678 return (error); 679 filp->f_flags = file->f_flag; 680 linux_set_current(td); 681 vma.vm_start = 0; 682 vma.vm_end = size; 683 vma.vm_pgoff = *offset / PAGE_SIZE; 684 vma.vm_pfn = 0; 685 vma.vm_page_prot = VM_MEMATTR_DEFAULT; 686 if (filp->f_op->mmap) { 687 error = -filp->f_op->mmap(filp, &vma); 688 if (error == 0) { 689 struct sglist *sg; 690 691 sg = sglist_alloc(1, M_WAITOK); 692 sglist_append_phys(sg, 693 (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT, vma.vm_len); 694 *object = vm_pager_allocate(OBJT_SG, sg, vma.vm_len, 695 nprot, 0, td->td_ucred); 696 if (*object == NULL) { 697 sglist_free(sg); 698 error = EINVAL; 699 goto done; 700 } 701 *offset = 0; 702 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) { 703 VM_OBJECT_WLOCK(*object); 704 vm_object_set_memattr(*object, 705 vma.vm_page_prot); 706 VM_OBJECT_WUNLOCK(*object); 707 } 708 } 709 } else 710 error = ENODEV; 711 done: 712 return (error); 713 } 714 715 struct cdevsw linuxcdevsw = { 716 .d_version = D_VERSION, 717 .d_flags = D_TRACKCLOSE, 718 .d_open = linux_dev_open, 719 .d_close = linux_dev_close, 720 .d_read = linux_dev_read, 721 .d_write = linux_dev_write, 722 .d_ioctl = linux_dev_ioctl, 723 .d_mmap_single = linux_dev_mmap_single, 724 .d_poll = linux_dev_poll, 725 }; 726 727 static int 728 linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 729 int flags, struct thread *td) 730 { 731 struct linux_file *filp; 732 ssize_t bytes; 733 int error; 734 735 error = 0; 736 filp = (struct linux_file *)file->f_data; 737 filp->f_flags = file->f_flag; 738 /* XXX no support for I/O vectors currently */ 739 if (uio->uio_iovcnt != 1) 740 return (EOPNOTSUPP); 741 linux_set_current(td); 742 if (filp->f_op->read) { 743 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 744 uio->uio_iov->iov_len, &uio->uio_offset); 745 if (bytes >= 0) { 746 uio->uio_iov->iov_base = 747 ((uint8_t *)uio->uio_iov->iov_base) + bytes; 748 uio->uio_iov->iov_len -= bytes; 749 uio->uio_resid -= bytes; 750 } else 751 error = -bytes; 752 } else 753 error = ENXIO; 754 755 return (error); 756 } 757 758 static int 759 linux_file_poll(struct file *file, int events, struct ucred *active_cred, 760 struct thread *td) 761 { 762 struct linux_file *filp; 763 int revents; 764 765 filp = (struct linux_file *)file->f_data; 766 filp->f_flags = file->f_flag; 767 linux_set_current(td); 768 if (filp->f_op->poll) 769 revents = filp->f_op->poll(filp, NULL) & events; 770 else 771 revents = 0; 772 773 return (revents); 774 } 775 776 static int 777 linux_file_close(struct file *file, struct thread *td) 778 { 779 struct linux_file *filp; 780 int error; 781 782 filp = (struct linux_file *)file->f_data; 783 filp->f_flags = file->f_flag; 784 linux_set_current(td); 785 error = -filp->f_op->release(NULL, filp); 786 funsetown(&filp->f_sigio); 787 kfree(filp); 788 789 return (error); 790 } 791 792 static int 793 linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 794 struct thread *td) 795 { 796 struct linux_file *filp; 797 int error; 798 799 filp = (struct linux_file *)fp->f_data; 800 filp->f_flags = fp->f_flag; 801 error = 0; 802 803 linux_set_current(td); 804 switch (cmd) { 805 case FIONBIO: 806 break; 807 case FIOASYNC: 808 if (filp->f_op->fasync == NULL) 809 break; 810 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 811 break; 812 case FIOSETOWN: 813 error = fsetown(*(int *)data, &filp->f_sigio); 814 if (error == 0) 815 error = filp->f_op->fasync(0, filp, 816 fp->f_flag & FASYNC); 817 break; 818 case FIOGETOWN: 819 *(int *)data = fgetown(&filp->f_sigio); 820 break; 821 default: 822 error = ENOTTY; 823 break; 824 } 825 return (error); 826 } 827 828 static int 829 linux_file_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 830 struct thread *td) 831 { 832 833 return (EOPNOTSUPP); 834 } 835 836 static int 837 linux_file_fill_kinfo(struct file *fp, struct kinfo_file *kif, 838 struct filedesc *fdp) 839 { 840 841 return (0); 842 } 843 844 struct fileops linuxfileops = { 845 .fo_read = linux_file_read, 846 .fo_write = invfo_rdwr, 847 .fo_truncate = invfo_truncate, 848 .fo_kqfilter = invfo_kqfilter, 849 .fo_stat = linux_file_stat, 850 .fo_fill_kinfo = linux_file_fill_kinfo, 851 .fo_poll = linux_file_poll, 852 .fo_close = linux_file_close, 853 .fo_ioctl = linux_file_ioctl, 854 .fo_chmod = invfo_chmod, 855 .fo_chown = invfo_chown, 856 .fo_sendfile = invfo_sendfile, 857 }; 858 859 /* 860 * Hash of vmmap addresses. This is infrequently accessed and does not 861 * need to be particularly large. This is done because we must store the 862 * caller's idea of the map size to properly unmap. 863 */ 864 struct vmmap { 865 LIST_ENTRY(vmmap) vm_next; 866 void *vm_addr; 867 unsigned long vm_size; 868 }; 869 870 struct vmmaphd { 871 struct vmmap *lh_first; 872 }; 873 #define VMMAP_HASH_SIZE 64 874 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 875 #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 876 static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 877 static struct mtx vmmaplock; 878 879 static void 880 vmmap_add(void *addr, unsigned long size) 881 { 882 struct vmmap *vmmap; 883 884 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 885 mtx_lock(&vmmaplock); 886 vmmap->vm_size = size; 887 vmmap->vm_addr = addr; 888 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 889 mtx_unlock(&vmmaplock); 890 } 891 892 static struct vmmap * 893 vmmap_remove(void *addr) 894 { 895 struct vmmap *vmmap; 896 897 mtx_lock(&vmmaplock); 898 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 899 if (vmmap->vm_addr == addr) 900 break; 901 if (vmmap) 902 LIST_REMOVE(vmmap, vm_next); 903 mtx_unlock(&vmmaplock); 904 905 return (vmmap); 906 } 907 908 #if defined(__i386__) || defined(__amd64__) 909 void * 910 _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 911 { 912 void *addr; 913 914 addr = pmap_mapdev_attr(phys_addr, size, attr); 915 if (addr == NULL) 916 return (NULL); 917 vmmap_add(addr, size); 918 919 return (addr); 920 } 921 #endif 922 923 void 924 iounmap(void *addr) 925 { 926 struct vmmap *vmmap; 927 928 vmmap = vmmap_remove(addr); 929 if (vmmap == NULL) 930 return; 931 #if defined(__i386__) || defined(__amd64__) 932 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 933 #endif 934 kfree(vmmap); 935 } 936 937 938 void * 939 vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 940 { 941 vm_offset_t off; 942 size_t size; 943 944 size = count * PAGE_SIZE; 945 off = kva_alloc(size); 946 if (off == 0) 947 return (NULL); 948 vmmap_add((void *)off, size); 949 pmap_qenter(off, pages, count); 950 951 return ((void *)off); 952 } 953 954 void 955 vunmap(void *addr) 956 { 957 struct vmmap *vmmap; 958 959 vmmap = vmmap_remove(addr); 960 if (vmmap == NULL) 961 return; 962 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 963 kva_free((vm_offset_t)addr, vmmap->vm_size); 964 kfree(vmmap); 965 } 966 967 char * 968 kvasprintf(gfp_t gfp, const char *fmt, va_list ap) 969 { 970 unsigned int len; 971 char *p; 972 va_list aq; 973 974 va_copy(aq, ap); 975 len = vsnprintf(NULL, 0, fmt, aq); 976 va_end(aq); 977 978 p = kmalloc(len + 1, gfp); 979 if (p != NULL) 980 vsnprintf(p, len + 1, fmt, ap); 981 982 return (p); 983 } 984 985 char * 986 kasprintf(gfp_t gfp, const char *fmt, ...) 987 { 988 va_list ap; 989 char *p; 990 991 va_start(ap, fmt); 992 p = kvasprintf(gfp, fmt, ap); 993 va_end(ap); 994 995 return (p); 996 } 997 998 static void 999 linux_timer_callback_wrapper(void *context) 1000 { 1001 struct timer_list *timer; 1002 1003 timer = context; 1004 timer->function(timer->data); 1005 } 1006 1007 void 1008 mod_timer(struct timer_list *timer, unsigned long expires) 1009 { 1010 1011 timer->expires = expires; 1012 callout_reset(&timer->timer_callout, 1013 linux_timer_jiffies_until(expires), 1014 &linux_timer_callback_wrapper, timer); 1015 } 1016 1017 void 1018 add_timer(struct timer_list *timer) 1019 { 1020 1021 callout_reset(&timer->timer_callout, 1022 linux_timer_jiffies_until(timer->expires), 1023 &linux_timer_callback_wrapper, timer); 1024 } 1025 1026 static void 1027 linux_timer_init(void *arg) 1028 { 1029 1030 /* 1031 * Compute an internal HZ value which can divide 2**32 to 1032 * avoid timer rounding problems when the tick value wraps 1033 * around 2**32: 1034 */ 1035 linux_timer_hz_mask = 1; 1036 while (linux_timer_hz_mask < (unsigned long)hz) 1037 linux_timer_hz_mask *= 2; 1038 linux_timer_hz_mask--; 1039 } 1040 SYSINIT(linux_timer, SI_SUB_DRIVERS, SI_ORDER_FIRST, linux_timer_init, NULL); 1041 1042 void 1043 linux_complete_common(struct completion *c, int all) 1044 { 1045 int wakeup_swapper; 1046 1047 sleepq_lock(c); 1048 c->done++; 1049 if (all) 1050 wakeup_swapper = sleepq_broadcast(c, SLEEPQ_SLEEP, 0, 0); 1051 else 1052 wakeup_swapper = sleepq_signal(c, SLEEPQ_SLEEP, 0, 0); 1053 sleepq_release(c); 1054 if (wakeup_swapper) 1055 kick_proc0(); 1056 } 1057 1058 /* 1059 * Indefinite wait for done != 0 with or without signals. 1060 */ 1061 long 1062 linux_wait_for_common(struct completion *c, int flags) 1063 { 1064 if (SCHEDULER_STOPPED()) 1065 return (0); 1066 1067 if (flags != 0) 1068 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1069 else 1070 flags = SLEEPQ_SLEEP; 1071 for (;;) { 1072 sleepq_lock(c); 1073 if (c->done) 1074 break; 1075 sleepq_add(c, NULL, "completion", flags, 0); 1076 if (flags & SLEEPQ_INTERRUPTIBLE) { 1077 if (sleepq_wait_sig(c, 0) != 0) 1078 return (-ERESTARTSYS); 1079 } else 1080 sleepq_wait(c, 0); 1081 } 1082 c->done--; 1083 sleepq_release(c); 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * Time limited wait for done != 0 with or without signals. 1090 */ 1091 long 1092 linux_wait_for_timeout_common(struct completion *c, long timeout, int flags) 1093 { 1094 long end = jiffies + timeout; 1095 1096 if (SCHEDULER_STOPPED()) 1097 return (0); 1098 1099 if (flags != 0) 1100 flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; 1101 else 1102 flags = SLEEPQ_SLEEP; 1103 for (;;) { 1104 int ret; 1105 1106 sleepq_lock(c); 1107 if (c->done) 1108 break; 1109 sleepq_add(c, NULL, "completion", flags, 0); 1110 sleepq_set_timeout(c, linux_timer_jiffies_until(end)); 1111 if (flags & SLEEPQ_INTERRUPTIBLE) 1112 ret = sleepq_timedwait_sig(c, 0); 1113 else 1114 ret = sleepq_timedwait(c, 0); 1115 if (ret != 0) { 1116 /* check for timeout or signal */ 1117 if (ret == EWOULDBLOCK) 1118 return (0); 1119 else 1120 return (-ERESTARTSYS); 1121 } 1122 } 1123 c->done--; 1124 sleepq_release(c); 1125 1126 /* return how many jiffies are left */ 1127 return (linux_timer_jiffies_until(end)); 1128 } 1129 1130 int 1131 linux_try_wait_for_completion(struct completion *c) 1132 { 1133 int isdone; 1134 1135 isdone = 1; 1136 sleepq_lock(c); 1137 if (c->done) 1138 c->done--; 1139 else 1140 isdone = 0; 1141 sleepq_release(c); 1142 return (isdone); 1143 } 1144 1145 int 1146 linux_completion_done(struct completion *c) 1147 { 1148 int isdone; 1149 1150 isdone = 1; 1151 sleepq_lock(c); 1152 if (c->done == 0) 1153 isdone = 0; 1154 sleepq_release(c); 1155 return (isdone); 1156 } 1157 1158 void 1159 linux_delayed_work_fn(void *arg) 1160 { 1161 struct delayed_work *work; 1162 1163 work = arg; 1164 taskqueue_enqueue(work->work.taskqueue, &work->work.work_task); 1165 } 1166 1167 void 1168 linux_work_fn(void *context, int pending) 1169 { 1170 struct work_struct *work; 1171 1172 work = context; 1173 work->fn(work); 1174 } 1175 1176 void 1177 linux_flush_fn(void *context, int pending) 1178 { 1179 } 1180 1181 struct workqueue_struct * 1182 linux_create_workqueue_common(const char *name, int cpus) 1183 { 1184 struct workqueue_struct *wq; 1185 1186 wq = kmalloc(sizeof(*wq), M_WAITOK); 1187 wq->taskqueue = taskqueue_create(name, M_WAITOK, 1188 taskqueue_thread_enqueue, &wq->taskqueue); 1189 atomic_set(&wq->draining, 0); 1190 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 1191 1192 return (wq); 1193 } 1194 1195 void 1196 destroy_workqueue(struct workqueue_struct *wq) 1197 { 1198 taskqueue_free(wq->taskqueue); 1199 kfree(wq); 1200 } 1201 1202 static void 1203 linux_cdev_release(struct kobject *kobj) 1204 { 1205 struct linux_cdev *cdev; 1206 struct kobject *parent; 1207 1208 cdev = container_of(kobj, struct linux_cdev, kobj); 1209 parent = kobj->parent; 1210 if (cdev->cdev) 1211 destroy_dev(cdev->cdev); 1212 kfree(cdev); 1213 kobject_put(parent); 1214 } 1215 1216 static void 1217 linux_cdev_static_release(struct kobject *kobj) 1218 { 1219 struct linux_cdev *cdev; 1220 struct kobject *parent; 1221 1222 cdev = container_of(kobj, struct linux_cdev, kobj); 1223 parent = kobj->parent; 1224 if (cdev->cdev) 1225 destroy_dev(cdev->cdev); 1226 kobject_put(parent); 1227 } 1228 1229 const struct kobj_type linux_cdev_ktype = { 1230 .release = linux_cdev_release, 1231 }; 1232 1233 const struct kobj_type linux_cdev_static_ktype = { 1234 .release = linux_cdev_static_release, 1235 }; 1236 1237 static void 1238 linux_handle_ifnet_link_event(void *arg, struct ifnet *ifp, int linkstate) 1239 { 1240 struct notifier_block *nb; 1241 1242 nb = arg; 1243 if (linkstate == LINK_STATE_UP) 1244 nb->notifier_call(nb, NETDEV_UP, ifp); 1245 else 1246 nb->notifier_call(nb, NETDEV_DOWN, ifp); 1247 } 1248 1249 static void 1250 linux_handle_ifnet_arrival_event(void *arg, struct ifnet *ifp) 1251 { 1252 struct notifier_block *nb; 1253 1254 nb = arg; 1255 nb->notifier_call(nb, NETDEV_REGISTER, ifp); 1256 } 1257 1258 static void 1259 linux_handle_ifnet_departure_event(void *arg, struct ifnet *ifp) 1260 { 1261 struct notifier_block *nb; 1262 1263 nb = arg; 1264 nb->notifier_call(nb, NETDEV_UNREGISTER, ifp); 1265 } 1266 1267 static void 1268 linux_handle_iflladdr_event(void *arg, struct ifnet *ifp) 1269 { 1270 struct notifier_block *nb; 1271 1272 nb = arg; 1273 nb->notifier_call(nb, NETDEV_CHANGEADDR, ifp); 1274 } 1275 1276 static void 1277 linux_handle_ifaddr_event(void *arg, struct ifnet *ifp) 1278 { 1279 struct notifier_block *nb; 1280 1281 nb = arg; 1282 nb->notifier_call(nb, NETDEV_CHANGEIFADDR, ifp); 1283 } 1284 1285 int 1286 register_netdevice_notifier(struct notifier_block *nb) 1287 { 1288 1289 nb->tags[NETDEV_UP] = EVENTHANDLER_REGISTER( 1290 ifnet_link_event, linux_handle_ifnet_link_event, nb, 0); 1291 nb->tags[NETDEV_REGISTER] = EVENTHANDLER_REGISTER( 1292 ifnet_arrival_event, linux_handle_ifnet_arrival_event, nb, 0); 1293 nb->tags[NETDEV_UNREGISTER] = EVENTHANDLER_REGISTER( 1294 ifnet_departure_event, linux_handle_ifnet_departure_event, nb, 0); 1295 nb->tags[NETDEV_CHANGEADDR] = EVENTHANDLER_REGISTER( 1296 iflladdr_event, linux_handle_iflladdr_event, nb, 0); 1297 1298 return (0); 1299 } 1300 1301 int 1302 register_inetaddr_notifier(struct notifier_block *nb) 1303 { 1304 1305 nb->tags[NETDEV_CHANGEIFADDR] = EVENTHANDLER_REGISTER( 1306 ifaddr_event, linux_handle_ifaddr_event, nb, 0); 1307 return (0); 1308 } 1309 1310 int 1311 unregister_netdevice_notifier(struct notifier_block *nb) 1312 { 1313 1314 EVENTHANDLER_DEREGISTER(ifnet_link_event, 1315 nb->tags[NETDEV_UP]); 1316 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, 1317 nb->tags[NETDEV_REGISTER]); 1318 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 1319 nb->tags[NETDEV_UNREGISTER]); 1320 EVENTHANDLER_DEREGISTER(iflladdr_event, 1321 nb->tags[NETDEV_CHANGEADDR]); 1322 1323 return (0); 1324 } 1325 1326 int 1327 unregister_inetaddr_notifier(struct notifier_block *nb) 1328 { 1329 1330 EVENTHANDLER_DEREGISTER(ifaddr_event, 1331 nb->tags[NETDEV_CHANGEIFADDR]); 1332 1333 return (0); 1334 } 1335 1336 struct list_sort_thunk { 1337 int (*cmp)(void *, struct list_head *, struct list_head *); 1338 void *priv; 1339 }; 1340 1341 static inline int 1342 linux_le_cmp(void *priv, const void *d1, const void *d2) 1343 { 1344 struct list_head *le1, *le2; 1345 struct list_sort_thunk *thunk; 1346 1347 thunk = priv; 1348 le1 = *(__DECONST(struct list_head **, d1)); 1349 le2 = *(__DECONST(struct list_head **, d2)); 1350 return ((thunk->cmp)(thunk->priv, le1, le2)); 1351 } 1352 1353 void 1354 list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, 1355 struct list_head *a, struct list_head *b)) 1356 { 1357 struct list_sort_thunk thunk; 1358 struct list_head **ar, *le; 1359 size_t count, i; 1360 1361 count = 0; 1362 list_for_each(le, head) 1363 count++; 1364 ar = malloc(sizeof(struct list_head *) * count, M_KMALLOC, M_WAITOK); 1365 i = 0; 1366 list_for_each(le, head) 1367 ar[i++] = le; 1368 thunk.cmp = cmp; 1369 thunk.priv = priv; 1370 qsort_r(ar, count, sizeof(struct list_head *), &thunk, linux_le_cmp); 1371 INIT_LIST_HEAD(head); 1372 for (i = 0; i < count; i++) 1373 list_add_tail(ar[i], head); 1374 free(ar, M_KMALLOC); 1375 } 1376 1377 void 1378 linux_irq_handler(void *ent) 1379 { 1380 struct irq_ent *irqe; 1381 1382 irqe = ent; 1383 irqe->handler(irqe->irq, irqe->arg); 1384 } 1385 1386 struct linux_cdev * 1387 linux_find_cdev(const char *name, unsigned major, unsigned minor) 1388 { 1389 int unit = MKDEV(major, minor); 1390 struct cdev *cdev; 1391 1392 dev_lock(); 1393 LIST_FOREACH(cdev, &linuxcdevsw.d_devs, si_list) { 1394 struct linux_cdev *ldev = cdev->si_drv1; 1395 if (dev2unit(cdev) == unit && 1396 strcmp(kobject_name(&ldev->kobj), name) == 0) { 1397 break; 1398 } 1399 } 1400 dev_unlock(); 1401 1402 return (cdev != NULL ? cdev->si_drv1 : NULL); 1403 } 1404 1405 int 1406 __register_chrdev(unsigned int major, unsigned int baseminor, 1407 unsigned int count, const char *name, 1408 const struct file_operations *fops) 1409 { 1410 struct linux_cdev *cdev; 1411 int ret = 0; 1412 int i; 1413 1414 for (i = baseminor; i < baseminor + count; i++) { 1415 cdev = cdev_alloc(); 1416 cdev_init(cdev, fops); 1417 kobject_set_name(&cdev->kobj, name); 1418 1419 ret = cdev_add(cdev, makedev(major, i), 1); 1420 if (ret != 0) 1421 break; 1422 } 1423 return (ret); 1424 } 1425 1426 int 1427 __register_chrdev_p(unsigned int major, unsigned int baseminor, 1428 unsigned int count, const char *name, 1429 const struct file_operations *fops, uid_t uid, 1430 gid_t gid, int mode) 1431 { 1432 struct linux_cdev *cdev; 1433 int ret = 0; 1434 int i; 1435 1436 for (i = baseminor; i < baseminor + count; i++) { 1437 cdev = cdev_alloc(); 1438 cdev_init(cdev, fops); 1439 kobject_set_name(&cdev->kobj, name); 1440 1441 ret = cdev_add_ext(cdev, makedev(major, i), uid, gid, mode); 1442 if (ret != 0) 1443 break; 1444 } 1445 return (ret); 1446 } 1447 1448 void 1449 __unregister_chrdev(unsigned int major, unsigned int baseminor, 1450 unsigned int count, const char *name) 1451 { 1452 struct linux_cdev *cdevp; 1453 int i; 1454 1455 for (i = baseminor; i < baseminor + count; i++) { 1456 cdevp = linux_find_cdev(name, major, i); 1457 if (cdevp != NULL) 1458 cdev_del(cdevp); 1459 } 1460 } 1461 1462 #if defined(__i386__) || defined(__amd64__) 1463 bool linux_cpu_has_clflush; 1464 #endif 1465 1466 static void 1467 linux_compat_init(void *arg) 1468 { 1469 struct sysctl_oid *rootoid; 1470 int i; 1471 1472 #if defined(__i386__) || defined(__amd64__) 1473 linux_cpu_has_clflush = (cpu_feature & CPUID_CLFSH); 1474 #endif 1475 1476 rootoid = SYSCTL_ADD_ROOT_NODE(NULL, 1477 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 1478 kobject_init(&linux_class_root, &linux_class_ktype); 1479 kobject_set_name(&linux_class_root, "class"); 1480 linux_class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 1481 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 1482 kobject_init(&linux_root_device.kobj, &linux_dev_ktype); 1483 kobject_set_name(&linux_root_device.kobj, "device"); 1484 linux_root_device.kobj.oidp = SYSCTL_ADD_NODE(NULL, 1485 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 1486 "device"); 1487 linux_root_device.bsddev = root_bus; 1488 linux_class_misc.name = "misc"; 1489 class_register(&linux_class_misc); 1490 INIT_LIST_HEAD(&pci_drivers); 1491 INIT_LIST_HEAD(&pci_devices); 1492 spin_lock_init(&pci_lock); 1493 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 1494 for (i = 0; i < VMMAP_HASH_SIZE; i++) 1495 LIST_INIT(&vmmaphead[i]); 1496 } 1497 SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 1498 1499 static void 1500 linux_compat_uninit(void *arg) 1501 { 1502 linux_kobject_kfree_name(&linux_class_root); 1503 linux_kobject_kfree_name(&linux_root_device.kobj); 1504 linux_kobject_kfree_name(&linux_class_misc.kobj); 1505 } 1506 SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); 1507 1508 /* 1509 * NOTE: Linux frequently uses "unsigned long" for pointer to integer 1510 * conversion and vice versa, where in FreeBSD "uintptr_t" would be 1511 * used. Assert these types have the same size, else some parts of the 1512 * LinuxKPI may not work like expected: 1513 */ 1514 CTASSERT(sizeof(unsigned long) == sizeof(uintptr_t)); 1515