|  | /* | 
|  | * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved. | 
|  | * | 
|  | * This copyrighted material is made available to anyone wishing to use, | 
|  | * modify, copy, or redistribute it subject to the terms and conditions | 
|  | * of the GNU General Public License version 2. | 
|  | */ | 
|  |  | 
|  | #include <linux/fs.h> | 
|  | #include <linux/miscdevice.h> | 
|  | #include <linux/poll.h> | 
|  | #include <linux/dlm.h> | 
|  | #include <linux/dlm_plock.h> | 
|  | #include <linux/slab.h> | 
|  |  | 
|  | #include "dlm_internal.h" | 
|  | #include "lockspace.h" | 
|  |  | 
|  | static spinlock_t ops_lock; | 
|  | static struct list_head send_list; | 
|  | static struct list_head recv_list; | 
|  | static wait_queue_head_t send_wq; | 
|  | static wait_queue_head_t recv_wq; | 
|  |  | 
|  | struct plock_op { | 
|  | struct list_head list; | 
|  | int done; | 
|  | struct dlm_plock_info info; | 
|  | }; | 
|  |  | 
|  | struct plock_xop { | 
|  | struct plock_op xop; | 
|  | int (*callback)(struct file_lock *fl, int result); | 
|  | void *fl; | 
|  | void *file; | 
|  | struct file_lock flc; | 
|  | }; | 
|  |  | 
|  |  | 
|  | static inline void set_version(struct dlm_plock_info *info) | 
|  | { | 
|  | info->version[0] = DLM_PLOCK_VERSION_MAJOR; | 
|  | info->version[1] = DLM_PLOCK_VERSION_MINOR; | 
|  | info->version[2] = DLM_PLOCK_VERSION_PATCH; | 
|  | } | 
|  |  | 
|  | static int check_version(struct dlm_plock_info *info) | 
|  | { | 
|  | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || | 
|  | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { | 
|  | log_print("plock device version mismatch: " | 
|  | "kernel (%u.%u.%u), user (%u.%u.%u)", | 
|  | DLM_PLOCK_VERSION_MAJOR, | 
|  | DLM_PLOCK_VERSION_MINOR, | 
|  | DLM_PLOCK_VERSION_PATCH, | 
|  | info->version[0], | 
|  | info->version[1], | 
|  | info->version[2]); | 
|  | return -EINVAL; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void send_op(struct plock_op *op) | 
|  | { | 
|  | set_version(&op->info); | 
|  | INIT_LIST_HEAD(&op->list); | 
|  | spin_lock(&ops_lock); | 
|  | list_add_tail(&op->list, &send_list); | 
|  | spin_unlock(&ops_lock); | 
|  | wake_up(&send_wq); | 
|  | } | 
|  |  | 
|  | /* If a process was killed while waiting for the only plock on a file, | 
|  | locks_remove_posix will not see any lock on the file so it won't | 
|  | send an unlock-close to us to pass on to userspace to clean up the | 
|  | abandoned waiter.  So, we have to insert the unlock-close when the | 
|  | lock call is interrupted. */ | 
|  |  | 
|  | static void do_unlock_close(struct dlm_ls *ls, u64 number, | 
|  | struct file *file, struct file_lock *fl) | 
|  | { | 
|  | struct plock_op *op; | 
|  |  | 
|  | op = kzalloc(sizeof(*op), GFP_NOFS); | 
|  | if (!op) | 
|  | return; | 
|  |  | 
|  | op->info.optype		= DLM_PLOCK_OP_UNLOCK; | 
|  | op->info.pid		= fl->fl_pid; | 
|  | op->info.fsid		= ls->ls_global_id; | 
|  | op->info.number		= number; | 
|  | op->info.start		= 0; | 
|  | op->info.end		= OFFSET_MAX; | 
|  | if (fl->fl_lmops && fl->fl_lmops->lm_grant) | 
|  | op->info.owner	= (__u64) fl->fl_pid; | 
|  | else | 
|  | op->info.owner	= (__u64)(long) fl->fl_owner; | 
|  |  | 
|  | op->info.flags |= DLM_PLOCK_FL_CLOSE; | 
|  | send_op(op); | 
|  | } | 
|  |  | 
|  | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | 
|  | int cmd, struct file_lock *fl) | 
|  | { | 
|  | struct dlm_ls *ls; | 
|  | struct plock_op *op; | 
|  | struct plock_xop *xop; | 
|  | int rv; | 
|  |  | 
|  | ls = dlm_find_lockspace_local(lockspace); | 
|  | if (!ls) | 
|  | return -EINVAL; | 
|  |  | 
|  | xop = kzalloc(sizeof(*xop), GFP_NOFS); | 
|  | if (!xop) { | 
|  | rv = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | op = &xop->xop; | 
|  | op->info.optype		= DLM_PLOCK_OP_LOCK; | 
|  | op->info.pid		= fl->fl_pid; | 
|  | op->info.ex		= (fl->fl_type == F_WRLCK); | 
|  | op->info.wait		= IS_SETLKW(cmd); | 
|  | op->info.fsid		= ls->ls_global_id; | 
|  | op->info.number		= number; | 
|  | op->info.start		= fl->fl_start; | 
|  | op->info.end		= fl->fl_end; | 
|  | if (fl->fl_lmops && fl->fl_lmops->lm_grant) { | 
|  | /* fl_owner is lockd which doesn't distinguish | 
|  | processes on the nfs client */ | 
|  | op->info.owner	= (__u64) fl->fl_pid; | 
|  | xop->callback	= fl->fl_lmops->lm_grant; | 
|  | locks_init_lock(&xop->flc); | 
|  | locks_copy_lock(&xop->flc, fl); | 
|  | xop->fl		= fl; | 
|  | xop->file	= file; | 
|  | } else { | 
|  | op->info.owner	= (__u64)(long) fl->fl_owner; | 
|  | xop->callback	= NULL; | 
|  | } | 
|  |  | 
|  | send_op(op); | 
|  |  | 
|  | if (xop->callback == NULL) { | 
|  | rv = wait_event_killable(recv_wq, (op->done != 0)); | 
|  | if (rv == -ERESTARTSYS) { | 
|  | log_debug(ls, "dlm_posix_lock: wait killed %llx", | 
|  | (unsigned long long)number); | 
|  | spin_lock(&ops_lock); | 
|  | list_del(&op->list); | 
|  | spin_unlock(&ops_lock); | 
|  | kfree(xop); | 
|  | do_unlock_close(ls, number, file, fl); | 
|  | goto out; | 
|  | } | 
|  | } else { | 
|  | rv = FILE_LOCK_DEFERRED; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&op->list)) { | 
|  | log_error(ls, "dlm_posix_lock: op on list %llx", | 
|  | (unsigned long long)number); | 
|  | list_del(&op->list); | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | rv = op->info.rv; | 
|  |  | 
|  | if (!rv) { | 
|  | if (posix_lock_file_wait(file, fl) < 0) | 
|  | log_error(ls, "dlm_posix_lock: vfs lock error %llx", | 
|  | (unsigned long long)number); | 
|  | } | 
|  |  | 
|  | kfree(xop); | 
|  | out: | 
|  | dlm_put_lockspace(ls); | 
|  | return rv; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dlm_posix_lock); | 
|  |  | 
|  | /* Returns failure iff a successful lock operation should be canceled */ | 
|  | static int dlm_plock_callback(struct plock_op *op) | 
|  | { | 
|  | struct file *file; | 
|  | struct file_lock *fl; | 
|  | struct file_lock *flc; | 
|  | int (*notify)(struct file_lock *fl, int result) = NULL; | 
|  | struct plock_xop *xop = (struct plock_xop *)op; | 
|  | int rv = 0; | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&op->list)) { | 
|  | log_print("dlm_plock_callback: op on list %llx", | 
|  | (unsigned long long)op->info.number); | 
|  | list_del(&op->list); | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | /* check if the following 2 are still valid or make a copy */ | 
|  | file = xop->file; | 
|  | flc = &xop->flc; | 
|  | fl = xop->fl; | 
|  | notify = xop->callback; | 
|  |  | 
|  | if (op->info.rv) { | 
|  | notify(fl, op->info.rv); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* got fs lock; bookkeep locally as well: */ | 
|  | flc->fl_flags &= ~FL_SLEEP; | 
|  | if (posix_lock_file(file, flc, NULL)) { | 
|  | /* | 
|  | * This can only happen in the case of kmalloc() failure. | 
|  | * The filesystem's own lock is the authoritative lock, | 
|  | * so a failure to get the lock locally is not a disaster. | 
|  | * As long as the fs cannot reliably cancel locks (especially | 
|  | * in a low-memory situation), we're better off ignoring | 
|  | * this failure than trying to recover. | 
|  | */ | 
|  | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", | 
|  | (unsigned long long)op->info.number, file, fl); | 
|  | } | 
|  |  | 
|  | rv = notify(fl, 0); | 
|  | if (rv) { | 
|  | /* XXX: We need to cancel the fs lock here: */ | 
|  | log_print("dlm_plock_callback: lock granted after lock request " | 
|  | "failed; dangling lock!\n"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | out: | 
|  | kfree(xop); | 
|  | return rv; | 
|  | } | 
|  |  | 
|  | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | 
|  | struct file_lock *fl) | 
|  | { | 
|  | struct dlm_ls *ls; | 
|  | struct plock_op *op; | 
|  | int rv; | 
|  | unsigned char fl_flags = fl->fl_flags; | 
|  |  | 
|  | ls = dlm_find_lockspace_local(lockspace); | 
|  | if (!ls) | 
|  | return -EINVAL; | 
|  |  | 
|  | op = kzalloc(sizeof(*op), GFP_NOFS); | 
|  | if (!op) { | 
|  | rv = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* cause the vfs unlock to return ENOENT if lock is not found */ | 
|  | fl->fl_flags |= FL_EXISTS; | 
|  |  | 
|  | rv = posix_lock_file_wait(file, fl); | 
|  | if (rv == -ENOENT) { | 
|  | rv = 0; | 
|  | goto out_free; | 
|  | } | 
|  | if (rv < 0) { | 
|  | log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", | 
|  | rv, (unsigned long long)number); | 
|  | } | 
|  |  | 
|  | op->info.optype		= DLM_PLOCK_OP_UNLOCK; | 
|  | op->info.pid		= fl->fl_pid; | 
|  | op->info.fsid		= ls->ls_global_id; | 
|  | op->info.number		= number; | 
|  | op->info.start		= fl->fl_start; | 
|  | op->info.end		= fl->fl_end; | 
|  | if (fl->fl_lmops && fl->fl_lmops->lm_grant) | 
|  | op->info.owner	= (__u64) fl->fl_pid; | 
|  | else | 
|  | op->info.owner	= (__u64)(long) fl->fl_owner; | 
|  |  | 
|  | if (fl->fl_flags & FL_CLOSE) { | 
|  | op->info.flags |= DLM_PLOCK_FL_CLOSE; | 
|  | send_op(op); | 
|  | rv = 0; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | send_op(op); | 
|  | wait_event(recv_wq, (op->done != 0)); | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&op->list)) { | 
|  | log_error(ls, "dlm_posix_unlock: op on list %llx", | 
|  | (unsigned long long)number); | 
|  | list_del(&op->list); | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | rv = op->info.rv; | 
|  |  | 
|  | if (rv == -ENOENT) | 
|  | rv = 0; | 
|  |  | 
|  | out_free: | 
|  | kfree(op); | 
|  | out: | 
|  | dlm_put_lockspace(ls); | 
|  | fl->fl_flags = fl_flags; | 
|  | return rv; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dlm_posix_unlock); | 
|  |  | 
|  | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, | 
|  | struct file_lock *fl) | 
|  | { | 
|  | struct dlm_ls *ls; | 
|  | struct plock_op *op; | 
|  | int rv; | 
|  |  | 
|  | ls = dlm_find_lockspace_local(lockspace); | 
|  | if (!ls) | 
|  | return -EINVAL; | 
|  |  | 
|  | op = kzalloc(sizeof(*op), GFP_NOFS); | 
|  | if (!op) { | 
|  | rv = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | op->info.optype		= DLM_PLOCK_OP_GET; | 
|  | op->info.pid		= fl->fl_pid; | 
|  | op->info.ex		= (fl->fl_type == F_WRLCK); | 
|  | op->info.fsid		= ls->ls_global_id; | 
|  | op->info.number		= number; | 
|  | op->info.start		= fl->fl_start; | 
|  | op->info.end		= fl->fl_end; | 
|  | if (fl->fl_lmops && fl->fl_lmops->lm_grant) | 
|  | op->info.owner	= (__u64) fl->fl_pid; | 
|  | else | 
|  | op->info.owner	= (__u64)(long) fl->fl_owner; | 
|  |  | 
|  | send_op(op); | 
|  | wait_event(recv_wq, (op->done != 0)); | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&op->list)) { | 
|  | log_error(ls, "dlm_posix_get: op on list %llx", | 
|  | (unsigned long long)number); | 
|  | list_del(&op->list); | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, | 
|  | -ENOENT if there are no locks on the file */ | 
|  |  | 
|  | rv = op->info.rv; | 
|  |  | 
|  | fl->fl_type = F_UNLCK; | 
|  | if (rv == -ENOENT) | 
|  | rv = 0; | 
|  | else if (rv > 0) { | 
|  | locks_init_lock(fl); | 
|  | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; | 
|  | fl->fl_flags = FL_POSIX; | 
|  | fl->fl_pid = op->info.pid; | 
|  | fl->fl_start = op->info.start; | 
|  | fl->fl_end = op->info.end; | 
|  | rv = 0; | 
|  | } | 
|  |  | 
|  | kfree(op); | 
|  | out: | 
|  | dlm_put_lockspace(ls); | 
|  | return rv; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dlm_posix_get); | 
|  |  | 
|  | /* a read copies out one plock request from the send list */ | 
|  | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | 
|  | loff_t *ppos) | 
|  | { | 
|  | struct dlm_plock_info info; | 
|  | struct plock_op *op = NULL; | 
|  |  | 
|  | if (count < sizeof(info)) | 
|  | return -EINVAL; | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&send_list)) { | 
|  | op = list_entry(send_list.next, struct plock_op, list); | 
|  | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | 
|  | list_del(&op->list); | 
|  | else | 
|  | list_move(&op->list, &recv_list); | 
|  | memcpy(&info, &op->info, sizeof(info)); | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | if (!op) | 
|  | return -EAGAIN; | 
|  |  | 
|  | /* there is no need to get a reply from userspace for unlocks | 
|  | that were generated by the vfs cleaning up for a close | 
|  | (the process did not make an unlock call). */ | 
|  |  | 
|  | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | 
|  | kfree(op); | 
|  |  | 
|  | if (copy_to_user(u, &info, sizeof(info))) | 
|  | return -EFAULT; | 
|  | return sizeof(info); | 
|  | } | 
|  |  | 
|  | /* a write copies in one plock result that should match a plock_op | 
|  | on the recv list */ | 
|  | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | 
|  | loff_t *ppos) | 
|  | { | 
|  | struct dlm_plock_info info; | 
|  | struct plock_op *op; | 
|  | int found = 0, do_callback = 0; | 
|  |  | 
|  | if (count != sizeof(info)) | 
|  | return -EINVAL; | 
|  |  | 
|  | if (copy_from_user(&info, u, sizeof(info))) | 
|  | return -EFAULT; | 
|  |  | 
|  | if (check_version(&info)) | 
|  | return -EINVAL; | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | list_for_each_entry(op, &recv_list, list) { | 
|  | if (op->info.fsid == info.fsid && | 
|  | op->info.number == info.number && | 
|  | op->info.owner == info.owner) { | 
|  | struct plock_xop *xop = (struct plock_xop *)op; | 
|  | list_del_init(&op->list); | 
|  | memcpy(&op->info, &info, sizeof(info)); | 
|  | if (xop->callback) | 
|  | do_callback = 1; | 
|  | else | 
|  | op->done = 1; | 
|  | found = 1; | 
|  | break; | 
|  | } | 
|  | } | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | if (found) { | 
|  | if (do_callback) | 
|  | dlm_plock_callback(op); | 
|  | else | 
|  | wake_up(&recv_wq); | 
|  | } else | 
|  | log_print("dev_write no op %x %llx", info.fsid, | 
|  | (unsigned long long)info.number); | 
|  | return count; | 
|  | } | 
|  |  | 
|  | static unsigned int dev_poll(struct file *file, poll_table *wait) | 
|  | { | 
|  | unsigned int mask = 0; | 
|  |  | 
|  | poll_wait(file, &send_wq, wait); | 
|  |  | 
|  | spin_lock(&ops_lock); | 
|  | if (!list_empty(&send_list)) | 
|  | mask = POLLIN | POLLRDNORM; | 
|  | spin_unlock(&ops_lock); | 
|  |  | 
|  | return mask; | 
|  | } | 
|  |  | 
|  | static const struct file_operations dev_fops = { | 
|  | .read    = dev_read, | 
|  | .write   = dev_write, | 
|  | .poll    = dev_poll, | 
|  | .owner   = THIS_MODULE, | 
|  | .llseek  = noop_llseek, | 
|  | }; | 
|  |  | 
|  | static struct miscdevice plock_dev_misc = { | 
|  | .minor = MISC_DYNAMIC_MINOR, | 
|  | .name = DLM_PLOCK_MISC_NAME, | 
|  | .fops = &dev_fops | 
|  | }; | 
|  |  | 
|  | int dlm_plock_init(void) | 
|  | { | 
|  | int rv; | 
|  |  | 
|  | spin_lock_init(&ops_lock); | 
|  | INIT_LIST_HEAD(&send_list); | 
|  | INIT_LIST_HEAD(&recv_list); | 
|  | init_waitqueue_head(&send_wq); | 
|  | init_waitqueue_head(&recv_wq); | 
|  |  | 
|  | rv = misc_register(&plock_dev_misc); | 
|  | if (rv) | 
|  | log_print("dlm_plock_init: misc_register failed %d", rv); | 
|  | return rv; | 
|  | } | 
|  |  | 
|  | void dlm_plock_exit(void) | 
|  | { | 
|  | if (misc_deregister(&plock_dev_misc) < 0) | 
|  | log_print("dlm_plock_exit: misc_deregister failed"); | 
|  | } | 
|  |  |