| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * inode.c - NILFS inode operations. |
| * |
| * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. |
| * |
| * Written by Ryusuke Konishi. |
| * |
| */ |
| |
| #include <linux/buffer_head.h> |
| #include <linux/gfp.h> |
| #include <linux/mpage.h> |
| #include <linux/pagemap.h> |
| #include <linux/writeback.h> |
| #include <linux/uio.h> |
| #include "nilfs.h" |
| #include "btnode.h" |
| #include "segment.h" |
| #include "page.h" |
| #include "mdt.h" |
| #include "cpfile.h" |
| #include "ifile.h" |
| |
| /** |
| * struct nilfs_iget_args - arguments used during comparison between inodes |
| * @ino: inode number |
| * @cno: checkpoint number |
| * @root: pointer on NILFS root object (mounted checkpoint) |
| * @for_gc: inode for GC flag |
| * @for_btnc: inode for B-tree node cache flag |
| * @for_shadow: inode for shadowed page cache flag |
| */ |
| struct nilfs_iget_args { |
| u64 ino; |
| __u64 cno; |
| struct nilfs_root *root; |
| bool for_gc; |
| bool for_btnc; |
| bool for_shadow; |
| }; |
| |
| static int nilfs_iget_test(struct inode *inode, void *opaque); |
| |
| void nilfs_inode_add_blocks(struct inode *inode, int n) |
| { |
| struct nilfs_root *root = NILFS_I(inode)->i_root; |
| |
| inode_add_bytes(inode, i_blocksize(inode) * n); |
| if (root) |
| atomic64_add(n, &root->blocks_count); |
| } |
| |
| void nilfs_inode_sub_blocks(struct inode *inode, int n) |
| { |
| struct nilfs_root *root = NILFS_I(inode)->i_root; |
| |
| inode_sub_bytes(inode, i_blocksize(inode) * n); |
| if (root) |
| atomic64_sub(n, &root->blocks_count); |
| } |
| |
| /** |
| * nilfs_get_block() - get a file block on the filesystem (callback function) |
| * @inode - inode struct of the target file |
| * @blkoff - file block number |
| * @bh_result - buffer head to be mapped on |
| * @create - indicate whether allocating the block or not when it has not |
| * been allocated yet. |
| * |
| * This function does not issue actual read request of the specified data |
| * block. It is done by VFS. |
| */ |
| int nilfs_get_block(struct inode *inode, sector_t blkoff, |
| struct buffer_head *bh_result, int create) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| __u64 blknum = 0; |
| int err = 0, ret; |
| unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; |
| |
| down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); |
| up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| if (ret >= 0) { /* found */ |
| map_bh(bh_result, inode->i_sb, blknum); |
| if (ret > 0) |
| bh_result->b_size = (ret << inode->i_blkbits); |
| goto out; |
| } |
| /* data block was not found */ |
| if (ret == -ENOENT && create) { |
| struct nilfs_transaction_info ti; |
| |
| bh_result->b_blocknr = 0; |
| err = nilfs_transaction_begin(inode->i_sb, &ti, 1); |
| if (unlikely(err)) |
| goto out; |
| err = nilfs_bmap_insert(ii->i_bmap, blkoff, |
| (unsigned long)bh_result); |
| if (unlikely(err != 0)) { |
| if (err == -EEXIST) { |
| /* |
| * The get_block() function could be called |
| * from multiple callers for an inode. |
| * However, the page having this block must |
| * be locked in this case. |
| */ |
| nilfs_warn(inode->i_sb, |
| "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", |
| __func__, inode->i_ino, |
| (unsigned long long)blkoff); |
| err = -EAGAIN; |
| } |
| nilfs_transaction_abort(inode->i_sb); |
| goto out; |
| } |
| nilfs_mark_inode_dirty_sync(inode); |
| nilfs_transaction_commit(inode->i_sb); /* never fails */ |
| /* Error handling should be detailed */ |
| set_buffer_new(bh_result); |
| set_buffer_delay(bh_result); |
| map_bh(bh_result, inode->i_sb, 0); |
| /* Disk block number must be changed to proper value */ |
| |
| } else if (ret == -ENOENT) { |
| /* |
| * not found is not error (e.g. hole); must return without |
| * the mapped state flag. |
| */ |
| ; |
| } else { |
| err = ret; |
| } |
| |
| out: |
| return err; |
| } |
| |
| /** |
| * nilfs_readpage() - implement readpage() method of nilfs_aops {} |
| * address_space_operations. |
| * @file - file struct of the file to be read |
| * @page - the page to be read |
| */ |
| static int nilfs_readpage(struct file *file, struct page *page) |
| { |
| return mpage_readpage(page, nilfs_get_block); |
| } |
| |
| /** |
| * nilfs_readpages() - implement readpages() method of nilfs_aops {} |
| * address_space_operations. |
| * @file - file struct of the file to be read |
| * @mapping - address_space struct used for reading multiple pages |
| * @pages - the pages to be read |
| * @nr_pages - number of pages to be read |
| */ |
| static int nilfs_readpages(struct file *file, struct address_space *mapping, |
| struct list_head *pages, unsigned int nr_pages) |
| { |
| return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); |
| } |
| |
| static int nilfs_writepages(struct address_space *mapping, |
| struct writeback_control *wbc) |
| { |
| struct inode *inode = mapping->host; |
| int err = 0; |
| |
| if (sb_rdonly(inode->i_sb)) { |
| nilfs_clear_dirty_pages(mapping, false); |
| return -EROFS; |
| } |
| |
| if (wbc->sync_mode == WB_SYNC_ALL) |
| err = nilfs_construct_dsync_segment(inode->i_sb, inode, |
| wbc->range_start, |
| wbc->range_end); |
| return err; |
| } |
| |
| static int nilfs_writepage(struct page *page, struct writeback_control *wbc) |
| { |
| struct inode *inode = page->mapping->host; |
| int err; |
| |
| if (sb_rdonly(inode->i_sb)) { |
| /* |
| * It means that filesystem was remounted in read-only |
| * mode because of error or metadata corruption. But we |
| * have dirty pages that try to be flushed in background. |
| * So, here we simply discard this dirty page. |
| */ |
| nilfs_clear_dirty_page(page, false); |
| unlock_page(page); |
| return -EROFS; |
| } |
| |
| redirty_page_for_writepage(wbc, page); |
| unlock_page(page); |
| |
| if (wbc->sync_mode == WB_SYNC_ALL) { |
| err = nilfs_construct_segment(inode->i_sb); |
| if (unlikely(err)) |
| return err; |
| } else if (wbc->for_reclaim) |
| nilfs_flush_segment(inode->i_sb, inode->i_ino); |
| |
| return 0; |
| } |
| |
| static int nilfs_set_page_dirty(struct page *page) |
| { |
| struct inode *inode = page->mapping->host; |
| int ret = __set_page_dirty_nobuffers(page); |
| |
| if (page_has_buffers(page)) { |
| unsigned int nr_dirty = 0; |
| struct buffer_head *bh, *head; |
| |
| /* |
| * This page is locked by callers, and no other thread |
| * concurrently marks its buffers dirty since they are |
| * only dirtied through routines in fs/buffer.c in |
| * which call sites of mark_buffer_dirty are protected |
| * by page lock. |
| */ |
| bh = head = page_buffers(page); |
| do { |
| /* Do not mark hole blocks dirty */ |
| if (buffer_dirty(bh) || !buffer_mapped(bh)) |
| continue; |
| |
| set_buffer_dirty(bh); |
| nr_dirty++; |
| } while (bh = bh->b_this_page, bh != head); |
| |
| if (nr_dirty) |
| nilfs_set_file_dirty(inode, nr_dirty); |
| } else if (ret) { |
| unsigned int nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); |
| |
| nilfs_set_file_dirty(inode, nr_dirty); |
| } |
| return ret; |
| } |
| |
| void nilfs_write_failed(struct address_space *mapping, loff_t to) |
| { |
| struct inode *inode = mapping->host; |
| |
| if (to > inode->i_size) { |
| truncate_pagecache(inode, inode->i_size); |
| nilfs_truncate(inode); |
| } |
| } |
| |
| static int nilfs_write_begin(struct file *file, struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned flags, |
| struct page **pagep, void **fsdata) |
| |
| { |
| struct inode *inode = mapping->host; |
| int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); |
| |
| if (unlikely(err)) |
| return err; |
| |
| err = block_write_begin(mapping, pos, len, flags, pagep, |
| nilfs_get_block); |
| if (unlikely(err)) { |
| nilfs_write_failed(mapping, pos + len); |
| nilfs_transaction_abort(inode->i_sb); |
| } |
| return err; |
| } |
| |
| static int nilfs_write_end(struct file *file, struct address_space *mapping, |
| loff_t pos, unsigned len, unsigned copied, |
| struct page *page, void *fsdata) |
| { |
| struct inode *inode = mapping->host; |
| unsigned int start = pos & (PAGE_SIZE - 1); |
| unsigned int nr_dirty; |
| int err; |
| |
| nr_dirty = nilfs_page_count_clean_buffers(page, start, |
| start + copied); |
| copied = generic_write_end(file, mapping, pos, len, copied, page, |
| fsdata); |
| nilfs_set_file_dirty(inode, nr_dirty); |
| err = nilfs_transaction_commit(inode->i_sb); |
| return err ? : copied; |
| } |
| |
| static ssize_t |
| nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) |
| { |
| struct inode *inode = file_inode(iocb->ki_filp); |
| |
| if (iov_iter_rw(iter) == WRITE) |
| return 0; |
| |
| /* Needs synchronization with the cleaner */ |
| return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); |
| } |
| |
| const struct address_space_operations nilfs_aops = { |
| .writepage = nilfs_writepage, |
| .readpage = nilfs_readpage, |
| .writepages = nilfs_writepages, |
| .set_page_dirty = nilfs_set_page_dirty, |
| .readpages = nilfs_readpages, |
| .write_begin = nilfs_write_begin, |
| .write_end = nilfs_write_end, |
| /* .releasepage = nilfs_releasepage, */ |
| .invalidatepage = block_invalidatepage, |
| .direct_IO = nilfs_direct_IO, |
| .is_partially_uptodate = block_is_partially_uptodate, |
| }; |
| |
| static int nilfs_insert_inode_locked(struct inode *inode, |
| struct nilfs_root *root, |
| unsigned long ino) |
| { |
| struct nilfs_iget_args args = { |
| .ino = ino, .root = root, .cno = 0, .for_gc = false, |
| .for_btnc = false, .for_shadow = false |
| }; |
| |
| return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); |
| } |
| |
| struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) |
| { |
| struct super_block *sb = dir->i_sb; |
| struct the_nilfs *nilfs = sb->s_fs_info; |
| struct inode *inode; |
| struct nilfs_inode_info *ii; |
| struct nilfs_root *root; |
| struct buffer_head *bh; |
| int err = -ENOMEM; |
| ino_t ino; |
| |
| inode = new_inode(sb); |
| if (unlikely(!inode)) |
| goto failed; |
| |
| mapping_set_gfp_mask(inode->i_mapping, |
| mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); |
| |
| root = NILFS_I(dir)->i_root; |
| ii = NILFS_I(inode); |
| ii->i_state = BIT(NILFS_I_NEW); |
| ii->i_root = root; |
| |
| err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); |
| if (unlikely(err)) |
| goto failed_ifile_create_inode; |
| /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
| |
| if (unlikely(ino < NILFS_USER_INO)) { |
| nilfs_msg(sb, KERN_WARNING, |
| "inode bitmap is inconsistent for reserved inodes"); |
| do { |
| brelse(bh); |
| err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); |
| if (unlikely(err)) |
| goto failed_ifile_create_inode; |
| } while (ino < NILFS_USER_INO); |
| |
| nilfs_msg(sb, KERN_INFO, |
| "repaired inode bitmap for reserved inodes"); |
| } |
| ii->i_bh = bh; |
| |
| atomic64_inc(&root->inodes_count); |
| inode_init_owner(inode, dir, mode); |
| inode->i_ino = ino; |
| inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); |
| |
| if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { |
| err = nilfs_bmap_read(ii->i_bmap, NULL); |
| if (err < 0) |
| goto failed_after_creation; |
| |
| set_bit(NILFS_I_BMAP, &ii->i_state); |
| /* No lock is needed; iget() ensures it. */ |
| } |
| |
| ii->i_flags = nilfs_mask_flags( |
| mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); |
| |
| /* ii->i_file_acl = 0; */ |
| /* ii->i_dir_acl = 0; */ |
| ii->i_dir_start_lookup = 0; |
| nilfs_set_inode_flags(inode); |
| spin_lock(&nilfs->ns_next_gen_lock); |
| inode->i_generation = nilfs->ns_next_generation++; |
| spin_unlock(&nilfs->ns_next_gen_lock); |
| if (nilfs_insert_inode_locked(inode, root, ino) < 0) { |
| err = -EIO; |
| goto failed_after_creation; |
| } |
| |
| err = nilfs_init_acl(inode, dir); |
| if (unlikely(err)) |
| /* |
| * Never occur. When supporting nilfs_init_acl(), |
| * proper cancellation of above jobs should be considered. |
| */ |
| goto failed_after_creation; |
| |
| return inode; |
| |
| failed_after_creation: |
| clear_nlink(inode); |
| unlock_new_inode(inode); |
| iput(inode); /* |
| * raw_inode will be deleted through |
| * nilfs_evict_inode(). |
| */ |
| goto failed; |
| |
| failed_ifile_create_inode: |
| make_bad_inode(inode); |
| iput(inode); |
| failed: |
| return ERR_PTR(err); |
| } |
| |
| void nilfs_set_inode_flags(struct inode *inode) |
| { |
| unsigned int flags = NILFS_I(inode)->i_flags; |
| unsigned int new_fl = 0; |
| |
| if (flags & FS_SYNC_FL) |
| new_fl |= S_SYNC; |
| if (flags & FS_APPEND_FL) |
| new_fl |= S_APPEND; |
| if (flags & FS_IMMUTABLE_FL) |
| new_fl |= S_IMMUTABLE; |
| if (flags & FS_NOATIME_FL) |
| new_fl |= S_NOATIME; |
| if (flags & FS_DIRSYNC_FL) |
| new_fl |= S_DIRSYNC; |
| inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | |
| S_NOATIME | S_DIRSYNC); |
| } |
| |
| int nilfs_read_inode_common(struct inode *inode, |
| struct nilfs_inode *raw_inode) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| int err; |
| |
| inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
| i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); |
| i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); |
| set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
| inode->i_size = le64_to_cpu(raw_inode->i_size); |
| inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
| inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); |
| inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
| inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
| inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); |
| inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
| if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) |
| return -EIO; /* this inode is for metadata and corrupted */ |
| if (inode->i_nlink == 0) |
| return -ESTALE; /* this inode is deleted */ |
| |
| inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); |
| ii->i_flags = le32_to_cpu(raw_inode->i_flags); |
| #if 0 |
| ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); |
| ii->i_dir_acl = S_ISREG(inode->i_mode) ? |
| 0 : le32_to_cpu(raw_inode->i_dir_acl); |
| #endif |
| ii->i_dir_start_lookup = 0; |
| inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
| |
| if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
| S_ISLNK(inode->i_mode)) { |
| err = nilfs_bmap_read(ii->i_bmap, raw_inode); |
| if (err < 0) |
| return err; |
| set_bit(NILFS_I_BMAP, &ii->i_state); |
| /* No lock is needed; iget() ensures it. */ |
| } |
| return 0; |
| } |
| |
| static int __nilfs_read_inode(struct super_block *sb, |
| struct nilfs_root *root, unsigned long ino, |
| struct inode *inode) |
| { |
| struct the_nilfs *nilfs = sb->s_fs_info; |
| struct buffer_head *bh; |
| struct nilfs_inode *raw_inode; |
| int err; |
| |
| down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); |
| if (unlikely(err)) |
| goto bad_inode; |
| |
| raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); |
| |
| err = nilfs_read_inode_common(inode, raw_inode); |
| if (err) |
| goto failed_unmap; |
| |
| if (S_ISREG(inode->i_mode)) { |
| inode->i_op = &nilfs_file_inode_operations; |
| inode->i_fop = &nilfs_file_operations; |
| inode->i_mapping->a_ops = &nilfs_aops; |
| } else if (S_ISDIR(inode->i_mode)) { |
| inode->i_op = &nilfs_dir_inode_operations; |
| inode->i_fop = &nilfs_dir_operations; |
| inode->i_mapping->a_ops = &nilfs_aops; |
| } else if (S_ISLNK(inode->i_mode)) { |
| inode->i_op = &nilfs_symlink_inode_operations; |
| inode_nohighmem(inode); |
| inode->i_mapping->a_ops = &nilfs_aops; |
| } else { |
| inode->i_op = &nilfs_special_inode_operations; |
| init_special_inode( |
| inode, inode->i_mode, |
| huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); |
| } |
| nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
| brelse(bh); |
| up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| nilfs_set_inode_flags(inode); |
| mapping_set_gfp_mask(inode->i_mapping, |
| mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); |
| return 0; |
| |
| failed_unmap: |
| nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
| brelse(bh); |
| |
| bad_inode: |
| up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| return err; |
| } |
| |
| static int nilfs_iget_test(struct inode *inode, void *opaque) |
| { |
| struct nilfs_iget_args *args = opaque; |
| struct nilfs_inode_info *ii; |
| |
| if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) |
| return 0; |
| |
| ii = NILFS_I(inode); |
| if (test_bit(NILFS_I_BTNC, &ii->i_state)) { |
| if (!args->for_btnc) |
| return 0; |
| } else if (args->for_btnc) { |
| return 0; |
| } |
| if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { |
| if (!args->for_shadow) |
| return 0; |
| } else if (args->for_shadow) { |
| return 0; |
| } |
| |
| if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) |
| return !args->for_gc; |
| |
| return args->for_gc && args->cno == ii->i_cno; |
| } |
| |
| static int nilfs_iget_set(struct inode *inode, void *opaque) |
| { |
| struct nilfs_iget_args *args = opaque; |
| |
| inode->i_ino = args->ino; |
| NILFS_I(inode)->i_cno = args->cno; |
| NILFS_I(inode)->i_root = args->root; |
| if (args->root && args->ino == NILFS_ROOT_INO) |
| nilfs_get_root(args->root); |
| |
| if (args->for_gc) |
| NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); |
| if (args->for_btnc) |
| NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); |
| if (args->for_shadow) |
| NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); |
| return 0; |
| } |
| |
| struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, |
| unsigned long ino) |
| { |
| struct nilfs_iget_args args = { |
| .ino = ino, .root = root, .cno = 0, .for_gc = false, |
| .for_btnc = false, .for_shadow = false |
| }; |
| |
| return ilookup5(sb, ino, nilfs_iget_test, &args); |
| } |
| |
| struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, |
| unsigned long ino) |
| { |
| struct nilfs_iget_args args = { |
| .ino = ino, .root = root, .cno = 0, .for_gc = false, |
| .for_btnc = false, .for_shadow = false |
| }; |
| |
| return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); |
| } |
| |
| struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, |
| unsigned long ino) |
| { |
| struct inode *inode; |
| int err; |
| |
| inode = nilfs_iget_locked(sb, root, ino); |
| if (unlikely(!inode)) |
| return ERR_PTR(-ENOMEM); |
| if (!(inode->i_state & I_NEW)) |
| return inode; |
| |
| err = __nilfs_read_inode(sb, root, ino, inode); |
| if (unlikely(err)) { |
| iget_failed(inode); |
| return ERR_PTR(err); |
| } |
| unlock_new_inode(inode); |
| return inode; |
| } |
| |
| struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, |
| __u64 cno) |
| { |
| struct nilfs_iget_args args = { |
| .ino = ino, .root = NULL, .cno = cno, .for_gc = true, |
| .for_btnc = false, .for_shadow = false |
| }; |
| struct inode *inode; |
| int err; |
| |
| inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); |
| if (unlikely(!inode)) |
| return ERR_PTR(-ENOMEM); |
| if (!(inode->i_state & I_NEW)) |
| return inode; |
| |
| err = nilfs_init_gcinode(inode); |
| if (unlikely(err)) { |
| iget_failed(inode); |
| return ERR_PTR(err); |
| } |
| unlock_new_inode(inode); |
| return inode; |
| } |
| |
| /** |
| * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode |
| * @inode: inode object |
| * |
| * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, |
| * or does nothing if the inode already has it. This function allocates |
| * an additional inode to maintain page cache of B-tree nodes one-on-one. |
| * |
| * Return Value: On success, 0 is returned. On errors, one of the following |
| * negative error code is returned. |
| * |
| * %-ENOMEM - Insufficient memory available. |
| */ |
| int nilfs_attach_btree_node_cache(struct inode *inode) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct inode *btnc_inode; |
| struct nilfs_iget_args args; |
| |
| if (ii->i_assoc_inode) |
| return 0; |
| |
| args.ino = inode->i_ino; |
| args.root = ii->i_root; |
| args.cno = ii->i_cno; |
| args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; |
| args.for_btnc = true; |
| args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; |
| |
| btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, |
| nilfs_iget_set, &args); |
| if (unlikely(!btnc_inode)) |
| return -ENOMEM; |
| if (btnc_inode->i_state & I_NEW) { |
| nilfs_init_btnc_inode(btnc_inode); |
| unlock_new_inode(btnc_inode); |
| } |
| NILFS_I(btnc_inode)->i_assoc_inode = inode; |
| NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; |
| ii->i_assoc_inode = btnc_inode; |
| |
| return 0; |
| } |
| |
| /** |
| * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode |
| * @inode: inode object |
| * |
| * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its |
| * holder inode bound to @inode, or does nothing if @inode doesn't have it. |
| */ |
| void nilfs_detach_btree_node_cache(struct inode *inode) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct inode *btnc_inode = ii->i_assoc_inode; |
| |
| if (btnc_inode) { |
| NILFS_I(btnc_inode)->i_assoc_inode = NULL; |
| ii->i_assoc_inode = NULL; |
| iput(btnc_inode); |
| } |
| } |
| |
| /** |
| * nilfs_iget_for_shadow - obtain inode for shadow mapping |
| * @inode: inode object that uses shadow mapping |
| * |
| * nilfs_iget_for_shadow() allocates a pair of inodes that holds page |
| * caches for shadow mapping. The page cache for data pages is set up |
| * in one inode and the one for b-tree node pages is set up in the |
| * other inode, which is attached to the former inode. |
| * |
| * Return Value: On success, a pointer to the inode for data pages is |
| * returned. On errors, one of the following negative error code is returned |
| * in a pointer type. |
| * |
| * %-ENOMEM - Insufficient memory available. |
| */ |
| struct inode *nilfs_iget_for_shadow(struct inode *inode) |
| { |
| struct nilfs_iget_args args = { |
| .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, |
| .for_btnc = false, .for_shadow = true |
| }; |
| struct inode *s_inode; |
| int err; |
| |
| s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, |
| nilfs_iget_set, &args); |
| if (unlikely(!s_inode)) |
| return ERR_PTR(-ENOMEM); |
| if (!(s_inode->i_state & I_NEW)) |
| return inode; |
| |
| NILFS_I(s_inode)->i_flags = 0; |
| memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); |
| mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); |
| |
| err = nilfs_attach_btree_node_cache(s_inode); |
| if (unlikely(err)) { |
| iget_failed(s_inode); |
| return ERR_PTR(err); |
| } |
| unlock_new_inode(s_inode); |
| return s_inode; |
| } |
| |
| void nilfs_write_inode_common(struct inode *inode, |
| struct nilfs_inode *raw_inode, int has_bmap) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| |
| raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
| raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); |
| raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); |
| raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
| raw_inode->i_size = cpu_to_le64(inode->i_size); |
| raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
| raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); |
| raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
| raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
| raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); |
| |
| raw_inode->i_flags = cpu_to_le32(ii->i_flags); |
| raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
| |
| if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| |
| /* zero-fill unused portion in the case of super root block */ |
| raw_inode->i_xattr = 0; |
| raw_inode->i_pad = 0; |
| memset((void *)raw_inode + sizeof(*raw_inode), 0, |
| nilfs->ns_inode_size - sizeof(*raw_inode)); |
| } |
| |
| if (has_bmap) |
| nilfs_bmap_write(ii->i_bmap, raw_inode); |
| else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
| raw_inode->i_device_code = |
| cpu_to_le64(huge_encode_dev(inode->i_rdev)); |
| /* |
| * When extending inode, nilfs->ns_inode_size should be checked |
| * for substitutions of appended fields. |
| */ |
| } |
| |
| void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) |
| { |
| ino_t ino = inode->i_ino; |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct inode *ifile = ii->i_root->ifile; |
| struct nilfs_inode *raw_inode; |
| |
| raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); |
| |
| if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) |
| memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); |
| if (flags & I_DIRTY_DATASYNC) |
| set_bit(NILFS_I_INODE_SYNC, &ii->i_state); |
| |
| nilfs_write_inode_common(inode, raw_inode, 0); |
| /* |
| * XXX: call with has_bmap = 0 is a workaround to avoid |
| * deadlock of bmap. This delays update of i_bmap to just |
| * before writing. |
| */ |
| |
| nilfs_ifile_unmap_inode(ifile, ino, ibh); |
| } |
| |
| #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ |
| |
| static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, |
| unsigned long from) |
| { |
| __u64 b; |
| int ret; |
| |
| if (!test_bit(NILFS_I_BMAP, &ii->i_state)) |
| return; |
| repeat: |
| ret = nilfs_bmap_last_key(ii->i_bmap, &b); |
| if (ret == -ENOENT) |
| return; |
| else if (ret < 0) |
| goto failed; |
| |
| if (b < from) |
| return; |
| |
| b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); |
| ret = nilfs_bmap_truncate(ii->i_bmap, b); |
| nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); |
| if (!ret || (ret == -ENOMEM && |
| nilfs_bmap_truncate(ii->i_bmap, b) == 0)) |
| goto repeat; |
| |
| failed: |
| nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", |
| ret, ii->vfs_inode.i_ino); |
| } |
| |
| void nilfs_truncate(struct inode *inode) |
| { |
| unsigned long blkoff; |
| unsigned int blocksize; |
| struct nilfs_transaction_info ti; |
| struct super_block *sb = inode->i_sb; |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| |
| if (!test_bit(NILFS_I_BMAP, &ii->i_state)) |
| return; |
| if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
| return; |
| |
| blocksize = sb->s_blocksize; |
| blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; |
| nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
| |
| block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); |
| |
| nilfs_truncate_bmap(ii, blkoff); |
| |
| inode->i_mtime = inode->i_ctime = current_time(inode); |
| if (IS_SYNC(inode)) |
| nilfs_set_transaction_flag(NILFS_TI_SYNC); |
| |
| nilfs_mark_inode_dirty(inode); |
| nilfs_set_file_dirty(inode, 0); |
| nilfs_transaction_commit(sb); |
| /* |
| * May construct a logical segment and may fail in sync mode. |
| * But truncate has no return value. |
| */ |
| } |
| |
| static void nilfs_clear_inode(struct inode *inode) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| |
| /* |
| * Free resources allocated in nilfs_read_inode(), here. |
| */ |
| BUG_ON(!list_empty(&ii->i_dirty)); |
| brelse(ii->i_bh); |
| ii->i_bh = NULL; |
| |
| if (nilfs_is_metadata_file_inode(inode)) |
| nilfs_mdt_clear(inode); |
| |
| if (test_bit(NILFS_I_BMAP, &ii->i_state)) |
| nilfs_bmap_clear(ii->i_bmap); |
| |
| if (!test_bit(NILFS_I_BTNC, &ii->i_state)) |
| nilfs_detach_btree_node_cache(inode); |
| |
| if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) |
| nilfs_put_root(ii->i_root); |
| } |
| |
| void nilfs_evict_inode(struct inode *inode) |
| { |
| struct nilfs_transaction_info ti; |
| struct super_block *sb = inode->i_sb; |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct the_nilfs *nilfs; |
| int ret; |
| |
| if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
| truncate_inode_pages_final(&inode->i_data); |
| clear_inode(inode); |
| nilfs_clear_inode(inode); |
| return; |
| } |
| nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
| |
| truncate_inode_pages_final(&inode->i_data); |
| |
| nilfs = sb->s_fs_info; |
| if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { |
| /* |
| * If this inode is about to be disposed after the file system |
| * has been degraded to read-only due to file system corruption |
| * or after the writer has been detached, do not make any |
| * changes that cause writes, just clear it. |
| * Do this check after read-locking ns_segctor_sem by |
| * nilfs_transaction_begin() in order to avoid a race with |
| * the writer detach operation. |
| */ |
| clear_inode(inode); |
| nilfs_clear_inode(inode); |
| nilfs_transaction_abort(sb); |
| return; |
| } |
| |
| /* TODO: some of the following operations may fail. */ |
| nilfs_truncate_bmap(ii, 0); |
| nilfs_mark_inode_dirty(inode); |
| clear_inode(inode); |
| |
| ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); |
| if (!ret) |
| atomic64_dec(&ii->i_root->inodes_count); |
| |
| nilfs_clear_inode(inode); |
| |
| if (IS_SYNC(inode)) |
| nilfs_set_transaction_flag(NILFS_TI_SYNC); |
| nilfs_transaction_commit(sb); |
| /* |
| * May construct a logical segment and may fail in sync mode. |
| * But delete_inode has no return value. |
| */ |
| } |
| |
| int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) |
| { |
| struct nilfs_transaction_info ti; |
| struct inode *inode = d_inode(dentry); |
| struct super_block *sb = inode->i_sb; |
| int err; |
| |
| err = setattr_prepare(dentry, iattr); |
| if (err) |
| return err; |
| |
| err = nilfs_transaction_begin(sb, &ti, 0); |
| if (unlikely(err)) |
| return err; |
| |
| if ((iattr->ia_valid & ATTR_SIZE) && |
| iattr->ia_size != i_size_read(inode)) { |
| inode_dio_wait(inode); |
| truncate_setsize(inode, iattr->ia_size); |
| nilfs_truncate(inode); |
| } |
| |
| setattr_copy(inode, iattr); |
| mark_inode_dirty(inode); |
| |
| if (iattr->ia_valid & ATTR_MODE) { |
| err = nilfs_acl_chmod(inode); |
| if (unlikely(err)) |
| goto out_err; |
| } |
| |
| return nilfs_transaction_commit(sb); |
| |
| out_err: |
| nilfs_transaction_abort(sb); |
| return err; |
| } |
| |
| int nilfs_permission(struct inode *inode, int mask) |
| { |
| struct nilfs_root *root = NILFS_I(inode)->i_root; |
| |
| if ((mask & MAY_WRITE) && root && |
| root->cno != NILFS_CPTREE_CURRENT_CNO) |
| return -EROFS; /* snapshot is not writable */ |
| |
| return generic_permission(inode, mask); |
| } |
| |
| int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) |
| { |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| int err; |
| |
| spin_lock(&nilfs->ns_inode_lock); |
| if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { |
| spin_unlock(&nilfs->ns_inode_lock); |
| err = nilfs_ifile_get_inode_block(ii->i_root->ifile, |
| inode->i_ino, pbh); |
| if (unlikely(err)) |
| return err; |
| spin_lock(&nilfs->ns_inode_lock); |
| if (ii->i_bh == NULL) |
| ii->i_bh = *pbh; |
| else if (unlikely(!buffer_uptodate(ii->i_bh))) { |
| __brelse(ii->i_bh); |
| ii->i_bh = *pbh; |
| } else { |
| brelse(*pbh); |
| *pbh = ii->i_bh; |
| } |
| } else |
| *pbh = ii->i_bh; |
| |
| get_bh(*pbh); |
| spin_unlock(&nilfs->ns_inode_lock); |
| return 0; |
| } |
| |
| int nilfs_inode_dirty(struct inode *inode) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| int ret = 0; |
| |
| if (!list_empty(&ii->i_dirty)) { |
| spin_lock(&nilfs->ns_inode_lock); |
| ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || |
| test_bit(NILFS_I_BUSY, &ii->i_state); |
| spin_unlock(&nilfs->ns_inode_lock); |
| } |
| return ret; |
| } |
| |
| int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) |
| { |
| struct nilfs_inode_info *ii = NILFS_I(inode); |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| |
| atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); |
| |
| if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) |
| return 0; |
| |
| spin_lock(&nilfs->ns_inode_lock); |
| if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && |
| !test_bit(NILFS_I_BUSY, &ii->i_state)) { |
| /* |
| * Because this routine may race with nilfs_dispose_list(), |
| * we have to check NILFS_I_QUEUED here, too. |
| */ |
| if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { |
| /* |
| * This will happen when somebody is freeing |
| * this inode. |
| */ |
| nilfs_warn(inode->i_sb, |
| "cannot set file dirty (ino=%lu): the file is being freed", |
| inode->i_ino); |
| spin_unlock(&nilfs->ns_inode_lock); |
| return -EINVAL; /* |
| * NILFS_I_DIRTY may remain for |
| * freeing inode. |
| */ |
| } |
| list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); |
| set_bit(NILFS_I_QUEUED, &ii->i_state); |
| } |
| spin_unlock(&nilfs->ns_inode_lock); |
| return 0; |
| } |
| |
| int __nilfs_mark_inode_dirty(struct inode *inode, int flags) |
| { |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| struct buffer_head *ibh; |
| int err; |
| |
| /* |
| * Do not dirty inodes after the log writer has been detached |
| * and its nilfs_root struct has been freed. |
| */ |
| if (unlikely(nilfs_purging(nilfs))) |
| return 0; |
| |
| err = nilfs_load_inode_block(inode, &ibh); |
| if (unlikely(err)) { |
| nilfs_warn(inode->i_sb, |
| "cannot mark inode dirty (ino=%lu): error %d loading inode block", |
| inode->i_ino, err); |
| return err; |
| } |
| nilfs_update_inode(inode, ibh, flags); |
| mark_buffer_dirty(ibh); |
| nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); |
| brelse(ibh); |
| return 0; |
| } |
| |
| /** |
| * nilfs_dirty_inode - reflect changes on given inode to an inode block. |
| * @inode: inode of the file to be registered. |
| * |
| * nilfs_dirty_inode() loads a inode block containing the specified |
| * @inode and copies data from a nilfs_inode to a corresponding inode |
| * entry in the inode block. This operation is excluded from the segment |
| * construction. This function can be called both as a single operation |
| * and as a part of indivisible file operations. |
| */ |
| void nilfs_dirty_inode(struct inode *inode, int flags) |
| { |
| struct nilfs_transaction_info ti; |
| struct nilfs_mdt_info *mdi = NILFS_MDT(inode); |
| |
| if (is_bad_inode(inode)) { |
| nilfs_warn(inode->i_sb, |
| "tried to mark bad_inode dirty. ignored."); |
| dump_stack(); |
| return; |
| } |
| if (mdi) { |
| nilfs_mdt_mark_dirty(inode); |
| return; |
| } |
| nilfs_transaction_begin(inode->i_sb, &ti, 0); |
| __nilfs_mark_inode_dirty(inode, flags); |
| nilfs_transaction_commit(inode->i_sb); /* never fails */ |
| } |
| |
| int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| __u64 start, __u64 len) |
| { |
| struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
| __u64 logical = 0, phys = 0, size = 0; |
| __u32 flags = 0; |
| loff_t isize; |
| sector_t blkoff, end_blkoff; |
| sector_t delalloc_blkoff; |
| unsigned long delalloc_blklen; |
| unsigned int blkbits = inode->i_blkbits; |
| int ret, n; |
| |
| ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); |
| if (ret) |
| return ret; |
| |
| inode_lock(inode); |
| |
| isize = i_size_read(inode); |
| |
| blkoff = start >> blkbits; |
| end_blkoff = (start + len - 1) >> blkbits; |
| |
| delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, |
| &delalloc_blkoff); |
| |
| do { |
| __u64 blkphy; |
| unsigned int maxblocks; |
| |
| if (delalloc_blklen && blkoff == delalloc_blkoff) { |
| if (size) { |
| /* End of the current extent */ |
| ret = fiemap_fill_next_extent( |
| fieinfo, logical, phys, size, flags); |
| if (ret) |
| break; |
| } |
| if (blkoff > end_blkoff) |
| break; |
| |
| flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; |
| logical = blkoff << blkbits; |
| phys = 0; |
| size = delalloc_blklen << blkbits; |
| |
| blkoff = delalloc_blkoff + delalloc_blklen; |
| delalloc_blklen = nilfs_find_uncommitted_extent( |
| inode, blkoff, &delalloc_blkoff); |
| continue; |
| } |
| |
| /* |
| * Limit the number of blocks that we look up so as |
| * not to get into the next delayed allocation extent. |
| */ |
| maxblocks = INT_MAX; |
| if (delalloc_blklen) |
| maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, |
| maxblocks); |
| blkphy = 0; |
| |
| down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| n = nilfs_bmap_lookup_contig( |
| NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); |
| up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
| |
| if (n < 0) { |
| int past_eof; |
| |
| if (unlikely(n != -ENOENT)) |
| break; /* error */ |
| |
| /* HOLE */ |
| blkoff++; |
| past_eof = ((blkoff << blkbits) >= isize); |
| |
| if (size) { |
| /* End of the current extent */ |
| |
| if (past_eof) |
| flags |= FIEMAP_EXTENT_LAST; |
| |
| ret = fiemap_fill_next_extent( |
| fieinfo, logical, phys, size, flags); |
| if (ret) |
| break; |
| size = 0; |
| } |
| if (blkoff > end_blkoff || past_eof) |
| break; |
| } else { |
| if (size) { |
| if (phys && blkphy << blkbits == phys + size) { |
| /* The current extent goes on */ |
| size += n << blkbits; |
| } else { |
| /* Terminate the current extent */ |
| ret = fiemap_fill_next_extent( |
| fieinfo, logical, phys, size, |
| flags); |
| if (ret || blkoff > end_blkoff) |
| break; |
| |
| /* Start another extent */ |
| flags = FIEMAP_EXTENT_MERGED; |
| logical = blkoff << blkbits; |
| phys = blkphy << blkbits; |
| size = n << blkbits; |
| } |
| } else { |
| /* Start a new extent */ |
| flags = FIEMAP_EXTENT_MERGED; |
| logical = blkoff << blkbits; |
| phys = blkphy << blkbits; |
| size = n << blkbits; |
| } |
| blkoff += n; |
| } |
| cond_resched(); |
| } while (true); |
| |
| /* If ret is 1 then we just hit the end of the extent array */ |
| if (ret == 1) |
| ret = 0; |
| |
| inode_unlock(inode); |
| return ret; |
| } |