From 94ea5c51dccab8244ff0f64a4dc9de5dcc428cda Mon Sep 17 00:00:00 2001 From: Cristoforo Cataldo Date: Sat, 31 Jan 2015 14:13:33 +0100 Subject: [PATCH] f2fs: Sync F2FS to Jaegeuk's linux-3.4 branch This commit updates F2FS with latest commits applied on: https://kernel.googlesource.com/pub/scm/linux/kernel/git/jaegeuk/f2fs/+/linux-3.4 It's synched up to the following commit: 521e57c f2fs: support 3.4 by Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 ++--- fs/f2fs/data.c | 54 +++++++-------- fs/f2fs/debug.c | 49 +++++++++---- fs/f2fs/f2fs.h | 24 ++++--- fs/f2fs/file.c | 44 +++++++++++- fs/f2fs/gc.c | 20 +----- fs/f2fs/gc.h | 7 +- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 35 ++++------ fs/f2fs/node.c | 26 ++++++- fs/f2fs/node.h | 1 + fs/f2fs/recovery.c | 3 +- fs/f2fs/segment.c | 28 +++++--- fs/f2fs/super.c | 11 ++- fs/f2fs/trace.c | 55 ++++++++++++++- fs/f2fs/trace.h | 4 ++ fs/xfs/xfs_fs.h | 8 +-- include/linux/fs.h | 8 +++ include/trace/events/f2fs.h | 134 +++++++++++++++++------------------- 19 files changed, 323 insertions(+), 208 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9dc08547f..28c87e10c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -24,7 +24,7 @@ #include static struct kmem_cache *ino_entry_slab; -static struct kmem_cache *inode_entry_slab; +struct kmem_cache *inode_entry_slab; /* * We guarantee no failure on the returned page. @@ -674,7 +674,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -691,7 +691,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new; + struct inode_entry *new; int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) @@ -721,7 +721,7 @@ void update_dirty_page(struct inode *inode, struct page *page) void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new = + struct inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -739,7 +739,7 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *entry; + struct inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; @@ -769,7 +769,7 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct dir_inode_entry *entry; + struct inode_entry *entry; struct inode *inode; retry: if (unlikely(f2fs_cp_error(sbi))) @@ -782,7 +782,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct dir_inode_entry, list); + entry = list_entry(head->next, struct inode_entry, list); inode = igrab(entry->inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { @@ -1108,8 +1108,8 @@ int __init create_checkpoint_caches(void) sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry)); + inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + sizeof(struct inode_entry)); if (!inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1c10ca4ce..20f480d98 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -96,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) return; if (is_read_io(fio->rw)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); else - trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); submit_bio(fio->rw, io->bio); io->bio = NULL; @@ -137,7 +135,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, { struct bio *bio; - trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ @@ -190,7 +188,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); + trace_f2fs_submit_page_mbio(page, fio); } /* @@ -199,7 +197,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -212,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); + addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); } @@ -227,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -293,23 +291,24 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +void update_extent_cache(struct dnode_of_data *dn) { struct f2fs_inode_info *fi = F2FS_I(dn->inode); pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); + __set_data_blkaddr(dn); if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + write_lock(&fi->ext.ext_lock); start_fofs = fi->ext.fofs; @@ -323,16 +322,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { + if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; + fi->ext.blk_addr = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk_addr--; fi->ext.len++; @@ -340,7 +339,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -564,30 +563,25 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; - block_t new_blkaddr; struct node_info ni; + int seg = CURSEG_WARM_DATA; pgoff_t fofs; - int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - __set_data_blkaddr(dn, NEW_ADDR); - dn->data_blkaddr = NEW_ADDR; - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - type = CURSEG_WARM_DATA; + if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) + seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ - set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - update_extent_cache(new_blkaddr, dn); - clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + __set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -595,7 +589,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); - dn->data_blkaddr = new_blkaddr; return 0; } @@ -786,7 +779,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, fio); - update_extent_cache(fio->blk_addr, &dn); + update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: @@ -844,7 +837,6 @@ static int f2fs_write_data_page(struct page *page, /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); - unlock_page(page); goto out; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 2b6422156..0f721f6a1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -57,7 +57,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; - si->sits = SIT_I(sbi)->dirty_sentries; + si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->sits = MAIN_SEGS(sbi); + si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->fnids = NM_I(sbi)->fcnt; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -161,20 +163,32 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); +get_cache: + si->cache_mem = 0; + /* build gc */ - si->base_mem += sizeof(struct f2fs_gc_kthread); + if (sbi->gc_thread) + si->cache_mem += sizeof(struct f2fs_gc_kthread); + + /* build merge flush thread */ + if (SM_I(sbi)->cmd_control_info) + si->cache_mem += sizeof(struct flush_cmd_control); -get_cache: /* free nids */ - si->cache_mem = NM_I(sbi)->fcnt; - si->cache_mem += NM_I(sbi)->nat_cnt; - npages = NODE_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); + si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); + si->cache_mem += NM_I(sbi)->dirty_nat_cnt * + sizeof(struct nat_entry_set); + si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + + si->page_mem = 0; + npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; + npages = META_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; } static int stat_show(struct seq_file *s, void *v) @@ -260,8 +274,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", - si->nats, si->sits); + seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", + si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d\n", si->fnids); seq_puts(s, "\nDistribution of User Blocks:"); @@ -292,9 +306,14 @@ static int stat_show(struct seq_file *s, void *v) /* memory footprint */ update_mem_info(si->sbi); - seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", - (si->base_mem + si->cache_mem) >> 10, - si->base_mem >> 10, si->cache_mem >> 10); + seq_printf(s, "\nMemory: %u KB\n", + (si->base_mem + si->cache_mem + si->page_mem) >> 10); + seq_printf(s, " - static: %u KB\n", + si->base_mem >> 10); + seq_printf(s, " - cached: %u KB\n", + si->cache_mem >> 10); + seq_printf(s, " - paged : %u KB\n", + si->page_mem >> 10); } mutex_unlock(&f2fs_stat_mutex); return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 539963fe6..4381f5c3e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -137,8 +137,14 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* for the list of directory inodes */ -struct dir_inode_entry { +/* + * for the list of directory inodes or gc inodes. + * NOTE: there are two slab users for this structure, if we add/modify/delete + * fields in structure for one of slab users, it may affect fields or size of + * other one, in this condition, it's better to split both of slab and related + * data structure. + */ +struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; @@ -298,7 +304,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ - struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ @@ -401,7 +407,8 @@ enum { CURSEG_HOT_NODE, /* direct node blocks of directory files */ CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE + NO_CHECK_TYPE, + CURSEG_DIRECT_IO, /* to use for the direct IO path */ }; struct flush_cmd { @@ -1478,7 +1485,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(block_t, struct dnode_of_data *); +void update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); @@ -1493,8 +1500,6 @@ void stop_gc_thread(struct f2fs_sb_info *); block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); -int __init create_gc_caches(void); -void destroy_gc_caches(void); /* * recovery.c @@ -1513,7 +1518,7 @@ struct f2fs_stat_info { int main_area_segs, main_area_sections, main_area_zones; int hit_ext, total_ext; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; - int nats, sits, fnids; + int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; @@ -1531,7 +1536,7 @@ struct f2fs_stat_info { unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; - unsigned base_mem, cache_mem; + unsigned base_mem, cache_mem, page_mem; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -1617,6 +1622,7 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) +#define stat_inc_inplace_blocks(sbi) #define stat_inc_seg_count(si, type) #define stat_inc_tot_blk_count(si, blks) #define stat_inc_data_blk_count(si, blks) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0ea17a956..faf22291b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -244,6 +244,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) sync_nodes: sync_node_pages(sbi, ino, &wbc); + /* if cp_error was enabled, we should avoid infinite loop */ + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); @@ -445,7 +449,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - update_extent_cache(NULL_ADDR, dn); + dn->data_blkaddr = NULL_ADDR; + update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); nr_free++; } @@ -1034,6 +1039,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) return ret; } +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + __u32 in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + switch (in) { + case FS_GOING_DOWN_FULLSYNC: + sb = freeze_bdev(sb->s_bdev); + if (sb && !IS_ERR(sb)) { + f2fs_stop_checkpoint(sbi); + thaw_bdev(sb->s_bdev, sb); + } + break; + case FS_GOING_DOWN_METASYNC: + /* do checkpoint only */ + f2fs_sync_fs(sb, 1); + f2fs_stop_checkpoint(sbi); + break; + case FS_GOING_DOWN_NOSYNC: + f2fs_stop_checkpoint(sbi); + break; + default: + return -EINVAL; + } + return 0; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1081,6 +1121,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_release_volatile_write(filp); case F2FS_IOC_ABORT_VOLATILE_WRITE: return f2fs_ioc_abort_volatile_write(filp); + case FS_IOC_SHUTDOWN: + return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); default: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5cd405b17..60215e1f4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -24,8 +24,6 @@ #include "gc.h" #include -static struct kmem_cache *winode_slab; - static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -356,7 +354,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; retry: if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { @@ -373,7 +371,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(winode_slab, ie); + kmem_cache_free(inode_entry_slab, ie); } } @@ -750,17 +748,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi) { DIRTY_I(sbi)->v_ops = &default_v_ops; } - -int __init create_gc_caches(void) -{ - winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", - sizeof(struct inode_entry)); - if (!winode_slab) - return -ENOMEM; - return 0; -} - -void destroy_gc_caches(void) -{ - kmem_cache_destroy(winode_slab); -} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 8eef228ba..3498b0a91 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -35,16 +35,13 @@ struct f2fs_gc_kthread { unsigned int gc_idle; }; -struct inode_entry { - struct list_head list; - struct inode *inode; -}; - struct gc_inode_list { struct list_head ilist; struct radix_tree_root iroot; }; +extern struct kmem_cache *inode_entry_slab; + /* * inline functions */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3ec7d500b..d46eccd63 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -116,7 +116,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); - update_extent_cache(fio.blk_addr, dn); + update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index faeee58d5..8a75669c7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -66,29 +66,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static int __recover_inline_status(struct inode *inode, struct page *ipage) +static void __recover_inline_status(struct inode *inode, struct page *ipage) { void *inline_data = inline_data_addr(ipage); - struct f2fs_inode *ri; - void *zbuf; + __le32 *start = inline_data; + __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); - zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); - if (!zbuf) - return -ENOMEM; + while (start < end) { + if (*start++) { + f2fs_wait_on_page_writeback(ipage, NODE); - if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { - kfree(zbuf); - return 0; + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_page_dirty(ipage); + return; + } } - kfree(zbuf); - - f2fs_wait_on_page_writeback(ipage, NODE); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - - ri = F2FS_INODE(ipage); - set_raw_inline(F2FS_I(inode), ri); - set_page_dirty(ipage); - return 0; + return; } static int do_read_inode(struct inode *inode) @@ -97,7 +91,6 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; - int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -141,7 +134,7 @@ static int do_read_inode(struct inode *inode) /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) - err = __recover_inline_status(inode, node_page); + __recover_inline_status(inode, node_page); /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); @@ -151,7 +144,7 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); - return err; + return 0; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 905d918fa..1888242dc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -58,6 +58,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; + if (sbi->sb->s_bdi->dirty_exceeded) + return false; for (i = 0; i <= UPDATE_INO; i++) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; @@ -348,7 +350,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct nat_entry *e; int i; - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); ni->nid = nid; /* Check nat cache */ @@ -363,6 +364,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) if (e) return; + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); @@ -1894,7 +1897,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry_set *setvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; struct nat_entry_set *set, *tmp; unsigned int found; nid_t set_idx = 0; @@ -1911,7 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, - set_idx, NATVEC_SIZE, setvec))) { + set_idx, SETVEC_SIZE, setvec))) { unsigned idx; set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) @@ -1991,6 +1994,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; struct nat_entry *natvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; nid_t nid = 0; unsigned int found; @@ -2015,11 +2019,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; + nid = nat_get_nid(natvec[found - 1]) + 1; for (idx = 0; idx < found; idx++) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); + + /* destroy nat set cache */ + nid = 0; + while ((found = __gang_lookup_nat_set(nm_i, + nid, SETVEC_SIZE, setvec))) { + unsigned idx; + + nid = setvec[found - 1]->set + 1; + for (idx = 0; idx < found; idx++) { + /* entry_cnt is not zero, when cp_error was occurred */ + f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); + radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); + kmem_cache_free(nat_entry_set_slab, setvec[idx]); + } + } up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index cac8a3d9a..f405bbf24 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -25,6 +25,7 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +#define SETVEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b12c13d11..a9482cd65 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -396,7 +396,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); - update_extent_cache(dest, &dn); + dn.data_blkaddr = dest; + update_extent_cache(&dn); recovered++; } dn.ofs_in_node++; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 827215880..50c97dbce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1110,18 +1110,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int old_segno; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} + void allocate_new_segments(struct f2fs_sb_info *sbi) { - struct curseg_info *curseg; - unsigned int old_curseg; int i; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segments(sbi, i); } static const struct segment_allocation default_salloc_ops = { @@ -1234,11 +1238,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; + bool direct_io = (type == CURSEG_DIRECT_IO); + + type = direct_io ? CURSEG_WARM_DATA : type; curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + /* direct_io'ed data is aligned to the segment for better performance */ + if (direct_io && curseg->next_blkoff) + __allocate_new_segments(sbi, type); + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); /* @@ -1312,6 +1323,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(sbi, page, &sum, fio); + dn->data_blkaddr = fio->blk_addr; } void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 87a521945..a320b41d2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1221,6 +1221,8 @@ static int __init init_f2fs_fs(void) { int err; + f2fs_build_trace_ios(); + err = init_inodecache(); if (err) goto fail; @@ -1230,12 +1232,9 @@ static int __init init_f2fs_fs(void) err = create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_gc_caches(); - if (err) - goto free_segment_manager_caches; err = create_checkpoint_caches(); if (err) - goto free_gc_caches; + goto free_segment_manager_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; @@ -1252,8 +1251,6 @@ static int __init init_f2fs_fs(void) kset_unregister(f2fs_kset); free_checkpoint_caches: destroy_checkpoint_caches(); -free_gc_caches: - destroy_gc_caches(); free_segment_manager_caches: destroy_segment_manager_caches(); free_node_manager_caches: @@ -1270,11 +1267,11 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); - destroy_gc_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); destroy_inodecache(); kset_unregister(f2fs_kset); + f2fs_destroy_trace_ios(); } module_init(init_f2fs_fs) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 19f5216b9..ce01a2c90 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -11,11 +11,13 @@ #include #include #include +#include #include "f2fs.h" #include "trace.h" -RADIX_TREE(pids, GFP_NOIO); +RADIX_TREE(pids, GFP_ATOMIC); +static spinlock_t pids_lock; struct last_io_info last_io; static inline void __print_last_io(void) @@ -58,9 +60,13 @@ void f2fs_trace_pid(struct page *page) page->private = pid; + if (radix_tree_preload(GFP_NOFS)) + return; + + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) - return; + goto out; if (p) radix_tree_delete(&pids, pid); @@ -69,7 +75,9 @@ void f2fs_trace_pid(struct page *page) trace_printk("%3x:%3x %4x %-16s\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); - +out: + spin_unlock(&pids_lock); + radix_tree_preload_end(); } void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) @@ -108,3 +116,44 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) last_io.len = 1; return; } + +void f2fs_build_trace_ios(void) +{ + spin_lock_init(&pids_lock); +} + +#define PIDVEC_SIZE 128 +static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, + unsigned int max_items) +{ + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; + + if (unlikely(!max_items)) + return 0; + + radix_tree_for_each_slot(slot, &pids, &iter, first_index) { + results[ret] = iter.index; + if (++ret == PIDVEC_SIZE) + break; + } + return ret; +} + +void f2fs_destroy_trace_ios(void) +{ + pid_t pid[PIDVEC_SIZE]; + pid_t next_pid = 0; + unsigned int found; + + spin_lock(&pids_lock); + while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { + unsigned idx; + + next_pid = pid[found - 1] + 1; + for (idx = 0; idx < found; idx++) + radix_tree_delete(&pids, pid[idx]); + } + spin_unlock(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index aa6663be5..1041dbeb5 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -34,9 +34,13 @@ struct last_io_info { extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +extern void f2fs_build_trace_ios(void); +extern void f2fs_destroy_trace_ios(void); #else #define f2fs_trace_pid(p) #define f2fs_trace_ios(p, i, n) +#define f2fs_build_trace_ios() +#define f2fs_destroy_trace_ios() #endif #endif /* __F2FS_TRACE_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c13fed8c3..a3ed6cd39 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -421,9 +421,9 @@ typedef struct xfs_handle { /* * Flags for going down operation */ -#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ -#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ -#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ +#define XFS_FSOP_GOING_FLAGS_DEFAULT FS_GOING_DOWN_FULLSYNC +#define XFS_FSOP_GOING_FLAGS_LOGFLUSH FS_GOING_DOWN_METASYNC +#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH FS_GOING_DOWN_NOSYNC /* * ioctl commands that are used by Linux filesystems @@ -485,7 +485,7 @@ typedef struct xfs_handle { #define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq) #define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq) #define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom) -#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t) +#define XFS_IOC_GOINGDOWN FS_IOC_SHUTDOWN /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 736a55bb2..9663ce1f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -332,6 +332,7 @@ struct inodes_stat_t { #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ #define FIDTRIM _IOWR('f', 128, struct fstrim_range) /* Deep discard trim */ @@ -382,6 +383,13 @@ struct inodes_stat_t { #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 +/* + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define FS_GOING_DOWN_NOSYNC 0x2 /* going down */ + #ifdef __KERNEL__ #include diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e86c1eafc..a3e6dd4b5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -440,38 +440,6 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT_CONDITION(f2fs_submit_page_bio, - - TP_PROTO(struct page *page, sector_t blkaddr, int type), - - TP_ARGS(page, blkaddr, type), - - TP_CONDITION(page->mapping), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(pgoff_t, index) - __field(sector_t, blkaddr) - __field(int, type) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->type = type; - ), - - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s%s", - show_dev_ino(__entry), - (unsigned long)__entry->index, - (unsigned long long)__entry->blkaddr, - show_bio_type(__entry->type)) -); - TRACE_EVENT(f2fs_get_data_block, TP_PROTO(struct inode *inode, sector_t iblock, struct buffer_head *bh, int ret), @@ -680,11 +648,63 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); +DECLARE_EVENT_CLASS(f2fs__submit_page_bio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(block_t, blkaddr) + __field(int, rw) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = fio->blk_addr; + __entry->rw = fio->rw; + __entry->type = fio->type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, rw = %s%s, type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->rw), + show_block_type(__entry->type)) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_CONDITION(page->mapping) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, + + TP_PROTO(struct page *page, struct f2fs_io_info *fio), + + TP_ARGS(page, fio), + + TP_CONDITION(page->mapping) +); + DECLARE_EVENT_CLASS(f2fs__submit_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_STRUCT__entry( __field(dev_t, dev) @@ -696,8 +716,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->rw = rw; - __entry->type = type; + __entry->rw = fio->rw; + __entry->type = fio->type; __entry->sector = bio->bi_sector; __entry->size = bio->bi_size; ), @@ -712,18 +732,20 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); @@ -913,38 +935,6 @@ TRACE_EVENT(f2fs_writepages, __entry->range_cyclic) ); -TRACE_EVENT(f2fs_submit_page_mbio, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, rw) - __field(int, type) - __field(pgoff_t, index) - __field(block_t, block) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->rw = rw; - __entry->type = type; - __entry->index = page->index; - __entry->block = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", - show_dev_ino(__entry), - show_bio_type(__entry->rw), - show_block_type(__entry->type), - (unsigned long)__entry->index, - (unsigned long long)__entry->block) -); - TRACE_EVENT(f2fs_write_checkpoint, TP_PROTO(struct super_block *sb, int reason, char *msg),