[PATCH, RFC] Ext4 patches planned for submission upstream - Kernel

This is a discussion on [PATCH, RFC] Ext4 patches planned for submission upstream - Kernel ; The following ext4 patches are planned for submission to Linus once the merge window for 2.6.24-rc1 is opened. - Ted - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More ...

+ Reply to Thread
Page 1 of 2 1 2 LastLast
Results 1 to 20 of 31

Thread: [PATCH, RFC] Ext4 patches planned for submission upstream

  1. [PATCH, RFC] Ext4 patches planned for submission upstream

    The following ext4 patches are planned for submission to Linus once
    the merge window for 2.6.24-rc1 is opened.

    - Ted


    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4

    From: Takashi Sato

    This patch set supports large block size(>4k, <=64k) in ext4,
    just enlarging the block size limit. But it is NOT possible to have 64kB
    blocksize on ext4 without some changes to the directory handling
    code. The reason is that an empty 64kB directory block would have a
    rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
    the filesystem. The proposed solution is treat 64k rec_len
    with a an impossible value like rec_len = 0xffff to handle this.

    The Patch-set consists of the following 2 patches.
    [1/2] ext4: enlarge blocksize
    - Allow blocksize up to pagesize

    [2/2] ext4: fix rec_len overflow
    - prevent rec_len from overflow with 64KB blocksize

    Now on 64k page ppc64 box runs with this patch set we could create a 64k
    block size ext4dev, and able to handle empty directory block.

    Signed-off-by: Takashi Sato
    Signed-off-by: Mingming Cao
    ---
    fs/ext4/super.c | 5 +++++
    include/linux/ext4_fs.h | 4 ++--
    2 files changed, 7 insertions(+), 2 deletions(-)

    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index 619db84..d8bb279 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -1548,6 +1548,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
    goto out_fail;
    }

    + if (!sb_set_blocksize(sb, blocksize)) {
    + printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
    + goto out_fail;
    + }
    +
    /*
    * The ext4 superblock will not be buffer aligned for other than 1kB
    * block sizes. We need to calculate the offset from buffer start.
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 46b304d..5d90616 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -73,8 +73,8 @@
    * Macro-instructions used to manage several block sizes
    */
    #define EXT4_MIN_BLOCK_SIZE 1024
    -#define EXT4_MAX_BLOCK_SIZE 4096
    -#define EXT4_MIN_BLOCK_LOG_SIZE 10
    +#define EXT4_MAX_BLOCK_SIZE 65536
    +#define EXT4_MIN_BLOCK_LOG_SIZE 10
    #ifdef __KERNEL__
    # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
    #else
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. [PATCH] ext4: Avoid rec_len overflow with 64KB block size

    From: Jan Kara

    With 64KB blocksize, a directory entry can have size 64KB which does not fit
    into 16 bits we have for entry lenght. So we store 0xffff instead and convert
    value when read from / written to disk. The patch also converts some places
    to use ext4_next_entry() when we are changing them anyway.

    Signed-off-by: Jan Kara
    Signed-off-by: Mingming Cao
    ---
    fs/ext4/dir.c | 12 ++++----
    fs/ext4/namei.c | 76 ++++++++++++++++++++++------------------------
    include/linux/ext4_fs.h | 20 ++++++++++++
    3 files changed, 62 insertions(+), 46 deletions(-)

    diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
    index 2ba49ff..8236352 100644
    --- a/fs/ext4/dir.c
    +++ b/fs/ext4/dir.c
    @@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
    unsigned long offset)
    {
    const char * error_msg = NULL;
    - const int rlen = le16_to_cpu(de->rec_len);
    + const int rlen = ext4_rec_len_from_disk(de->rec_len);

    if (rlen < EXT4_DIR_REC_LEN(1))
    error_msg = "rec_len is smaller than minimal";
    @@ -172,10 +172,10 @@ revalidate:
    * least that it is non-zero. A
    * failure will be detected in the
    * dirent test below. */
    - if (le16_to_cpu(de->rec_len) <
    - EXT4_DIR_REC_LEN(1))
    + if (ext4_rec_len_from_disk(de->rec_len)
    + < EXT4_DIR_REC_LEN(1))
    break;
    - i += le16_to_cpu(de->rec_len);
    + i += ext4_rec_len_from_disk(de->rec_len);
    }
    offset = i;
    filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
    @@ -197,7 +197,7 @@ revalidate:
    ret = stored;
    goto out;
    }
    - offset += le16_to_cpu(de->rec_len);
    + offset += ext4_rec_len_from_disk(de->rec_len);
    if (le32_to_cpu(de->inode)) {
    /* We might block in the next section
    * if the data destination is
    @@ -219,7 +219,7 @@ revalidate:
    goto revalidate;
    stored ++;
    }
    - filp->f_pos += le16_to_cpu(de->rec_len);
    + filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
    }
    offset = 0;
    brelse (bh);
    diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
    index 94ee6f3..bc3825d 100644
    --- a/fs/ext4/namei.c
    +++ b/fs/ext4/namei.c
    @@ -280,7 +280,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
    space += EXT4_DIR_REC_LEN(de->name_len);
    names++;
    }
    - de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext4_next_entry(de);
    }
    printk("(%i)\n", names);
    return (struct stats) { names, space, 1 };
    @@ -551,7 +551,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
    */
    static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
    {
    - return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
    + return (struct ext4_dir_entry_2 *)((char*)p +
    + ext4_rec_len_from_disk(p->rec_len));
    }

    /*
    @@ -720,7 +721,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
    cond_resched();
    }
    /* XXX: do we need to check rec_len == 0 case? -Chris */
    - de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext4_next_entry(de);
    }
    return count;
    }
    @@ -820,7 +821,7 @@ static inline int search_dirblock(struct buffer_head * bh,
    return 1;
    }
    /* prevent looping on a bad block */
    - de_len = le16_to_cpu(de->rec_len);
    + de_len = ext4_rec_len_from_disk(de->rec_len);
    if (de_len <= 0)
    return -1;
    offset += de_len;
    @@ -1128,7 +1129,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
    rec_len = EXT4_DIR_REC_LEN(de->name_len);
    memcpy (to, de, rec_len);
    ((struct ext4_dir_entry_2 *) to)->rec_len =
    - cpu_to_le16(rec_len);
    + ext4_rec_len_to_disk(rec_len);
    de->inode = 0;
    map++;
    to += rec_len;
    @@ -1147,13 +1148,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)

    prev = to = de;
    while ((char*)de < base + size) {
    - next = (struct ext4_dir_entry_2 *) ((char *) de +
    - le16_to_cpu(de->rec_len));
    + next = ext4_next_entry(de);
    if (de->inode && de->name_len) {
    rec_len = EXT4_DIR_REC_LEN(de->name_len);
    if (de > to)
    memmove(to, de, rec_len);
    - to->rec_len = cpu_to_le16(rec_len);
    + to->rec_len = ext4_rec_len_to_disk(rec_len);
    prev = to;
    to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
    }
    @@ -1227,8 +1227,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
    /* Fancy dance to stay within two buffers */
    de2 = dx_move_dirents(data1, data2, map + split, count - split);
    de = dx_pack_dirents(data1,blocksize);
    - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
    - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
    + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
    + de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
    dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
    dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));

    @@ -1297,7 +1297,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
    return -EEXIST;
    }
    nlen = EXT4_DIR_REC_LEN(de->name_len);
    - rlen = le16_to_cpu(de->rec_len);
    + rlen = ext4_rec_len_from_disk(de->rec_len);
    if ((de->inode? rlen - nlen: rlen) >= reclen)
    break;
    de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
    @@ -1316,11 +1316,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,

    /* By now the buffer is marked for journaling */
    nlen = EXT4_DIR_REC_LEN(de->name_len);
    - rlen = le16_to_cpu(de->rec_len);
    + rlen = ext4_rec_len_from_disk(de->rec_len);
    if (de->inode) {
    struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
    - de1->rec_len = cpu_to_le16(rlen - nlen);
    - de->rec_len = cpu_to_le16(nlen);
    + de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
    + de->rec_len = ext4_rec_len_to_disk(nlen);
    de = de1;
    }
    de->file_type = EXT4_FT_UNKNOWN;
    @@ -1397,17 +1397,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

    /* The 0th block becomes the root, move the dirents out */
    fde = &root->dotdot;
    - de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
    + de = (struct ext4_dir_entry_2 *)((char *)fde +
    + ext4_rec_len_from_disk(fde->rec_len));
    len = ((char *) root) + blocksize - (char *) de;
    memcpy (data1, de, len);
    de = (struct ext4_dir_entry_2 *) data1;
    top = data1 + len;
    - while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
    + while ((char *)(de2 = ext4_next_entry(de)) < top)
    de = de2;
    - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
    + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
    /* Initialize the root; the dot dirents already exist */
    de = (struct ext4_dir_entry_2 *) (&root->dotdot);
    - de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
    + de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
    memset (&root->info, 0, sizeof(root->info));
    root->info.info_length = sizeof(root->info);
    root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
    @@ -1487,7 +1488,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    return retval;
    de = (struct ext4_dir_entry_2 *) bh->b_data;
    de->inode = 0;
    - de->rec_len = cpu_to_le16(blocksize);
    + de->rec_len = ext4_rec_len_to_disk(blocksize);
    return add_dirent_to_buf(handle, dentry, inode, de, bh);
    }

    @@ -1550,7 +1551,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
    goto cleanup;
    node2 = (struct dx_node *)(bh2->b_data);
    entries2 = node2->entries;
    - node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
    + node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
    node2->fake.inode = 0;
    BUFFER_TRACE(frame->bh, "get_write_access");
    err = ext4_journal_get_write_access(handle, frame->bh);
    @@ -1648,9 +1649,9 @@ static int ext4_delete_entry (handle_t *handle,
    BUFFER_TRACE(bh, "get_write_access");
    ext4_journal_get_write_access(handle, bh);
    if (pde)
    - pde->rec_len =
    - cpu_to_le16(le16_to_cpu(pde->rec_len) +
    - le16_to_cpu(de->rec_len));
    + pde->rec_len = ext4_rec_len_to_disk(
    + ext4_rec_len_from_disk(pde->rec_len) +
    + ext4_rec_len_from_disk(de->rec_len));
    else
    de->inode = 0;
    dir->i_version++;
    @@ -1658,10 +1659,9 @@ static int ext4_delete_entry (handle_t *handle,
    ext4_journal_dirty_metadata(handle, bh);
    return 0;
    }
    - i += le16_to_cpu(de->rec_len);
    + i += ext4_rec_len_from_disk(de->rec_len);
    pde = de;
    - de = (struct ext4_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext4_next_entry(de);
    }
    return -ENOENT;
    }
    @@ -1824,13 +1824,12 @@ retry:
    de = (struct ext4_dir_entry_2 *) dir_block->b_data;
    de->inode = cpu_to_le32(inode->i_ino);
    de->name_len = 1;
    - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
    + de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
    strcpy (de->name, ".");
    ext4_set_de_type(dir->i_sb, de, S_IFDIR);
    - de = (struct ext4_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext4_next_entry(de);
    de->inode = cpu_to_le32(dir->i_ino);
    - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
    + de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
    de->name_len = 2;
    strcpy (de->name, "..");
    ext4_set_de_type(dir->i_sb, de, S_IFDIR);
    @@ -1882,8 +1881,7 @@ static int empty_dir (struct inode * inode)
    return 1;
    }
    de = (struct ext4_dir_entry_2 *) bh->b_data;
    - de1 = (struct ext4_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de1 = ext4_next_entry(de);
    if (le32_to_cpu(de->inode) != inode->i_ino ||
    !le32_to_cpu(de1->inode) ||
    strcmp (".", de->name) ||
    @@ -1894,9 +1892,9 @@ static int empty_dir (struct inode * inode)
    brelse (bh);
    return 1;
    }
    - offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
    - de = (struct ext4_dir_entry_2 *)
    - ((char *) de1 + le16_to_cpu(de1->rec_len));
    + offset = ext4_rec_len_from_disk(de->rec_len) +
    + ext4_rec_len_from_disk(de1->rec_len);
    + de = ext4_next_entry(de1);
    while (offset < inode->i_size ) {
    if (!bh ||
    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
    @@ -1925,9 +1923,8 @@ static int empty_dir (struct inode * inode)
    brelse (bh);
    return 0;
    }
    - offset += le16_to_cpu(de->rec_len);
    - de = (struct ext4_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + offset += ext4_rec_len_from_disk(de->rec_len);
    + de = ext4_next_entry(de);
    }
    brelse (bh);
    return 1;
    @@ -2282,8 +2279,7 @@ retry:
    }

    #define PARENT_INO(buffer) \
    - ((struct ext4_dir_entry_2 *) ((char *) buffer + \
    - le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
    + (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)

    /*
    * Anybody can rename anything with this: the permission checks are left to the
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 5d90616..ab608e8 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -767,6 +767,26 @@ struct ext4_dir_entry_2 {
    #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
    #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
    ~EXT4_DIR_ROUND)
    +#define EXT4_MAX_REC_LEN ((1<<16)-1)
    +
    +static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
    +{
    + unsigned len = le16_to_cpu(dlen);
    +
    + if (len == EXT4_MAX_REC_LEN)
    + return 1 << 16;
    + return len;
    +}
    +
    +static inline __le16 ext4_rec_len_to_disk(unsigned len)
    +{
    + if (len == (1 << 16))
    + return cpu_to_le16(EXT4_MAX_REC_LEN);
    + else if (len > (1 << 16))
    + BUG();
    + return cpu_to_le16(len);
    +}
    +
    /*
    * Hash Tree Directory indexing
    * (c) Daniel Phillips, 2001
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. [PATCH] jbd2: fix commit code to properly abort journal

    From: Jan Kara

    We should really call journal_abort() and not __journal_abort_hard() in
    case of errors. The latter call does not record the error in the journal
    superblock and thus filesystem won't be marked as with errors later (and
    user could happily mount it without any warning).

    Signed-off-by: Jan Kara
    Cc:
    Signed-off-by: Andrew Morton
    ---
    fs/jbd2/commit.c | 8 ++++----
    1 files changed, 4 insertions(+), 4 deletions(-)

    diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
    index b898ee4..6986f33 100644
    --- a/fs/jbd2/commit.c
    +++ b/fs/jbd2/commit.c
    @@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
    spin_unlock(&journal->j_list_lock);

    if (err)
    - __jbd2_journal_abort_hard(journal);
    + jbd2_journal_abort(journal, err);

    jbd2_journal_write_revoke_records(journal, commit_transaction);

    @@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)

    descriptor = jbd2_journal_get_descriptor_buffer(journal);
    if (!descriptor) {
    - __jbd2_journal_abort_hard(journal);
    + jbd2_journal_abort(journal, -EIO);
    continue;
    }

    @@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
    and repeat this loop: we'll fall into the
    refile-on-abort condition above. */
    if (err) {
    - __jbd2_journal_abort_hard(journal);
    + jbd2_journal_abort(journal, err);
    continue;
    }

    @@ -757,7 +757,7 @@ wait_for_iobuf:
    err = -EIO;

    if (err)
    - __jbd2_journal_abort_hard(journal);
    + jbd2_journal_abort(journal, err);

    /* End of a transaction! Finally, we can do checkpoint
    processing: any buffers committed as a result of this
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. [PATCH] JBD2/Ext4: Convert kmalloc to kzalloc in jbd2/ext4

    From: Mingming Cao

    Convert kmalloc to kzalloc() and get rid of the memset().

    Signed-off-by: Mingming Cao
    ---
    fs/ext4/xattr.c | 3 +--
    fs/jbd2/journal.c | 3 +--
    fs/jbd2/transaction.c | 3 +--
    3 files changed, 3 insertions(+), 6 deletions(-)

    diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
    index b10d68f..12c7d65 100644
    --- a/fs/ext4/xattr.c
    +++ b/fs/ext4/xattr.c
    @@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
    }
    } else {
    /* Allocate a buffer where we construct the new block. */
    - s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
    + s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
    /* assert(header == s->base) */
    error = -ENOMEM;
    if (s->base == NULL)
    goto cleanup;
    - memset(s->base, 0, sb->s_blocksize);
    header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
    header(s->base)->h_blocks = cpu_to_le32(1);
    header(s->base)->h_refcount = cpu_to_le32(1);
    diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
    index 0e329a3..f12c65b 100644
    --- a/fs/jbd2/journal.c
    +++ b/fs/jbd2/journal.c
    @@ -654,10 +654,9 @@ static journal_t * journal_init_common (void)
    journal_t *journal;
    int err;

    - journal = kmalloc(sizeof(*journal), GFP_KERNEL);
    + journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
    if (!journal)
    goto fail;
    - memset(journal, 0, sizeof(*journal));

    init_waitqueue_head(&journal->j_wait_transaction_locked);
    init_waitqueue_head(&journal->j_wait_logspace);
    diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
    index a5fb70f..b1fcf2b 100644
    --- a/fs/jbd2/transaction.c
    +++ b/fs/jbd2/transaction.c
    @@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)

    alloc_transaction:
    if (!journal->j_running_transaction) {
    - new_transaction = kmalloc(sizeof(*new_transaction),
    + new_transaction = kzalloc(sizeof(*new_transaction),
    GFP_NOFS|__GFP_NOFAIL);
    if (!new_transaction) {
    ret = -ENOMEM;
    goto out;
    }
    - memset(new_transaction, 0, sizeof(*new_transaction));
    }

    jbd_debug(3, "New handle %p going live.\n", handle);
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL

    From: Aneesh Kumar K.V

    Signed-off-by: Mingming Cao
    Signed-off-by: "Theodore Ts'o"
    ---
    fs/jbd/journal.c | 2 +-
    fs/jbd2/journal.c | 2 +-
    2 files changed, 2 insertions(+), 2 deletions(-)

    diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
    index ae2c25d..8d6d475 100644
    --- a/fs/jbd/journal.c
    +++ b/fs/jbd/journal.c
    @@ -653,7 +653,7 @@ static journal_t * journal_init_common (void)
    journal_t *journal;
    int err;

    - journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
    + journal = kmalloc(sizeof(*journal), GFP_KERNEL);
    if (!journal)
    goto fail;
    memset(journal, 0, sizeof(*journal));
    diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
    index 4281244..0e329a3 100644
    --- a/fs/jbd2/journal.c
    +++ b/fs/jbd2/journal.c
    @@ -654,7 +654,7 @@ static journal_t * journal_init_common (void)
    journal_t *journal;
    int err;

    - journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
    + journal = kmalloc(sizeof(*journal), GFP_KERNEL);
    if (!journal)
    goto fail;
    memset(journal, 0, sizeof(*journal));
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo

    From: Aneesh Kumar K.V

    Convert ext4_extent.ee_start to ext4_extent.ee_start_lo
    This helps in finding BUGs due to direct partial access of
    these split 48 bit values

    Also fix direct partial access in ext4 code

    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/ext4/extents.c | 8 +++-----
    include/linux/ext4_fs_extents.h | 2 +-
    2 files changed, 4 insertions(+), 6 deletions(-)

    diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
    index 2be404f..c03056a 100644
    --- a/fs/ext4/extents.c
    +++ b/fs/ext4/extents.c
    @@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
    {
    ext4_fsblk_t block;

    - block = le32_to_cpu(ex->ee_start);
    + block = le32_to_cpu(ex->ee_start_lo);
    block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
    return block;
    }
    @@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
    */
    static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
    {
    - ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
    + ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
    ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
    }

    @@ -1409,8 +1409,7 @@ has_space:
    eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
    nearex = path[depth].p_ext;
    nearex->ee_block = newext->ee_block;
    - nearex->ee_start = newext->ee_start;
    - nearex->ee_start_hi = newext->ee_start_hi;
    + ext4_ext_store_pblock(nearex, ext_pblock(newext));
    nearex->ee_len = newext->ee_len;

    merge:
    @@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
    }
    /* ex2: iblock to iblock + maxblocks-1 : initialised */
    ex2->ee_block = cpu_to_le32(iblock);
    - ex2->ee_start = cpu_to_le32(newblock);
    ext4_ext_store_pblock(ex2, newblock);
    ex2->ee_len = cpu_to_le16(allocated);
    if (ex2 != ex)
    diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
    index 81406f3..cb2dfbb 100644
    --- a/include/linux/ext4_fs_extents.h
    +++ b/include/linux/ext4_fs_extents.h
    @@ -74,7 +74,7 @@ struct ext4_extent {
    __le32 ee_block; /* first logical block extent covers */
    __le16 ee_len; /* number of blocks covered by extent */
    __le16 ee_start_hi; /* high 16 bits of physical block */
    - __le32 ee_start; /* low 32 bits of physical block */
    + __le32 ee_start_lo; /* low 32 bits of physical block */
    };

    /*
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. [PATCH] ext4: Fix sparse warnings

    From: Aneesh Kumar K.V

    Signed-off-by: Aneesh Kumar K.V
    Signed-off-by: Andrew Morton
    ---
    fs/ext4/inode.c | 6 ++++--
    include/linux/ext4_fs.h | 14 +++++++-------
    2 files changed, 11 insertions(+), 9 deletions(-)

    diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
    index f283522..a2e1ea4 100644
    --- a/fs/ext4/inode.c
    +++ b/fs/ext4/inode.c
    @@ -3167,12 +3167,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
    iloc, handle);
    if (ret) {
    EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
    - if (mnt_count != sbi->s_es->s_mnt_count) {
    + if (mnt_count !=
    + le16_to_cpu(sbi->s_es->s_mnt_count)) {
    ext4_warning(inode->i_sb, __FUNCTION__,
    "Unable to expand inode %lu. Delete"
    " some EAs or run e2fsck.",
    inode->i_ino);
    - mnt_count = sbi->s_es->s_mnt_count;
    + mnt_count =
    + le16_to_cpu(sbi->s_es->s_mnt_count);
    }
    }
    }
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index b77b59f..722d4ef 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -574,13 +574,13 @@ struct ext4_super_block {
    /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
    __le32 s_r_blocks_count_hi; /* Reserved blocks count */
    __le32 s_free_blocks_count_hi; /* Free blocks count */
    - __u16 s_min_extra_isize; /* All inodes have at least # bytes */
    - __u16 s_want_extra_isize; /* New inodes should reserve # bytes */
    - __u32 s_flags; /* Miscellaneous flags */
    - __u16 s_raid_stride; /* RAID stride */
    - __u16 s_mmp_interval; /* # seconds to wait in MMP checking */
    - __u64 s_mmp_block; /* Block for multi-mount protection */
    - __u32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
    + __le16 s_min_extra_isize; /* All inodes have at least # bytes */
    + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
    + __le32 s_flags; /* Miscellaneous flags */
    + __le16 s_raid_stride; /* RAID stride */
    + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
    + __le64 s_mmp_block; /* Block for multi-mount protection */
    + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
    __u32 s_reserved[163]; /* Padding to the end of the block */
    };

    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo

    From: Aneesh Kumar K.V

    Convert s_blocks_count to s_blocks_count_lo
    This helps in finding BUGs due to direct partial access of
    these split 64 bit values

    Also fix direct partial access in ext4 code

    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/ext4/super.c | 4 ++--
    include/linux/ext4_fs.h | 6 +++---
    2 files changed, 5 insertions(+), 5 deletions(-)

    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index 7548408..dc7fe4c 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -2593,7 +2593,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)

    if (test_opt(sb, MINIX_DF)) {
    sbi->s_overhead_last = 0;
    - } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
    + } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
    unsigned long ngroups = sbi->s_groups_count, i;
    ext4_fsblk_t overhead = 0;
    smp_rmb();
    @@ -2628,7 +2628,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
    overhead += ngroups * (2 + sbi->s_itb_per_group);
    sbi->s_overhead_last = overhead;
    smp_wmb();
    - sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
    + sbi->s_blocks_last = ext4_blocks_count(es);
    }

    buf->f_type = EXT4_SUPER_MAGIC;
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 5eb4953..339412d 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -501,7 +501,7 @@ do { \
    */
    struct ext4_super_block {
    /*00*/ __le32 s_inodes_count; /* Inodes count */
    - __le32 s_blocks_count; /* Blocks count */
    + __le32 s_blocks_count_lo; /* Blocks count */
    __le32 s_r_blocks_count; /* Reserved blocks count */
    __le32 s_free_blocks_count; /* Free blocks count */
    /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
    @@ -1001,7 +1001,7 @@ extern void ext4_inode_table_set(struct super_block *sb,
    static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
    {
    return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
    - le32_to_cpu(es->s_blocks_count);
    + le32_to_cpu(es->s_blocks_count_lo);
    }

    static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
    @@ -1019,7 +1019,7 @@ static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
    static inline void ext4_blocks_count_set(struct ext4_super_block *es,
    ext4_fsblk_t blk)
    {
    - es->s_blocks_count = cpu_to_le32((u32)blk);
    + es->s_blocks_count_lo = cpu_to_le32((u32)blk);
    es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
    }

    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  10. [PATCH] jbd/jbd2: JBD memory allocation cleanups

    From: Mingming Cao

    JBD: Replace slab allocations with page cache allocations

    JBD allocate memory for committed_data and frozen_data from slab. However
    JBD should not pass slab pages down to the block layer. Use page allocator pages instead. This will also prepare JBD for the large blocksize patchset.


    Also this patch cleans up jbd_kmalloc and replace it with kmalloc directly

    Signed-off-by: Christoph Lameter
    Signed-off-by: Mingming Cao
    ---
    fs/jbd/commit.c | 6 +-
    fs/jbd/journal.c | 99 ++----------------------------------------------
    fs/jbd/transaction.c | 12 +++---
    fs/jbd2/commit.c | 6 +-
    fs/jbd2/journal.c | 99 ++----------------------------------------------
    fs/jbd2/transaction.c | 18 ++++----
    include/linux/jbd.h | 18 +++++----
    include/linux/jbd2.h | 19 +++++----
    8 files changed, 51 insertions(+), 226 deletions(-)

    diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
    index a003d50..a263d82 100644
    --- a/fs/jbd/commit.c
    +++ b/fs/jbd/commit.c
    @@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal)
    struct buffer_head *bh = jh2bh(jh);

    jbd_lock_bh_state(bh);
    - jbd_slab_free(jh->b_committed_data, bh->b_size);
    + jbd_free(jh->b_committed_data, bh->b_size);
    jh->b_committed_data = NULL;
    jbd_unlock_bh_state(bh);
    }
    @@ -792,14 +792,14 @@ restart_loop:
    * Otherwise, we can just throw away the frozen data now.
    */
    if (jh->b_committed_data) {
    - jbd_slab_free(jh->b_committed_data, bh->b_size);
    + jbd_free(jh->b_committed_data, bh->b_size);
    jh->b_committed_data = NULL;
    if (jh->b_frozen_data) {
    jh->b_committed_data = jh->b_frozen_data;
    jh->b_frozen_data = NULL;
    }
    } else if (jh->b_frozen_data) {
    - jbd_slab_free(jh->b_frozen_data, bh->b_size);
    + jbd_free(jh->b_frozen_data, bh->b_size);
    jh->b_frozen_data = NULL;
    }

    diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
    index 06ab3c1..ae2c25d 100644
    --- a/fs/jbd/journal.c
    +++ b/fs/jbd/journal.c
    @@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);

    static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
    static void __journal_abort_soft (journal_t *journal, int errno);
    -static int journal_create_jbd_slab(size_t slab_size);

    /*
    * Helper function used to manage commit timeouts
    @@ -334,10 +333,10 @@ repeat:
    char *tmp;

    jbd_unlock_bh_state(bh_in);
    - tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
    + tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
    jbd_lock_bh_state(bh_in);
    if (jh_in->b_frozen_data) {
    - jbd_slab_free(tmp, bh_in->b_size);
    + jbd_free(tmp, bh_in->b_size);
    goto repeat;
    }

    @@ -654,7 +653,7 @@ static journal_t * journal_init_common (void)
    journal_t *journal;
    int err;

    - journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
    + journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
    if (!journal)
    goto fail;
    memset(journal, 0, sizeof(*journal));
    @@ -1095,13 +1094,6 @@ int journal_load(journal_t *journal)
    }
    }

    - /*
    - * Create a slab for this blocksize
    - */
    - err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
    - if (err)
    - return err;
    -
    /* Let the recovery code check whether it needs to recover any
    * data from the journal. */
    if (journal_recover(journal))
    @@ -1615,86 +1607,6 @@ int journal_blocks_per_page(struct inode *inode)
    }

    /*
    - * Simple support for retrying memory allocations. Introduced to help to
    - * debug different VM deadlock avoidance strategies.
    - */
    -void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
    -{
    - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
    -}
    -
    -/*
    - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
    - * and allocate frozen and commit buffers from these slabs.
    - *
    - * Reason for doing this is to avoid, SLAB_DEBUG - since it could
    - * cause bh to cross page boundary.
    - */
    -
    -#define JBD_MAX_SLABS 5
    -#define JBD_SLAB_INDEX(size) (size >> 11)
    -
    -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
    -static const char *jbd_slab_names[JBD_MAX_SLABS] = {
    - "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
    -};
    -
    -static void journal_destroy_jbd_slabs(void)
    -{
    - int i;
    -
    - for (i = 0; i < JBD_MAX_SLABS; i++) {
    - if (jbd_slab[i])
    - kmem_cache_destroy(jbd_slab[i]);
    - jbd_slab[i] = NULL;
    - }
    -}
    -
    -static int journal_create_jbd_slab(size_t slab_size)
    -{
    - int i = JBD_SLAB_INDEX(slab_size);
    -
    - BUG_ON(i >= JBD_MAX_SLABS);
    -
    - /*
    - * Check if we already have a slab created for this size
    - */
    - if (jbd_slab[i])
    - return 0;
    -
    - /*
    - * Create a slab and force alignment to be same as slabsize -
    - * this will make sure that allocations won't cross the page
    - * boundary.
    - */
    - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
    - slab_size, slab_size, 0, NULL);
    - if (!jbd_slab[i]) {
    - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
    - return -ENOMEM;
    - }
    - return 0;
    -}
    -
    -void * jbd_slab_alloc(size_t size, gfp_t flags)
    -{
    - int idx;
    -
    - idx = JBD_SLAB_INDEX(size);
    - BUG_ON(jbd_slab[idx] == NULL);
    - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
    -}
    -
    -void jbd_slab_free(void *ptr, size_t size)
    -{
    - int idx;
    -
    - idx = JBD_SLAB_INDEX(size);
    - BUG_ON(jbd_slab[idx] == NULL);
    - kmem_cache_free(jbd_slab[idx], ptr);
    -}
    -
    -/*
    * Journal_head storage management
    */
    static struct kmem_cache *journal_head_cache;
    @@ -1881,13 +1793,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
    printk(KERN_WARNING "%s: freeing "
    "b_frozen_data\n",
    __FUNCTION__);
    - jbd_slab_free(jh->b_frozen_data, bh->b_size);
    + jbd_free(jh->b_frozen_data, bh->b_size);
    }
    if (jh->b_committed_data) {
    printk(KERN_WARNING "%s: freeing "
    "b_committed_data\n",
    __FUNCTION__);
    - jbd_slab_free(jh->b_committed_data, bh->b_size);
    + jbd_free(jh->b_committed_data, bh->b_size);
    }
    bh->b_private = NULL;
    jh->b_bh = NULL; /* debug, really */
    @@ -2042,7 +1954,6 @@ static void journal_destroy_caches(void)
    journal_destroy_revoke_caches();
    journal_destroy_journal_head_cache();
    journal_destroy_handle_cache();
    - journal_destroy_jbd_slabs();
    }

    static int __init journal_init(void)
    diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
    index 772b653..11d846c 100644
    --- a/fs/jbd/transaction.c
    +++ b/fs/jbd/transaction.c
    @@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)

    alloc_transaction:
    if (!journal->j_running_transaction) {
    - new_transaction = jbd_kmalloc(sizeof(*new_transaction),
    - GFP_NOFS);
    + new_transaction = kmalloc(sizeof(*new_transaction),
    + GFP_NOFS|__GFP_NOFAIL);
    if (!new_transaction) {
    ret = -ENOMEM;
    goto out;
    @@ -668,7 +668,7 @@ repeat:
    JBUFFER_TRACE(jh, "allocate memory for buffer");
    jbd_unlock_bh_state(bh);
    frozen_buffer =
    - jbd_slab_alloc(jh2bh(jh)->b_size,
    + jbd_alloc(jh2bh(jh)->b_size,
    GFP_NOFS);
    if (!frozen_buffer) {
    printk(KERN_EMERG
    @@ -728,7 +728,7 @@ done:

    out:
    if (unlikely(frozen_buffer)) /* It's usually NULL */
    - jbd_slab_free(frozen_buffer, bh->b_size);
    + jbd_free(frozen_buffer, bh->b_size);

    JBUFFER_TRACE(jh, "exit");
    return error;
    @@ -881,7 +881,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)

    repeat:
    if (!jh->b_committed_data) {
    - committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
    + committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
    if (!committed_data) {
    printk(KERN_EMERG "%s: No memory for committed data\n",
    __FUNCTION__);
    @@ -908,7 +908,7 @@ repeat:
    out:
    journal_put_journal_head(jh);
    if (unlikely(committed_data))
    - jbd_slab_free(committed_data, bh->b_size);
    + jbd_free(committed_data, bh->b_size);
    return err;
    }

    diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
    index c0f59d1..2cac34a 100644
    --- a/fs/jbd2/commit.c
    +++ b/fs/jbd2/commit.c
    @@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
    struct buffer_head *bh = jh2bh(jh);

    jbd_lock_bh_state(bh);
    - jbd2_slab_free(jh->b_committed_data, bh->b_size);
    + jbd2_free(jh->b_committed_data, bh->b_size);
    jh->b_committed_data = NULL;
    jbd_unlock_bh_state(bh);
    }
    @@ -801,14 +801,14 @@ restart_loop:
    * Otherwise, we can just throw away the frozen data now.
    */
    if (jh->b_committed_data) {
    - jbd2_slab_free(jh->b_committed_data, bh->b_size);
    + jbd2_free(jh->b_committed_data, bh->b_size);
    jh->b_committed_data = NULL;
    if (jh->b_frozen_data) {
    jh->b_committed_data = jh->b_frozen_data;
    jh->b_frozen_data = NULL;
    }
    } else if (jh->b_frozen_data) {
    - jbd2_slab_free(jh->b_frozen_data, bh->b_size);
    + jbd2_free(jh->b_frozen_data, bh->b_size);
    jh->b_frozen_data = NULL;
    }

    diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
    index f37324a..4281244 100644
    --- a/fs/jbd2/journal.c
    +++ b/fs/jbd2/journal.c
    @@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);

    static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
    static void __journal_abort_soft (journal_t *journal, int errno);
    -static int jbd2_journal_create_jbd_slab(size_t slab_size);

    /*
    * Helper function used to manage commit timeouts
    @@ -335,10 +334,10 @@ repeat:
    char *tmp;

    jbd_unlock_bh_state(bh_in);
    - tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
    + tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
    jbd_lock_bh_state(bh_in);
    if (jh_in->b_frozen_data) {
    - jbd2_slab_free(tmp, bh_in->b_size);
    + jbd2_free(tmp, bh_in->b_size);
    goto repeat;
    }

    @@ -655,7 +654,7 @@ static journal_t * journal_init_common (void)
    journal_t *journal;
    int err;

    - journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
    + journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
    if (!journal)
    goto fail;
    memset(journal, 0, sizeof(*journal));
    @@ -1096,13 +1095,6 @@ int jbd2_journal_load(journal_t *journal)
    }
    }

    - /*
    - * Create a slab for this blocksize
    - */
    - err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
    - if (err)
    - return err;
    -
    /* Let the recovery code check whether it needs to recover any
    * data from the journal. */
    if (jbd2_journal_recover(journal))
    @@ -1627,86 +1619,6 @@ size_t journal_tag_bytes(journal_t *journal)
    }

    /*
    - * Simple support for retrying memory allocations. Introduced to help to
    - * debug different VM deadlock avoidance strategies.
    - */
    -void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
    -{
    - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
    -}
    -
    -/*
    - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
    - * and allocate frozen and commit buffers from these slabs.
    - *
    - * Reason for doing this is to avoid, SLAB_DEBUG - since it could
    - * cause bh to cross page boundary.
    - */
    -
    -#define JBD_MAX_SLABS 5
    -#define JBD_SLAB_INDEX(size) (size >> 11)
    -
    -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
    -static const char *jbd_slab_names[JBD_MAX_SLABS] = {
    - "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
    -};
    -
    -static void jbd2_journal_destroy_jbd_slabs(void)
    -{
    - int i;
    -
    - for (i = 0; i < JBD_MAX_SLABS; i++) {
    - if (jbd_slab[i])
    - kmem_cache_destroy(jbd_slab[i]);
    - jbd_slab[i] = NULL;
    - }
    -}
    -
    -static int jbd2_journal_create_jbd_slab(size_t slab_size)
    -{
    - int i = JBD_SLAB_INDEX(slab_size);
    -
    - BUG_ON(i >= JBD_MAX_SLABS);
    -
    - /*
    - * Check if we already have a slab created for this size
    - */
    - if (jbd_slab[i])
    - return 0;
    -
    - /*
    - * Create a slab and force alignment to be same as slabsize -
    - * this will make sure that allocations won't cross the page
    - * boundary.
    - */
    - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
    - slab_size, slab_size, 0, NULL);
    - if (!jbd_slab[i]) {
    - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
    - return -ENOMEM;
    - }
    - return 0;
    -}
    -
    -void * jbd2_slab_alloc(size_t size, gfp_t flags)
    -{
    - int idx;
    -
    - idx = JBD_SLAB_INDEX(size);
    - BUG_ON(jbd_slab[idx] == NULL);
    - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
    -}
    -
    -void jbd2_slab_free(void *ptr, size_t size)
    -{
    - int idx;
    -
    - idx = JBD_SLAB_INDEX(size);
    - BUG_ON(jbd_slab[idx] == NULL);
    - kmem_cache_free(jbd_slab[idx], ptr);
    -}
    -
    -/*
    * Journal_head storage management
    */
    static struct kmem_cache *jbd2_journal_head_cache;
    @@ -1893,13 +1805,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
    printk(KERN_WARNING "%s: freeing "
    "b_frozen_data\n",
    __FUNCTION__);
    - jbd2_slab_free(jh->b_frozen_data, bh->b_size);
    + jbd2_free(jh->b_frozen_data, bh->b_size);
    }
    if (jh->b_committed_data) {
    printk(KERN_WARNING "%s: freeing "
    "b_committed_data\n",
    __FUNCTION__);
    - jbd2_slab_free(jh->b_committed_data, bh->b_size);
    + jbd2_free(jh->b_committed_data, bh->b_size);
    }
    bh->b_private = NULL;
    jh->b_bh = NULL; /* debug, really */
    @@ -2040,7 +1952,6 @@ static void jbd2_journal_destroy_caches(void)
    jbd2_journal_destroy_revoke_caches();
    jbd2_journal_destroy_jbd2_journal_head_cache();
    jbd2_journal_destroy_handle_cache();
    - jbd2_journal_destroy_jbd_slabs();
    }

    static int __init journal_init(void)
    diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
    index 7946ff4..a5fb70f 100644
    --- a/fs/jbd2/transaction.c
    +++ b/fs/jbd2/transaction.c
    @@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)

    alloc_transaction:
    if (!journal->j_running_transaction) {
    - new_transaction = jbd_kmalloc(sizeof(*new_transaction),
    - GFP_NOFS);
    + new_transaction = kmalloc(sizeof(*new_transaction),
    + GFP_NOFS|__GFP_NOFAIL);
    if (!new_transaction) {
    ret = -ENOMEM;
    goto out;
    @@ -236,7 +236,7 @@ out:
    /* Allocate a new handle. This should probably be in a slab... */
    static handle_t *new_handle(int nblocks)
    {
    - handle_t *handle = jbd_alloc_handle(GFP_NOFS);
    + handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
    if (!handle)
    return NULL;
    memset(handle, 0, sizeof(*handle));
    @@ -282,7 +282,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)

    err = start_this_handle(journal, handle);
    if (err < 0) {
    - jbd_free_handle(handle);
    + jbd2_free_handle(handle);
    current->journal_info = NULL;
    handle = ERR_PTR(err);
    }
    @@ -668,7 +668,7 @@ repeat:
    JBUFFER_TRACE(jh, "allocate memory for buffer");
    jbd_unlock_bh_state(bh);
    frozen_buffer =
    - jbd2_slab_alloc(jh2bh(jh)->b_size,
    + jbd2_alloc(jh2bh(jh)->b_size,
    GFP_NOFS);
    if (!frozen_buffer) {
    printk(KERN_EMERG
    @@ -728,7 +728,7 @@ done:

    out:
    if (unlikely(frozen_buffer)) /* It's usually NULL */
    - jbd2_slab_free(frozen_buffer, bh->b_size);
    + jbd2_free(frozen_buffer, bh->b_size);

    JBUFFER_TRACE(jh, "exit");
    return error;
    @@ -881,7 +881,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)

    repeat:
    if (!jh->b_committed_data) {
    - committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
    + committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
    if (!committed_data) {
    printk(KERN_EMERG "%s: No memory for committed data\n",
    __FUNCTION__);
    @@ -908,7 +908,7 @@ repeat:
    out:
    jbd2_journal_put_journal_head(jh);
    if (unlikely(committed_data))
    - jbd2_slab_free(committed_data, bh->b_size);
    + jbd2_free(committed_data, bh->b_size);
    return err;
    }

    @@ -1411,7 +1411,7 @@ int jbd2_journal_stop(handle_t *handle)
    spin_unlock(&journal->j_state_lock);
    }

    - jbd_free_handle(handle);
    + jbd2_free_handle(handle);
    return err;
    }

    diff --git a/include/linux/jbd.h b/include/linux/jbd.h
    index 4527375..26216c1 100644
    --- a/include/linux/jbd.h
    +++ b/include/linux/jbd.h
    @@ -71,14 +71,16 @@ extern int journal_enable_debug;
    #define jbd_debug(f, a...) /**/
    #endif

    -extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
    -extern void * jbd_slab_alloc(size_t size, gfp_t flags);
    -extern void jbd_slab_free(void *ptr, size_t size);
    -
    -#define jbd_kmalloc(size, flags) \
    - __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
    -#define jbd_rep_kmalloc(size, flags) \
    - __jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
    +
    +static inline void *jbd_alloc(size_t size, gfp_t flags)
    +{
    + return (void *)__get_free_pages(flags, get_order(size));
    +}
    +
    +static inline void jbd_free(void *ptr, size_t size)
    +{
    + free_pages((unsigned long)ptr, get_order(size));
    +};

    #define JFS_MIN_JOURNAL_BLOCKS 1024

    diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
    index 260d6d7..5f8b876 100644
    --- a/include/linux/jbd2.h
    +++ b/include/linux/jbd2.h
    @@ -71,14 +71,15 @@ extern u8 jbd2_journal_enable_debug;
    #define jbd_debug(f, a...) /**/
    #endif

    -extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
    -extern void * jbd2_slab_alloc(size_t size, gfp_t flags);
    -extern void jbd2_slab_free(void *ptr, size_t size);
    +static inline void *jbd2_alloc(size_t size, gfp_t flags)
    +{
    + return (void *)__get_free_pages(flags, get_order(size));
    +}

    -#define jbd_kmalloc(size, flags) \
    - __jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
    -#define jbd_rep_kmalloc(size, flags) \
    - __jbd2_kmalloc(__FUNCTION__, (size), (flags), 1)
    +static inline void jbd2_free(void *ptr, size_t size)
    +{
    + free_pages((unsigned long)ptr, get_order(size));
    +};

    #define JBD2_MIN_JOURNAL_BLOCKS 1024

    @@ -959,12 +960,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
    */
    extern struct kmem_cache *jbd2_handle_cache;

    -static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
    +static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
    {
    return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
    }

    -static inline void jbd_free_handle(handle_t *handle)
    +static inline void jbd2_free_handle(handle_t *handle)
    {
    kmem_cache_free(jbd2_handle_cache, handle);
    }
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  11. [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count

    From: Aneesh Kumar K.V

    Convert s_r_blocks_count and s_free_blocks_count to
    s_r_blocks_count_lo and s_free_blocks_count_lo

    This helps in finding BUGs due to direct partial access of
    these split 64 bit values

    Also fix direct partial access in ext4 code

    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/ext4/super.c | 2 +-
    include/linux/ext4_fs.h | 12 ++++++------
    2 files changed, 7 insertions(+), 7 deletions(-)

    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index dc7fe4c..dbd114c 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -2635,7 +2635,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
    buf->f_bsize = sb->s_blocksize;
    buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
    buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
    - es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
    + ext4_free_blocks_count_set(es, buf->f_bfree);
    buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
    if (buf->f_bfree < ext4_r_blocks_count(es))
    buf->f_bavail = 0;
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 339412d..fb31c1a 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -502,8 +502,8 @@ do { \
    struct ext4_super_block {
    /*00*/ __le32 s_inodes_count; /* Inodes count */
    __le32 s_blocks_count_lo; /* Blocks count */
    - __le32 s_r_blocks_count; /* Reserved blocks count */
    - __le32 s_free_blocks_count; /* Free blocks count */
    + __le32 s_r_blocks_count_lo; /* Reserved blocks count */
    + __le32 s_free_blocks_count_lo; /* Free blocks count */
    /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
    __le32 s_first_data_block; /* First Data Block */
    __le32 s_log_block_size; /* Block size */
    @@ -1007,13 +1007,13 @@ static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
    static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
    {
    return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
    - le32_to_cpu(es->s_r_blocks_count);
    + le32_to_cpu(es->s_r_blocks_count_lo);
    }

    static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
    {
    return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
    - le32_to_cpu(es->s_free_blocks_count);
    + le32_to_cpu(es->s_free_blocks_count_lo);
    }

    static inline void ext4_blocks_count_set(struct ext4_super_block *es,
    @@ -1026,14 +1026,14 @@ static inline void ext4_blocks_count_set(struct ext4_super_block *es,
    static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
    ext4_fsblk_t blk)
    {
    - es->s_free_blocks_count = cpu_to_le32((u32)blk);
    + es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
    es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
    }

    static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
    ext4_fsblk_t blk)
    {
    - es->s_r_blocks_count = cpu_to_le32((u32)blk);
    + es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
    es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
    }

    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  12. [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo

    From: Aneesh Kumar K.V

    Convert bg_block_bitmap to bg_block_bitmap_lo
    This helps in catching some BUGS due to direct
    partial access of these split fields.

    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/ext4/super.c | 6 +++---
    include/linux/ext4_fs.h | 2 +-
    2 files changed, 4 insertions(+), 4 deletions(-)

    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index d8bb279..02a2418 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -70,9 +70,9 @@ static void ext4_write_super_lockfs(struct super_block *sb);
    ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
    struct ext4_group_desc *bg)
    {
    - return le32_to_cpu(bg->bg_block_bitmap) |
    + return le32_to_cpu(bg->bg_block_bitmap_lo) |
    (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
    - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
    + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
    }

    ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
    @@ -94,7 +94,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
    void ext4_block_bitmap_set(struct super_block *sb,
    struct ext4_group_desc *bg, ext4_fsblk_t blk)
    {
    - bg->bg_block_bitmap = cpu_to_le32((u32)blk);
    + bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
    if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
    bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
    }
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index ab608e8..c26e30e 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -105,7 +105,7 @@
    */
    struct ext4_group_desc
    {
    - __le32 bg_block_bitmap; /* Blocks bitmap block */
    + __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
    __le32 bg_inode_bitmap; /* Inodes bitmap block */
    __le32 bg_inode_table; /* Inodes table block */
    __le16 bg_free_blocks_count; /* Free blocks count */
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  13. [PATCH] ext4: sparse fixes

    From: Aneesh Kumar K.V

    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/ext4/fsync.c | 2 +-
    fs/ext4/inode.c | 2 +-
    fs/ext4/xattr.c | 4 ++--
    3 files changed, 4 insertions(+), 4 deletions(-)

    diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
    index 2a167d7..8d50879 100644
    --- a/fs/ext4/fsync.c
    +++ b/fs/ext4/fsync.c
    @@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
    struct inode *inode = dentry->d_inode;
    int ret = 0;

    - J_ASSERT(ext4_journal_current_handle() == 0);
    + J_ASSERT(ext4_journal_current_handle() == NULL);

    /*
    * data=writeback:
    diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
    index a2e1ea4..07afd4a 100644
    --- a/fs/ext4/inode.c
    +++ b/fs/ext4/inode.c
    @@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
    }
    if (buffer_new(&dummy)) {
    J_ASSERT(create != 0);
    - J_ASSERT(handle != 0);
    + J_ASSERT(handle != NULL);

    /*
    * Now that we do not always journal data, we should
    diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
    index 12c7d65..8638730 100644
    --- a/fs/ext4/xattr.c
    +++ b/fs/ext4/xattr.c
    @@ -1120,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
    int total_ino, total_blk;
    void *base, *start, *end;
    int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
    - int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
    + int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);

    down_write(&EXT4_I(inode)->xattr_sem);
    retry:
    @@ -1292,7 +1292,7 @@ retry:

    i.name = b_entry_name;
    i.value = buffer;
    - i.value_len = cpu_to_le32(size);
    + i.value_len = size;
    error = ext4_xattr_block_find(inode, &i, bs);
    if (error)
    goto cleanup;
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  14. [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3

    From: Takashi Sato

    This patch set supports large block size(>4k, <=64k) in ext3
    just enlarging the block size limit. But it is NOT possible to have 64kB
    blocksize on ext3 without some changes to the directory handling
    code. The reason is that an empty 64kB directory block would have a
    rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
    the filesystem. The proposed solution is treat 64k rec_len
    with a an impossible value like rec_len = 0xffff to handle this.

    The Patch-set consists of the following 2 patches.
    [1/2] ext3: enlarge blocksize
    - Allow blocksize up to pagesize

    [2/2] ext3: fix rec_len overflow
    - prevent rec_len from overflow with 64KB blocksize

    Now on 64k page ppc64 box runs with this patch set we could create a 64k
    block size ext3, and able to handle empty directory block.

    Signed-off-by: Takashi Sato
    Signed-off-by: Mingming Cao
    ---
    fs/ext3/super.c | 6 +++++-
    include/linux/ext3_fs.h | 4 ++--
    2 files changed, 7 insertions(+), 3 deletions(-)

    diff --git a/fs/ext3/super.c b/fs/ext3/super.c
    index 9537316..b4bfd36 100644
    --- a/fs/ext3/super.c
    +++ b/fs/ext3/super.c
    @@ -1549,7 +1549,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
    }

    brelse (bh);
    - sb_set_blocksize(sb, blocksize);
    + if (!sb_set_blocksize(sb, blocksize)) {
    + printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
    + blocksize);
    + goto out_fail;
    + }
    logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
    offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
    bh = sb_bread(sb, logic_sb_block);
    diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
    index ece49a8..7aa5556 100644
    --- a/include/linux/ext3_fs.h
    +++ b/include/linux/ext3_fs.h
    @@ -76,8 +76,8 @@
    * Macro-instructions used to manage several block sizes
    */
    #define EXT3_MIN_BLOCK_SIZE 1024
    -#define EXT3_MAX_BLOCK_SIZE 4096
    -#define EXT3_MIN_BLOCK_LOG_SIZE 10
    +#define EXT3_MAX_BLOCK_SIZE 65536
    +#define EXT3_MIN_BLOCK_LOG_SIZE 10
    #ifdef __KERNEL__
    # define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
    #else
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  15. [PATCH] ext2: Avoid rec_len overflow with 64KB block size

    From: Jan Kara

    With 64KB blocksize, a directory entry can have size 64KB which does not fit
    into 16 bits we have for entry lenght. So we store 0xffff instead and convert
    value when read from / written to disk.

    Signed-off-by: Jan Kara
    Signed-off-by: Mingming Cao
    ---
    fs/ext2/dir.c | 43 +++++++++++++++++++++++++++++++------------
    include/linux/ext2_fs.h | 1 +
    2 files changed, 32 insertions(+), 12 deletions(-)

    diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
    index 2bf49d7..1329bdb 100644
    --- a/fs/ext2/dir.c
    +++ b/fs/ext2/dir.c
    @@ -26,6 +26,24 @@

    typedef struct ext2_dir_entry_2 ext2_dirent;

    +static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
    +{
    + unsigned len = le16_to_cpu(dlen);
    +
    + if (len == EXT2_MAX_REC_LEN)
    + return 1 << 16;
    + return len;
    +}
    +
    +static inline __le16 ext2_rec_len_to_disk(unsigned len)
    +{
    + if (len == (1 << 16))
    + return cpu_to_le16(EXT2_MAX_REC_LEN);
    + else if (len > (1 << 16))
    + BUG();
    + return cpu_to_le16(len);
    +}
    +
    /*
    * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
    * more robust, but we have what we have
    @@ -95,7 +113,7 @@ static void ext2_check_page(struct page *page)
    }
    for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
    p = (ext2_dirent *)(kaddr + offs);
    - rec_len = le16_to_cpu(p->rec_len);
    + rec_len = ext2_rec_len_from_disk(p->rec_len);

    if (rec_len < EXT2_DIR_REC_LEN(1))
    goto Eshort;
    @@ -193,7 +211,8 @@ static inline int ext2_match (int len, const char * const name,
    */
    static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
    {
    - return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
    + return (ext2_dirent *)((char*)p +
    + ext2_rec_len_from_disk(p->rec_len));
    }

    static inline unsigned
    @@ -305,7 +324,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
    return 0;
    }
    }
    - filp->f_pos += le16_to_cpu(de->rec_len);
    + filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
    }
    ext2_put_page(page);
    }
    @@ -413,7 +432,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
    struct page *page, struct inode *inode)
    {
    unsigned from = (char *) de - (char *) page_address(page);
    - unsigned to = from + le16_to_cpu(de->rec_len);
    + unsigned to = from + ext2_rec_len_from_disk(de->rec_len);
    int err;

    lock_page(page);
    @@ -469,7 +488,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
    /* We hit i_size */
    name_len = 0;
    rec_len = chunk_size;
    - de->rec_len = cpu_to_le16(chunk_size);
    + de->rec_len = ext2_rec_len_to_disk(chunk_size);
    de->inode = 0;
    goto got_it;
    }
    @@ -483,7 +502,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
    if (ext2_match (namelen, name, de))
    goto out_unlock;
    name_len = EXT2_DIR_REC_LEN(de->name_len);
    - rec_len = le16_to_cpu(de->rec_len);
    + rec_len = ext2_rec_len_from_disk(de->rec_len);
    if (!de->inode && rec_len >= reclen)
    goto got_it;
    if (rec_len >= name_len + reclen)
    @@ -504,8 +523,8 @@ got_it:
    goto out_unlock;
    if (de->inode) {
    ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
    - de1->rec_len = cpu_to_le16(rec_len - name_len);
    - de->rec_len = cpu_to_le16(name_len);
    + de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
    + de->rec_len = ext2_rec_len_to_disk(name_len);
    de = de1;
    }
    de->name_len = namelen;
    @@ -536,7 +555,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
    struct inode *inode = mapping->host;
    char *kaddr = page_address(page);
    unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
    - unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
    + unsigned to = ((char*)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len);
    ext2_dirent * pde = NULL;
    ext2_dirent * de = (ext2_dirent *) (kaddr + from);
    int err;
    @@ -557,7 +576,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
    err = mapping->a_ops->prepare_write(NULL, page, from, to);
    BUG_ON(err);
    if (pde)
    - pde->rec_len = cpu_to_le16(to-from);
    + pde->rec_len = ext2_rec_len_to_disk(to-from);
    dir->inode = 0;
    err = ext2_commit_chunk(page, from, to);
    inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
    @@ -591,14 +610,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
    memset(kaddr, 0, chunk_size);
    de = (struct ext2_dir_entry_2 *)kaddr;
    de->name_len = 1;
    - de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
    + de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1));
    memcpy (de->name, ".\0\0", 4);
    de->inode = cpu_to_le32(inode->i_ino);
    ext2_set_de_type (de, inode);

    de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));
    de->name_len = 2;
    - de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
    + de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1));
    de->inode = cpu_to_le32(parent->i_ino);
    memcpy (de->name, "..\0", 4);
    ext2_set_de_type (de, inode);
    diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
    index 910a705..41063d5 100644
    --- a/include/linux/ext2_fs.h
    +++ b/include/linux/ext2_fs.h
    @@ -557,5 +557,6 @@ enum {
    #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1)
    #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \
    ~EXT2_DIR_ROUND)
    +#define EXT2_MAX_REC_LEN ((1<<16)-1)

    #endif /* _LINUX_EXT2_FS_H */
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  16. [PATCH] This feature relaxes check restrictions on where each block groups meta

    From: Jose R. Santos

    data is located within the storage media. This allows for the allocation
    of bitmaps or inode tables outside the block group boundaries in cases
    where bad blocks forces us to look for new blocks which the owning block
    group can not satisfy. This will also allow for new meta-data allocation
    schemes to improve performance and scalability.

    Signed-off-by: Jose R. Santos
    Cc:
    Signed-off-by: Andrew Morton
    ---
    fs/ext4/super.c | 9 +++++++--
    include/linux/ext4_fs.h | 4 +++-
    2 files changed, 10 insertions(+), 3 deletions(-)

    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index b59610d..619db84 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -1287,13 +1287,17 @@ static int ext4_check_descriptors (struct super_block * sb)
    ext4_fsblk_t inode_table;
    struct ext4_group_desc * gdp = NULL;
    int desc_block = 0;
    + int flexbg_flag = 0;
    int i;

    + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
    + flexbg_flag = 1;
    +
    ext4_debug ("Checking group descriptors");

    for (i = 0; i < sbi->s_groups_count; i++)
    {
    - if (i == sbi->s_groups_count - 1)
    + if (i == sbi->s_groups_count - 1 || flexbg_flag)
    last_block = ext4_blocks_count(sbi->s_es) - 1;
    else
    last_block = first_block +
    @@ -1338,7 +1342,8 @@ static int ext4_check_descriptors (struct super_block * sb)
    le16_to_cpu(gdp->bg_checksum));
    return 0;
    }
    - first_block += EXT4_BLOCKS_PER_GROUP(sb);
    + if (!flexbg_flag)
    + first_block += EXT4_BLOCKS_PER_GROUP(sb);
    gdp = (struct ext4_group_desc *)
    ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
    }
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 722d4ef..46b304d 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -682,13 +682,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
    #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
    #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
    #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
    +#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200

    #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
    #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
    EXT4_FEATURE_INCOMPAT_RECOVER| \
    EXT4_FEATURE_INCOMPAT_META_BG| \
    EXT4_FEATURE_INCOMPAT_EXTENTS| \
    - EXT4_FEATURE_INCOMPAT_64BIT)
    + EXT4_FEATURE_INCOMPAT_64BIT| \
    + EXT4_FEATURE_INCOMPAT_FLEX_BG)
    #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
    EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
    EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  17. [PATCH] ext3: Avoid rec_len overflow with 64KB block size

    From: Jan Kara

    With 64KB blocksize, a directory entry can have size 64KB which does not fit
    into 16 bits we have for entry lenght. So we store 0xffff instead and convert
    value when read from / written to disk. The patch also converts some places
    to use ext3_next_entry() when we are changing them anyway.

    Signed-off-by: Jan Kara
    Signed-off-by: Mingming Cao
    ---
    fs/ext3/dir.c | 10 +++---
    fs/ext3/namei.c | 90 ++++++++++++++++++++++------------------------
    include/linux/ext3_fs.h | 20 ++++++++++
    3 files changed, 68 insertions(+), 52 deletions(-)

    diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
    index c00723a..3c4c43a 100644
    --- a/fs/ext3/dir.c
    +++ b/fs/ext3/dir.c
    @@ -69,7 +69,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
    unsigned long offset)
    {
    const char * error_msg = NULL;
    - const int rlen = le16_to_cpu(de->rec_len);
    + const int rlen = ext3_rec_len_from_disk(de->rec_len);

    if (rlen < EXT3_DIR_REC_LEN(1))
    error_msg = "rec_len is smaller than minimal";
    @@ -177,10 +177,10 @@ revalidate:
    * least that it is non-zero. A
    * failure will be detected in the
    * dirent test below. */
    - if (le16_to_cpu(de->rec_len) <
    + if (ext3_rec_len_from_disk(de->rec_len) <
    EXT3_DIR_REC_LEN(1))
    break;
    - i += le16_to_cpu(de->rec_len);
    + i += ext3_rec_len_from_disk(de->rec_len);
    }
    offset = i;
    filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
    @@ -201,7 +201,7 @@ revalidate:
    ret = stored;
    goto out;
    }
    - offset += le16_to_cpu(de->rec_len);
    + offset += ext3_rec_len_from_disk(de->rec_len);
    if (le32_to_cpu(de->inode)) {
    /* We might block in the next section
    * if the data destination is
    @@ -223,7 +223,7 @@ revalidate:
    goto revalidate;
    stored ++;
    }
    - filp->f_pos += le16_to_cpu(de->rec_len);
    + filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
    }
    offset = 0;
    brelse (bh);
    diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
    index c1fa190..2c38eb6 100644
    --- a/fs/ext3/namei.c
    +++ b/fs/ext3/namei.c
    @@ -144,6 +144,15 @@ struct dx_map_entry
    u16 size;
    };

    +/*
    + * p is at least 6 bytes before the end of page
    + */
    +static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
    +{
    + return (struct ext3_dir_entry_2 *)((char*)p +
    + ext3_rec_len_from_disk(p->rec_len));
    +}
    +
    #ifdef CONFIG_EXT3_INDEX
    static inline unsigned dx_get_block (struct dx_entry *entry);
    static void dx_set_block (struct dx_entry *entry, unsigned value);
    @@ -281,7 +290,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
    space += EXT3_DIR_REC_LEN(de->name_len);
    names++;
    }
    - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext3_next_entry(de);
    }
    printk("(%i)\n", names);
    return (struct stats) { names, space, 1 };
    @@ -548,14 +557,6 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,


    /*
    - * p is at least 6 bytes before the end of page
    - */
    -static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
    -{
    - return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
    -}
    -
    -/*
    * This function fills a red-black tree with information from a
    * directory block. It returns the number directory entries loaded
    * into the tree. If there is an error it is returned in err.
    @@ -721,7 +722,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
    cond_resched();
    }
    /* XXX: do we need to check rec_len == 0 case? -Chris */
    - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext3_next_entry(de);
    }
    return count;
    }
    @@ -825,7 +826,7 @@ static inline int search_dirblock(struct buffer_head * bh,
    return 1;
    }
    /* prevent looping on a bad block */
    - de_len = le16_to_cpu(de->rec_len);
    + de_len = ext3_rec_len_from_disk(de->rec_len);
    if (de_len <= 0)
    return -1;
    offset += de_len;
    @@ -1138,7 +1139,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
    rec_len = EXT3_DIR_REC_LEN(de->name_len);
    memcpy (to, de, rec_len);
    ((struct ext3_dir_entry_2 *) to)->rec_len =
    - cpu_to_le16(rec_len);
    + ext3_rec_len_to_disk(rec_len);
    de->inode = 0;
    map++;
    to += rec_len;
    @@ -1157,13 +1158,12 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)

    prev = to = de;
    while ((char*)de < base + size) {
    - next = (struct ext3_dir_entry_2 *) ((char *) de +
    - le16_to_cpu(de->rec_len));
    + next = ext3_next_entry(de);
    if (de->inode && de->name_len) {
    rec_len = EXT3_DIR_REC_LEN(de->name_len);
    if (de > to)
    memmove(to, de, rec_len);
    - to->rec_len = cpu_to_le16(rec_len);
    + to->rec_len = ext3_rec_len_to_disk(rec_len);
    prev = to;
    to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
    }
    @@ -1237,8 +1237,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
    /* Fancy dance to stay within two buffers */
    de2 = dx_move_dirents(data1, data2, map + split, count - split);
    de = dx_pack_dirents(data1,blocksize);
    - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
    - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
    + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
    + de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2);
    dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
    dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));

    @@ -1309,7 +1309,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
    return -EEXIST;
    }
    nlen = EXT3_DIR_REC_LEN(de->name_len);
    - rlen = le16_to_cpu(de->rec_len);
    + rlen = ext3_rec_len_from_disk(de->rec_len);
    if ((de->inode? rlen - nlen: rlen) >= reclen)
    break;
    de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
    @@ -1328,11 +1328,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,

    /* By now the buffer is marked for journaling */
    nlen = EXT3_DIR_REC_LEN(de->name_len);
    - rlen = le16_to_cpu(de->rec_len);
    + rlen = ext3_rec_len_from_disk(de->rec_len);
    if (de->inode) {
    struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
    - de1->rec_len = cpu_to_le16(rlen - nlen);
    - de->rec_len = cpu_to_le16(nlen);
    + de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
    + de->rec_len = ext3_rec_len_to_disk(nlen);
    de = de1;
    }
    de->file_type = EXT3_FT_UNKNOWN;
    @@ -1410,17 +1410,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

    /* The 0th block becomes the root, move the dirents out */
    fde = &root->dotdot;
    - de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
    + de = (struct ext3_dir_entry_2 *)((char *)fde +
    + ext3_rec_len_from_disk(fde->rec_len));
    len = ((char *) root) + blocksize - (char *) de;
    memcpy (data1, de, len);
    de = (struct ext3_dir_entry_2 *) data1;
    top = data1 + len;
    - while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
    + while ((char *)(de2 = ext3_next_entry(de)) < top)
    de = de2;
    - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
    + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
    /* Initialize the root; the dot dirents already exist */
    de = (struct ext3_dir_entry_2 *) (&root->dotdot);
    - de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
    + de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
    memset (&root->info, 0, sizeof(root->info));
    root->info.info_length = sizeof(root->info);
    root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
    @@ -1507,7 +1508,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
    return retval;
    de = (struct ext3_dir_entry_2 *) bh->b_data;
    de->inode = 0;
    - de->rec_len = cpu_to_le16(blocksize);
    + de->rec_len = ext3_rec_len_to_disk(blocksize);
    return add_dirent_to_buf(handle, dentry, inode, de, bh);
    }

    @@ -1571,7 +1572,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
    goto cleanup;
    node2 = (struct dx_node *)(bh2->b_data);
    entries2 = node2->entries;
    - node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
    + node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
    node2->fake.inode = 0;
    BUFFER_TRACE(frame->bh, "get_write_access");
    err = ext3_journal_get_write_access(handle, frame->bh);
    @@ -1670,9 +1671,9 @@ static int ext3_delete_entry (handle_t *handle,
    BUFFER_TRACE(bh, "get_write_access");
    ext3_journal_get_write_access(handle, bh);
    if (pde)
    - pde->rec_len =
    - cpu_to_le16(le16_to_cpu(pde->rec_len) +
    - le16_to_cpu(de->rec_len));
    + pde->rec_len = ext3_rec_len_to_disk(
    + ext3_rec_len_from_disk(pde->rec_len) +
    + ext3_rec_len_from_disk(de->rec_len));
    else
    de->inode = 0;
    dir->i_version++;
    @@ -1680,10 +1681,9 @@ static int ext3_delete_entry (handle_t *handle,
    ext3_journal_dirty_metadata(handle, bh);
    return 0;
    }
    - i += le16_to_cpu(de->rec_len);
    + i += ext3_rec_len_from_disk(de->rec_len);
    pde = de;
    - de = (struct ext3_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext3_next_entry(de);
    }
    return -ENOENT;
    }
    @@ -1817,13 +1817,12 @@ retry:
    de = (struct ext3_dir_entry_2 *) dir_block->b_data;
    de->inode = cpu_to_le32(inode->i_ino);
    de->name_len = 1;
    - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
    + de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
    strcpy (de->name, ".");
    ext3_set_de_type(dir->i_sb, de, S_IFDIR);
    - de = (struct ext3_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de = ext3_next_entry(de);
    de->inode = cpu_to_le32(dir->i_ino);
    - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
    + de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
    de->name_len = 2;
    strcpy (de->name, "..");
    ext3_set_de_type(dir->i_sb, de, S_IFDIR);
    @@ -1875,8 +1874,7 @@ static int empty_dir (struct inode * inode)
    return 1;
    }
    de = (struct ext3_dir_entry_2 *) bh->b_data;
    - de1 = (struct ext3_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + de1 = ext3_next_entry(de);
    if (le32_to_cpu(de->inode) != inode->i_ino ||
    !le32_to_cpu(de1->inode) ||
    strcmp (".", de->name) ||
    @@ -1887,9 +1885,9 @@ static int empty_dir (struct inode * inode)
    brelse (bh);
    return 1;
    }
    - offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
    - de = (struct ext3_dir_entry_2 *)
    - ((char *) de1 + le16_to_cpu(de1->rec_len));
    + offset = ext3_rec_len_from_disk(de->rec_len) +
    + ext3_rec_len_from_disk(de1->rec_len);
    + de = ext3_next_entry(de1);
    while (offset < inode->i_size ) {
    if (!bh ||
    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
    @@ -1918,9 +1916,8 @@ static int empty_dir (struct inode * inode)
    brelse (bh);
    return 0;
    }
    - offset += le16_to_cpu(de->rec_len);
    - de = (struct ext3_dir_entry_2 *)
    - ((char *) de + le16_to_cpu(de->rec_len));
    + offset += ext3_rec_len_from_disk(de->rec_len);
    + de = ext3_next_entry(de);
    }
    brelse (bh);
    return 1;
    @@ -2274,8 +2271,7 @@ retry:
    }

    #define PARENT_INO(buffer) \
    - ((struct ext3_dir_entry_2 *) ((char *) buffer + \
    - le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
    + (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)

    /*
    * Anybody can rename anything with this: the permission checks are left to the
    diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
    index 7aa5556..d9e378d 100644
    --- a/include/linux/ext3_fs.h
    +++ b/include/linux/ext3_fs.h
    @@ -660,6 +660,26 @@ struct ext3_dir_entry_2 {
    #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
    #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
    ~EXT3_DIR_ROUND)
    +#define EXT3_MAX_REC_LEN ((1<<16)-1)
    +
    +static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
    +{
    + unsigned len = le16_to_cpu(dlen);
    +
    + if (len == EXT3_MAX_REC_LEN)
    + return 1 << 16;
    + return len;
    +}
    +
    +static inline __le16 ext3_rec_len_to_disk(unsigned len)
    +{
    + if (len == (1 << 16))
    + return cpu_to_le16(EXT3_MAX_REC_LEN);
    + else if (len > (1 << 16))
    + BUG();
    + return cpu_to_le16(len);
    +}
    +
    /*
    * Hash Tree Directory indexing
    * (c) Daniel Phillips, 2001
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  18. [PATCH] Ext4: Uninitialized Block Groups

    From: Andreas Dilger

    In pass1 of e2fsck, every inode table in the fileystem is scanned and checked,
    regardless of whether it is in use. This is this the most time consuming part
    of the filesystem check. The unintialized block group feature can greatly
    reduce e2fsck time by eliminating checking of uninitialized inodes.

    With this feature, there is a a high water mark of used inodes for each block
    group. Block and inode bitmaps can be uninitialized on disk via a flag in the
    group descriptor to avoid reading or scanning them at e2fsck time. A checksum
    of each group descriptor is used to ensure that corruption in the group
    descriptor's bit flags does not cause incorrect operation.

    The feature is enabled through a mkfs option

    mke2fs /dev/ -O uninit_groups

    A patch adding support for uninitialized block groups to e2fsprogs tools has
    been posted to the linux-ext4 mailing list.

    The patches have been stress tested with fsstress and fsx. In performance
    tests testing e2fsck time, we have seen that e2fsck time on ext3 grows
    linearly with the total number of inodes in the filesytem. In ext4 with the
    uninitialized block groups feature, the e2fsck time is constant, based
    solely on the number of used inodes rather than the total inode count.
    Since typical ext4 filesystems only use 1-10% of their inodes, this feature can
    greatly reduce e2fsck time for users. With performance improvement of 2-20
    times, depending on how full the filesystem is.

    The attached graph shows the major improvements in e2fsck times in filesystems
    with a large total inode count, but few inodes in use.

    In each group descriptor if we have

    EXT4_BG_INODE_UNINIT set in bg_flags:
    Inode table is not initialized/used in this group. So we can skip
    the consistency check during fsck.
    EXT4_BG_BLOCK_UNINIT set in bg_flags:
    No block in the group is used. So we can skip the block bitmap
    verification for this group.

    We also add two new fields to group descriptor as a part of
    uninitialized group patch.

    __le16 bg_itable_unused; /* Unused inodes count */
    __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */


    bg_itable_unused:

    If we have EXT4_BG_INODE_UNINIT not set in bg_flags
    then bg_itable_unused will give the offset within
    the inode table till the inodes are used. This can be
    used by fsck to skip list of inodes that are marked unused.


    bg_checksum:
    Now that we depend on bg_flags and bg_itable_unused to determine
    the block and inode usage, we need to make sure group descriptor
    is not corrupt. We add checksum to group descriptor to
    detect corruption. If the descriptor is found to be corrupt, we
    mark all the blocks and inodes in the group used.


    Signed-off-by: Avantika Mathur
    Signed-off-by: Andreas Dilger
    Signed-off-by: Mingming Cao
    Signed-off-by: Aneesh Kumar K.V
    ---
    fs/Kconfig | 1 +
    fs/ext4/balloc.c | 92 ++++++++++++++++++++++++++++-
    fs/ext4/group.h | 29 +++++++++
    fs/ext4/ialloc.c | 146 ++++++++++++++++++++++++++++++++++++++++++++---
    fs/ext4/resize.c | 2 +
    fs/ext4/super.c | 47 +++++++++++++++
    include/linux/ext4_fs.h | 16 ++++-
    7 files changed, 317 insertions(+), 16 deletions(-)
    create mode 100644 fs/ext4/group.h

    diff --git a/fs/Kconfig b/fs/Kconfig
    index f9eed6d..97eef97 100644
    --- a/fs/Kconfig
    +++ b/fs/Kconfig
    @@ -140,6 +140,7 @@ config EXT4DEV_FS
    tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
    depends on EXPERIMENTAL
    select JBD2
    + select CRC16
    help
    Ext4dev is a predecessor filesystem of the next generation
    extended fs ext4, based on ext3 filesystem code. It will be
    diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
    index e53b4af..d1a8882 100644
    --- a/fs/ext4/balloc.c
    +++ b/fs/ext4/balloc.c
    @@ -20,6 +20,7 @@
    #include
    #include

    +#include "group.h"
    /*
    * balloc.c contains the blocks allocation and deallocation routines
    */
    @@ -42,6 +43,74 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,

    }

    +/* Initializes an uninitialized block bitmap if given, and returns the
    + * number of blocks free in the group. */
    +unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
    + int block_group, struct ext4_group_desc *gdp)
    +{
    + unsigned long start;
    + int bit, bit_max;
    + unsigned free_blocks;
    + struct ext4_sb_info *sbi = EXT4_SB(sb);
    +
    + if (bh) {
    + J_ASSERT_BH(bh, buffer_locked(bh));
    +
    + /* If checksum is bad mark all blocks used to prevent allocation
    + * essentially implementing a per-group read-only flag. */
    + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
    + ext4_error(sb, __FUNCTION__,
    + "Checksum bad for group %u\n", block_group);
    + gdp->bg_free_blocks_count = 0;
    + gdp->bg_free_inodes_count = 0;
    + gdp->bg_itable_unused = 0;
    + memset(bh->b_data, 0xff, sb->s_blocksize);
    + return 0;
    + }
    + memset(bh->b_data, 0, sb->s_blocksize);
    + }
    +
    + /* Check for superblock and gdt backups in this group */
    + bit_max = ext4_bg_has_super(sb, block_group);
    +
    + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
    + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
    + sbi->s_desc_per_block) {
    + if (bit_max) {
    + bit_max += ext4_bg_num_gdb(sb, block_group);
    + bit_max +=
    + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
    + }
    + } else { /* For META_BG_BLOCK_GROUPS */
    + int group_rel = (block_group -
    + le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
    + EXT4_DESC_PER_BLOCK(sb);
    + if (group_rel == 0 || group_rel == 1 ||
    + (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
    + bit_max += 1;
    + }
    +
    + /* Last and first groups are always initialized */
    + free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
    +
    + if (bh) {
    + for (bit = 0; bit < bit_max; bit++)
    + ext4_set_bit(bit, bh->b_data);
    +
    + start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
    + le32_to_cpu(sbi->s_es->s_first_data_block);
    +
    + /* Set bits for block and inode bitmaps, and inode table */
    + ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
    + ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
    + for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
    + bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
    + ext4_set_bit(bit, bh->b_data);
    + }
    +
    + return free_blocks - sbi->s_itb_per_group - 2;
    +}
    +
    /*
    * The free blocks are managed by bitmaps. A file system contains several
    * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
    @@ -110,16 +179,29 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
    *
    * Return buffer_head on success or NULL in case of failure.
    */
    -static struct buffer_head *
    +struct buffer_head *
    read_block_bitmap(struct super_block *sb, unsigned int block_group)
    {
    struct ext4_group_desc * desc;
    struct buffer_head * bh = NULL;

    - desc = ext4_get_group_desc (sb, block_group, NULL);
    + desc = ext4_get_group_desc(sb, block_group, NULL);
    if (!desc)
    goto error_out;
    - bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
    + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
    + bh = sb_getblk(sb, ext4_block_bitmap(sb, desc));
    + if (!buffer_uptodate(bh)) {
    + lock_buffer(bh);
    + if (!buffer_uptodate(bh)) {
    + ext4_init_block_bitmap(sb, bh, block_group,
    + desc);
    + set_buffer_uptodate(bh);
    + }
    + unlock_buffer(bh);
    + }
    + } else {
    + bh = sb_bread(sb, ext4_block_bitmap(sb,desc));
    + }
    if (!bh)
    ext4_error (sb, "read_block_bitmap",
    "Cannot read block bitmap - "
    @@ -586,6 +668,7 @@ do_more:
    desc->bg_free_blocks_count =
    cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
    group_freed);
    + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
    spin_unlock(sb_bgl_lock(sbi, block_group));
    percpu_counter_mod(&sbi->s_freeblocks_counter, count);

    @@ -1644,8 +1727,11 @@ allocated:
    ret_block, goal_hits, goal_attempts);

    spin_lock(sb_bgl_lock(sbi, group_no));
    + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
    + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
    gdp->bg_free_blocks_count =
    cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
    + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
    spin_unlock(sb_bgl_lock(sbi, group_no));
    percpu_counter_mod(&sbi->s_freeblocks_counter, -num);

    diff --git a/fs/ext4/group.h b/fs/ext4/group.h
    new file mode 100644
    index 0000000..9310979
    --- /dev/null
    +++ b/fs/ext4/group.h
    @@ -0,0 +1,29 @@
    +/*
    + * linux/fs/ext4/group.h
    + *
    + * Copyright (C) 2007 Cluster File Systems, Inc
    + *
    + * Author: Andreas Dilger
    + */
    +
    +#ifndef _LINUX_EXT4_GROUP_H
    +#define _LINUX_EXT4_GROUP_H
    +#if defined(CONFIG_CRC16)
    +#include
    +#endif
    +
    +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
    + struct ext4_group_desc *gdp);
    +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
    + struct ext4_group_desc *gdp);
    +struct buffer_head *read_block_bitmap(struct super_block *sb,
    + unsigned int block_group);
    +extern unsigned ext4_init_block_bitmap(struct super_block *sb,
    + struct buffer_head *bh, int group,
    + struct ext4_group_desc *desc);
    +#define ext4_free_blocks_after_init(sb, group, desc) \
    + ext4_init_block_bitmap(sb, NULL, group, desc)
    +extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
    + struct buffer_head *bh, int group,
    + struct ext4_group_desc *desc);
    +#endif /* _LINUX_EXT4_GROUP_H */
    diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
    index b8b538d..1fa418c 100644
    --- a/fs/ext4/ialloc.c
    +++ b/fs/ext4/ialloc.c
    @@ -28,6 +28,7 @@

    #include "xattr.h"
    #include "acl.h"
    +#include "group.h"

    /*
    * ialloc.c contains the inodes allocation and deallocation routines
    @@ -43,6 +44,52 @@
    * the free blocks count in the block.
    */

    +/*
    + * To avoid calling the atomic setbit hundreds or thousands of times, we only
    + * need to use it within a single byte (to ensure we get endianness right).
    + * We can use memset for the rest of the bitmap as there are no other users.
    + */
    +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
    +{
    + int i;
    +
    + if (start_bit >= end_bit)
    + return;
    +
    + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
    + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
    + ext4_set_bit(i, bitmap);
    + if (i < end_bit)
    + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
    +}
    +
    +/* Initializes an uninitialized inode bitmap */
    +unsigned ext4_init_inode_bitmap(struct super_block *sb,
    + struct buffer_head *bh, int block_group,
    + struct ext4_group_desc *gdp)
    +{
    + struct ext4_sb_info *sbi = EXT4_SB(sb);
    +
    + J_ASSERT_BH(bh, buffer_locked(bh));
    +
    + /* If checksum is bad mark all blocks and inodes use to prevent
    + * allocation, essentially implementing a per-group read-only flag. */
    + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
    + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
    + block_group);
    + gdp->bg_free_blocks_count = 0;
    + gdp->bg_free_inodes_count = 0;
    + gdp->bg_itable_unused = 0;
    + memset(bh->b_data, 0xff, sb->s_blocksize);
    + return 0;
    + }
    +
    + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
    + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
    + bh->b_data);
    +
    + return EXT4_INODES_PER_GROUP(sb);
    +}

    /*
    * Read the inode allocation bitmap for a given block_group, reading
    @@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
    desc = ext4_get_group_desc(sb, block_group, NULL);
    if (!desc)
    goto error_out;
    -
    - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
    + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
    + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
    + if (!buffer_uptodate(bh)) {
    + lock_buffer(bh);
    + if (!buffer_uptodate(bh)) {
    + ext4_init_inode_bitmap(sb, bh, block_group,
    + desc);
    + set_buffer_uptodate(bh);
    + }
    + unlock_buffer(bh);
    + }
    + } else {
    + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
    + }
    if (!bh)
    ext4_error(sb, "read_inode_bitmap",
    "Cannot read inode bitmap - "
    @@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
    if (is_directory)
    gdp->bg_used_dirs_count = cpu_to_le16(
    le16_to_cpu(gdp->bg_used_dirs_count) - 1);
    + gdp->bg_checksum = ext4_group_desc_csum(sbi,
    + block_group, gdp);
    spin_unlock(sb_bgl_lock(sbi, block_group));
    percpu_counter_inc(&sbi->s_freeinodes_counter);
    if (is_directory)
    @@ -438,7 +499,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
    struct ext4_sb_info *sbi;
    int err = 0;
    struct inode *ret;
    - int i;
    + int i, free = 0;

    /* Cannot create files in a deleted directory */
    if (!dir || !dir->i_nlink)
    @@ -520,11 +581,13 @@ repeat_in_this_group:
    goto out;

    got:
    - ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
    - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
    - ext4_error (sb, "ext4_new_inode",
    - "reserved inode or inode > inodes count - "
    - "block_group = %d, inode=%lu", group, ino);
    + ino++;
    + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
    + ino > EXT4_INODES_PER_GROUP(sb)) {
    + ext4_error(sb, __FUNCTION__,
    + "reserved inode or inode > inodes count - "
    + "block_group = %d, inode=%lu", group,
    + ino + group * EXT4_INODES_PER_GROUP(sb));
    err = -EIO;
    goto fail;
    }
    @@ -532,13 +595,78 @@ got:
    BUFFER_TRACE(bh2, "get_write_access");
    err = ext4_journal_get_write_access(handle, bh2);
    if (err) goto fail;
    +
    + /* We may have to initialize the block bitmap if it isn't already */
    + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
    + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
    + struct buffer_head *block_bh = read_block_bitmap(sb, group);
    +
    + BUFFER_TRACE(block_bh, "get block bitmap access");
    + err = ext4_journal_get_write_access(handle, block_bh);
    + if (err) {
    + brelse(block_bh);
    + goto fail;
    + }
    +
    + free = 0;
    + spin_lock(sb_bgl_lock(sbi, group));
    + /* recheck and clear flag under lock if we still need to */
    + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
    + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
    + free = ext4_free_blocks_after_init(sb, group, gdp);
    + gdp->bg_free_blocks_count = cpu_to_le16(free);
    + }
    + spin_unlock(sb_bgl_lock(sbi, group));
    +
    + /* Don't need to dirty bitmap block if we didn't change it */
    + if (free) {
    + BUFFER_TRACE(block_bh, "dirty block bitmap");
    + err = ext4_journal_dirty_metadata(handle, block_bh);
    + }
    +
    + brelse(block_bh);
    + if (err)
    + goto fail;
    + }
    +
    spin_lock(sb_bgl_lock(sbi, group));
    + /* If we didn't allocate from within the initialized part of the inode
    + * table then we need to initialize up to this inode. */
    + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
    + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
    + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
    +
    + /* When marking the block group with
    + * ~EXT4_BG_INODE_UNINIT we don't want to depend
    + * on the value of bg_itable_unsed even though
    + * mke2fs could have initialized the same for us.
    + * Instead we calculated the value below
    + */
    +
    + free = 0;
    + } else {
    + free = EXT4_INODES_PER_GROUP(sb) -
    + le16_to_cpu(gdp->bg_itable_unused);
    + }
    +
    + /*
    + * Check the relative inode number against the last used
    + * relative inode number in this group. if it is greater
    + * we need to update the bg_itable_unused count
    + *
    + */
    + if (ino > free)
    + gdp->bg_itable_unused =
    + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
    + }
    +
    gdp->bg_free_inodes_count =
    cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
    if (S_ISDIR(mode)) {
    gdp->bg_used_dirs_count =
    cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
    }
    + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
    spin_unlock(sb_bgl_lock(sbi, group));
    BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
    err = ext4_journal_dirty_metadata(handle, bh2);
    @@ -560,7 +688,7 @@ got:
    inode->i_gid = current->fsgid;
    inode->i_mode = mode;

    - inode->i_ino = ino;
    + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
    /* This is the optimal IO size (for stat), not the fs block size */
    inode->i_blocks = 0;
    inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
    diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
    index aa11d7d..3359450 100644
    --- a/fs/ext4/resize.c
    +++ b/fs/ext4/resize.c
    @@ -16,6 +16,7 @@
    #include
    #include

    +#include "group.h"

    #define outside(b, first, last) ((b) < (first) || (b) >= (last))
    #define inside(b, first, last) ((b) >= (first) && (b) < (last))
    @@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
    ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
    gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
    gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
    + gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);

    /*
    * Make the new blocks and inodes valid next. We do this before
    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index 420d39d..b59610d 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -37,12 +37,14 @@
    #include
    #include
    #include
    +#include

    #include

    #include "xattr.h"
    #include "acl.h"
    #include "namei.h"
    +#include "group.h"

    static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
    unsigned long journal_devnum);
    @@ -1237,6 +1239,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
    return res;
    }

    +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
    + struct ext4_group_desc *gdp)
    +{
    + __u16 crc = 0;
    +
    + if (sbi->s_es->s_feature_ro_compat &
    + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
    + int offset = offsetof(struct ext4_group_desc, bg_checksum);
    + __le32 le_group = cpu_to_le32(block_group);
    +
    + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
    + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
    + crc = crc16(crc, (__u8 *)gdp, offset);
    + offset += sizeof(gdp->bg_checksum); /* skip checksum */
    + /* for checksum of struct ext4_group_desc do the rest...*/
    + if ((sbi->s_es->s_feature_incompat &
    + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
    + offset < le16_to_cpu(sbi->s_es->s_desc_size))
    + crc = crc16(crc, (__u8 *)gdp + offset,
    + le16_to_cpu(sbi->s_es->s_desc_size) -
    + offset);
    + }
    +
    + return cpu_to_le16(crc);
    +}
    +
    +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
    + struct ext4_group_desc *gdp)
    +{
    + if ((sbi->s_es->s_feature_ro_compat &
    + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
    + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
    + return 0;
    +
    + return 1;
    +}
    +
    /* Called at mount-time, super-block is locked */
    static int ext4_check_descriptors (struct super_block * sb)
    {
    @@ -1291,6 +1330,14 @@ static int ext4_check_descriptors (struct super_block * sb)
    i, inode_table);
    return 0;
    }
    + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
    + ext4_error(sb, __FUNCTION__,
    + "Checksum for group %d failed (%u!=%u)\n", i,
    + le16_to_cpu(ext4_group_desc_csum(sbi, i,
    + gdp)),
    + le16_to_cpu(gdp->bg_checksum));
    + return 0;
    + }
    first_block += EXT4_BLOCKS_PER_GROUP(sb);
    gdp = (struct ext4_group_desc *)
    ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 151738a..b77b59f 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -105,19 +105,25 @@
    */
    struct ext4_group_desc
    {
    - __le32 bg_block_bitmap; /* Blocks bitmap block */
    - __le32 bg_inode_bitmap; /* Inodes bitmap block */
    + __le32 bg_block_bitmap; /* Blocks bitmap block */
    + __le32 bg_inode_bitmap; /* Inodes bitmap block */
    __le32 bg_inode_table; /* Inodes table block */
    __le16 bg_free_blocks_count; /* Free blocks count */
    __le16 bg_free_inodes_count; /* Free inodes count */
    __le16 bg_used_dirs_count; /* Directories count */
    - __u16 bg_flags;
    - __u32 bg_reserved[3];
    + __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
    + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
    + __le16 bg_itable_unused; /* Unused inodes count */
    + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
    __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
    __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
    __le32 bg_inode_table_hi; /* Inodes table block MSB */
    };

    +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
    +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
    +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
    +
    #ifdef __KERNEL__
    #include
    #include
    @@ -665,6 +671,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
    #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
    #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
    #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
    +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
    #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
    #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

    @@ -684,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
    EXT4_FEATURE_INCOMPAT_64BIT)
    #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
    EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
    + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
    EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
    EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
    EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  19. [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX

    From: Eric Sandeen

    CONFIG_EXT4_INDEX is not an exposed config option in the kernel, and it is
    unconditionally defined in ext4_fs.h. tune2fs is already able to turn off
    dir indexing, so at this point it's just cluttering up the code. Remove
    it.

    Signed-off-by: Eric Sandeen
    Signed-off-by: Mingming Cao
    Signed-off-by: "Theodore Ts'o"
    Signed-off-by: Andrew Morton
    ---
    fs/ext4/dir.c | 7 -------
    fs/ext4/namei.c | 20 --------------------
    include/linux/ext4_fs.h | 14 ++------------
    3 files changed, 2 insertions(+), 39 deletions(-)

    diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
    index 3ab01c0..2ba49ff 100644
    --- a/fs/ext4/dir.c
    +++ b/fs/ext4/dir.c
    @@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
    .compat_ioctl = ext4_compat_ioctl,
    #endif
    .fsync = ext4_sync_file, /* BKL held */
    -#ifdef CONFIG_EXT4_INDEX
    .release = ext4_release_dir,
    -#endif
    };


    @@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,

    sb = inode->i_sb;

    -#ifdef CONFIG_EXT4_INDEX
    if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
    ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
    @@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
    */
    EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
    }
    -#endif
    stored = 0;
    offset = filp->f_pos & (sb->s_blocksize - 1);

    @@ -232,7 +228,6 @@ out:
    return ret;
    }

    -#ifdef CONFIG_EXT4_INDEX
    /*
    * These functions convert from the major/minor hash to an f_pos
    * value.
    @@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)

    return 0;
    }
    -
    -#endif
    diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
    index 5fdb862..94ee6f3 100644
    --- a/fs/ext4/namei.c
    +++ b/fs/ext4/namei.c
    @@ -144,7 +144,6 @@ struct dx_map_entry
    u16 size;
    };

    -#ifdef CONFIG_EXT4_INDEX
    static inline unsigned dx_get_block (struct dx_entry *entry);
    static void dx_set_block (struct dx_entry *entry, unsigned value);
    static inline unsigned dx_get_hash (struct dx_entry *entry);
    @@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
    dx_set_block(new, block);
    dx_set_count(entries, count + 1);
    }
    -#endif
    -

    static void ext4_update_dx_flag(struct inode *inode)
    {
    @@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
    name = dentry->d_name.name;
    if (namelen > EXT4_NAME_LEN)
    return NULL;
    -#ifdef CONFIG_EXT4_INDEX
    if (is_dx(dir)) {
    bh = ext4_dx_find_entry(dentry, res_dir, &err);
    /*
    @@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
    return bh;
    dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
    }
    -#endif
    nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
    start = EXT4_I(dir)->i_dir_start_lookup;
    if (start >= nblocks)
    @@ -957,7 +952,6 @@ cleanup_and_exit:
    return ret;
    }

    -#ifdef CONFIG_EXT4_INDEX
    static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
    struct ext4_dir_entry_2 **res_dir, int *err)
    {
    @@ -1025,7 +1019,6 @@ errout:
    dx_release (frames);
    return NULL;
    }
    -#endif

    static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
    {
    @@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
    de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
    }

    -#ifdef CONFIG_EXT4_INDEX
    /*
    * Move count entries from end of map between two memory locations.
    * Returns pointer to last entry moved.
    @@ -1266,8 +1258,6 @@ errout:
    *error = err;
    return NULL;
    }
    -#endif
    -

    /*
    * Add a new entry into a directory (leaf) block. If de is non-NULL,
    @@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
    return 0;
    }

    -#ifdef CONFIG_EXT4_INDEX
    /*
    * This converts a one block unindexed directory to a 3 block indexed
    * directory, and adds the dentry to the indexed directory.
    @@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

    return add_dirent_to_buf(handle, dentry, inode, de, bh);
    }
    -#endif

    /*
    * ext4_add_entry()
    @@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    struct ext4_dir_entry_2 *de;
    struct super_block * sb;
    int retval;
    -#ifdef CONFIG_EXT4_INDEX
    int dx_fallback=0;
    -#endif
    unsigned blocksize;
    u32 block, blocks;

    @@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    blocksize = sb->s_blocksize;
    if (!dentry->d_name.len)
    return -EINVAL;
    -#ifdef CONFIG_EXT4_INDEX
    if (is_dx(dir)) {
    retval = ext4_dx_add_entry(handle, dentry, inode);
    if (!retval || (retval != ERR_BAD_DX_DIR))
    @@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    dx_fallback++;
    ext4_mark_inode_dirty(handle, dir);
    }
    -#endif
    blocks = dir->i_size >> sb->s_blocksize_bits;
    for (block = 0, offset = 0; block < blocks; block++) {
    bh = ext4_bread(handle, dir, block, 0, &retval);
    @@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    if (retval != -ENOSPC)
    return retval;

    -#ifdef CONFIG_EXT4_INDEX
    if (blocks == 1 && !dx_fallback &&
    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
    return make_indexed_dir(handle, dentry, inode, bh);
    -#endif
    brelse(bh);
    }
    bh = ext4_append(handle, dir, &block, &retval);
    @@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
    return add_dirent_to_buf(handle, dentry, inode, de, bh);
    }

    -#ifdef CONFIG_EXT4_INDEX
    /*
    * Returns 0 for success, or a negative error value
    */
    @@ -1644,7 +1625,6 @@ cleanup:
    dx_release(frames);
    return err;
    }
    -#endif

    /*
    * ext4_delete_entry deletes a directory entry by merging it with the
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index 3baeb99..151738a 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -36,10 +36,6 @@
    /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
    #define EXT4_MAX_RESERVE_BLOCKS 1027
    #define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
    -/*
    - * Always enable hashed directories
    - */
    -#define CONFIG_EXT4_INDEX

    /*
    * Debug code
    @@ -766,17 +762,11 @@ struct ext4_dir_entry_2 {
    * (c) Daniel Phillips, 2001
    */

    -#ifdef CONFIG_EXT4_INDEX
    - #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
    - EXT4_FEATURE_COMPAT_DIR_INDEX) && \
    +#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
    + EXT4_FEATURE_COMPAT_DIR_INDEX) && \
    (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
    #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
    #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
    -#else
    - #define is_dx(dir) 0
    -#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX)
    -#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
    -#endif

    /* Legal values for the dx_root hash_version field: */

    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  20. [PATCH] Once ext4 will not implement fragment, it is believed it will never be

    From: Coly Li

    implement in future. Therefore fragment related source code in ext4 should
    be obsoleted -- no one will use it.

    This patch obsolete fragment from ext4. Another patch posted on linux-ext4
    removing fragment supporting from e2fsprogs.

    Signed-off-by: Coly Li
    Acked-by: Andreas Dilger
    Signed-off-by: Andrew Morton
    ---
    fs/ext4/ialloc.c | 5 -----
    fs/ext4/inode.c | 10 ----------
    fs/ext4/super.c | 15 ---------------
    include/linux/ext4_fs.h | 35 ++++++-----------------------------
    include/linux/ext4_fs_i.h | 5 -----
    include/linux/ext4_fs_sb.h | 3 ---
    6 files changed, 6 insertions(+), 67 deletions(-)

    diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
    index 427f830..b8b538d 100644
    --- a/fs/ext4/ialloc.c
    +++ b/fs/ext4/ialloc.c
    @@ -576,11 +576,6 @@ got:
    /* dirsync only applies to directories */
    if (!S_ISDIR(mode))
    ei->i_flags &= ~EXT4_DIRSYNC_FL;
    -#ifdef EXT4_FRAGMENTS
    - ei->i_faddr = 0;
    - ei->i_frag_no = 0;
    - ei->i_frag_size = 0;
    -#endif
    ei->i_file_acl = 0;
    ei->i_dir_acl = 0;
    ei->i_dtime = 0;
    diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
    index a4848e0..f283522 100644
    --- a/fs/ext4/inode.c
    +++ b/fs/ext4/inode.c
    @@ -2645,11 +2645,6 @@ void ext4_read_inode(struct inode * inode)
    }
    inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
    ei->i_flags = le32_to_cpu(raw_inode->i_flags);
    -#ifdef EXT4_FRAGMENTS
    - ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
    - ei->i_frag_no = raw_inode->i_frag;
    - ei->i_frag_size = raw_inode->i_fsize;
    -#endif
    ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
    if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
    cpu_to_le32(EXT4_OS_HURD))
    @@ -2794,11 +2789,6 @@ static int ext4_do_update_inode(handle_t *handle,
    raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
    raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
    raw_inode->i_flags = cpu_to_le32(ei->i_flags);
    -#ifdef EXT4_FRAGMENTS
    - raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
    - raw_inode->i_frag = ei->i_frag_no;
    - raw_inode->i_fsize = ei->i_frag_size;
    -#endif
    if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
    cpu_to_le32(EXT4_OS_HURD))
    raw_inode->i_file_acl_high =
    diff --git a/fs/ext4/super.c b/fs/ext4/super.c
    index 42cbdb5..420d39d 100644
    --- a/fs/ext4/super.c
    +++ b/fs/ext4/super.c
    @@ -1655,14 +1655,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
    if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
    sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
    }
    - sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
    - le32_to_cpu(es->s_log_frag_size);
    - if (blocksize != sbi->s_frag_size) {
    - printk(KERN_ERR
    - "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
    - sbi->s_frag_size, blocksize);
    - goto failed_mount;
    - }
    sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
    if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
    if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
    @@ -1676,7 +1668,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
    } else
    sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
    sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
    - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
    sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
    if (EXT4_INODE_SIZE(sb) == 0)
    goto cantfind_ext4;
    @@ -1700,12 +1691,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
    sbi->s_blocks_per_group);
    goto failed_mount;
    }
    - if (sbi->s_frags_per_group > blocksize * 8) {
    - printk (KERN_ERR
    - "EXT4-fs: #fragments per group too big: %lu\n",
    - sbi->s_frags_per_group);
    - goto failed_mount;
    - }
    if (sbi->s_inodes_per_group > blocksize * 8) {
    printk (KERN_ERR
    "EXT4-fs: #inodes per group too big: %lu\n",
    diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
    index cdee7aa..3baeb99 100644
    --- a/include/linux/ext4_fs.h
    +++ b/include/linux/ext4_fs.h
    @@ -105,20 +105,6 @@
    #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))

    /*
    - * Macro-instructions used to manage fragments
    - */
    -#define EXT4_MIN_FRAG_SIZE 1024
    -#define EXT4_MAX_FRAG_SIZE 4096
    -#define EXT4_MIN_FRAG_LOG_SIZE 10
    -#ifdef __KERNEL__
    -# define EXT4_FRAG_SIZE(s) (EXT4_SB(s)->s_frag_size)
    -# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_SB(s)->s_frags_per_block)
    -#else
    -# define EXT4_FRAG_SIZE(s) (EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size)
    -# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s))
    -#endif
    -
    -/*
    * Structure of a blocks group descriptor
    */
    struct ext4_group_desc
    @@ -311,27 +297,24 @@ struct ext4_inode {
    __le32 i_generation; /* File version (for NFS) */
    __le32 i_file_acl; /* File ACL */
    __le32 i_dir_acl; /* Directory ACL */
    - __le32 i_faddr; /* Fragment address */
    + __le32 i_obso_faddr; /* Obsoleted fragment address */
    union {
    struct {
    - __u8 l_i_frag; /* Fragment number */
    - __u8 l_i_fsize; /* Fragment size */
    + __le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
    __le16 l_i_file_acl_high;
    __le16 l_i_uid_high; /* these 2 fields */
    __le16 l_i_gid_high; /* were reserved2[0] */
    __u32 l_i_reserved2;
    } linux2;
    struct {
    - __u8 h_i_frag; /* Fragment number */
    - __u8 h_i_fsize; /* Fragment size */
    + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
    __u16 h_i_mode_high;
    __u16 h_i_uid_high;
    __u16 h_i_gid_high;
    __u32 h_i_author;
    } hurd2;
    struct {
    - __u8 m_i_frag; /* Fragment number */
    - __u8 m_i_fsize; /* Fragment size */
    + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
    __le16 m_i_file_acl_high;
    __u32 m_i_reserved2[2];
    } masix2;
    @@ -419,8 +402,6 @@ do { \

    #if defined(__KERNEL__) || defined(__linux__)
    #define i_reserved1 osd1.linux1.l_i_reserved1
    -#define i_frag osd2.linux2.l_i_frag
    -#define i_fsize osd2.linux2.l_i_fsize
    #define i_file_acl_high osd2.linux2.l_i_file_acl_high
    #define i_uid_low i_uid
    #define i_gid_low i_gid
    @@ -431,8 +412,6 @@ do { \
    #elif defined(__GNU__)

    #define i_translator osd1.hurd1.h_i_translator
    -#define i_frag osd2.hurd2.h_i_frag;
    -#define i_fsize osd2.hurd2.h_i_fsize;
    #define i_uid_high osd2.hurd2.h_i_uid_high
    #define i_gid_high osd2.hurd2.h_i_gid_high
    #define i_author osd2.hurd2.h_i_author
    @@ -440,8 +419,6 @@ do { \
    #elif defined(__masix__)

    #define i_reserved1 osd1.masix1.m_i_reserved1
    -#define i_frag osd2.masix2.m_i_frag
    -#define i_fsize osd2.masix2.m_i_fsize
    #define i_file_acl_high osd2.masix2.m_i_file_acl_high
    #define i_reserved2 osd2.masix2.m_i_reserved2

    @@ -528,9 +505,9 @@ struct ext4_super_block {
    /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
    __le32 s_first_data_block; /* First Data Block */
    __le32 s_log_block_size; /* Block size */
    - __le32 s_log_frag_size; /* Fragment size */
    + __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
    /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
    - __le32 s_frags_per_group; /* # Fragments per group */
    + __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
    __le32 s_inodes_per_group; /* # Inodes per group */
    __le32 s_mtime; /* Mount time */
    /*30*/ __le32 s_wtime; /* Write time */
    diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
    index 1a511e9..86ddfe2 100644
    --- a/include/linux/ext4_fs_i.h
    +++ b/include/linux/ext4_fs_i.h
    @@ -78,11 +78,6 @@ struct ext4_ext_cache {
    struct ext4_inode_info {
    __le32 i_data[15]; /* unconverted */
    __u32 i_flags;
    -#ifdef EXT4_FRAGMENTS
    - __u32 i_faddr;
    - __u8 i_frag_no;
    - __u8 i_frag_size;
    -#endif
    ext4_fsblk_t i_file_acl;
    __u32 i_dir_acl;
    __u32 i_dtime;
    diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
    index 1b2ffee..a978fba 100644
    --- a/include/linux/ext4_fs_sb.h
    +++ b/include/linux/ext4_fs_sb.h
    @@ -28,11 +28,8 @@
    * third extended-fs super-block data in memory
    */
    struct ext4_sb_info {
    - unsigned long s_frag_size; /* Size of a fragment in bytes */
    unsigned long s_desc_size; /* Size of a group descriptor in bytes */
    - unsigned long s_frags_per_block;/* Number of fragments per block */
    unsigned long s_inodes_per_block;/* Number of inodes per block */
    - unsigned long s_frags_per_group;/* Number of fragments in a group */
    unsigned long s_blocks_per_group;/* Number of blocks in a group */
    unsigned long s_inodes_per_group;/* Number of inodes in a group */
    unsigned long s_itb_per_group; /* Number of inode table blocks per group */
    --
    1.5.3.2.81.g17ed

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread
Page 1 of 2 1 2 LastLast