[code] Unlimited partitions, a try - Kernel

This is a discussion on [code] Unlimited partitions, a try - Kernel ; 15 partitions (at least for sd_mod devices) are too few. So I tried the following: after scanning the disk (sda), when we know the number of partitions P on a disk, create a new block device /dev/gd0 that is a ...

+ Reply to Thread
Results 1 to 9 of 9

Thread: [code] Unlimited partitions, a try

  1. [code] Unlimited partitions, a try


    15 partitions (at least for sd_mod devices) are too few.

    So I tried the following: after scanning the disk (sda), when we know
    the number of partitions P on a disk, create a new block device
    /dev/gd0 that is a copy of sda (in terms of disk->queue, etc.). This
    is done using alloc_disk(P).

    However, read() on gd0 will just return 0. It takes a `blockdev --rereadpt
    /dev/gd0` before the disk is accessible. And if I add a call to
    rescan inside gpdisk_new() it oopses (probably rightfully so). I do not
    know all the block layer magic, so expect some horrible code.

    Ideas, hints, anything is welcome.

    ---
    block/Makefile | 1
    block/genhd.c | 5 +
    block/gpdisk.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++
    drivers/scsi/sd.c | 1
    fs/partitions/check.c | 19 ++++++
    include/linux/genhd.h | 16 +++++
    include/scsi/sd.h | 3 +
    7 files changed, 182 insertions(+), 2 deletions(-)

    Index: linux-2.6.23/block/Makefile
    ================================================== =================
    --- linux-2.6.23.orig/block/Makefile
    +++ linux-2.6.23/block/Makefile
    @@ -3,6 +3,7 @@
    #

    obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
    +obj-${CONFIG_BLOCK} += gpdisk.o

    obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
    obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
    Index: linux-2.6.23/block/genhd.c
    ================================================== =================
    --- linux-2.6.23.orig/block/genhd.c
    +++ linux-2.6.23/block/genhd.c
    @@ -744,6 +744,9 @@ struct gendisk *alloc_disk_node(int mino
    rand_initialize_disk(disk);
    INIT_WORK(&disk->async_notify,
    media_change_notify_thread);
    + atomic_set(&disk->gpdisk_enabled, false);
    + disk->gpdisk = NULL;
    + disk->gpdisk_parent = NULL;
    }
    return disk;
    }
    @@ -793,6 +796,8 @@ EXPORT_SYMBOL(set_device_ro);
    void set_disk_ro(struct gendisk *disk, int flag)
    {
    int i;
    + if (gpdisk_online(disk))
    + set_disk_ro(disk->gpdisk, flag);
    disk->policy = flag;
    for (i = 0; i < disk->minors - 1; i++)
    if (disk->part[i]) disk->part[i]->policy = flag;
    Index: linux-2.6.23/block/gpdisk.c
    ================================================== =================
    --- /dev/null
    +++ linux-2.6.23/block/gpdisk.c
    @@ -0,0 +1,139 @@
    +#include
    +#include
    +#include
    +#include
    +#include
    +#include
    +#define MINOR_IS_ALLOCATED ((void *)0xB4C4)
    +
    +static unsigned int gpdisk_major = 0;
    +module_param(gpdisk_major, uint, S_IRUGO);
    +
    +static LIST_HEAD(gpdisk_regions);
    +static DEFINE_SPINLOCK(gpdisk_region_lock);
    +static DEFINE_SPINLOCK(gpdisk_id_lock);
    +static DEFINE_IDR(gpdisk_id_idr);
    +
    +static unsigned int gpdisk_first_minor = 0;
    +
    +static int gpdisk_get_region(unsigned int size)
    +{
    + /* still to be done */
    + unsigned int old;
    +
    + old = gpdisk_first_minor;
    + gpdisk_first_minor += size;
    + return old;
    +}
    +
    +static void gpdisk_put_region(unsigned int start, unsigned int n)
    +{
    + /* same */
    +}
    +
    +static int gpdisk_get_diskid(void)
    +{
    + int new_id, ret;
    +
    + again:
    + ret = idr_pre_get(&gpdisk_id_idr, GFP_KERNEL);
    + if (ret == 0)
    + return -ENOMEM;
    +
    + spin_lock(&gpdisk_id_lock);
    + ret = idr_get_new(&gpdisk_id_idr, MINOR_IS_ALLOCATED, &new_id);
    + if (ret == -EAGAIN) {
    + spin_unlock(&gpdisk_id_lock);
    + cond_resched();
    + goto again;
    + }
    + if (ret != 0)
    + goto out;
    +
    + ret = new_id;
    + out:
    + spin_unlock(&gpdisk_id_lock);
    + return ret;
    +}
    +
    +static void gpdisk_put_diskid(unsigned int id)
    +{
    + idr_remove(&gpdisk_id_idr, id);
    +}
    +
    +void gpdisk_new(struct gendisk *disk, unsigned int minors)
    +{
    + struct block_device *bdev;
    + struct gendisk *gpdisk;
    + int disk_id, minor_start;
    +
    + gpdisk = alloc_disk(minors);
    + if (gpdisk == NULL)
    + return;
    +
    + disk_id = gpdisk_get_diskid();
    + if (disk_id < 0)
    + goto out;
    +
    + minor_start = gpdisk_get_region(minors);
    + if (minor_start < 0)
    + goto out2;
    +
    + disk->gpdisk = gpdisk;
    + disk->gpdisk_id = disk_id;
    +
    + sprintf(gpdisk->disk_name, "gd%u", disk_id);
    + gpdisk->gpdisk_parent = disk;
    + gpdisk->major = gpdisk_major;
    + gpdisk->first_minor = minor_start;
    + gpdisk->minors = minors;
    + gpdisk->fops = disk->fops;
    +
    + /* safe? */
    + gpdisk->private_data = disk->private_data;
    + gpdisk->queue = disk->queue;
    + gpdisk->driverfs_dev = disk->driverfs_dev;
    + gpdisk->flags = disk->flags;
    +
    + add_disk(gpdisk);
    + return;
    +
    + out2:
    + gpdisk_put_diskid(disk_id);
    + out:
    + put_disk(gpdisk);
    + return;
    +}
    +
    +void gpdisk_release(struct gendisk *disk)
    +{
    + gpdisk_put_region(disk->gpdisk->first_minor, disk->gpdisk->minors);
    + gpdisk_put_diskid(disk->gpdisk_id);
    +}
    +
    +static int __init gpdisk_init(void)
    +{
    + int i;
    +
    + i = register_blkdev(gpdisk_major, "gpdisk");
    + if (gpdisk_major == 0) {
    + /* dynamic major */
    + if (i == 0)
    + return -ENODEV;
    + gpdisk_major = i;
    + } else {
    + /* fixed major */
    + if (i != 0)
    + return -ENODEV;
    + }
    +
    + return 0;
    +}
    +
    +static void __exit gpdisk_exit(void)
    +{
    + unregister_blkdev(gpdisk_major, "gpdisk");
    +}
    +
    +module_init(gpdisk_init);
    +module_exit(gpdisk_exit);
    Index: linux-2.6.23/drivers/scsi/sd.c
    ================================================== =================
    --- linux-2.6.23.orig/drivers/scsi/sd.c
    +++ linux-2.6.23/drivers/scsi/sd.c
    @@ -1610,6 +1610,7 @@ static int sd_probe(struct device *dev)
    gd = alloc_disk(16);
    if (!gd)
    goto out_free;
    + atomic_set(&gd->gpdisk_enabled, true);

    if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
    goto out_put;
    Index: linux-2.6.23/fs/partitions/check.c
    ================================================== =================
    --- linux-2.6.23.orig/fs/partitions/check.c
    +++ linux-2.6.23/fs/partitions/check.c
    @@ -168,7 +168,7 @@ check_partition(struct gendisk *hd, stru
    if (isdigit(state->name[strlen(state->name)-1]))
    sprintf(state->name, "p");

    - state->limit = hd->minors;
    + state->limit = MAX_PART;
    i = res = err = 0;
    while (!res && check_part[i]) {
    memset(&state->parts, 0, sizeof(state->parts));
    @@ -528,6 +528,7 @@ exit:
    int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
    {
    struct parsed_partitions *state;
    + unsigned int max;
    int p, res;

    if (bdev->bd_part_count)
    @@ -536,6 +537,11 @@ int rescan_partitions(struct gendisk *di
    if (res)
    return res;
    bdev->bd_invalidated = 0;
    + if (gpdisk_online(disk)) {
    + gpdisk_release(disk);
    + del_gendisk(disk->gpdisk);
    + disk->gpdisk = NULL;
    + }
    for (p = 1; p < disk->minors; p++)
    delete_partition(disk, p);
    if (disk->fops->revalidate_disk)
    @@ -544,7 +550,8 @@ int rescan_partitions(struct gendisk *di
    return 0;
    if (IS_ERR(state)) /* I/O error reading the partition table */
    return -EIO;
    - for (p = 1; p < state->limit; p++) {
    + max = min(disk->minors, state->next);
    + for (p = 1; p < max; p++) {
    sector_t size = state->parts[p].size;
    sector_t from = state->parts[p].from;
    if (!size)
    @@ -560,6 +567,8 @@ int rescan_partitions(struct gendisk *di
    #endif
    }
    kfree(state);
    + if (atomic_read(&disk->gpdisk_enabled) > 0)
    + gpdisk_new(disk, state->next);
    return 0;
    }

    @@ -588,6 +597,12 @@ void del_gendisk(struct gendisk *disk)
    {
    int p;

    + if (gpdisk_online(disk)) {
    + gpdisk_release(disk);
    + del_gendisk(disk->gpdisk);
    + disk->gpdisk = NULL;
    + }
    +
    /* invalidate stuff */
    for (p = disk->minors - 1; p > 0; p--) {
    invalidate_partition(disk, p);
    Index: linux-2.6.23/include/linux/genhd.h
    ================================================== =================
    --- linux-2.6.23.orig/include/linux/genhd.h
    +++ linux-2.6.23/include/linux/genhd.h
    @@ -67,6 +67,7 @@ struct partition {
    #include
    #include
    #include
    +#include

    struct partition {
    unsigned char boot_ind; /* 0x80 - active */
    @@ -141,6 +142,10 @@ struct gendisk {
    struct disk_stats dkstats;
    #endif
    struct work_struct async_notify;
    +
    + struct gendisk *gpdisk, *gpdisk_parent;
    + unsigned int gpdisk_id;
    + atomic_t gpdisk_enabled;
    };

    /* Structure for sysfs attributes on block devices */
    @@ -255,11 +260,22 @@ static inline sector_t get_capacity(stru
    {
    return disk->capacity;
    }
    +
    +static inline bool gpdisk_online(const struct gendisk *disk)
    +{
    + return atomic_read(&disk->gpdisk_enabled) > 0 && disk->gpdisk != NULL;
    +}
    +
    static inline void set_capacity(struct gendisk *disk, sector_t size)
    {
    disk->capacity = size;
    + if (gpdisk_online(disk))
    + disk->gpdisk->capacity = size;
    }

    +extern void gpdisk_new(struct gendisk *, unsigned int);
    +extern void gpdisk_release(struct gendisk *);
    +
    #endif /* __KERNEL__ */

    #ifdef CONFIG_SOLARIS_X86_PARTITION
    Index: linux-2.6.23/include/scsi/sd.h
    ================================================== =================
    --- linux-2.6.23.orig/include/scsi/sd.h
    +++ linux-2.6.23/include/scsi/sd.h
    @@ -62,6 +62,9 @@ static void sd_print_sense_hdr(struct sc
    static void sd_print_result(struct scsi_disk *, int);

    #define sd_printk(prefix, sdsk, fmt, a...) \
    + gpdisk_online((sdsk)->disk) ? \
    + sdev_printk(prefix, (sdsk)->device, "[%s] " fmt, \
    + (sdsk)->disk->gpdisk->disk_name, ##a) : \
    (sdsk)->disk ? \
    sdev_printk(prefix, (sdsk)->device, "[%s] " fmt, \
    (sdsk)->disk->disk_name, ##a) : \
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  2. Re: [code] Unlimited partitions, a try

    Jan Engelhardt wrote:
    > 15 partitions (at least for sd_mod devices) are too few.


    Now when we have 20-bit minors, can't we simply recycle some of the
    higher bits for additional partitions, across the board? 63 partitions
    seem to have been sufficient; at least I haven't heard anyone complain
    about that for 15 years.

    -hpa
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  3. Re: [code] Unlimited partitions, a try


    On Oct 5 2007 15:11, H. Peter Anvin wrote:
    > Jan Engelhardt wrote:
    >> 15 partitions (at least for sd_mod devices) are too few.

    >
    > Now when we have 20-bit minors, can't we simply recycle some of the
    > higher bits for additional partitions, across the board? 63
    > partitions seem to have been sufficient; at least I haven't heard
    > anyone complain about that for 15 years.


    GPT allows up to 128 partitions, and the linux partition code currently
    allows for up to MAX_PART (256). Assuming 1048576/128, that would give
    8192 disks. With dynamic minor allocation and reuse, all that goes away
    and the limit becomes a bit less than 1048576 _partitions_.
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  4. Re: [code] Unlimited partitions, a try

    Jan Engelhardt wrote:
    > On Oct 5 2007 15:11, H. Peter Anvin wrote:
    >> Jan Engelhardt wrote:
    >>> 15 partitions (at least for sd_mod devices) are too few.

    >> Now when we have 20-bit minors, can't we simply recycle some of the
    >> higher bits for additional partitions, across the board? 63
    >> partitions seem to have been sufficient; at least I haven't heard
    >> anyone complain about that for 15 years.

    >
    > GPT allows up to 128 partitions, and the linux partition code currently
    > allows for up to MAX_PART (256). Assuming 1048576/128, that would give
    > 8192 disks. With dynamic minor allocation and reuse, all that goes away
    > and the limit becomes a bit less than 1048576 _partitions_.


    Yes, but you're proposing something with substantially higher switch
    threshold...

    -hpa
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  5. Re: [code] Unlimited partitions, a try

    Alan Cox wrote:
    > On Fri, 05 Oct 2007 15:11:52 -0700
    > "H. Peter Anvin" wrote:
    >
    >> Jan Engelhardt wrote:
    >>> 15 partitions (at least for sd_mod devices) are too few.

    >> Now when we have 20-bit minors, can't we simply recycle some of the
    >> higher bits for additional partitions, across the board? 63 partitions
    >> seem to have been sufficient; at least I haven't heard anyone complain
    >> about that for 15 years.

    >
    > This was proposed ages ago. Al Viro vetoed sparse minors and it has been
    > stuck this way ever since. If you have > 15 partitions use device mapper
    > for it. I'd prefer it fixed but its arguable that device mapper is the
    > right way to punt all our partitioning to userspace


    Sure. However, that takes having that bit of userspace in even the most
    trivial configurations, and not just on bootup, but continuously.

    -hpa
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  6. Re: [code] Unlimited partitions, a try

    On Fri, 05 Oct 2007 15:11:52 -0700
    "H. Peter Anvin" wrote:

    > Jan Engelhardt wrote:
    > > 15 partitions (at least for sd_mod devices) are too few.

    >
    > Now when we have 20-bit minors, can't we simply recycle some of the
    > higher bits for additional partitions, across the board? 63 partitions
    > seem to have been sufficient; at least I haven't heard anyone complain
    > about that for 15 years.


    This was proposed ages ago. Al Viro vetoed sparse minors and it has been
    stuck this way ever since. If you have > 15 partitions use device mapper
    for it. I'd prefer it fixed but its arguable that device mapper is the
    right way to punt all our partitioning to userspace


    Alan
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  7. Re: [code] Unlimited partitions, a try

    Alan Cox wrote:
    > This was proposed ages ago. Al Viro vetoed sparse minors and it has been
    > stuck this way ever since. If you have > 15 partitions use device mapper
    > for it. I'd prefer it fixed but its arguable that device mapper is the
    > right way to punt all our partitioning to userspace.
    >


    Then please fix support for extended partitions in kpartx (part of
    multipath-tools). Debian has an incomplete patch that does the right
    thing on activation, but not on deactivation of partitions, and has an
    obvious off-by-one in the "kpartx -l /dev/sda" output.

    Signed-off-by: Hannes Reinecke
    Edited by Alexander E. Patrakov to fix incorrect output of "kpartx -l"
    Signed-off-by: Alexander E. Patrakov

    --- a/kpartx/kpartx.c
    +++ b/kpartx/kpartx.c
    @@ -387,10 +387,10 @@ main(int argc, char **argv){
    slices[j].minor = m++;

    start = slices[j].start - slices[k].start;
    - printf("%s%s%d : 0 %lu /dev/dm-%d %lu\n",
    + printf("%s%s%d : 0 %lu %s%s%d %lu\n",
    mapname, delim, j+1,
    (unsigned long) slices[j].size,
    - slices[k].minor, start);
    + mapname, delim, k+1, start);
    c--;
    }
    /* Terminate loop if nothing more to resolve */
    @@ -431,7 +431,7 @@ main(int argc, char **argv){
    break;

    case ADD:
    - for (j=0, c = 0; j + for (j = 0, c = 0; j < n; j++) {
    if (slices[j].size == 0)
    continue;

    @@ -477,6 +477,7 @@ main(int argc, char **argv){
    d = c;
    while (c) {
    for (j = 0; j < n; j++) {
    + unsigned long start;
    int k = slices[j].container - 1;

    if (slices[j].size == 0)
    @@ -487,7 +488,7 @@ main(int argc, char **argv){
    continue;

    /* Skip all simple slices */
    - if (k < 0)
    + if (slices[j].container == 0)
    continue;

    /* Check container slice */
    @@ -502,10 +503,11 @@ main(int argc, char **argv){
    }
    strip_slash(partname);

    + start = slices[j].start - slices[k].start;
    if (safe_sprintf(params, "%d:%d %lu",
    slices[k].major,
    slices[k].minor,
    - (unsigned long)slices[j].start)) {
    + start)) {
    fprintf(stderr, "params too small\n");
    exit(1);
    }
    @@ -524,9 +526,12 @@ main(int argc, char **argv){
    &slices[j].minor);

    if (verbose)
    - printf("add map %s : 0 %lu %s %s\n",
    - partname, slices[j].size,
    - DM_TARGET, params);
    + printf("add map %s (%d:%d): 0 %lu %s\n",
    + partname,
    + slices[j].major,
    + slices[j].minor,
    + slices[j].size,
    + params);
    c--;
    }
    /* Terminate loop */


    --
    Alexander E. Patrakov
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  8. Re: [code] Unlimited partitions, a try

    H. Peter Anvin wrote:
    > Alan Cox wrote:
    >> On Fri, 05 Oct 2007 15:11:52 -0700
    >> "H. Peter Anvin" wrote:
    >>
    >>> Jan Engelhardt wrote:
    >>>> 15 partitions (at least for sd_mod devices) are too few.
    >>> Now when we have 20-bit minors, can't we simply recycle some of the
    >>> higher bits for additional partitions, across the board? 63
    >>> partitions seem to have been sufficient; at least I haven't heard
    >>> anyone complain about that for 15 years.

    >>
    >> This was proposed ages ago. Al Viro vetoed sparse minors and it has been
    >> stuck this way ever since. If you have > 15 partitions use device mapper
    >> for it. I'd prefer it fixed but its arguable that device mapper is the
    >> right way to punt all our partitioning to userspace

    >
    > Sure. However, that takes having that bit of userspace in even the most
    > trivial configurations, and not just on bootup, but continuously.
    >

    I'm not sure that configurations requiring more than 15 partitions are
    properly described as "trivial." Which is not to disagree with your
    point about required user tools, but most systems needing such tools
    will be large and complex enough that a userspace solution will be
    acceptable.


    --
    Bill Davidsen
    "We have more to fear from the bungling of the incompetent than from
    the machinations of the wicked." - from Slashdot
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

  9. Re: [code] Unlimited partitions, a try

    On Sat, Oct 06, 2007 at 03:33:59PM -0400, Bill Davidsen wrote:
    > I'm not sure that configurations requiring more than 15 partitions are
    > properly described as "trivial." Which is not to disagree with your
    > point about required user tools, but most systems needing such tools
    > will be large and complex enough that a userspace solution will be
    > acceptable.


    And there is LVM too, which seems better than partitions in many cases.

    --
    Len Sorensen
    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/

+ Reply to Thread