BIO bi_sector submit_bio make_request_fn
BIO結(jié)構(gòu)中有一個(gè)很重要的字段叫做bi_sector,在高版本中這個(gè)字段已經(jīng)叫bi_iter.bi_sector了,這個(gè)不是重點(diǎn),重點(diǎn)是下面要說的。
當(dāng)讀寫一個(gè)block device的時(shí)候,會(huì)提交一個(gè)bio數(shù)據(jù)結(jié)構(gòu)給make_request_fn,那么這個(gè)bio結(jié)構(gòu)中的bi_sector到底表示什么意思呢?
在bio.h中有這么一行注釋
sector_t?? ??? ?bi_sector;?? ?/* device address in 512 byte? sectors */
大意是說bi_sector是設(shè)備的地址,什么地址?以sector(512字節(jié))為單位,也就是說以這個(gè)sector為起始地址,去block設(shè)備請(qǐng)求數(shù)據(jù)。
一般硬盤都是以扇區(qū)(sector)為單位的,而且一般也只有硬盤有扇區(qū),Linux中的分區(qū)比如/dev/sda1是建立在硬盤/dev/sda的基礎(chǔ)上的,對(duì)于每一個(gè)分區(qū)來講,我們通過fdisk來查看分區(qū)的細(xì)節(jié)信息,如下:
Device ? ? Boot ? Start ? ? ?End ?Sectors ?Size Id Type
/dev/sda1 ?* ? ? ? 2048 ? 999423 ? 997376 ?487M 83 Linux
/dev/sda2 ? ? ? 1001470 41940991 40939522 19.5G ?5 Extended
硬盤sda有兩個(gè)分區(qū),分別是/dev/sda1和/dev/sda2,值得注意的是,分區(qū)/dev/sda1的起始扇區(qū)是2048,/dev/sda2的起始分區(qū)是1001470,這個(gè)起始扇區(qū)在本文中非常重要。
對(duì)于硬盤來講,在make_request_fn中,bio的bi_sector代表什么呢?
代表的就是硬盤的扇區(qū),比如bi_sector為0,則表示從0扇區(qū)開始讀取或者寫入數(shù)據(jù)。
對(duì)于分區(qū)來講,在make_request_fn中,bio的bi_sector又代表什么呢?
同樣,比如bio的bi_sector為0,還是表示從0扇區(qū)開始讀取或者寫入數(shù)據(jù)。只是,make_request_fn中很難收到這樣的請(qǐng)求了,除非這個(gè)分區(qū)的起始扇區(qū)為0。為什么呢?這就是起始扇區(qū)的原因,對(duì)于分區(qū)來講,收到的bio請(qǐng)求中,這個(gè)bi_sector總是大于等于起始扇區(qū)的,比如對(duì)于/dev/sda2來講,收到的請(qǐng)求中的bi_sector總是大于等于1001470的。
總結(jié)一下,無論是硬盤還是分區(qū),在make_request_fn中,收到的bio請(qǐng)求中的bi_sector已經(jīng)是真實(shí)對(duì)應(yīng)硬盤的物理扇區(qū)位置了。
再說一下submit_bio,
我們可以自己構(gòu)建一個(gè)bio,然后調(diào)用submit_bio去直接對(duì)block設(shè)備讀取或者寫入數(shù)據(jù),特別要注意的是,通過submit_bio出去的bio中的bi_sector是有可能會(huì)被改變的,如果操作的是分區(qū),在真正提交到make_request_fn之前,會(huì)被加上該分區(qū)對(duì)應(yīng)的起始扇區(qū)的,特別需要注意。
看看源代碼,以2.6為例,注意黑體部分
void submit_bio(int rw, struct bio *bio)
{
?? ?int count = bio_sectors(bio);
?? ?bio->bi_rw |= rw;
?? ?/*
?? ? * If it's a regular read/write or a barrier with data attached,
?? ? * go through the normal accounting stuff before submission.
?? ? */
?? ?if (bio_has_data(bio)) {
?? ??? ?if (rw & WRITE) {
?? ??? ??? ?count_vm_events(PGPGOUT, count);
?? ??? ?} else {
?? ??? ??? ?task_io_account_read(bio->bi_size);
?? ??? ??? ?count_vm_events(PGPGIN, count);
?? ??? ?}
?? ??? ?if (unlikely(block_dump)) {
?? ??? ??? ?char b[BDEVNAME_SIZE];
?? ??? ??? ?printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
?? ??? ??? ?current->comm, task_pid_nr(current),
?? ??? ??? ??? ?(rw & WRITE) ? "WRITE" : "READ",
?? ??? ??? ??? ?(unsigned long long)bio->bi_sector,
?? ??? ??? ??? ?bdevname(bio->bi_bdev, b));
?? ??? ?}
?? ?}
?? ?generic_make_request(bio);
}
void generic_make_request(struct bio *bio)
{
?? ?if (current->bio_tail) {
?? ??? ?/* make_request is active */
?? ??? ?*(current->bio_tail) = bio;
?? ??? ?bio->bi_next = NULL;
?? ??? ?current->bio_tail = &bio->bi_next;
?? ??? ?return;
?? ?}
?? ?/* following loop may be a bit non-obvious, and so deserves some
?? ? * explanation.
?? ? * Before entering the loop, bio->bi_next is NULL (as all callers
?? ? * ensure that) so we have a list with a single bio.
?? ? * We pretend that we have just taken it off a longer list, so
?? ? * we assign bio_list to the next (which is NULL) and bio_tail
?? ? * to &bio_list, thus initialising the bio_list of new bios to be
?? ? * added. ?__generic_make_request may indeed add some more bios
?? ? * through a recursive call to generic_make_request. ?If it
?? ? * did, we find a non-NULL value in bio_list and re-enter the loop
?? ? * from the top. ?In this case we really did just take the bio
?? ? * of the top of the list (no pretending) and so fixup bio_list and
?? ? * bio_tail or bi_next, and call into __generic_make_request again.
?? ? *
?? ? * The loop was structured like this to make only one call to
?? ? * __generic_make_request (which is important as it is large and
?? ? * inlined) and to keep the structure simple.
?? ? */
?? ?BUG_ON(bio->bi_next);
?? ?do {
?? ??? ?current->bio_list = bio->bi_next;
?? ??? ?if (bio->bi_next == NULL)
?? ??? ??? ?current->bio_tail = ¤t->bio_list;
?? ??? ?else
?? ??? ??? ?bio->bi_next = NULL;
?? ??? ?__generic_make_request(bio);
?? ??? ?bio = current->bio_list;
?? ?} while (bio);
?? ?current->bio_tail = NULL; /* deactivate */
}
static inline void __generic_make_request(struct bio *bio)
{
?? ?struct request_queue *q;
?? ?sector_t old_sector;
?? ?int ret, nr_sectors = bio_sectors(bio);
?? ?dev_t old_dev;
?? ?int err = -EIO;
?? ?might_sleep();
?? ?if (bio_check_eod(bio, nr_sectors))
?? ??? ?goto end_io;
?? ?/*
?? ? * Resolve the mapping until finished. (drivers are
?? ? * still free to implement/resolve their own stacking
?? ? * by explicitly returning 0)
?? ? *
?? ? * NOTE: we don't repeat the blk_size check for each new device.
?? ? * Stacking drivers are expected to know what they are doing.
?? ? */
?? ?old_sector = -1;
?? ?old_dev = 0;
?? ?do {
?? ??? ?char b[BDEVNAME_SIZE];
?? ??? ?q = bdev_get_queue(bio->bi_bdev);
?? ??? ?if (unlikely(!q)) {
?? ??? ??? ?printk(KERN_ERR
?? ??? ??? ? ? ? ? "generic_make_request: Trying to access "
?? ??? ??? ??? ?"nonexistent block-device %s (%Lu)\n",
?? ??? ??? ??? ?bdevname(bio->bi_bdev, b),
?? ??? ??? ??? ?(long long) bio->bi_sector);
?? ??? ??? ?goto end_io;
?? ??? ?}
?? ??? ?if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
?? ??? ??? ? ? ? nr_sectors > queue_max_hw_sectors(q))) {
?? ??? ??? ?printk(KERN_ERR "bio too big device %s (%u > %u)\n",
?? ??? ??? ? ? ? ? bdevname(bio->bi_bdev, b),
?? ??? ??? ? ? ? ? bio_sectors(bio),
?? ??? ??? ? ? ? ? queue_max_hw_sectors(q));
?? ??? ??? ?goto end_io;
?? ??? ?}
?? ??? ?if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
?? ??? ??? ?goto end_io;
?? ??? ?if (should_fail_request(bio))
?? ??? ??? ?goto end_io;
?? ??? ?/*
?? ??? ? * If this device has partitions, remap block n
?? ??? ? * of partition p to block n+start(p) of the disk.
?? ??? ? */
?? ??? ?blk_partition_remap(bio);
?? ??? ?if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
?? ??? ??? ?goto end_io;
?? ??? ?if (old_sector != -1)
?? ??? ??? ?trace_block_remap(q, bio, old_dev, old_sector);
?? ??? ?old_sector = bio->bi_sector;
?? ??? ?old_dev = bio->bi_bdev->bd_dev;
?? ??? ?if (bio_check_eod(bio, nr_sectors))
?? ??? ??? ?goto end_io;
?? ??? ?if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
?? ??? ? ? ?!blk_queue_discard(q)) {
?? ??? ??? ?err = -EOPNOTSUPP;
?? ??? ??? ?goto end_io;
?? ??? ?}
?? ??? ?trace_block_bio_queue(q, bio);
?? ??? ?ret = q->make_request_fn(q, bio);
?? ?} while (ret);
?? ?return;
end_io:
?? ?bio_endio(bio, err);
}
?
*
?* If bio->bi_dev is a partition, remap the location
?*/
static inline void blk_partition_remap(struct bio *bio)
{
?? ?struct block_device *bdev = bio->bi_bdev;
?? ?if (bio_sectors(bio) && bdev != bdev->bd_contains) {
?? ??? ?struct hd_struct *p = bdev->bd_part;
?? ??? ?bio->bi_sector += p->start_sect;
?? ??? ?bio->bi_bdev = bdev->bd_contains;
?? ??? ?trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
?? ??? ??? ??? ? ? ?bdev->bd_dev,
?? ??? ??? ??? ? ? ?bio->bi_sector - p->start_sect);
?? ?}
}
?
總結(jié)
以上是生活随笔為你收集整理的BIO bi_sector submit_bio make_request_fn的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Linux 如何获取PAGE size的
- 下一篇: git提交指定文件