vmalloc 实现
生活随笔
收集整理的這篇文章主要介紹了
vmalloc 实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
內核版本:2.6.28
linux如何利用伙伴系統,slab分配器分配內存,用這些方法得到的內存在物理地址上都是連續的, 然而,有些時候,每次請求內存時,系統都分配物理地址連續的內存塊是不合適的,可以利用小塊內存“連接”成大塊可使用的內存.這在操作系統設計中也被稱為 “內存拼接”,顯然,內存拼接在需要較大內存,而內存訪問相比之下不是很頻繁的情況下是比較有效的.
在linux內核中用來管理內存拼接的接口是vmalloc/vfree.用vmalloc分配得到的內存在線性地址是平滑的,但是物理地址上是非連續的.
函數vmalloc列出如下:(mm/vmalloc.c)
size: 分配的虛擬空間的大小.
gfp_mask: 頁級分配器的標志.
prot: 已分配的保護掩碼.
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
??? return __vmalloc_node(size, gfp_mask, prot, -1,
??????????????? __builtin_return_address(0));
}
函數_vmalloc分配足夠的頁數與size相配,把它們映射進連續的內核虛擬空間,但分配的內存塊不一定連續.在函數中第一步是在vmlist中尋找到一個大小合適的虛擬內存塊(_get_vm_area_node(...)).
static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
??????????????????????? int node, void *caller)
{
??? struct vm_struct *area;
??? //檢查請求分配的內存大小有沒有超過最大的物理頁面數。如果超過返回 0 ,表示分配失敗。
??? size = PAGE_ALIGN(size);
??? //有效性檢查
??? if (!size || (size >> PAGE_SHIFT) > num_physpages)
??????? return NULL;
??? //取得一個有效的vma
??? area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
??????????????????????? node, gfp_mask, caller);
??? //如果申請的無效,返回一個NULL指針
??? if (!area)
??????? return NULL;
??? return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
}
第二步檢查這個虛擬塊是否可用(空閑),
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
???????????????? pgprot_t prot, int node, void *caller)
{
??? struct page **pages;
??? unsigned int nr_pages, array_size, i;
?? //所要映射的頁面總數
??? nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
??? //計算數組大小,其中sizeof(struct page *)是計算頁描述符號所占的空間
??? array_size = (nr_pages * sizeof(struct page *));
??? area->nr_pages = nr_pages;
??? /* Please note that the recursion is strictly bounded. */
//如果數組大小大于1 個頁面,在非連續區進行分配,否則在連續區進行分 配
??? if (array_size > PAGE_SIZE) {
??????? //非連續區分配
??????? pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
??????????????? PAGE_KERNEL, node, caller);
??????? area->flags |= VM_VPAGES;
??? } else {
??????? //使用kmalloc_node在連續中進行分配
??????? pages = kmalloc_node(array_size,
??????????????? (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
??????????????? node);
??? }
??? area->pages = pages;
??? area->caller = caller;
??? //如果area->pages所指向的是無效的地址,即空間分配失敗
??? if (!area->pages) {
??????? remove_vm_area(area->addr);? //用來將相應的vm從vmlist中斷開,使其表示的空間可以被利用
??????? kfree(area);???? //釋放空間
??????? return NULL;
??? }
? // 從伙伴系統中進行物理內存頁面的分配,注意是為每一個頁面分配空間.
??? for (i = 0; i < area->nr_pages; i++) {
??????? struct page *page;
??????? if (node < 0)
??????????? page = alloc_page(gfp_mask);???? // 針對 UMA
??????? else
??????????? page = alloc_pages_node(node, gfp_mask, 0);?? // 針對 NUMA
??????? if (unlikely(!page)) {
??????????? /* Successfully allocated i pages, free them in __vunmap() 已經分配了i頁,不能夠完成的分配成功*/
??????????? area->nr_pages = i;
??????????? goto fail;
??????? }
??????? area->pages[i] = page;
??? }
//將剛申請的頁面映射到頁表中。
??? if (map_vm_area(area, prot, &pages))
??????? goto fail;
??? return area->addr;
fail:
??? vfree(area->addr);???? //釋放掉這個虛擬塊(vfree)
??? return NULL;
}
_get_vm_area_node函數從VMALLOC_START開始遍歷vmlist鏈表,將申請到的vm_struct結構插入到 vm_list鏈表中.函數如下:
static struct vm_struct *__get_vm_area_node(unsigned long size,
??????? unsigned long flags, unsigned long start, unsigned long end,
??????? int node, gfp_t gfp_mask, void *caller)
{
??? static struct vmap_area *va;
??? struct vm_struct *area;
??? struct vm_struct *tmp, **p;
??? unsigned long align = 1;
??? BUG_ON(in_interrupt());
??? //如果指定了VM_IOREMAP.則調整對齊因子
??? if (flags & VM_IOREMAP) {
??????? int bit = fls(size);
??????? if (bit > IOREMAP_MAX_ORDER)
??????????? bit = IOREMAP_MAX_ORDER;
??????? else if (bit < PAGE_SHIFT)
??????????? bit = PAGE_SHIFT;
??????? align = 1ul << bit;
??? }
??? size = PAGE_ALIGN(size);
??? if (unlikely(!size))
??????? return NULL;
?? //使用kmalloc_node()在slab中,分配一個vm_struct結構.
??? area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
??? if (unlikely(!area))
??????? return NULL;
??? /*
???? * We always allocate a guard page.
???? */
??? size += PAGE_SIZE;?? //PAGE_SIZE:在i32中為4KB,指的是間隔空洞
??? //在start到end中,分配足夠size大小的內核虛擬空間
??? va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
??? if (IS_ERR(va)) {
??????? kfree(area);
??????? return NULL;
??? }
??? //設置area各成員值
??? area->flags = flags;
??? area->addr = (void *)va->va_start;
??? area->size = size;
??? area->pages = NULL;
??? area->nr_pages = 0;
??? area->phys_addr = 0;
??? area->caller = caller;
??? va->private = area;
??? va->flags |= VM_VM_AREA;
??? //加上寫鎖
??? write_lock(&vmlist_lock);
?? //遍歷vmlist鏈表,將area插入到前后兩者間間隙放得下area的位置
??? for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
??????? if (tmp->addr >= area->addr)
??????????? break;
??? }
??? area->next = *p;
??? *p = area;
??? //解除寫鎖
??? write_unlock(&vmlist_lock);
??? return area;
}
/*
?* Allocate a region of KVA of the specified size and alignment, within the
?* vstart and vend.
?*/
static struct vmap_area *alloc_vmap_area(unsigned long size,
??????????????? unsigned long align,
??????????????? unsigned long vstart, unsigned long vend,
??????????????? int node, gfp_t gfp_mask)
{
??? struct vmap_area *va;
??? struct rb_node *n;
??? unsigned long addr;
??? int purged = 0;
??? BUG_ON(size & ~PAGE_MASK);
??? va = kmalloc_node(sizeof(struct vmap_area),
??????????? gfp_mask & GFP_RECLAIM_MASK, node);
??? if (unlikely(!va))
??????? return ERR_PTR(-ENOMEM);
retry:
??? 將起始地址按照對齊因子對齊
??? addr = ALIGN(vstart, align);
??? spin_lock(&vmap_area_lock);
??? /* XXX: could have a last_hole cache */
??? n = vmap_area_root.rb_node;
??? if (n) {
??????? struct vmap_area *first = NULL;
??????? do {
??????????? struct vmap_area *tmp;
??????????? tmp = rb_entry(n, struct vmap_area, rb_node);
??????????? if (tmp->va_end >= addr) {?? //若起始地址落在某一個vm區間,則調整起始地址為vm區間的末尾
??????????????? if (!first && tmp->va_start < addr + size)
??????????????????? first = tmp;
??????????????? n = n->rb_left;
??????????? } else {
??????????????? first = tmp;
??????????????? n = n->rb_right;
??????????? }
??????? } while (n);
??????? if (!first)
??????????? goto found;
??????? if (first->va_end < addr) {
??????????? n = rb_next(&first->rb_node);
??????????? if (n)
??????????????? first = rb_entry(n, struct vmap_area, rb_node);
??????????? else
??????????????? goto found;
??????? }
??????? while (addr + size > first->va_start && addr + size <= vend) {
??????????? addr = ALIGN(first->va_end + PAGE_SIZE, align);
??????????? n = rb_next(&first->rb_node);
??????????? if (n)
??????????????? first = rb_entry(n, struct vmap_area, rb_node);
??????????? else
??????????????? goto found;
??????? }
??? }
found:
??? if (addr + size > vend) {
??????? spin_unlock(&vmap_area_lock);
??????? if (!purged) {
??????????? purge_vmap_area_lazy();
??????????? purged = 1;
??????????? goto retry;
??????? }
??????? if (printk_ratelimit())
??????????? printk(KERN_WARNING "vmap allocation failed: "
???????????????? "use vmalloc=<size> to increase size./n");
??????? return ERR_PTR(-EBUSY);
??? }
??? BUG_ON(addr & (align-1));
??? va->va_start = addr;
??? va->va_end = addr + size;
??? va->flags = 0;
??? __insert_vmap_area(va);
??? spin_unlock(&vmap_area_lock);
??? return va;
}
linux如何利用伙伴系統,slab分配器分配內存,用這些方法得到的內存在物理地址上都是連續的, 然而,有些時候,每次請求內存時,系統都分配物理地址連續的內存塊是不合適的,可以利用小塊內存“連接”成大塊可使用的內存.這在操作系統設計中也被稱為 “內存拼接”,顯然,內存拼接在需要較大內存,而內存訪問相比之下不是很頻繁的情況下是比較有效的.
在linux內核中用來管理內存拼接的接口是vmalloc/vfree.用vmalloc分配得到的內存在線性地址是平滑的,但是物理地址上是非連續的.
函數vmalloc列出如下:(mm/vmalloc.c)
size: 分配的虛擬空間的大小.
gfp_mask: 頁級分配器的標志.
prot: 已分配的保護掩碼.
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
??? return __vmalloc_node(size, gfp_mask, prot, -1,
??????????????? __builtin_return_address(0));
}
函數_vmalloc分配足夠的頁數與size相配,把它們映射進連續的內核虛擬空間,但分配的內存塊不一定連續.在函數中第一步是在vmlist中尋找到一個大小合適的虛擬內存塊(_get_vm_area_node(...)).
static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
??????????????????????? int node, void *caller)
{
??? struct vm_struct *area;
??? //檢查請求分配的內存大小有沒有超過最大的物理頁面數。如果超過返回 0 ,表示分配失敗。
??? size = PAGE_ALIGN(size);
??? //有效性檢查
??? if (!size || (size >> PAGE_SHIFT) > num_physpages)
??????? return NULL;
??? //取得一個有效的vma
??? area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
??????????????????????? node, gfp_mask, caller);
??? //如果申請的無效,返回一個NULL指針
??? if (!area)
??????? return NULL;
??? return __vmalloc_area_node(area, gfp_mask, prot, node, caller);
}
第二步檢查這個虛擬塊是否可用(空閑),
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
???????????????? pgprot_t prot, int node, void *caller)
{
??? struct page **pages;
??? unsigned int nr_pages, array_size, i;
?? //所要映射的頁面總數
??? nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
??? //計算數組大小,其中sizeof(struct page *)是計算頁描述符號所占的空間
??? array_size = (nr_pages * sizeof(struct page *));
??? area->nr_pages = nr_pages;
??? /* Please note that the recursion is strictly bounded. */
//如果數組大小大于1 個頁面,在非連續區進行分配,否則在連續區進行分 配
??? if (array_size > PAGE_SIZE) {
??????? //非連續區分配
??????? pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
??????????????? PAGE_KERNEL, node, caller);
??????? area->flags |= VM_VPAGES;
??? } else {
??????? //使用kmalloc_node在連續中進行分配
??????? pages = kmalloc_node(array_size,
??????????????? (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
??????????????? node);
??? }
??? area->pages = pages;
??? area->caller = caller;
??? //如果area->pages所指向的是無效的地址,即空間分配失敗
??? if (!area->pages) {
??????? remove_vm_area(area->addr);? //用來將相應的vm從vmlist中斷開,使其表示的空間可以被利用
??????? kfree(area);???? //釋放空間
??????? return NULL;
??? }
? // 從伙伴系統中進行物理內存頁面的分配,注意是為每一個頁面分配空間.
??? for (i = 0; i < area->nr_pages; i++) {
??????? struct page *page;
??????? if (node < 0)
??????????? page = alloc_page(gfp_mask);???? // 針對 UMA
??????? else
??????????? page = alloc_pages_node(node, gfp_mask, 0);?? // 針對 NUMA
??????? if (unlikely(!page)) {
??????????? /* Successfully allocated i pages, free them in __vunmap() 已經分配了i頁,不能夠完成的分配成功*/
??????????? area->nr_pages = i;
??????????? goto fail;
??????? }
??????? area->pages[i] = page;
??? }
//將剛申請的頁面映射到頁表中。
??? if (map_vm_area(area, prot, &pages))
??????? goto fail;
??? return area->addr;
fail:
??? vfree(area->addr);???? //釋放掉這個虛擬塊(vfree)
??? return NULL;
}
_get_vm_area_node函數從VMALLOC_START開始遍歷vmlist鏈表,將申請到的vm_struct結構插入到 vm_list鏈表中.函數如下:
static struct vm_struct *__get_vm_area_node(unsigned long size,
??????? unsigned long flags, unsigned long start, unsigned long end,
??????? int node, gfp_t gfp_mask, void *caller)
{
??? static struct vmap_area *va;
??? struct vm_struct *area;
??? struct vm_struct *tmp, **p;
??? unsigned long align = 1;
??? BUG_ON(in_interrupt());
??? //如果指定了VM_IOREMAP.則調整對齊因子
??? if (flags & VM_IOREMAP) {
??????? int bit = fls(size);
??????? if (bit > IOREMAP_MAX_ORDER)
??????????? bit = IOREMAP_MAX_ORDER;
??????? else if (bit < PAGE_SHIFT)
??????????? bit = PAGE_SHIFT;
??????? align = 1ul << bit;
??? }
??? size = PAGE_ALIGN(size);
??? if (unlikely(!size))
??????? return NULL;
?? //使用kmalloc_node()在slab中,分配一個vm_struct結構.
??? area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
??? if (unlikely(!area))
??????? return NULL;
??? /*
???? * We always allocate a guard page.
???? */
??? size += PAGE_SIZE;?? //PAGE_SIZE:在i32中為4KB,指的是間隔空洞
??? //在start到end中,分配足夠size大小的內核虛擬空間
??? va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
??? if (IS_ERR(va)) {
??????? kfree(area);
??????? return NULL;
??? }
??? //設置area各成員值
??? area->flags = flags;
??? area->addr = (void *)va->va_start;
??? area->size = size;
??? area->pages = NULL;
??? area->nr_pages = 0;
??? area->phys_addr = 0;
??? area->caller = caller;
??? va->private = area;
??? va->flags |= VM_VM_AREA;
??? //加上寫鎖
??? write_lock(&vmlist_lock);
?? //遍歷vmlist鏈表,將area插入到前后兩者間間隙放得下area的位置
??? for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
??????? if (tmp->addr >= area->addr)
??????????? break;
??? }
??? area->next = *p;
??? *p = area;
??? //解除寫鎖
??? write_unlock(&vmlist_lock);
??? return area;
}
/*
?* Allocate a region of KVA of the specified size and alignment, within the
?* vstart and vend.
?*/
static struct vmap_area *alloc_vmap_area(unsigned long size,
??????????????? unsigned long align,
??????????????? unsigned long vstart, unsigned long vend,
??????????????? int node, gfp_t gfp_mask)
{
??? struct vmap_area *va;
??? struct rb_node *n;
??? unsigned long addr;
??? int purged = 0;
??? BUG_ON(size & ~PAGE_MASK);
??? va = kmalloc_node(sizeof(struct vmap_area),
??????????? gfp_mask & GFP_RECLAIM_MASK, node);
??? if (unlikely(!va))
??????? return ERR_PTR(-ENOMEM);
retry:
??? 將起始地址按照對齊因子對齊
??? addr = ALIGN(vstart, align);
??? spin_lock(&vmap_area_lock);
??? /* XXX: could have a last_hole cache */
??? n = vmap_area_root.rb_node;
??? if (n) {
??????? struct vmap_area *first = NULL;
??????? do {
??????????? struct vmap_area *tmp;
??????????? tmp = rb_entry(n, struct vmap_area, rb_node);
??????????? if (tmp->va_end >= addr) {?? //若起始地址落在某一個vm區間,則調整起始地址為vm區間的末尾
??????????????? if (!first && tmp->va_start < addr + size)
??????????????????? first = tmp;
??????????????? n = n->rb_left;
??????????? } else {
??????????????? first = tmp;
??????????????? n = n->rb_right;
??????????? }
??????? } while (n);
??????? if (!first)
??????????? goto found;
??????? if (first->va_end < addr) {
??????????? n = rb_next(&first->rb_node);
??????????? if (n)
??????????????? first = rb_entry(n, struct vmap_area, rb_node);
??????????? else
??????????????? goto found;
??????? }
??????? while (addr + size > first->va_start && addr + size <= vend) {
??????????? addr = ALIGN(first->va_end + PAGE_SIZE, align);
??????????? n = rb_next(&first->rb_node);
??????????? if (n)
??????????????? first = rb_entry(n, struct vmap_area, rb_node);
??????????? else
??????????????? goto found;
??????? }
??? }
found:
??? if (addr + size > vend) {
??????? spin_unlock(&vmap_area_lock);
??????? if (!purged) {
??????????? purge_vmap_area_lazy();
??????????? purged = 1;
??????????? goto retry;
??????? }
??????? if (printk_ratelimit())
??????????? printk(KERN_WARNING "vmap allocation failed: "
???????????????? "use vmalloc=<size> to increase size./n");
??????? return ERR_PTR(-EBUSY);
??? }
??? BUG_ON(addr & (align-1));
??? va->va_start = addr;
??? va->va_end = addr + size;
??? va->flags = 0;
??? __insert_vmap_area(va);
??? spin_unlock(&vmap_area_lock);
??? return va;
}
總結
以上是生活随笔為你收集整理的vmalloc 实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: kmalloc/kfree,vmallo
- 下一篇: linux copy_from/to_u