轉(zhuǎn)自:https://blog.csdn.net/21cnbao/article/details/7309757
在我們使用ARM等嵌入式Linux系統(tǒng)的時(shí)候,一個(gè)頭疼的問題是GPU,Camera,HDMI等都需要預(yù)留大量連續(xù)內(nèi)存,這部分內(nèi)存平時(shí)不用,但是一般的做法又必須先預(yù)留著。目前,Marek Szyprowski和Michal Nazarewicz實(shí)現(xiàn)了一套全新的Contiguous Memory Allocator。通過這套機(jī)制,我們可以做到不預(yù)留內(nèi)存,這些內(nèi)存平時(shí)是可用的,只有當(dāng)需要的時(shí)候才被分配給Camera,HDMI等設(shè)備。下面分析它的基本代碼流程。
聲明連續(xù)內(nèi)存
內(nèi)核啟動(dòng)過程中arch/arm/mm/init.c中的arm_memblock_init()會(huì)調(diào)用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
該函數(shù)位于:drivers/base/dma-contiguous.c
void __init dma_contiguous_reserve(phys_addr_t limit) { unsigned long selected_size = 0; pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { selected_size = size_cmdline; } else { #ifdef CONFIG_CMA_SIZE_SEL_MBYTES selected_size = size_bytes; #elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) selected_size = cma_early_percent_memory(); #elif defined(CONFIG_CMA_SIZE_SEL_MIN) selected_size = min(size_bytes, cma_early_percent_memory()); #elif defined(CONFIG_CMA_SIZE_SEL_MAX) selected_size = max(size_bytes, cma_early_percent_memory()); #endif } if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, selected_size / SZ_1M); dma_declare_contiguous(NULL, selected_size, 0, limit); } }; 其中的size_bytes定義為:
static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默認(rèn)情況下,CMA_SIZE_MBYTES會(huì)被定義為16MB,來源于CONFIG_CMA_SIZE_MBYTES=16
->
int __init dma_declare_contiguous(struct device *dev, unsigned long size, phys_addr_t base, phys_addr_t limit) { ... if (base) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { base = -EBUSY; goto err; } } else { phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); if (!addr) { base = -ENOMEM; goto err; } else if (addr + size > ~(unsigned long)0) { memblock_free(addr, size); base = -EINVAL; base = -EINVAL; goto err; } else { base = addr; } } r->start = base; r->size = size; r->dev = dev; cma_reserved_count++; pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, (unsigned long)base); dma_contiguous_early_fixup(base, size); return 0; err: pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); return base; } 由此可見,連續(xù)內(nèi)存區(qū)域也是在內(nèi)核啟動(dòng)的早期,通過__memblock_alloc_base()拿到的。
另外:
drivers/base/dma-contiguous.c里面的core_initcall()會(huì)導(dǎo)致cma_init_reserved_areas()被調(diào)用:
static int __init cma_init_reserved_areas(void) { struct cma_reserved *r = cma_reserved; unsigned i = cma_reserved_count; pr_debug("%s()\n", __func__); for (; i; --i, ++r) { struct cma *cma; cma = cma_create_area(PFN_DOWN(r->start), r->size >> PAGE_SHIFT); if (!IS_ERR(cma)) dev_set_cma_area(r->dev, cma); } return 0; } core_initcall(cma_init_reserved_areas);
cma_create_area()會(huì)調(diào)用cma_activate_area(),cma_activate_area()函數(shù)則會(huì)針對每個(gè)page調(diào)用:
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
這個(gè)函數(shù)則會(huì)通過set_pageblock_migratetype(page, MIGRATE_CMA)將頁設(shè)置為MIGRATE_CMA類型的:
#ifdef CONFIG_CMA void __init init_cma_reserved_pageblock(struct page *page) { unsigned i = pageblock_nr_pages; struct page *p = page; do { __ClearPageReserved(p); set_page_count(p, 0); } while (++p, --i); set_page_refcounted(page); set_pageblock_migratetype(page, MIGRATE_CMA); __free_pages(page, pageblock_order); totalram_pages += pageblock_nr_pages; } #endif 同時(shí)其中調(diào)用的__free_pages(page, pageblock_order);最終會(huì)調(diào)用到__free_one_page(page, zone, order, migratetype);
相關(guān)的page會(huì)被加到MIGRATE_CMA的free_list上面去:
list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
?
申請連續(xù)內(nèi)存
申請連續(xù)內(nèi)存仍然使用標(biāo)準(zhǔn)的arch/arm/mm/dma-mapping.c中定義的dma_alloc_coherent()和dma_alloc_writecombine(),這二者會(huì)間接調(diào)用drivers/base/dma-contiguous.c中的
struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align) ?
->
?
struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align) { ... for (;;) { pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, start, count, mask); if (pageno >= cma->count) { ret = -ENOMEM; goto error; } pfn = cma->base_pfn + pageno; ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); if (ret == 0) { bitmap_set(cma->bitmap, pageno, count); break; } else if (ret != -EBUSY) { goto error; } pr_debug("%s(): memory range at %p is busy, retrying\n", __func__, pfn_to_page(pfn)); start = pageno + mask + 1; } ... } ->
int alloc_contig_range(unsigned long start, unsigned long end,
?????????????????????? unsigned migratetype)
需要隔離page,隔離page的作用通過代碼的注釋可以體現(xiàn):
ret = start_isolate_page_range(pfn_align_to_maxpage_down(start), pfn_align_to_maxpage_up(end), migratetype);
簡單地說,就是把相關(guān)的page標(biāo)記為MIGRATE_ISOLATE,這樣buddy系統(tǒng)就不會(huì)再使用他們。
?
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype) { unsigned long pfn; unsigned long undo_pfn; struct page *page; BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); if (page && set_migratetype_isolate(page)) { undo_pfn = pfn; goto undo; } } return 0; undo: for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) unset_migratetype_isolate(pfn_to_page(pfn), migratetype); return -EBUSY; } ?
接下來調(diào)用__alloc_contig_migrate_range()進(jìn)行頁面隔離和遷移:
static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) { unsigned long pfn = start; unsigned int tries = 0; int ret = 0; struct compact_control cc = { .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), .sync = true, }; INIT_LIST_HEAD(&cc.migratepages); migrate_prep_local(); while (pfn < end || !list_empty(&cc.migratepages)) { if (fatal_signal_pending(current)) { ret = -EINTR; break; } if (list_empty(&cc.migratepages)) { cc.nr_migratepages = 0; pfn = isolate_migratepages_range(cc.zone, &cc, pfn, end); if (!pfn) { ret = -EINTR; break; } tries = 0; } else if (++tries == 5) { ret = ret < 0 ? ret : -EBUSY; break; } ret = migrate_pages(&cc.migratepages, __alloc_contig_migrate_alloc, 0, false, true); } putback_lru_pages(&cc.migratepages); return ret > 0 ? 0 : ret; } 其中的函數(shù)migrate_pages()會(huì)完成頁面的遷移,遷移過程中通過傳入的__alloc_contig_migrate_alloc()申請新的page,并將老的page付給新的page:
int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, bool sync) { int retry = 1; int nr_failed = 0; int pass = 0; struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; if (!swapwrite) current->flags |= PF_SWAPWRITE; for(pass = 0; pass < 10 && retry; pass++) { retry = 0; list_for_each_entry_safe(page, page2, from, lru) { cond_resched(); rc = unmap_and_move(get_new_page, private, page, pass > 2, offlining, sync); switch(rc) { case -ENOMEM: goto out; case -EAGAIN: retry++; break; case 0: break; default: nr_failed++; break; } } } rc = 0; ... } 其中的unmap_and_move()函數(shù)較為關(guān)鍵,它定義在mm/migrate.c中
static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *page, int force, bool offlining, bool sync) { int rc = 0; int *result = NULL; struct page *newpage = get_new_page(page, private, &result); int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem = NULL; struct anon_vma *anon_vma = NULL; ... charge = mem_cgroup_prepare_migration(page, newpage, &mem); ... if (PageWriteback(page)) { if (!force || !sync) goto uncharge; wait_on_page_writeback(page); } if (PageAnon(page)) { anon_vma = page_lock_anon_vma(page); if (anon_vma) { get_anon_vma(anon_vma); page_unlock_anon_vma(anon_vma); } else if (PageSwapCache(page)) { remap_swapcache = 0; } else { goto uncharge; } } ... try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); skip_unmap: if (!page_mapped(page)) rc = move_to_new_page(newpage, page, remap_swapcache); if (rc && remap_swapcache) remove_migration_ptes(page, page); if (anon_vma) drop_anon_vma(anon_vma); uncharge: if (!charge) mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); move_newpage: ... } 通過unmap_and_move(),老的page就被遷移過去新的page。
接下來要回收page,回收page的作用是,不至于因?yàn)槟昧诉B續(xù)的內(nèi)存后,系統(tǒng)變得內(nèi)存饑餓:
->
__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
->
static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zonelist *zonelist = node_zonelist(0, gfp_mask); int did_some_progress = 0; int order = 1; unsigned long watermark; __update_cma_watermarks(zone, count); watermark = low_wmark_pages(zone) + count; while (!zone_watermark_ok(zone, 0, watermark, 0, 0)) { wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, NULL); if (!did_some_progress) { out_of_memory(zonelist, gfp_mask, order, NULL); } } __update_cma_watermarks(zone, -count); return count; } ?
釋放連續(xù)內(nèi)存
內(nèi)存釋放的時(shí)候也比較簡單,直接就是:
arch/arm/mm/dma-mapping.c:
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) ->
arch/arm/mm/dma-mapping.c:
static void __free_from_contiguous(struct device *dev, struct page *page, size_t size) { __dma_remap(page, size, pgprot_kernel); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); }
->
bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count) { ... free_contig_range(pfn, count); .. }
->
void free_contig_range(unsigned long pfn, unsigned nr_pages) { for (; nr_pages--; ++pfn) __free_page(pfn_to_page(pfn)); } 將page交還給buddy。
?
內(nèi)核內(nèi)存分配的migratetype
內(nèi)核內(nèi)存分配的時(shí)候,帶的標(biāo)志是GFP_,但是GFP_可以轉(zhuǎn)化為migratetype:
static inline int allocflags_to_migratetype(gfp_t gfp_flags) { WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | ((gfp_flags & __GFP_RECLAIMABLE) != 0); } 之后申請內(nèi)存的時(shí)候,會(huì)對比遷移類型匹配的free_list:
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); 另外,筆者也編寫了一個(gè)測試程序,透過它隨時(shí)測試CMA的功能:
#include <linux/module.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/miscdevice.h> #include <linux/dma-mapping.h> #define CMA_NUM 10 static struct device *cma_dev; static dma_addr_t dma_phys[CMA_NUM]; static void *dma_virt[CMA_NUM]; static ssize_t cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; for (i = 0; i < CMA_NUM; i++) { if (dma_virt[i]) { dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]); _dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]); dma_virt[i] = NULL; break; } } return 0; } static ssize_t cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int i; int ret; for (i = 0; i < CMA_NUM; i++) { if (!dma_virt[i]) { dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL); if (dma_virt[i]) { void *p; for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE) *(u32 *)p = 0; _dev_info(cma_dev, "alloc virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]); } else { dev_err(cma_dev, "no mem in CMA area\n"); ret = -ENOMEM; } break; } } return count; } static const struct file_operations cma_test_fops = { .owner = THIS_MODULE, .read = cma_test_read, .write = cma_test_write, }; static struct miscdevice cma_test_misc = { .name = "cma_test", .fops = &cma_test_fops, }; static int __init cma_test_init(void) { int ret = 0; ret = misc_register(&cma_test_misc); if (unlikely(ret)) { pr_err("failed to register cma test misc device!\n"); return ret; } cma_dev = cma_test_misc.this_device; cma_dev->coherent_dma_mask = ~0; _dev_info(cma_dev, "registered.\n"); return ret; } module_init(cma_test_init); static void __exit cma_test_exit(void) { misc_deregister(&cma_test_misc); } module_exit(cma_test_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); MODULE_DESCRIPTION("kernel module to help the test of CMA"); MODULE_ALIAS("CMA test");
申請內(nèi)存:
釋放內(nèi)存:
參考鏈接:
[1] http://www.spinics.net/lists/arm-kernel/msg160854.html
[2] http://www.spinics.net/lists/arm-kernel/msg162063.html
[3] http://lwn.net/Articles/447405/
轉(zhuǎn)載于:https://www.cnblogs.com/sky-heaven/p/9549482.html
總結(jié)
以上是生活随笔為你收集整理的Linux内核最新的连续内存分配器(CMA)——避免预留大块内存【转】的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。