elf文件结构分析
elf簡介
ELF(Excutable and Linking Format)是可執行與鏈接格式的縮寫,是linux下最重要的一種可執行文件格式。如想深入了解linux下的程序運行原理,對elf文件組織格式的了解是必不可少的。
網上有很多elf結構分析相關的文章,這里推薦一個pdf《ELF文件格式分析.pdf》,細節講的比較系統,大家可以參考,可以到https://download.csdn.net/download/t3swing/10302876下載。
elf格式介紹
linux可以通過readelf命令來查看elf文件格式,elf格式是一個多級索引結構,基本結構如下圖所示:
有幾個概念:
- Section:一般翻譯為節,很多文章都與段等同了,但一般說的段指的是segment。每個節都定義了具體獨立的功能,elf鏈接的時候,多個目標(.o)文件的節會合到一起。elf的通過節表來索引具體的節,每個節的解析方式都可能不一樣,可以對照著文檔解析與理解不同節的含義。
- segment:段是節的集合,一個段包含多個同屬性(讀、寫、執行屬性)的節,一般用在elf加載運行過程中,segment會指定地址范圍,落在該地址范圍的section,程序執行時,對應的section會加載該段里面。至于節和段的地址是怎么生成的,這都是ld鏈接程序確定的,ld命令參數非常多,甚至可以自定義segment中包含的節,同一份代碼,不同平臺生成的elf段和節的對應關系都可能不一樣。
- Program:程序表,elf中主要用來管理segment。
注意:上述圖只是elf的結構示意圖,不代表具體位置,實際上,elf只規定了總頭位置,其他的位置都是根據offset與size來確定的,圖中也沒有體現出segment與section的關系。
elf文件格式解析
可以嘗試自己解析elf來加深對elf文件結構的理解,解析elf格式,結構體定義可以直接使用linux下的/usr/include/elf.h頭文件,該頭文件包含32位和64位的elf解析相關結構體。
- linux下解析elf直接引用/usr/include/elf.h即可。
- windows下,可以把/usr/include/elf.h復制過去,并替換基礎變量名稱定義即可。如下:
改成
typedef signed char int8_t; typedef short int int16_t; typedef int int32_t; typedef long long int int64_t;/* Unsigned. */ typedef unsigned char uint8_t; typedef unsigned short int uint16_t; typedef unsigned int uint32_t; typedef unsigned long long int uint64_t;解析elf之前,需先了解一下elf頭文件中的各種縮寫,如下:
- hdr、h:header的縮寫,如Elf32_Ehdr、Elf32_Shdr、sh_name等;
- e:elf縮寫,如Ehdr,e_type中的e等;
- s:section節的縮寫,如Shdr、sh_size等;
- p:Program程序的縮寫,如Phdr、p_type等;
- t :table表的縮寫,elf是多級索引結構,通過表來完成;
- sym:Symbol符號的縮寫;
- INTERP : interpreter解釋器縮寫;
elf頭文件命令習慣用單個字母組合完成,如sht表示section header table即節頭表,st表示symbol table即符號表,縮寫有很多,elf.h的注釋一般都解釋了縮寫代表什么意思。
對照著上節的結構體的結構圖和elf.h很容易解析elf文件結構,有一個細節需注意,解析節時必須先找到.shstrtab這個包含節頭名稱字符串表的節,只有知道了各節頭的名稱,才能繼續解析各個節的內容。字符串表(string table)有多個(如.shstrtab、.strtab、.dynstr等),不同情況下用不同的字符串解析,如節頭名稱不能用.strtab或者.dynstr字符串表去解析。
自己嘗試解析elf文件,可以很快的掌握elf結構,比看文檔快,熟悉結構前,不要對著二進制去看,效率太低。細節方面可以查看文檔,下面是一個簡單的elf解析程序,仿照readelf輸出格式,主要解析了節頭、段頭及符號表部分信息,大家可以參考,源碼如下:
#include <stdlib.h> #include <stdio.h> #include <string.h> #include "elf.h"#define INVALID (-1)#define SECTION_NAME_SHSTRTAB ".shstrtab"#define SECTION_NAME_SYMTAB ".symtab" #define SECTION_NAME_STRTAB ".strtab"#define SECTION_NAME_DYNSYM ".dynsym" #define SECTION_NAME_DYNSTR ".dynstr"/** * 字串和枚舉轉換結構體 */ typedef struct {int enum_value;char * enum_str; } ENUM_S;char * enum2str(ENUM_S * specs_enum, int enum_value) {int i = 0;if (NULL == specs_enum){printf("param error! enum:%#x\n", (int)specs_enum);return NULL;}for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++){if (specs_enum->enum_value == enum_value){return specs_enum->enum_str;}}printf("enum_value not match:%d \n", enum_value);return NULL; }int str2enum(ENUM_S * specs_enum, const char * enum_str) {int i = 0;if ((NULL == specs_enum) || (NULL == enum_str)){printf("param error! specs_enum:%#x enum_str:%#x\n", (int)specs_enum, (int)enum_str);return -1;}for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++){if (0 == strcmp(specs_enum->enum_str, enum_str)){return (int)specs_enum->enum_value;}}printf("enum_str not match:%s \n", enum_str);return -1; }char * readFile(char * file, int * size) {int fsize = 0;char * mem = NULL;FILE * fp = NULL;fp = fopen(file, "rb");if (fp == NULL){perror("fopen error!");goto RELEASE;}fseek(fp, 0, SEEK_END);fsize = ftell(fp);mem = malloc(fsize);if (mem == NULL){perror("malloc error!");goto RELEASE;}printf("readFile size:%d\n", size);fseek(fp, 0, SEEK_SET);fread(mem, fsize, 1, fp);*size = fsize;RELEASE:fclose(fp);return mem; }int parseHeader(Elf32_Ehdr * header) {int i = 0;/* magic num */ENUM_S e_class[] = { {ELFCLASS32, "ELF32"}, {ELFCLASS64, "ELF64"}, {INVALID, NULL}};ENUM_S e_data[] = { {ELFDATA2LSB, "little endian"}, {ELFDATA2MSB, "big endian"}, {INVALID, NULL}};ENUM_S e_osabi[] = { {ELFOSABI_NONE, "UNIX - System V"}, {ELFOSABI_ARM_AEABI, "ARM EABI"}, {ELFOSABI_ARM, "ARM"}, {INVALID, NULL}};/* elf header */ENUM_S e_type[] ={{ET_REL, "Relocatable file"},{ET_EXEC, "Executable file"},{ET_DYN, "Shared object file"},{ET_CORE, "Core file"},{INVALID, NULL}};ENUM_S e_machine[] = {{EM_386, "Intel 80386"}, {EM_ARM, "ARM"}, {INVALID, NULL}};unsigned char * e_ident = header->e_ident;if ((e_ident[EI_MAG0] != ELFMAG0) || (e_ident[EI_MAG1] != ELFMAG1) ||(e_ident[EI_MAG2] != ELFMAG2) || (e_ident[EI_MAG2] != ELFMAG2)){printf("Not elf format!\n");return -1;}printf("ELF Header:\n\tMagic:");for (i = 0; i < EI_NIDENT; i++){printf(" %02x", e_ident[i]);}printf("\n");printf("\tClass: %s\n", enum2str(e_class, e_ident[EI_CLASS]));printf("\tData: 2's complement, %s\n", enum2str(e_data, e_ident[EI_DATA]));printf("\tVersion: %d (current)\n", e_ident[EI_VERSION]);printf("\tOS/ABI: %s\n", enum2str(e_osabi, e_ident[EI_OSABI]));printf("\tABI Version: %d\n", e_ident[EI_ABIVERSION]);printf("\tType: %s\n", enum2str(e_type, header->e_type));printf("\tMachine: %s\n", enum2str(e_machine, header->e_machine));printf("\tVersion: 0x%x\n", header->e_version);printf("\tEntry point address: 0x%x\n", header->e_entry);printf("\tStart of program headers: %d (bytes into file)\n", header->e_phoff);printf("\tStart of section headers: %d (bytes into file)\n", header->e_shoff);printf("\tFlags: 0x%x\n", header->e_flags);printf("\tSize of this header: %d (bytes)\n", header->e_ehsize);printf("\tSize of program headers: %d (bytes)\n", header->e_phentsize);printf("\tNumber of program headers: %d\n", header->e_phnum);printf("\tSize of section headers: %d (bytes)\n", header->e_shentsize);printf("\tNumber of section headers: %d\n", header->e_shnum);printf("\tSection header string table index: %d\n", header->e_shstrndx);return 0; }char * getSectionTypeName(Elf32_Word sh_type) {ENUM_S sh_type_list[] ={{SHT_NULL, "NULL"},{SHT_PROGBITS, "PROGBITS"},{SHT_SYMTAB, "SYMTAB"},{SHT_STRTAB, "STRTAB"},{SHT_RELA, "RELA"},{SHT_HASH, "HASH"},{SHT_DYNAMIC, "DYNAMIC"},{SHT_NOTE, "NOTE"},{SHT_NOBITS, "NOBITS"},{SHT_REL, "REL"},{SHT_SHLIB, "SHLIB"},{SHT_DYNSYM, "DYNSYM"},{SHT_INIT_ARRAY, "INIT_ARRAY"},{SHT_FINI_ARRAY, "FINI_ARRAY"},{SHT_PREINIT_ARRAY, "PREINIT_ARRAY"},{SHT_GROUP, "GROUP"},{SHT_SYMTAB_SHNDX, "SYMTAB_SHNDX"},{SHT_NUM, "NUM"},{SHT_LOOS , "LOOS"},{SHT_GNU_ATTRIBUTES , "GNU_ATTRIBUTES"},{SHT_GNU_HASH , "GNU_HASH"},{SHT_GNU_LIBLIST , "GNU_LIBLIST"},{SHT_CHECKSUM , "CHECKSUM"},{SHT_LOSUNW , "LOSUNW"},{SHT_SUNW_move , "SUNW_move"},{SHT_SUNW_COMDAT , "SUNW_COMDAT"},{SHT_SUNW_syminfo , "SUNW_syminfo"},{SHT_GNU_verdef , "GNU_verdef"},{SHT_GNU_verneed , "GNU_verneed"},{SHT_GNU_versym , "GNU_versym"},{SHT_HISUNW , "HISUNW"},{SHT_HIOS , "HIOS"},{SHT_LOPROC , "LOPROC"},{SHT_HIPROC , "HIPROC"},{SHT_LOUSER , "LOUSER"},{SHT_HIUSER , "HIUSER"},{INVALID, NULL}};return enum2str(sh_type_list, (int)sh_type); }char * getStrTabStr(Elf32_Ehdr * ehdr, Elf32_Shdr * strtabhdr, int idx) {if (idx < strtabhdr->sh_size){return ((char *)ehdr + strtabhdr->sh_offset + idx);}return NULL; }Elf32_Shdr * getSHStrTab(Elf32_Ehdr * ehdr) {int i = 0, shnum = ehdr->e_shnum;char * nameStr = NULL;Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);for (i = 0; i < shnum; i++, shdr++){if (shdr->sh_type == SHT_STRTAB){if (shdr->sh_name >= shdr->sh_size){continue;}nameStr = (char *)ehdr + shdr->sh_offset + shdr->sh_name;if (strcmp(nameStr, SECTION_NAME_SHSTRTAB) == 0){return shdr;}}}return NULL; }Elf32_Shdr * getSHdrByName(Elf32_Ehdr * ehdr, const char * name, Elf32_Word sh_type) {int i = 0, shnum = ehdr->e_shnum;char * nameStr = NULL;Elf32_Shdr * shstrtabhdr = NULL;Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);shstrtabhdr = getSHStrTab(ehdr);if (shstrtabhdr == NULL){printf("getSHdrByName Get shstrtabhdr failed name:%s sh_type:%d\n", name, sh_type);return NULL;}shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);for (i = 0; i < shnum; i++, shdr++){if (shdr->sh_type == sh_type){nameStr = getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name);if (nameStr && strcmp(nameStr, name) == 0){return shdr;}}}printf("getSHdrByName Get section header failed name:%s sh_type:%d\n", name, sh_type);return NULL; }int parseSectionHeader(Elf32_Ehdr * ehdr) {int i = 0, shnum = ehdr->e_shnum;Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);Elf32_Shdr * shstrtabhdr = NULL;shstrtabhdr = getSHStrTab(ehdr);if (shstrtabhdr == NULL){printf("parseSectionHeader Get shstrtabhdr failed\n");return -1;}printf("\nThere are %d section headers, starting at offset 0x%d:\n", shnum, ehdr->e_shoff);printf("Section Headers:\n");printf(" [Nr] Name Type Addr Off Size ES Flg Lk Inf Al\n");for (i = 0; i < shnum; i++, shdr++){printf(" [%2d] ", i);printf("%-24s ", getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));printf("%-15s ", getSectionTypeName(shdr->sh_type));printf("%08x ", shdr->sh_addr);printf("%06x ", shdr->sh_offset);printf("%06x ", shdr->sh_size);printf("%02x ", shdr->sh_entsize);printf("%03x ", shdr->sh_flags);printf("%02x ", shdr->sh_link);printf("%03x ", shdr->sh_info);printf("%02x ", shdr->sh_addralign);printf("\n");}return 0; }/* 解析動態和靜態符號表時,須告知符號節名、類型及符號表用到字串節名稱 */ int parseSymTab(Elf32_Ehdr * ehdr, char * symtab, char * strtab, Elf32_Word sh_type) {int i = 0, sym_cnt = 0;Elf32_Shdr * shdrStrTab = NULL;Elf32_Shdr * shdrSymTab = NULL;Elf32_Sym * sym = NULL;/* Elf32_Sym st_info_binding */ENUM_S st_info_bind[] ={{STB_LOCAL, "LOCAL"},{STB_GLOBAL, "GLOBAL"},{STB_WEAK, "WEAK"},{STB_NUM, "NUM"},{STB_HIOS, "HIOS"},{STB_LOPROC, "LOPROC"},{STB_HIPROC, "HIPROC"},{INVALID, NULL}};/* Elf32_Sym st_info_type */ENUM_S st_info_type[] ={{STT_NOTYPE, "NOTYPE"},{STT_OBJECT, "OBJECT"},{STT_FUNC, "FUNC"},{STT_SECTION, "SECTION"},{STT_FILE, "FILE"},{STT_COMMON, "COMMON"},{STT_TLS, "TLS"},{STT_NUM, "NUM"},{INVALID, NULL}};/*Elf32_Sym visibility */ENUM_S st_other_visibility[] ={{STV_DEFAULT, "DEFAULT"},{STV_INTERNAL, "INTERNAL"},{STV_HIDDEN, "HIDDEN"},{STV_PROTECTED, "PROTECTED"},{INVALID, NULL}};shdrSymTab = getSHdrByName(ehdr, symtab, sh_type);if (shdrSymTab == NULL){printf("parseSymTab getSymTab faild\n");return -1;}shdrStrTab = getSHdrByName(ehdr, strtab, SHT_STRTAB);if (shdrStrTab == NULL){printf("parseSymTab getStrTab faild\n");return -1;}sym_cnt = shdrSymTab->sh_size / sizeof(Elf32_Sym);printf("\nSymbol table '%s' contains %d entries:\n", symtab, sym_cnt);printf(" Num: Value Size Type Bind Vis Ndx Name\n");sym = (Elf32_Sym *)((char *)ehdr + shdrSymTab->sh_offset);for (i = 0; i < sym_cnt; i++, sym++){printf("%6d: ", i);printf("%08x ", sym->st_value);printf("%5d ", sym->st_size);printf("%-7s ", enum2str(st_info_type, ELF32_ST_TYPE(sym->st_info)));printf("%-6s ", enum2str(st_info_bind, ELF32_ST_BIND(sym->st_info)));printf("%-8s ", enum2str(st_other_visibility, ELF32_ST_VISIBILITY(sym->st_other)));printf("%5d ", sym->st_shndx);printf("%-32s", getStrTabStr(ehdr, shdrStrTab, sym->st_name));printf("\n");}return 0; }int parseProgramHeader(Elf32_Ehdr * ehdr) {int i = 0, phnum = ehdr->e_phnum;Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);ENUM_S p_type[] ={{PT_NULL, "NULL"},{PT_LOAD, "LOAD"},{PT_DYNAMIC, "DYNAMIC"},{PT_INTERP, "INTERP"},{PT_NOTE, "NOTE"},{PT_SHLIB, "SHLIB"},{PT_PHDR, "PHDR"},{PT_TLS, "TLS"},{PT_GNU_EH_FRAME, "GNU_EH_FRAME"},{PT_GNU_STACK, "GNU_STACK"},{PT_GNU_RELRO, "GNU_RELRO"},{INVALID, NULL},};ENUM_S p_flags[] ={{PF_X, "X"},{PF_W, "W"},{PF_R, "R"},{PF_X | PF_W, "WE"},{PF_X | PF_R, "RE"},{PF_W | PF_R, "RW"},{PF_X | PF_W | PF_R, "RWE"},{INVALID, NULL},};printf("\nProgram Headers:\n");printf(" Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align\n");for (i = 0; i < phnum; i++, phdr++){printf(" %-12s ", enum2str(p_type, phdr->p_type));printf("0x%08x ", phdr->p_offset);printf("0x%08x ", phdr->p_vaddr);printf("0x%08x ", phdr->p_paddr);printf("0x%08x ", phdr->p_filesz);printf("0x%08x ", phdr->p_memsz);printf("%-3s ", enum2str(p_flags, phdr->p_flags));printf("%#04x ", phdr->p_align);printf("\n");/* 解釋器類型,則打印出解析器名稱 */if (phdr->p_type == PT_INTERP){printf(" [Requesting program interpreter:%s]\n", (char *)ehdr + phdr->p_offset);}}return 0; }/* 解析段包含的section信息 */ int parseSegment(Elf32_Ehdr * ehdr) {int i = 0, j = 0;Elf32_Shdr * shdr = NULL;Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);Elf32_Shdr * shstrtabhdr = NULL;shstrtabhdr = getSHStrTab(ehdr);if (shstrtabhdr == NULL){printf("parseSectionHeader Get shstrtabhdr failed\n");return -1;}printf("\nSection to Segment mapping:\n");printf(" Segment Sections...\n");for (i = 0; i < ehdr->e_phnum; i++, phdr++){printf(" %2.2d ", i);shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);for (j = 1; j < ehdr->e_shnum; j++, shdr++){if (shdr->sh_size <= 0){continue;}/* 地址或者偏移在segment地址范圍內的section都歸到該segment */if (shdr->sh_flags & SHF_ALLOC){if ((shdr->sh_addr < phdr->p_vaddr) || (shdr->sh_addr + shdr->sh_size > phdr->p_vaddr + phdr->p_memsz)){continue;}}else{if ((shdr->sh_offset < phdr->p_offset) || (shdr->sh_offset + shdr->sh_size > phdr->p_offset + phdr->p_filesz)){continue;}}printf("%s ",getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));}printf("\n");}return 0; }int parseFile(char * mem, int size) {int ret = 0;Elf32_Ehdr * ehdr = (Elf32_Ehdr *)mem;/* 解析elf總頭 */parseHeader(ehdr);/* 解析section總頭 */parseSectionHeader(ehdr);/* 解析.symtab與.dynsym */parseSymTab(ehdr,SECTION_NAME_SYMTAB,SECTION_NAME_STRTAB,SHT_SYMTAB);parseSymTab(ehdr,SECTION_NAME_DYNSYM,SECTION_NAME_DYNSTR,SHT_DYNSYM);/* 解析程序頭與segment信息 */parseProgramHeader(ehdr);parseSegment(ehdr);return 0; }int main(int argc, char * argv[]) {int size = 0;char * mem = NULL;if( argc < 2){printf("./readelf elf\n");return -1;}mem = readFile(argv[1], &size);parseFile(mem, size);if (mem){free(mem);}return 0; }總結
- 上一篇: 什么可以作为gcroot_面包果既能当水
- 下一篇: 计算机专业联考335分,联考专业分、文化