浅谈c语言中的字符串
寫在前面:最近MM問了我一個問題——字符串在內(nèi)存中位于哪里?我想當然是位于數(shù)據(jù)段(data segment)。她又問,那怎么保證它只讀呢?我答不上了。有些問題,看似簡單,背后卻隱藏著天機,后來查了一些資料,對C語言的字符串有了一個全新的認識。但是不是終極認識,最近實在是太忙了,以后再重來認識它吧。進入正文:
注:segment和section兩個單詞經(jīng)常混用,在這里我對運行時用segment,對編譯時用section。
(1)表示形式
字符數(shù)組形式:char str[]="hustcat";
字符指針形式:char *str="hustcat";
(2)sizeof與strlen
sizeof("hustcat")=8; sizeof("")=1,而不是0;
strlen("hustcat")=7; strlent("")=0
(3)單字符字符串常量與字符常量的值大不相同
int x=(int)"A";
得到的結(jié)果是將x初始化成指向包含'A'和'"0'兩個字符的內(nèi)存塊的指針。
int y=(int)'A';
得到的結(jié)果是y=0x41
(4)讀寫性
char p1[]="always writable"; //p1[0]='1' ,OK
char *p2="possibly not writable"; //p2[0]='1',在vc++ 6.0和gcc下都會出現(xiàn)運行時錯誤(段錯誤)
const char p3[]="never writable";//p3[0]='1',總是會造成編譯時錯誤
(5)標準c允許實現(xiàn)對包含相同字符串的兩個字符串常量使用同一存儲空間。
char *str1,*str2
int main()
{
?? ?str1="hustcat";
?? ?str2="hustcat";
?? ?if(str1==str2)
?? ??? ?printf("strings are shared."n");
?? ?else
?? ??? ?printf("strings are not shared."n");
?? ?return 0;
}
在vc++ 6.0和gcc下輸出:
strings are shared.
(6)編譯時與運行時
字符串常量編譯之后位于哪里?文本區(qū)(text section),數(shù)據(jù)區(qū)(data section)還是bss區(qū)?
下面來看幾個實際的程序:
?以下基于gcc?1.40,輸出的可執(zhí)行文件格式為a.out格式:
第一個程序:
#include?<stdio.h>
int?main()
{
????????char?*ptr="1111";
????????int?a=12;
????????return?0;
}
---------匯編輸出-----------------
????????.file???"hello.c"
gcc_compiled.:
.text
LC0:
????????.ascii?"1111\0"
????????.align?2
.globl?_main
_main:
????????pushl?%ebp
????????movl?%esp,%ebp
????????subl?$8,%esp
????????movl?$LC0,-4(%ebp)
????????movl?$12,-8(%ebp)
????????xorl?%eax,%eax
????????jmp?L1
????????.align?2
L1:
????????leave
????????ret
---------相應信息----------------------
[/usr/root]#?objdump?-h?hello.o
hello.o:
magic:?0x107?(407)machine?type:?0flags:?0x0text?0x24?data?0x0?bss?0x0
nsyms?2?entry?0x0?trsize?0x8?drsize?0x0
[/usr/root]#?objdump?-h?hello
hello:
magic:?0x10b?(413)machine?type:?0flags:?0x0text?0x1000?data?0x1000?bss?0x0
nsyms?20?entry?0x0?trsize?0x0?drsize?0x0
?可以看出,在a.out格式目標文件中,字符串常量位于代碼區(qū)。
第二個程序:
----------c程序---------------------#include?<stdio.h>
char?*p="2222";
int?main()
{
????????char?*ptr="1111";
????????int?a=12;
????????return?0;
}
---------匯編輸出------------------
.file???"hello.c"
gcc_compiled.:
.globl?_p
.text
LC0:
????????.ascii?"2222\0"
.data
????????.align?2
_p:
????????.long?LC0
.text
LC1:
????????.ascii?"1111\0"
????????.align?2
.globl?_main
_main:
????????pushl?%ebp
????????movl?%esp,%ebp
????????subl?$8,%esp
????????movl?$LC1,-4(%ebp)
????????movl?$12,-8(%ebp)
????????xorl?%eax,%eax
????????jmp?L1
????.align?2
L1:
????leave
????ret
-----------相應信息-------------------
[/usr/root]#?objdump?-h?hello.o
hello.o:
magic:?0x107?(407)machine?type:?0flags:?0x0text?0x28?data?0x4?bss?0x0
nsyms?3?entry?0x0?trsize?0x8?drsize?0x8
[/usr/root]#?objdump?-h?hello
hello:
magic:?0x10b?(413)machine?type:?0flags:?0x0text?0x1000?data?0x1000?bss?0x0
nsyms?20?entry?0x0?trsize?0x0?drsize?0x0
?可以知道,全局變量p位于data section,而字符串仍然位于text section。
?
?
第三個程序:
----------c程序-----------------#include?<stdio.h>
char?*p1="2222";
char?*p2="2222";
char?str1[]="3333";
int?main()
{
????????char?*ptr="1111";
????????char?str2[]="4444";
????????int?a=12;
????????return?0;
}
-----------匯編輸出---------------
????.file???"hello.c"
gcc_compiled.:
.globl?_p1
.text
LC0:
????????.ascii?"2222\0"
.data
????????.align?2
_p1:
????????.long?LC0???//p1和p2使用同一存儲空間
.globl?_p2
????????.align?2
_p2:
????????.long?LC0
.globl?_str1
_str1:
????????.ascii?"3333\0"??//str1所包含的字符串位于data區(qū)
.text
LC1:
????????.ascii?"1111\0"
LC2:
????????.ascii?"4444\0"
????????.align?2
.globl?_main
_main:
????????pushl?%ebp
????????movl?%esp,%ebp
????????subl?$16,%esp
????????movl?$LC1,-4(%ebp)
????????leal?-12(%ebp),%eax
????????movl?%eax,%eax
????????movl?LC2,%edx
????????movl?%edx,-12(%ebp)
????????movb?LC2+4,%dl
????movb?%dl,-8(%ebp)
????????movl?$12,-16(%ebp)
????????xorl?%eax,%eax
????????jmp?L1
????????.align?2
L1:
????????leave
????????ret
?可以知道,p1和p2使用同一存儲空間,str1以及它的字符串都位于data section。
? 但是對于gcc 3.2.2生成的ELF(Executable and Linking Format)目標文件,結(jié)果又不同。
? 來看一個例子:
?
------------C程序----------------------------------#include?<stdio.h>
char?*str1,*str2;
int?main()
{
????str1="abcd";
????str2="abcd";
????if?(str1==str2)
????????printf("string?are?shared.\n");
????else
????????printf("not?shared.\n");
????str1[0]='1';
????if?(*str1=='1')
????????printf("writable.\n");
????else
????????printf("not?writable.\n");
????return?0;
}
-----------匯編輸出--------------------------------
????.file????"test.c"
????.section????.rodata
.LC0:
????.string????"abcd"?? //字符串位于rodata section
.LC1:
????.string????"string?are?shared.\n"
.LC2:
????.string????"not?shared.\n"
.LC3:
????.string????"writable.\n"
.LC4:
????.string????"not?writable.\n"
????.text
.globl?main
????.type????main,@function
main:
????pushl????%ebp
????movl????%esp,?%ebp
????subl????$8,?%esp
????andl????$-16,?%esp
????movl????$0,?%eax
????subl????%eax,?%esp
????movl????$.LC0,?str1
????movl????$.LC0,?str2
????movl????str1,?%eax
????cmpl????str2,?%eax
????jne????.L2
????subl????$12,?%esp
????pushl????$.LC1
????call????printf
????addl????$16,?%esp
????jmp????.L3
.L2:
????subl????$12,?%esp
????pushl????$.LC2
????call????printf
????addl????$16,?%esp
.L3:
????movl????str1,?%eax
????movb????$49,?(%eax)
????movl????str1,?%eax
????cmpb????$49,?(%eax)
????jne????.L4
????subl????$12,?%esp
????pushl????$.LC3
????call????printf
????addl????$16,?%esp
????jmp????.L5
.L4:
????subl????$12,?%esp
????pushl????$.LC4
????call????printf
????addl????$16,?%esp
.L5:
????movl????$0,?%eax
????leave
????ret
.Lfe1:
????.size????main,.Lfe1-main
????.comm????str1,4,4
????.comm????str2,4,4
????.ident????"GCC:?(GNU)?3.2.2?20030222?(Red?Hat?Linux?3.2.2-5)"
----------------相應信息------------------------------------
[root@localhost?devlop]#?objdump?-h?test.o
?
test.o:?????file?format?elf32-i386
?
Sections:
Idx?Name??????????Size??????VMA???????LMA???????File?off??Algn
??0?.text?????????0000008e??00000000??00000000??00000034??2**2
??????????????????CONTENTS,?ALLOC,?LOAD,?RELOC,?READONLY,?CODE
??1?.data?????????00000000??00000000??00000000??000000c4??2**2
??????????????????CONTENTS,?ALLOC,?LOAD,?DATA
??2?.bss??????????00000000??00000000??00000000??000000c4??2**2
??????????????????ALLOC
??3?.rodata???????00000040??00000000??00000000??000000c4??2**0
??????????????????CONTENTS,?ALLOC,?LOAD,?READONLY,?DATA
??4?.comment??????00000033??00000000??00000000??00000104??2**0
??????????????????CONTENTS,?READONLY
?從上可知,在ELF格式中,字符串常量位于rodata section,該區(qū)是只讀的。
下面來對比一下a.out格式和ELF格式:
a.out頭部
int a_magic;? // 幻數(shù)
int a_text;?? // 文本段大小
int a_data;?? // 初始化的數(shù)據(jù)段大小
int a_bss;??? // 未初始化的數(shù)據(jù)段大
int a_syms;?? // 符號表大小
int a_entry;? // 入口點
int a_trsize; // 文本重定位段大小
int a_drsize; // 數(shù)據(jù)重定位段大小
ELF文件頭部
.text
.data
.rodata
.bss
.sym
.rel.text
.rel.data
.rel.rodata
.line
.debug
.strtab
總的來說,在編譯時,字符串的編譯后所處的位置與具體的編譯器(目標文件格式)相關(guān);
????????????? 運行時,則與具體的操作系統(tǒng)(可執(zhí)行文件格式)和加載器的實現(xiàn)相關(guān)。
轉(zhuǎn)載于:https://www.cnblogs.com/hustcat/archive/2009/05/09/1453338.html
總結(jié)
以上是生活随笔為你收集整理的浅谈c语言中的字符串的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 360tray.exe指的是什么程序
- 下一篇: 电脑总是弹出广告如何解决