TLS variable in Linux
TLS就是thread local storage,即线程局部存储,其含义很很直白,每个线程都能访问该变量,但实际访问的却是不同的地址(有点类似于内核里面的PERCPU variable)。Glibc中的errno就是一个TLS变量,gcc也提供了__thread关键字扩展,可以将一个全局变量申明为TLS。
写一段很简单的程序来验证下:
__thread int test;
int test2;
int main(int argc, char **argv) {
test++;
test2++;
return 0;
}
反汇编后如下:
0x00000000004004ec <+0>: push %rbp
0x00000000004004ed <+1>: mov %rsp,%rbp
0x00000000004004f0 <+4>: mov %edi,-0x4(%rbp)
0x00000000004004f3 <+7>: mov %rsi,-0x10(%rbp)
0x00000000004004f7 <+11>: mov %fs:0xfffffffffffffffc,%eax
0x00000000004004ff <+19>: add $0x1,%eax
0x0000000000400502 <+22>: mov %eax,%fs:0xfffffffffffffffc
0x000000000040050a <+30>: mov 0x2003c4(%rip),%eax # 0x6008d4 <test2>
0x0000000000400510 <+36>: add $0x1,%eax
0x0000000000400513 <+39>: mov %eax,0x2003bb(%rip) # 0x6008d4 <test2>
0x0000000000400519 <+45>: mov $0x0,%eax
0x000000000040051e <+50>: pop %rbp
0x000000000040051f <+51>: retq
可以看到,一个普通的全局变量是通过%rip加上一个偏移来访问的,而一个TLS变量则是通过%fs寄存器去访问。而且,这里有一处非常奇怪的地方,%fs寄存器的偏移竟然是负数?虽然x86_64当中的segment是不做limit checking的,但还是想不通这么做是什么缘由。难道这个用作TLS的segment是expand-down的?
......
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44eb90e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44eb90d000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44eb90c000
arch_prctl(ARCH_SET_FS, 0x7f44eb90d700) = 0
......
用strace观察该程序的行为,发现glibc runtime在初始化的时候调用了arch_prctl,这是一个系统调用,并且传入了参数ARCH_SET_FS,用来设置segment base,主线程的TLS段就是在这里设置。来看看arch_prctl都干了哪些事情。
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
int doit = task == current;
int cpu;
switch (code) {
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
/* handle small bases via the GDT because that's faster to
switch. */
if (addr <= 0xffffffff) {
set_32bit_tls(task, FS_TLS, addr);
if (doit) {
load_TLS(&task->thread, cpu);
loadsegment(fs, FS_TLS_SEL);
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
} else {
task->thread.fsindex = 0;
task->thread.fs = addr;
if (doit) {
/* set the selector to 0 to not confuse
__switch_to */
loadsegment(fs, 0);
ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
}
put_cpu();
break;
}
return ret;
}
long sys_arch_prctl(int code, unsigned long addr)
{
return do_arch_prctl(current, code, addr);
}
先检查addr有没有越界,然后关抢占,在x86_64上应该执行判断语句的else分支,将%fs的信息写入thread_struct,最后写%fs,写segment base MSR。
如果新创建了一个线程,会发生什么情况呢?继续strace,发现如下内容。
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f12b0acb000
brk(0) = 0x1e2e000
brk(0x1e4f000) = 0x1e4f000
mprotect(0x7f12b0acb000, 4096, PROT_NONE) = 0
clone(Process 6467 attached (waiting for parent)
Process 6467 resumed (parent 6466 ready)
child_stack=0x7f12b12caff0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f12b12cb9d0, tls=0x7f12b12cb700, child_tidptr=0x7f12b12cb9d0) = 6467
父线程在做clone的时候,会把子线程的TLS段相关参数给传进去,随后do_fork -> copy_process -> copy_thread,最后有如下代码:
/*
* Set a new TLS for the child thread?
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0);
else
#endif
err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
if (err)
goto out;
}
可以看到最后还是会调到do_arch_prctl,并且TLS段的基地址是放在%r8寄存器中的。最后再来看看线程切换的时候会发生什么。
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p) {
......
savesegment(fs, fsindex);
savesegment(gs, gsindex);
load_TLS(next, cpu);
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
* the GDT and LDT are properly updated, and must be
* done before math_state_restore, so the TS bit is up
* to date.
*/
arch_end_context_switch(next_p);
/*
* Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
* reload when it has changed. When prev process used 64bit
* base always reload to avoid an information leak.
*/
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
/*
* Check if the user used a selector != 0; if yes
* clear 64bit base, since overloaded base is always
* mapped to the Null selector
*/
if (fsindex)
prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
wrmsrl(MSR_FS_BASE, next->fs);
prev->fsindex = fsindex;
......
}
可以看到,线程切换时在此处做了TLS段的切换,调用关系如下schedule -> __schedule -> context_switch -> switch_to -> __switch_to。
此外,TLS变量还可以动态创建和使用,pthread库提供了pthread_key_create,pthread_key_delete,pthread_setspecific和pthread_getspecific等函数,此处就不再赘述了。
总结一句话,TLS变量的使用需要用户程序,编译器,C runtime和OS多方的配合才能实现。
blog comments powered by Disqus