Uthread: switching between threads 任务:在用户层面实现线程切换机制。
我们需要补充完整 user/uthread.c 中的 thread_create() 和 thread_schedule(),以及 user/uthread_switch.S 中的 thread_switch。两个目标:
当 thread_scheduler() 第一次跑某一线程时,该线程在自己的栈上执行传入的函数;
thread_switch 保存切换走的线程的寄存器,恢复要切换的线程的寄存器,并返回到线程上一次切走的位置。
定义要保存的上下文
寄存器被分为两种类型——caller-save 和 callee-save,顾名思义,前者要求调用者保存和恢复相关寄存器,而后者要求被调用者保存和恢复相关寄存器, 也就是说,从调用者的角度看,caller-save 寄存器在调用前后可能发生变化,而 callee-save 寄存器不会变化 。因此,切换时我们只需要保存和恢复 callee-save 寄存器。 除了 callee-save 寄存器需要保存和恢复以外,栈指针 sp 和返回地址 ra 也需要保存和恢复,它们构成了上述 context 结构体的内容。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 struct context { uint64 ra; uint64 sp; uint64 s0; uint64 s1; uint64 s2; uint64 s3; uint64 s4; uint64 s5; uint64 s6; uint64 s7; uint64 s8; uint64 s9; uint64 s10; uint64 s11; }; struct thread { char stack [STACK_SIZE]; int state; struct context context ; };
参考 kernel/swtch.S,完成uthread_switch.S的上下文切换汇编1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 .text /* * save the old thread's registers, * restore the new thread's registers. */ .globl thread_switch thread_switch: /* YOUR CODE HERE */ sd ra, 0(a0) sd sp, 8(a0) sd s0, 16(a0) sd s1, 24(a0) sd s2, 32(a0) sd s3, 40(a0) sd s4, 48(a0) sd s5, 56(a0) sd s6, 64(a0) sd s7, 72(a0) sd s8, 80(a0) sd s9, 88(a0) sd s10, 96(a0) sd s11, 104(a0) ld ra, 0(a1) ld sp, 8(a1) ld s0, 16(a1) ld s1, 24(a1) ld s2, 32(a1) ld s3, 40(a1) ld s4, 48(a1) ld s5, 56(a1) ld s6, 64(a1) ld s7, 72(a1) ld s8, 80(a1) ld s9, 88(a1) ld s10, 96(a1) ld s11, 104(a1) ret /* return to ra */
在 thread_schedule() 中调用 uthread_switch 并把旧的线程上下文 &t->context 和新的线程上下文 &next_thread->context 作为参数传入;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 void thread_schedule (void ) { struct thread *t , *next_thread ; next_thread = 0 ; t = current_thread + 1 ; for (int i = 0 ; i < MAX_THREAD; i++){ if (t >= all_thread + MAX_THREAD) t = all_thread; if (t->state == RUNNABLE) { next_thread = t; break ; } t = t + 1 ; } if (next_thread == 0 ) { printf ("thread_schedule: no runnable threads\n" ); exit (-1 ); } if (current_thread != next_thread) { next_thread->state = RUNNING; t = current_thread; current_thread = next_thread; thread_switch((uint64)&t->context, (uint64)&next_thread->context); } else next_thread = 0 ; }
最后,在 thread_create() 中把传入的函数指针给到线程上下文的 ra——这样当这个线程被调度执行时就可以执行指定函数了;同时把栈指针给到线程上下文的 sp——注意,栈空间是按地址从大到小增长的, 所以我们给的应该是 stack 数组的末尾指针。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 void thread_create (void (*func)()) { struct thread *t ; for (t = all_thread; t < all_thread + MAX_THREAD; t++) { if (t->state == FREE) break ; } t->state = RUNNABLE; t->context.ra = (uint64)func; t->context.sp = (uint64)t->stack + STACK_SIZE - 1 ; }
Using threads 任务:在一台多核 Linux 或 MacOS 上使用 UNIX pthread 线程库写多线程的并行程序。
MIT 提供了一个代码 notxv6/ph.c,它开给定数量个线程,每个线程向 hash 表里面加许多 key(put 操作),然后从 hash 表里取出 key(get 操作),同时记录 put、get 的用时,以及缺失的 key——本来应该在 hash 表里,但是 get 不到。发生缺失的原因是 ph.c 没有在多线程时加锁,我们的任务就是把锁给加上。
相关接口:1 2 3 4 pthread_mutex_t lock; pthread_mutex_init(&lock, NULL ); pthread_mutex_lock(&lock); pthread_mutex_unlock(&lock);
给每个 hash 桶加一个锁,put 和 get 的时候先加锁,再操作,最后解锁就搞定了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 pthread_mutex_t lock[NBUCKET];void init_locks () { for (int i = 0 ; i < NBUCKET; i++) assert(pthread_mutex_init(lock+i, NULL ) == 0 ); } static void put (int key, int value) { int i = key % NBUCKET; assert(pthread_mutex_lock(lock+i) == 0 ); struct entry *e = 0 ; for (e = table[i]; e != 0 ; e = e->next) { if (e->key == key) break ; } if (e){ e->value = value; } else { insert(key, value, &table[i], table[i]); } assert(pthread_mutex_unlock(lock+i) == 0 ); } static struct entry*get (int key) { int i = key % NBUCKET; assert(pthread_mutex_lock(lock+i) == 0 ); struct entry *e = 0 ; for (e = table[i]; e != 0 ; e = e->next) { if (e->key == key) break ; } assert(pthread_mutex_unlock(lock+i) == 0 ); return e; } int main (int argc, char *argv[]) { pthread_t *tha; void *value; double t1, t0; if (argc < 2 ) { fprintf (stderr , "Usage: %s nthreads\n" , argv[0 ]); exit (-1 ); } nthread = atoi(argv[1 ]); tha = malloc (sizeof (pthread_t ) * nthread); srandom(0 ); assert(NKEYS % nthread == 0 ); for (int i = 0 ; i < NKEYS; i++) { keys[i] = random(); } init_locks(); ...... }
Barrier 任务:实现一个 barrier——执行到这里的线程必须等待,直到所有线程都执行到了这个地方。 和上一个任务一样,notxv6/barrier.c 已经写了一个不对的 barrier,它开给定数量个线程,每个线程做一个循环,循环的某处调用了 barrier()。我们期望所有线程都调用了 barrier() 之后才能继续执行。
相关接口:1 2 pthread_cond_wait(&cond, &mutex); pthread_cond_broadcast(&cond);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 static void barrier () { assert(pthread_mutex_lock(&bstate.barrier_mutex) == 0 ); if (++bstate.nthread == nthread){ assert(pthread_cond_broadcast(&bstate.barrier_cond) == 0 ); bstate.round++; bstate.nthread = 0 ; } else assert(pthread_cond_wait(&bstate.barrier_cond, &bstate.barrier_mutex) == 0 ); assert(pthread_mutex_unlock(&bstate.barrier_mutex) == 0 ); }
git diff 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 diff --git a/answers-thread.txt b/answers-thread.txt new file mode 100644 index 0000000..2b0e77f --- /dev/null +++ b/answers-thread.txt @@ -0,0 +1 @@ +blablablablabla diff --git a/notxv6/barrier.c b/notxv6/barrier.c index 12793e8..0588d62 100644 --- a/notxv6/barrier.c +++ b/notxv6/barrier.c @@ -30,7 +30,19 @@ barrier() // Block until all threads have called barrier() and // then increment bstate.round. // + assert(pthread_mutex_lock(&bstate.barrier_mutex) == 0); + if (++bstate.nthread == nthread){ + // wake up every thread sleeping on “bstate.barrier_cond” + assert(pthread_cond_broadcast(&bstate.barrier_cond) == 0); + bstate.round++; + bstate.nthread = 0; + } + else + // go to sleep on “bstate.barrier_cond”, releasing lock mutex, acquiring upon wake up + assert(pthread_cond_wait(&bstate.barrier_cond, &bstate.barrier_mutex) == 0); + + assert(pthread_mutex_unlock(&bstate.barrier_mutex) == 0); } static void * diff --git a/notxv6/ph.c b/notxv6/ph.c index 6df1500..a21c1c4 100644 --- a/notxv6/ph.c +++ b/notxv6/ph.c @@ -17,6 +17,15 @@ struct entry *table[NBUCKET]; int keys[NKEYS]; int nthread = 1; +// 给每个 hash 桶初始化一个锁 +pthread_mutex_t lock[NBUCKET]; +void +init_locks() +{ + for (int i = 0; i < NBUCKET; i++) + assert(pthread_mutex_init(lock+i, NULL) == 0); +} + double now () { @@ -40,6 +49,8 @@ void put(int key, int value) { int i = key % NBUCKET; + assert(pthread_mutex_lock(lock+i) == 0); + // is the key already present? struct entry *e = 0; for (e = table[i]; e != 0; e = e->next) { @@ -53,6 +64,8 @@ void put(int key, int value) // the new is new. insert(key, value, &table[i], table[i]); } + + assert(pthread_mutex_unlock(lock+i) == 0); } static struct entry* @@ -60,12 +73,15 @@ get(int key) { int i = key % NBUCKET; - + assert(pthread_mutex_lock(lock+i) == 0); + struct entry *e = 0; for (e = table[i]; e != 0; e = e->next) { if (e->key == key) break ; } + assert(pthread_mutex_unlock(lock+i) == 0); + return e; } @@ -115,6 +131,9 @@ main(int argc, char *argv[]) keys[i] = random(); } + // initialize locks + init_locks(); + // // first the puts // diff --git a/time.txt b/time.txt new file mode 100644 index 0000000..f599e28 --- /dev/null +++ b/time.txt @@ -0,0 +1 @@ +10 diff --git a/user/uthread.c b/user/uthread.c index 8e46826..29b3592 100644 --- a/user/uthread.c +++ b/user/uthread.c @@ -10,16 +10,34 @@ +struct context { + uint64 ra; // return addr + uint64 sp; // stack pointer + + // callee-saved + uint64 s0; + uint64 s1; + uint64 s2; + uint64 s3; + uint64 s4; + uint64 s5; + uint64 s6; + uint64 s7; + uint64 s8; + uint64 s9; + uint64 s10; + uint64 s11; +}; struct thread { char stack[STACK_SIZE]; /* the thread's stack */ int state; /* FREE, RUNNING, RUNNABLE */ - + struct context context; // thread context }; struct thread all_thread[MAX_THREAD]; struct thread *current_thread; extern void thread_switch(uint64, uint64); - + void thread_init(void) { @@ -63,6 +81,7 @@ thread_schedule(void) * Invoke thread_switch to switch from t to next_thread: * thread_switch(??, ??); */ + thread_switch((uint64)&t->context, (uint64)&next_thread->context); } else next_thread = 0; } @@ -77,6 +96,9 @@ thread_create(void (*func)()) } t->state = RUNNABLE; // YOUR CODE HERE + t->context.ra = (uint64)func; + // 栈空间是按地址从大到小增长的 + t->context.sp = (uint64)t->stack + STACK_SIZE - 1; } void diff --git a/user/uthread_switch.S b/user/uthread_switch.S index 5defb12..70f2547 100644 --- a/user/uthread_switch.S +++ b/user/uthread_switch.S @@ -8,4 +8,36 @@ .globl thread_switch thread_switch: /* YOUR CODE HERE */ + sd ra, 0(a0) + sd sp, 8(a0) + sd s0, 16(a0) + sd s1, 24(a0) + sd s2, 32(a0) + sd s3, 40(a0) + sd s4, 48(a0) + sd s5, 56(a0) + sd s6, 64(a0) + sd s7, 72(a0) + sd s8, 80(a0) + sd s9, 88(a0) + sd s10, 96(a0) + sd s11, 104(a0) + + ld ra, 0(a1) + ld sp, 8(a1) + ld s0, 16(a1) + ld s1, 24(a1) + ld s2, 32(a1) + ld s3, 40(a1) + ld s4, 48(a1) + ld s5, 56(a1) + ld s6, 64(a1) + ld s7, 72(a1) + ld s8, 80(a1) + ld s9, 88(a1) + ld s10, 96(a1) + ld s11, 104(a1) + ret /* return to ra */ + +