Uthread: switching between threads

任务:在用户层面实现线程切换机制。

我们需要补充完整 user/uthread.c 中的 thread_create() 和 thread_schedule(),以及 user/uthread_switch.S 中的 thread_switch。两个目标:

  • 当 thread_scheduler() 第一次跑某一线程时,该线程在自己的栈上执行传入的函数;
  • thread_switch 保存切换走的线程的寄存器,恢复要切换的线程的寄存器,并返回到线程上一次切走的位置。

定义要保存的上下文

寄存器被分为两种类型——caller-save 和 callee-save,顾名思义,前者要求调用者保存和恢复相关寄存器,而后者要求被调用者保存和恢复相关寄存器,
也就是说,从调用者的角度看,caller-save 寄存器在调用前后可能发生变化,而 callee-save 寄存器不会变化 。因此,切换时我们只需要保存和恢复 callee-save 寄存器。
除了 callee-save 寄存器需要保存和恢复以外,栈指针 sp 和返回地址 ra 也需要保存和恢复,它们构成了上述 context 结构体的内容。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// user/uthread/c
struct context {
uint64 ra;
uint64 sp;

// callee-saved
uint64 s0;
uint64 s1;
uint64 s2;
uint64 s3;
uint64 s4;
uint64 s5;
uint64 s6;
uint64 s7;
uint64 s8;
uint64 s9;
uint64 s10;
uint64 s11;
};

struct thread {
char stack[STACK_SIZE]; /* the thread's stack */
int state; /* FREE, RUNNING, RUNNABLE */
struct context context; // thread context
};

参考 kernel/swtch.S,完成uthread_switch.S的上下文切换汇编

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
	.text

/*
* save the old thread's registers,
* restore the new thread's registers.
*/

.globl thread_switch
thread_switch:
/* YOUR CODE HERE */
sd ra, 0(a0)
sd sp, 8(a0)
sd s0, 16(a0)
sd s1, 24(a0)
sd s2, 32(a0)
sd s3, 40(a0)
sd s4, 48(a0)
sd s5, 56(a0)
sd s6, 64(a0)
sd s7, 72(a0)
sd s8, 80(a0)
sd s9, 88(a0)
sd s10, 96(a0)
sd s11, 104(a0)

ld ra, 0(a1)
ld sp, 8(a1)
ld s0, 16(a1)
ld s1, 24(a1)
ld s2, 32(a1)
ld s3, 40(a1)
ld s4, 48(a1)
ld s5, 56(a1)
ld s6, 64(a1)
ld s7, 72(a1)
ld s8, 80(a1)
ld s9, 88(a1)
ld s10, 96(a1)
ld s11, 104(a1)

ret /* return to ra */

  • 在 thread_schedule() 中调用 uthread_switch 并把旧的线程上下文 &t->context 和新的线程上下文 &next_thread->context 作为参数传入;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// user/uthread.c
void
thread_schedule(void)
{
struct thread *t, *next_thread;

/* Find another runnable thread. */
next_thread = 0;
t = current_thread + 1;
for(int i = 0; i < MAX_THREAD; i++){
if(t >= all_thread + MAX_THREAD)
t = all_thread;
if(t->state == RUNNABLE) {
next_thread = t;
break;
}
t = t + 1;
}

if (next_thread == 0) {
printf("thread_schedule: no runnable threads\n");
exit(-1);
}

if (current_thread != next_thread) { /* switch threads? */
next_thread->state = RUNNING;
t = current_thread;
current_thread = next_thread;
/* YOUR CODE HERE
* Invoke thread_switch to switch from t to next_thread:
* thread_switch(??, ??);
*/
thread_switch((uint64)&t->context, (uint64)&next_thread->context);
} else
next_thread = 0;
}
  • 最后,在 thread_create() 中把传入的函数指针给到线程上下文的 ra——这样当这个线程被调度执行时就可以执行指定函数了;同时把栈指针给到线程上下文的 sp——注意,栈空间是按地址从大到小增长的,所以我们给的应该是 stack 数组的末尾指针。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// user/uthread.c
void
thread_create(void (*func)())
{
struct thread *t;

for (t = all_thread; t < all_thread + MAX_THREAD; t++) {
if (t->state == FREE) break;
}
t->state = RUNNABLE;
// YOUR CODE HERE
// xyf
t->context.ra = (uint64)func;
t->context.sp = (uint64)t->stack + STACK_SIZE - 1;
}

Using threads

任务:在一台多核 Linux 或 MacOS 上使用 UNIX pthread 线程库写多线程的并行程序。

MIT 提供了一个代码 notxv6/ph.c,它开给定数量个线程,每个线程向 hash 表里面加许多 key(put 操作),然后从 hash 表里取出 key(get 操作),同时记录 put、get 的用时,以及缺失的 key——本来应该在 hash 表里,但是 get 不到。发生缺失的原因是 ph.c 没有在多线程时加锁,我们的任务就是把锁给加上。

相关接口:

1
2
3
4
pthread_mutex_t lock;            // declare a lock
pthread_mutex_init(&lock, NULL); // initialize the lock
pthread_mutex_lock(&lock); // acquire lock
pthread_mutex_unlock(&lock); // release lock

给每个 hash 桶加一个锁,put 和 get 的时候先加锁,再操作,最后解锁就搞定了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// notxv6/ph.c
pthread_mutex_t lock[NBUCKET];
void
init_locks()
{
for(int i = 0; i < NBUCKET; i++)
assert(pthread_mutex_init(lock+i, NULL) == 0);
}

static
void put(int key, int value)
{
int i = key % NBUCKET;

assert(pthread_mutex_lock(lock+i) == 0);

// is the key already present?
struct entry *e = 0;
for (e = table[i]; e != 0; e = e->next) {
if (e->key == key)
break;
}
if(e){
// update the existing key.
e->value = value;
} else {
// the new is new.
insert(key, value, &table[i], table[i]);
}

assert(pthread_mutex_unlock(lock+i) == 0);
}

static struct entry*
get(int key)
{
int i = key % NBUCKET;

assert(pthread_mutex_lock(lock+i) == 0);

struct entry *e = 0;
for (e = table[i]; e != 0; e = e->next) {
if (e->key == key) break;
}

assert(pthread_mutex_unlock(lock+i) == 0);

return e;
}

int
main(int argc, char *argv[])
{
pthread_t *tha;
void *value;
double t1, t0;

if (argc < 2) {
fprintf(stderr, "Usage: %s nthreads\n", argv[0]);
exit(-1);
}
nthread = atoi(argv[1]);
tha = malloc(sizeof(pthread_t) * nthread);
srandom(0);
assert(NKEYS % nthread == 0);
for (int i = 0; i < NKEYS; i++) {
keys[i] = random();
}

// initialize locks
init_locks();
......
}

Barrier

任务:实现一个 barrier——执行到这里的线程必须等待,直到所有线程都执行到了这个地方。
和上一个任务一样,notxv6/barrier.c 已经写了一个不对的 barrier,它开给定数量个线程,每个线程做一个循环,循环的某处调用了 barrier()。我们期望所有线程都调用了 barrier() 之后才能继续执行。

相关接口:

1
2
pthread_cond_wait(&cond, &mutex);  // go to sleep on cond, releasing lock mutex, acquiring upon wake up
pthread_cond_broadcast(&cond); // wake up every thread sleeping on cond

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// notxv6/barrier.c
static void
barrier()
{
// YOUR CODE HERE
//
// Block until all threads have called barrier() and
// then increment bstate.round.
//
assert(pthread_mutex_lock(&bstate.barrier_mutex) == 0);

if(++bstate.nthread == nthread){
assert(pthread_cond_broadcast(&bstate.barrier_cond) == 0);
bstate.round++;
bstate.nthread = 0;
}
else
assert(pthread_cond_wait(&bstate.barrier_cond, &bstate.barrier_mutex) == 0);

assert(pthread_mutex_unlock(&bstate.barrier_mutex) == 0);
}

git diff

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
diff --git a/answers-thread.txt b/answers-thread.txt
new file mode 100644
index 0000000..2b0e77f
--- /dev/null
+++ b/answers-thread.txt
@@ -0,0 +1 @@
+blablablablabla
diff --git a/notxv6/barrier.c b/notxv6/barrier.c
index 12793e8..0588d62 100644
--- a/notxv6/barrier.c
+++ b/notxv6/barrier.c
@@ -30,7 +30,19 @@ barrier()
// Block until all threads have called barrier() and
// then increment bstate.round.
//
+ assert(pthread_mutex_lock(&bstate.barrier_mutex) == 0);

+ if(++bstate.nthread == nthread){
+ // wake up every thread sleeping on “bstate.barrier_cond”
+ assert(pthread_cond_broadcast(&bstate.barrier_cond) == 0);
+ bstate.round++;
+ bstate.nthread = 0;
+ }
+ else
+ // go to sleep on “bstate.barrier_cond”, releasing lock mutex, acquiring upon wake up
+ assert(pthread_cond_wait(&bstate.barrier_cond, &bstate.barrier_mutex) == 0);
+
+ assert(pthread_mutex_unlock(&bstate.barrier_mutex) == 0);
}

static void *
diff --git a/notxv6/ph.c b/notxv6/ph.c
index 6df1500..a21c1c4 100644
--- a/notxv6/ph.c
+++ b/notxv6/ph.c
@@ -17,6 +17,15 @@ struct entry *table[NBUCKET];
int keys[NKEYS];
int nthread = 1;

+// 给每个 hash 桶初始化一个锁
+pthread_mutex_t lock[NBUCKET];
+void
+init_locks()
+{
+ for(int i = 0; i < NBUCKET; i++)
+ assert(pthread_mutex_init(lock+i, NULL) == 0);
+}
+
double
now()
{
@@ -40,6 +49,8 @@ void put(int key, int value)
{
int i = key % NBUCKET;

+ assert(pthread_mutex_lock(lock+i) == 0);
+
// is the key already present?
struct entry *e = 0;
for (e = table[i]; e != 0; e = e->next) {
@@ -53,6 +64,8 @@ void put(int key, int value)
// the new is new.
insert(key, value, &table[i], table[i]);
}
+
+ assert(pthread_mutex_unlock(lock+i) == 0);
}

static struct entry*
@@ -60,12 +73,15 @@ get(int key)
{
int i = key % NBUCKET;

-
+ assert(pthread_mutex_lock(lock+i) == 0);
+
struct entry *e = 0;
for (e = table[i]; e != 0; e = e->next) {
if (e->key == key) break;
}

+ assert(pthread_mutex_unlock(lock+i) == 0);
+
return e;
}

@@ -115,6 +131,9 @@ main(int argc, char *argv[])
keys[i] = random();
}

+ // initialize locks
+ init_locks();
+
//
// first the puts
//
diff --git a/time.txt b/time.txt
new file mode 100644
index 0000000..f599e28
--- /dev/null
+++ b/time.txt
@@ -0,0 +1 @@
+10
diff --git a/user/uthread.c b/user/uthread.c
index 8e46826..29b3592 100644
--- a/user/uthread.c
+++ b/user/uthread.c
@@ -10,16 +10,34 @@
#define STACK_SIZE 8192
#define MAX_THREAD 4

+struct context {
+ uint64 ra; // return addr
+ uint64 sp; // stack pointer
+
+ // callee-saved
+ uint64 s0;
+ uint64 s1;
+ uint64 s2;
+ uint64 s3;
+ uint64 s4;
+ uint64 s5;
+ uint64 s6;
+ uint64 s7;
+ uint64 s8;
+ uint64 s9;
+ uint64 s10;
+ uint64 s11;
+};

struct thread {
char stack[STACK_SIZE]; /* the thread's stack */
int state; /* FREE, RUNNING, RUNNABLE */
-
+ struct context context; // thread context
};
struct thread all_thread[MAX_THREAD];
struct thread *current_thread;
extern void thread_switch(uint64, uint64);
-
+
void
thread_init(void)
{
@@ -63,6 +81,7 @@ thread_schedule(void)
* Invoke thread_switch to switch from t to next_thread:
* thread_switch(??, ??);
*/
+ thread_switch((uint64)&t->context, (uint64)&next_thread->context);
} else
next_thread = 0;
}
@@ -77,6 +96,9 @@ thread_create(void (*func)())
}
t->state = RUNNABLE;
// YOUR CODE HERE
+ t->context.ra = (uint64)func;
+ // 栈空间是按地址从大到小增长的
+ t->context.sp = (uint64)t->stack + STACK_SIZE - 1;
}

void
diff --git a/user/uthread_switch.S b/user/uthread_switch.S
index 5defb12..70f2547 100644
--- a/user/uthread_switch.S
+++ b/user/uthread_switch.S
@@ -8,4 +8,36 @@
.globl thread_switch
thread_switch:
/* YOUR CODE HERE */
+ sd ra, 0(a0)
+ sd sp, 8(a0)
+ sd s0, 16(a0)
+ sd s1, 24(a0)
+ sd s2, 32(a0)
+ sd s3, 40(a0)
+ sd s4, 48(a0)
+ sd s5, 56(a0)
+ sd s6, 64(a0)
+ sd s7, 72(a0)
+ sd s8, 80(a0)
+ sd s9, 88(a0)
+ sd s10, 96(a0)
+ sd s11, 104(a0)
+
+ ld ra, 0(a1)
+ ld sp, 8(a1)
+ ld s0, 16(a1)
+ ld s1, 24(a1)
+ ld s2, 32(a1)
+ ld s3, 40(a1)
+ ld s4, 48(a1)
+ ld s5, 56(a1)
+ ld s6, 64(a1)
+ ld s7, 72(a1)
+ ld s8, 80(a1)
+ ld s9, 88(a1)
+ ld s10, 96(a1)
+ ld s11, 104(a1)
+
ret /* return to ra */
+
+