Lab pgtbl: Page tables

本小节参考freewalk的代码,实现RISC-V三级页表翻译,打印页表的内容。

kernel/defs.h声明void vmprint(pagetable_t);

kernel/exec.creturn argc;之前添加

1
2
3
if (p->pid == 1) {
vmprint(p->pagetable);
}

那么,在os启动的时候,会打印页表的内容。

参考kernel/vm.cfreewalk(pagetable_t pagetable)函数访问页表的方式递归输出页表信息。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// Recursively free page-table pages.
// All leaf mappings must already have been removed.
void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}

RISC-V的页表
Figure-3.2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// vmprint辅助函数
static void
_vmprint(pagetable_t pagetable, int level)
{
for (int i = 0; i < 512; ++i) { // 每一页都包含 512 个 PTE,用于指向下一个页表或物理地址
pte_t pte = pagetable[i];

if ((pte & PTE_V)) { // PTE_V 表示 PTE 是否有效

for (int j = 0; j < level; ++j) { // 根据现在是L2、L1、L0 (level: 1,2,3)来打印,形成树形结构
if (j == 0) printf("..");
else printf(" ..");
}

uint64 child = PTE2PA(pte); // 通过pte映射下一级页表的物理地址
printf("%d: pte %p pa %p\n", i, pte, child);
// 判断是否在最后一级页表,若不是,则递归
// 只有在页表的最后一级,才可进行读、写、执行
if ((pte & (PTE_R | PTE_W | PTE_X)) == 0)
_vmprint((pagetable_t)child, level + 1); // 层级加1
}
}
}

// 打印页表的内容
void
vmprint(pagetable_t pagetable)
{
printf("page table %p\n", pagetable);
_vmprint(pagetable, 1);
}

A kernel page table per process

当前,xv6的内核态只有一个直接映射到物理内存的page table,并且这个页表没有用户态进程的地址信息。
因此用户态向内核态传递地址时,先在用户态将虚拟地址转为物理地址,然后把物理地址通过寄存器传递给内核。

在lab2中,我们在结构体struct proc添加了一个成员变量int mast,实现sys_trace
在本小节中,我们在结构体struct proc添加一个成员变量kernel_pagetable
为每个进程添加一个内核态的页表,然后在该进程进入到内核前,
将该进程的kernel page table的物理地址写入SATP寄存器(Supervisor Address Translation and Protection Register),即使用该进程的内核态页表,
这样就可以实现在内核态直接翻译虚拟地址的功能


一个内核页表的创建

  • kernel/proc.h中的struct proc添加新成员变量pagetable_t kernel_pagetable
  • 首先查看kernel/vm.c中的kvminit函数。该函数通过kalloc为内核创建一个空的页表,然后通过kvmmap完成直接映射操作。
  • 现在要为每个新进程生成一个内核页表,所以需要参考kvminit重新实现一个为进程生成内核页表的版本
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
* the kernel's page table.
*/
pagetable_t kernel_pagetable;
extern char etext[]; // kernel.ld sets this to end of kernel code.
extern char trampoline[]; // trampoline.S

/*
* create a direct-map page table for the kernel.
*/
void
kvminit()
{
kernel_pagetable = (pagetable_t) kalloc();
memset(kernel_pagetable, 0, PGSIZE);
// uart registers
kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W);
// virtio mmio disk interface
kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
// CLINT
kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W);
// PLIC
kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W);
// map kernel text executable and read-only.
kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of.
kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
}

// add a mapping to the kernel page table.
// only used when booting.
// does not flush TLB or enable paging.
void
kvmmap(uint64 va, uint64 pa, uint64 sz, int perm)
{
if(mappages(kernel_pagetable, va, sz, pa, perm) != 0)
panic("kvmmap");
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void
ukvmmap(pagetable_t pagetable, uint64 va, uint64 pa, uint64 sz, int perm)
{
if(mappages(pagetable, va, sz, pa, perm) != 0)
panic("ukvmmap");
}

pagetable_t
proc_kvminit()
{
// 申请一个页表空间
pagetable_t proc_kernel_pagetable = (pagetable_t) kalloc();
if (proc_kernel_pagetable == 0)
return 0;
memset(proc_kernel_pagetable, 0, PGSIZE);
// 与vminint内容上保持一致
ukvmmap(proc_kernel_pagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
ukvmmap(proc_kernel_pagetable, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
return proc_kernel_pagetable;
}

同时,在kernel/defs.h里面添加这两个函数的声明。

为每个新进程生成一个内核页表

确保每个进程的内核页表都有该进程的内核栈的映射。
在未修改的xv6中,所有内核栈都在procinit中设置。
你需要将部分或全部功能移至allocproc。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// kernel/proc.c void procinit(void)
// initialize the proc table at boot time.
void
procinit(void)
{
struct proc *p;

initlock(&pid_lock, "nextpid");
for(p = proc; p < &proc[NPROC]; p++) {
initlock(&p->lock, "proc");
// 删除以下部分,将内核栈的空间申请和映射放在创建进程时
// // Allocate a page for the process's kernel stack.
// // Map it high in memory, followed by an invalid
// // guard page.
// char *pa = kalloc();
// if(pa == 0)
// panic("kalloc");
// uint64 va = KSTACK((int) (p - proc));
// kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
// p->kstack = va;
}
kvminithart();
}

// Look in the process table for an UNUSED proc.
// If found, initialize state required to run in the kernel,
// and return with p->lock held.
// If there are no free procs, or a memory allocation fails, return 0.
static struct proc*
allocproc(void)
{
struct proc *p;

for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == UNUSED) {
goto found;
} else {
release(&p->lock);
}
}
return 0;

found:
p->pid = allocpid();

// Allocate a trapframe page.
if((p->trapframe = (struct trapframe *)kalloc()) == 0){
release(&p->lock);
return 0;
}

// An empty user page table.
p->pagetable = proc_pagetable(p);
if(p->pagetable == 0){
freeproc(p);
release(&p->lock);
return 0;
}

// 创建进程时,为进程分配独立的内核页表
p->kernel_pagetable = proc_kvminit();
if (p->kernel_pagetable == 0) {
freeproc(p);
release(&p->lock);
return 0;
}

// 初始化当前内核页表的内核栈
char *pa = kalloc();
if (pa == 0)
panic("kalloc");
// 将内核栈映射到用户内核页表固定的部分
uint64 va = KSTACK((int)0);
// 添加kernel stack的映射到用户的kernel pagetable中
ukvmmap(p->kernel_pagetable, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
p->kstack = va;

// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;

return p;
}

切换进程时能够切换内核页表

修改scheduler()以便将进程的内核页表加载到内核的satp寄存器中(请参阅kvminithart以获得灵感)。
在调用w_satp()之后不要忘记调用sfence_vma()。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
void
scheduler(void)
{
struct proc *p;
struct cpu *c = mycpu();

c->proc = 0;
for(;;){
// Avoid deadlock by ensuring that devices can interrupt.
intr_on();

int found = 0;
for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == RUNNABLE) {
// Switch to chosen process. It is the process's job
// to release its lock and then reacquire it
// before jumping back to us.
p->state = RUNNING;
c->proc = p;

// 在切换任务前,将用户内核页表替换到stap寄存器中
w_satp(MAKE_SATP(p->kernel_pagetable));
// 清除快表缓存
sfence_vma();
// 调度,执行进程
swtch(&c->context, &p->context);

// Process is done running for now.
// It should have changed its p->state before coming back.
// 该进程执行结束后,将SATP寄存器的值设置为全局内核页表地址
kvminithart();
c->proc = 0;

found = 1;
}
release(&p->lock);
}
#if !defined (LAB_FS)
if(found == 0) {
intr_on();
asm volatile("wfi");
}
#else
;
#endif
}
}

销毁进程时释放内核页表

在freeproc中释放进程的内核页表。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// free a proc structure and the data hanging from it,
// including user pages.
// p->lock must be held.
static void
freeproc(struct proc *p)
{
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;

// 删除内核栈
if (p->kstack) {
// 通过页表地址, kstack虚拟地址 找到最后一级的页表项
pte_t* pte = walk(p->kernel_pagetable, p->kstack, 0);
if (pte == 0)
panic("freeproc : kstack");
// 删除页表项对应的物理地址
kfree((void*)PTE2PA(*pte));
}
p->kstack = 0;

if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;

// 删除kernel pagetable
if (p->kernel_pagetable)
proc_freekernelpagetable(p->kernel_pagetable);
p->kernel_pagetable = 0;

p->sz = 0;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
p->killed = 0;
p->xstate = 0;
p->state = UNUSED;
}

void
proc_freekernelpagetable(pagetable_t pagetable)
{
for (int i = 0; i < 512; ++i) {
pte_t pte = pagetable[i];
if ((pte & PTE_V)) {
pagetable[i] = 0;
if ((pte & (PTE_R | PTE_W | PTE_X)) == 0) {
uint64 child = PTE2PA(pte);
proc_freekernelpagetable((pagetable_t)child);
}
}
}
kfree((void*)pagetable);
}

需要将 walk 函数的定义添加到 kernel/defs.h 中,否则无法直接引用。

获取进程内核页表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
uint64
kvmpa(uint64 va)
{
uint64 off = va % PGSIZE;
pte_t *pte;
uint64 pa;

//注释掉
//pte = walk(kernel_pagetable, va, 0);
struct proc *p = myproc();
pte = walk(p->kernel_pagetable, va, 0);

if(pte == 0)
panic("kvmpa");
if((*pte & PTE_V) == 0)
panic("kvmpa");
pa = PTE2PA(*pte);
return pa+off;
}

最后,在vm.c中添加头文件,因为使用了结构体proc。

1
2
#include "spinlock.h"
#include "proc.h"

Simplify copyin/copyinstr

这个任务的目的就是在用户进程内核页表中添加用户页表映射的副本。


1.将vm.ccopyin的内容替换为对copyin_new的调用,将vm.ccopyinstr的内容替换为对copyinstr_new的调用。

1
2
3
4
5
6
7
8
9
10
11
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
return copyin_new(pagetable, dst, srcva, len);
}

int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
return copyinstr_new(pagetable, dst, srcva, max);
}

然后,将copyin_newcopyinstr_new的声明添加到kernel/defs.h


2.在kernel/vm.c中添加kvmcopykvmdealloc函数,
用户页表的变化同步到用户进程内核页表中,需要实现映射和缩减两个操作。
注意权限,需要把PTE_U去掉,因为CPU在suprivisor模式时不能访问设置PTE_U的页。
同时,不要释放物理内存,拷贝映射关系即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// kernel/vm.c
// 将 src 页表的一部分页映射关系拷贝到 dst 页表中。
// 只拷贝页表项,不拷贝实际的物理页内存。
// 成功返回0,失败返回 -1
int
kvmcopy(pagetable_t src, pagetable_t dst, uint64 start, uint64 end)
{
pte_t *pte;
uint64 pa, i;
uint flags;

// PGROUNDUP: 对齐页边界,防止 remap
for(i = PGROUNDUP(start); i < end; i += PGSIZE){
if((pte = walk(src, i, 0)) == 0) // 找到虚拟地址的最后一级页表项
panic("kvmcopy: pte should exist");
if((*pte & PTE_V) == 0) // 判断页表项是否有效
panic("kvmcopy: page not present");
pa = PTE2PA(*pte); // 将页表项转换为物理地址页起始位置
// `& ~PTE_U` 表示将该页的权限设置为非用户页
// 必须设置该权限,RISC-V 中内核是无法直接访问用户页的。
flags = PTE_FLAGS(*pte) & ~PTE_U;
// 将pa这一页的PTEs映射到dst上同样的虚拟地址
if(mappages(dst, i, PGSIZE, pa, flags) != 0){
// 清除已经映射的部分,但不释放内存
uvmunmap(dst, 0, i / PGSIZE, 0);
return -1;
}
}
return 0;
}

// 与 uvmdealloc 功能类似,将程序内存从 oldsz 缩减到 newsz。
// 但区别在于不释放实际内存
// 用于内核页表内程序内存映射与用户页表程序内存映射之间的同步
uint64
kvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
if(newsz >= oldsz)
return oldsz;

if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){
// 如果存在多余的页需要释放
int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE;
uvmunmap(pagetable, PGROUNDUP(newsz), npages, 0);
}
return newsz;
}

然后,将函数声明在defs.h中。


3.修改fork()exec()growproc()

把fork出来的子进程的用户pagetable复制给kernel_pagetable。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// kernel/proc.c
int
fork(void)
{
// ......

// Copy user memory from parent to child.
// 将新进程的用户页表映射拷贝一份到新进程的内核页表中
if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0 ||
kvmcopy(np->pagetable, np->kernel_pagetable, 0, p->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
np->sz = p->sz;

// ......
}

在映射之前要先检测程序大小是否超过PLIC,防止remap。
同时,映射前要先清除[0,PLIC]中原本的内容,再将要执行的程序映射到[0,PLIC]中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// kernel/exec.c
int
exec(char *path, char **argv)
{
// ......
// Load program into memory.
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, 0, (uint64)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
// 添加检测,防止程序大小超过 PLIC
if(sz1 >= PLIC) {
goto bad;
}
sz = sz1;
if(ph.vaddr % PGSIZE != 0)
goto bad;
if(loadseg(pagetable, ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
}
// ......

// Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));

// 清除内核页表中对程序内存的旧映射,然后重新建立映射。
uvmunmap(p->kernel_pagetable, 0, PGROUNDUP(oldsz)/PGSIZE, 0);
kvmcopy(pagetable, p->kernel_pagetable, 0, sz);

// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
p->sz = sz;
p->trapframe->epc = elf.entry; // initial program counter = main
p->trapframe->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);
// ......
}

sysproc.c中的sys_sbrk()中可以发现,执行内存相关的函数为growproc(),所以我们对growproc()进行修改。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// kernel/proc.c
int
growproc(int n)
{
uint sz;
struct proc *p = myproc();

sz = p->sz;
if(n > 0){
uint64 newsz;
if((newsz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
return -1;
}
// 内核页表中的映射同步扩大
if(kvmcopy(p->pagetable, p->kernel_pagetable, sz, n) != 0) {
// uvmdealloc(p->pagetable, newsz, sz);
return -1;
}
sz = newsz;
} else if(n < 0){
uvmdealloc(p->pagetable, sz, sz + n);
// 内核页表中的映射同步缩小
sz = kvmdealloc(p->kernel_pagetable, sz, sz + n);
}
p->sz = sz;
return 0;
}

在userinit的内核页表中包含第一个进程的用户页表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// kernel/proc.c
// Set up first user process.
void
userinit(void)
{
// ......

// allocate one user page and copy init's instructions
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
kvmcopy(p->pagetable, p->kernel_pagetable, 0, p->sz); // 同步程序内存映射到进程内核页表中

// ......
}

用户页表在用户内核页表中的映射范围为[0,PLIC],但是从xv6 book中可以看到,
全局内核页表的定义中在[0,PLIC]之间存在一个CLINT核心本地中断,
CLINT仅在内核启动时使用,所以用户进程内核页表中无需再存在CLINT,
所以我们将proc_kvminit()中CLINT映射的部分注释掉,防止再映射用户页表时出现remap

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// vm.c
pagetable_t
proc_kvminit() {
// 申请一个页表空间
pagetable_t proc_kernel_pagetable = (pagetable_t) kalloc();
if (proc_kernel_pagetable == 0)
return 0;
memset(proc_kernel_pagetable, 0, PGSIZE);
// 与vminint内容上保持一致
ukvmmap(proc_kernel_pagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
// 用户进程内核页表无需在映射CLINT,将空间留出映射用户页表
// ukvmmap(proc_kernel_pagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
ukvmmap(proc_kernel_pagetable, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
ukvmmap(proc_kernel_pagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
return proc_kernel_pagetable;
}

遇到的错误

缺少页表映射可能会导致内核遇到页面错误。
它将打印一个包含sepc=0x00000000XXXXXXXX的错误。
你可以通过在kernel/kernel.asm中搜索XXXXXXXX来找出故障发生的位置。


如果出现== Test usertests == Timeout! (300.2s)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
grand@Lubuntu ~/xv6-labs-2020 (pgtbl)> python3 grade-lab-pgtbl
make: 'kernel/kernel' is up to date.
== Test pte printout == pte printout: OK (3.1s)
== Test answers-pgtbl.txt == answers-pgtbl.txt: FAIL
answers-pgtbl.txt does not seem to contain enough text
== Test count copyin == count copyin: OK (2.2s)
== Test usertests == Timeout! (300.2s)
== Test usertests: copyin ==
usertests: copyin: OK
== Test usertests: copyinstr1 ==
usertests: copyinstr1: OK
== Test usertests: copyinstr2 ==
usertests: copyinstr2: OK
== Test usertests: copyinstr3 ==
usertests: copyinstr3: OK
== Test usertests: sbrkmuch ==
usertests: sbrkmuch: OK
== Test usertests: all tests ==
usertests: all tests: FAIL
...
test bigfile: OK
test dirfile: OK
test iref: OK
test forktest: OK
test bigdir: qemu-system-riscv64: terminating on signal 15 from pid 17227 (make)
MISSING '^ALL TESTS PASSED$'
== Test time ==
time: OK
Score: 41/66

就去修改grade-lab-pgtbl,把timeout往大改,毕竟在虚拟机运行会比较慢
1
2
3
4
5
@test(0, "usertests")
def test_usertests():
r.run_qemu(shell_script([
'usertests'
]), timeout=600) # 把时间改长,300s->600s

实验评分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
grand@Lubuntu ~/xv6-labs-2020 (pgtbl)> python3 grade-lab-pgtbl

== Test pte printout == pte printout: OK (4.1s)
== Test answers-pgtbl.txt == answers-pgtbl.txt: FAIL
answers-pgtbl.txt does not seem to contain enough text
== Test count copyin == count copyin: OK (2.4s)
== Test usertests == (284.1s)
== Test usertests: copyin ==
usertests: copyin: OK
== Test usertests: copyinstr1 ==
usertests: copyinstr1: OK
== Test usertests: copyinstr2 ==
usertests: copyinstr2: OK
== Test usertests: copyinstr3 ==
usertests: copyinstr3: OK
== Test usertests: sbrkmuch ==
usertests: sbrkmuch: OK
== Test usertests: all tests ==
usertests: all tests: OK
== Test time ==
time: OK
Score: 61/66

修改记录

1
2
3
4
5
6
7
8
9
10
grand@Lubuntu ~/xv6-labs-2020 (pgtbl)> git diff origin/pgtbl pgtbl --stat
answers-pgtbl.txt | 0
grade-lab-pgtbl | 2 +-
kernel/defs.h | 13 ++++++++++++
kernel/exec.c | 14 ++++++++++++-
kernel/proc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
kernel/proc.h | 1 +
kernel/vm.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
time.txt | 1 +
8 files changed, 232 insertions(+), 7 deletions(-)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
diff --git a/answers-pgtbl.txt b/answers-pgtbl.txt
new file mode 100644
index 0000000..e69de29
diff --git a/grade-lab-pgtbl b/grade-lab-pgtbl
index 2b0b49d..bf5a47e 100755
--- a/grade-lab-pgtbl
+++ b/grade-lab-pgtbl
@@ -62,7 +62,7 @@ def test_count():
def test_usertests():
r.run_qemu(shell_script([
'usertests'
- ]), timeout=300)
+ ]), timeout=600) # 把时间改长,300s->600s

def usertest_check(testcase, nextcase, output):
if not re.search(r'\ntest {}: [\s\S]*OK\ntest {}'.format(testcase, nextcase), output):
diff --git a/kernel/defs.h b/kernel/defs.h
index a73b4f7..4996b44 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -108,6 +108,7 @@ void yield(void);
int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
void procdump(void);
+void proc_freekernelpagetable(pagetable_t);

// swtch.S
void swtch(struct context*, struct context*);
@@ -167,17 +168,25 @@ pagetable_t uvmcreate(void);
void uvminit(pagetable_t, uchar *, uint);
uint64 uvmalloc(pagetable_t, uint64, uint64);
uint64 uvmdealloc(pagetable_t, uint64, uint64);
+
#ifdef SOL_COW
#else
int uvmcopy(pagetable_t, pagetable_t, uint64);
#endif
+
void uvmfree(pagetable_t, uint64);
void uvmunmap(pagetable_t, uint64, uint64, int);
void uvmclear(pagetable_t, uint64);
+pte_t* walk(pagetable_t, uint64, int);
uint64 walkaddr(pagetable_t, uint64);
int copyout(pagetable_t, uint64, char *, uint64);
int copyin(pagetable_t, char *, uint64, uint64);
int copyinstr(pagetable_t, char *, uint64, uint64);
+void vmprint(pagetable_t);
+void ukvmmap(pagetable_t, uint64, uint64, uint64, int);
+pagetable_t proc_kvminit(void);
+int kvmcopy(pagetable_t, pagetable_t, uint64, uint64);
+uint64 kvmdealloc(pagetable_t, uint64, uint64);

// plic.c
void plicinit(void);
@@ -202,6 +211,10 @@ void statsinc(void);
// sprintf.c
int snprintf(char*, int, char*, ...);

+// vmcopyin.c
+int copyin_new(pagetable_t, char*, uint64, uint64);
+int copyinstr_new(pagetable_t, char*, uint64, uint64);
+
#ifdef LAB_NET
// pci.c
void pci_init();
diff --git a/kernel/exec.c b/kernel/exec.c
index 0e8762f..7577797 100644
--- a/kernel/exec.c
+++ b/kernel/exec.c
@@ -51,6 +51,11 @@ exec(char *path, char **argv)
uint64 sz1;
if((sz1 = uvmalloc(pagetable, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
+ // 添加检测,防止程序大小超过 PLIC
+ if(sz1 >= PLIC) {
+ goto bad;
+ }
+
sz = sz1;
if(ph.vaddr % PGSIZE != 0)
goto bad;
@@ -107,7 +112,11 @@ exec(char *path, char **argv)
if(*s == '/')
last = s+1;
safestrcpy(p->name, last, sizeof(p->name));
-
+
+ // 清除内核页表中对程序内存的旧映射,然后重新建立映射。
+ uvmunmap(p->kernel_pagetable, 0, PGROUNDUP(oldsz)/PGSIZE, 0);
+ kvmcopy(pagetable, p->kernel_pagetable, 0, sz);
+
// Commit to the user image.
oldpagetable = p->pagetable;
p->pagetable = pagetable;
@@ -116,6 +125,9 @@ exec(char *path, char **argv)
p->trapframe->sp = sp; // initial stack pointer
proc_freepagetable(oldpagetable, oldsz);

+ // print a page table
+ if(p->pid == 1) vmprint(p->pagetable);
+
return argc; // this ends up in a0, the first argument to main(argc, argv)

bad:
diff --git a/kernel/proc.c b/kernel/proc.c
index dab1e1d..d8c0333 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -30,7 +30,7 @@ procinit(void)
initlock(&pid_lock, "nextpid");
for(p = proc; p < &proc[NPROC]; p++) {
initlock(&p->lock, "proc");
-
+/* // move to allocproc
// Allocate a page for the process's kernel stack.
// Map it high in memory, followed by an invalid
// guard page.
@@ -40,6 +40,7 @@ procinit(void)
uint64 va = KSTACK((int) (p - proc));
kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
p->kstack = va;
+*/
}
kvminithart();
}
@@ -121,6 +122,25 @@ found:
return 0;
}

+ // An empty user kernel page table
+ p->kernel_pagetable = proc_kvminit();
+ if (p->kernel_pagetable == 0) {
+ freeproc(p);
+ release(&p->lock);
+ return 0;
+ }
+
+ // Allocate a page for the process's kernel stack.
+ // Map it high in memory, followed by an invalid
+ // guard page.
+ char *pa = kalloc();
+ if(pa == 0)
+ panic("kalloc");
+ uint64 va = KSTACK((int) (p - proc));
+ // kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
+ ukvmmap(p->kernel_pagetable, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
+ p->kstack = va;
+
// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof(p->context));
@@ -139,9 +159,27 @@ freeproc(struct proc *p)
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
+
+ // 删除内核栈
+ if(p->kstack) {
+ // 通过页表地址, kstack虚拟地址 找到最后一级的页表项
+ pte_t* pte = walk(p->kernel_pagetable, p->kstack, 0);
+ if (pte == 0)
+ panic("freeproc : kstack");
+ // 删除页表项对应的物理地址
+ kfree((void*)PTE2PA(*pte));
+ }
+ p->kstack = 0;
+
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
+
+ // 删除kernel pagetable
+ if(p->kernel_pagetable)
+ proc_freekernelpagetable(p->kernel_pagetable);
+ p->kernel_pagetable = 0;
+
p->sz = 0;
p->pid = 0;
p->parent = 0;
@@ -195,6 +233,23 @@ proc_freepagetable(pagetable_t pagetable, uint64 sz)
uvmfree(pagetable, sz);
}

+// Free a process's kernel page tabel
+void
+proc_freekernelpagetable(pagetable_t pagetable)
+{
+ for(int i = 0; i < 512; i++) {
+ pte_t pte = pagetable[i];
+ if(pte & PTE_V) {
+ pagetable[i] = 0;
+ if((pte & (PTE_R | PTE_W | PTE_X)) == 0) {
+ uint64 child = PTE2PA(pte);
+ proc_freekernelpagetable((pagetable_t)child);
+ }
+ }
+ }
+ kfree((void*)pagetable);
+}
+
// a user program that calls exec("/init")
// od -t xC initcode
uchar initcode[] = {
@@ -220,6 +275,8 @@ userinit(void)
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
+ // 同步程序内存映射到进程内核页表中
+ kvmcopy(p->pagetable, p->kernel_pagetable, 0, p->sz);

// prepare for the very first "return" from kernel to user.
p->trapframe->epc = 0; // user program counter
@@ -242,12 +299,23 @@ growproc(int n)
struct proc *p = myproc();

sz = p->sz;
- if(n > 0){
+ if(n > 0) {
+ if(PGROUNDDOWN(sz + n) >= PLIC)
+ return -1;
+
if((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
return -1;
}
- } else if(n < 0){
+
+ // 内核页表中的映射同步扩大
+ if(kvmcopy(p->pagetable, p->kernel_pagetable, p->sz, sz) == -1) {
+ return -1;
+ }
+
+ } else if(n < 0) {
sz = uvmdealloc(p->pagetable, sz, sz + n);
+ // 内核页表中的映射同步缩小
+ kvmdealloc(p->kernel_pagetable, p->sz, p->sz + n);
}
p->sz = sz;
return 0;
@@ -268,7 +336,8 @@ fork(void)
}

// Copy user memory from parent to child.
- if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){
+ if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0 ||
+ kvmcopy(np->pagetable, np->kernel_pagetable, 0, p->sz) < 0) {
freeproc(np);
release(&np->lock);
return -1;
@@ -473,8 +542,17 @@ scheduler(void)
// before jumping back to us.
p->state = RUNNING;
c->proc = p;
+
+ // 在切换任务前,将用户内核页表替换到stap寄存器中
+ w_satp(MAKE_SATP(p->kernel_pagetable));
+ // 清除快表缓存
+ sfence_vma();
+
swtch(&c->context, &p->context);

+ // 该进程执行结束后,将SATP寄存器的值设置为全局内核页表地址
+ kvminithart();
+
// Process is done running for now.
// It should have changed its p->state before coming back.
c->proc = 0;
diff --git a/kernel/proc.h b/kernel/proc.h
index 9c16ea7..2cdd693 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -98,6 +98,7 @@ struct proc {
uint64 kstack; // Virtual address of kernel stack
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // User page table
+ pagetable_t kernel_pagetable;// kernel page tabel of each process
struct trapframe *trapframe; // data page for trampoline.S
struct context context; // swtch() here to run process
struct file *ofile[NOFILE]; // Open files
diff --git a/kernel/vm.c b/kernel/vm.c
index bccb405..3e95042 100644
--- a/kernel/vm.c
+++ b/kernel/vm.c
@@ -5,6 +5,8 @@
#include "riscv.h"
#include "defs.h"
#include "fs.h"
+#include "spinlock.h"
+#include "proc.h"

/*
* the kernel's page table.
@@ -132,7 +134,10 @@ kvmpa(uint64 va)
pte_t *pte;
uint64 pa;

- pte = walk(kernel_pagetable, va, 0);
+ // pte = walk(kernel_pagetable, va, 0);
+ // 使用用户进程自己的内核页表地址来翻译虚拟地址
+ pte = walk(myproc()->kernel_pagetable, va, 0);
+
if(pte == 0)
panic("kvmpa");
if((*pte & PTE_V) == 0)
@@ -379,6 +384,7 @@ copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
+/*
uint64 n, va0, pa0;

while(len > 0){
@@ -396,6 +402,8 @@ copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
srcva = va0 + PGSIZE;
}
return 0;
+*/
+ return copyin_new(pagetable, dst, srcva, len);
}

// Copy a null-terminated string from user to kernel.
@@ -405,6 +413,7 @@ copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
+/*
uint64 n, va0, pa0;
int got_null = 0;

@@ -439,4 +448,115 @@ copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
} else {
return -1;
}
+*/
+ return copyinstr_new(pagetable, dst, srcva, max);
+}
+
+// vmprint辅助函数
+static void
+_vmprint(pagetable_t pagetable, int level)
+{
+ for (int i = 0; i < 512; ++i) {
+ pte_t pte = pagetable[i];
+
+ if ((pte & PTE_V)) {
+ for (int j = 0; j < level; ++j) {
+ if (j == 0) printf("..");
+ else printf(" ..");
+ }
+
+ uint64 child = PTE2PA(pte); // 通过pte映射下一级页表的物理地址
+ printf("%d: pte %p pa %p\n", i, pte, child);
+ // 查看flag是否被设置,若被设置,则为最低一层
+ // 只有在页表的最后一级,才可进行读、写、执行
+ if ((pte & (PTE_R | PTE_W | PTE_X)) == 0)
+ _vmprint((pagetable_t)child, level + 1);
+ }
+ }
+}
+
+// Print a page table
+void
+vmprint(pagetable_t pagetable)
+{
+ printf("page table %p\n", pagetable);
+ _vmprint(pagetable, 1);
+}
+
+// map to the process's kernel page table
+void
+ukvmmap(pagetable_t pagetable, uint64 va, uint64 pa, uint64 sz, int perm)
+{
+ if(mappages(pagetable, va, sz, pa, perm) != 0)
+ panic("ukvmmap");
+}
+
+// init the kernel page tabel of user process
+pagetable_t
+proc_kvminit()
+{
+ // 申请一个页表空间
+ pagetable_t proc_kernel_pagetable = (pagetable_t) kalloc();
+ if (proc_kernel_pagetable == 0)
+ return 0;
+ memset(proc_kernel_pagetable, 0, PGSIZE);
+ // 与vminint内容上保持一致
+ ukvmmap(proc_kernel_pagetable, UART0, UART0, PGSIZE, PTE_R | PTE_W);
+ ukvmmap(proc_kernel_pagetable, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
+ // 用户进程内核页表无需在映射CLINT,将空间留出映射用户页表
+ // ukvmmap(proc_kernel_pagetable, CLINT, CLINT, 0x10000, PTE_R | PTE_W);
+ ukvmmap(proc_kernel_pagetable, PLIC, PLIC, 0x400000, PTE_R | PTE_W);
+ ukvmmap(proc_kernel_pagetable, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
+ ukvmmap(proc_kernel_pagetable, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
+ ukvmmap(proc_kernel_pagetable, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
+ return proc_kernel_pagetable;
}
+
+// 将 src 页表的一部分页映射关系拷贝到 dst 页表中。
+// 只拷贝页表项,不拷贝实际的物理页内存。
+// 成功返回0,失败返回 -1
+int
+kvmcopy(pagetable_t src, pagetable_t dst, uint64 start, uint64 end)
+{
+ pte_t *pte;
+
+ // PGROUNDUP: 对齐页边界,防止 remap
+ for(uint64 i = PGROUNDUP(start); i < end; i += PGSIZE) {
+ if((pte = walk(src, i, 0)) == 0) // 找到虚拟地址的最后一级页表项
+ panic("kvmcopy: pte should exist");
+ if((*pte & PTE_V) == 0) // 判断页表项是否有效
+ panic("kvmcopy: page not present");
+
+ // 将页表项转换为物理地址页起始位置
+ uint64 pa = PTE2PA(*pte);
+ // `& ~PTE_U` 表示将该页的权限设置为非用户页
+ // 必须设置该权限,RISC-V 中内核是无法直接访问用户页的。
+ uint flags = PTE_FLAGS(*pte) & ~PTE_U;
+ // 将pa这一页的PTEs映射到dst上同样的虚拟地址
+ if(mappages(dst, i, PGSIZE, pa, flags) != 0) {
+ // 清除已经映射的部分,但不释放内存
+ uvmunmap(dst, 0, i / PGSIZE, 0);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+// 与 uvmdealloc 功能类似,将程序内存从 oldsz 缩减到 newsz。但区别在于不释放实际内存
+// 用于内核页表内程序内存映射与用户页表程序内存映射之间的同步
+uint64
+kvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
+{
+ if(newsz >= oldsz)
+ return oldsz;
+
+ if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){
+ // 如果存在多余的页需要释放
+ int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE;
+ uvmunmap(pagetable, PGROUNDUP(newsz), npages, 0);
+ }
+
+ return newsz;
+}
+
+
diff --git a/time.txt b/time.txt
new file mode 100644
index 0000000..a45fd52
--- /dev/null
+++ b/time.txt
@@ -0,0 +1 @@
+24