XV6 운영체제 개선해 보기 <CFS scheduler 구현>

Home Post About

Choice

2023.09.04.

해당 포스트는 MIT의 교육용 운영체제 XV6를 활용하며, 코드 저작권은 MIT에 있음을 밝힙니다.

목표

CFS scheduler 구현

과정

0. Make file cpu 수정

...
ifndef CPUS
CPUS := 1
endif
...

원활한 테스트를 위해 사용 cpu 수를 1개로 제한하였다.

1. proc 구조체에 runtime, vruntime, time_slice, run_d_w(runtime/weight) 추가 및 초기화

proc.h

...
struct inode *cwd;           // Current directory
  char name[16];               // Process name (debugging)
  int nice;
  int runtime;
  int vruntime;
  int time_slice;
  int run_d_w;
};
...

proc.c

...
found:
  p->state = EMBRYO;
  p->pid = nextpid++;
  p->nice = 20;
  p->runtime = 0;
  p->vruntime = 0;
  p->time_slice = 0;
  p->run_d_w = 0;
...

기본 nice 값은 20으로 초기화한다.

2. Nice weight 하드 코드

proc.c, trap.c

int weight_table[40] = {
/*  0*/ 88761, 71755, 56483, 46273, 36291,
/*  5*/ 29154, 23254, 18705, 14949, 11916,
/* 10*/  9548,  7620,  6100,  4904,  3906,
/* 15*/  3121,  2501,  1991,  1586,  1277,
/* 20*/  1024,   820,   655,   526,   423,
/* 25*/   335,   272,   215,   172,   137,
/* 30*/   110,    87,    70,    56,    45,
/* 35*/    36,    29,    23,    18,    15
};

hard-code를 사전에 작성해 놓는다.

3. 스케줄러 함수 구현

void
scheduler(void)
{
  struct proc *p;
  struct proc *p1;
  struct proc *p2;
  struct proc *most_p;
  int total_weight;
  struct cpu *c = mycpu();
  c->proc = 0;
  
  for(;;){
    // Enable interrupts on this processor.
    sti();

    acquire(&ptable.lock);
    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
      if(p->state != RUNNABLE)
        continue;

      total_weight = 0;
      for(p1 = ptable.proc; p1 < &ptable.proc[NPROC]; p1++){
        if(p1->state != RUNNABLE) continue;
        total_weight += weight_table[p1->nice];
      }

      most_p = p;
      for(p2 = ptable.proc; p2 < &ptable.proc[NPROC]; p2++){
        if(p2->state != RUNNABLE) continue;
        if(most_p->vruntime > p2->vruntime) {
          most_p = p2;
          }
      }
      
      most_p->time_slice = ((10 * (weight_table[most_p->nice])) / total_weight) + ( (10 * (weight_table[most_p->nice])) % total_weight !=0 );

      c->proc = most_p;
      switchuvm(most_p);
      most_p->state = RUNNING;

      swtch(&(c->scheduler), most_p->context);
      switchkvm();
      c->proc = 0;
    }
    release(&ptable.lock);
  }
}

우선 p1 proc 포인터로 ptable을 순회하며 time_slice 계산에 필요한 total_weight 를 누적한다. 다음으로 p2 proc 포인터로 ptable을 다시 순회하며 가장 작은 vruntime을 가진 프로세스를 찾고, 이를 most_p proc 포인터에 저장한다.

time_slice를 계산하는데, 해당 프로세서의 weight 값에 10 tick 을 곱해주고 앞서 구한 total_weight 값으로 나누어 준다. + ((10 * (weight_table[most_p->nice])) % total_weight !=0 ) 부분은 틱의 올림을 위해서 이다.

마지막으로는 c->proc 에 해당 프로세서 most_p를 할당해주고, swtch(&(c->scheduler), most_p->context) 을 통해 trap 부분으로 넘어가게 된다. 이후 해당 스케줄러는 해당 작업을 계속 반복하게 된다.

4. trap.c 타임인터럽트 추가구현

if(myproc() && myproc()->state == RUNNING && tf->trapno == T_IRQ0+IRQ_TIMER){
 
    myproc()->runtime = myproc()->runtime + 1000;
    myproc()->vruntime = myproc()->vruntime + ((1000*1024)/(weight_table2[myproc()->nice]));
    myproc()->run_d_w = myproc()->runtime / weight_table2[myproc()->nice];
    myproc()->time_slice--;
    if(myproc()->time_slice<=0) {
      myproc()->time_slice = 0;
      yield();
      }
}

매 틱마다 runtime 및 vruntime을 증가시켜 준다. 이때 단위는 밀리틱이므로 1000씩 더해준다. vruntime의 경우 (1000밀리틱 x 1024(nice 20의 weight)) / 해당 프로세스의 weight 로 구현하였다. run_d_w의 경우 추후 print의 편의성을 위해 같이 계산해주었다.

5. fork함수 수정

...
np->vruntime = curproc->vruntime; 
np->sz = curproc->sz;
...

6. wakeup 함수 수정

static void
wakeup1(void *chan)
{
  struct proc *p;

  int min_vrun = 0;
  int is_run = 0;
  int vrun_1tick = 0;

  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
    if(p->state == RUNNABLE){
      is_run = 1;
      min_vrun = p->vruntime;
    }

  if (is_run == 1){
    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
      if(p->state == RUNNABLE){
        if (min_vrun > p->vruntime) min_vrun = p->vruntime;
      }
  }


  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
    if(p->state == SLEEPING && p->chan == chan){
      vrun_1tick = ((1000*1024)/(weight_table[p->nice]));
      if(min_vrun < vrun_1tick){
        p->vruntime = 0;
      }
      else{
        p->vruntime = min_vrun - vrun_1tick;
        //cprintf("set %d-> vrun %d (%d - %d)\n",p->pid, p->vruntime, min_vrun, vrun_1tick);
      }
      p->state = RUNNABLE;
    }
      
}

우선 첫번째 for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) 문을 통해 RUNNABLE 프로세스가 있는지 확인하고 있다면 임의로 하나를 할당한다. 없다면 min_vrun 은 0으로 초기화 된다. 이후 RUNNABLE 프로세스가 있다면 두번째 for(p = ptable.proc; p < &ptable.proc[NPROC]; p++) 문을 통해 min_vrun에 가장 작은 값을 할당한다. 마지막으로 if(p->state == SLEEPING && p->chan == chan) 문에서는 우선 아래 공식을 적용하여 vrun_1tick 값을 구한다.

다음으로 (minimum vruntime of processes in the ready queue – vruntime(1tick)) 에 해당하는 코드 p->vruntime = min_vrun - vrun_1tick; 를 작성했다. 여기서 if(min_vrun < vrun_1tick) p->vruntime = 0; 를 넣은 이유는 뺄셈 공식으로 인해 vruntime이 음수가 되는 경우를 피하기 위해서이다. sched()는 추가하지 않았다.

7. ps() 수정

void
ps(int pid)
{
  static char* states[] = {
    "UNUSED  ", 
    "EMBRYO  ", 
    "SLEEPING", 
    "RUNNABLE", 
    "RUNNING ", 
    "ZOMBIE  "
  };

  struct proc *p;
  acquire(&ptable.lock);
  int valid_pid = 0;

  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++)
    if(p->pid == pid) valid_pid = 1;
  if (pid == 0) valid_pid = 1;
  
  if (valid_pid==0) {
    release(&ptable.lock);
    return;
  }

  int getlens(char* s){
    int len = 0;
    while(s[len]) len++;
    return len;
  }

  int getleni(int n){
    int len = 0;
    if (n==0) return 1;
    while(n != 0){
      n = n/10;
      ++len;
    }
    return len;
  }

  int maxl_name = 0;
  int maxl_runtime = 0;
  int maxl_vruntime = 0;

  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
    if((pid == 0) || (p->pid == pid)){
      if (p->state != 0){
          int name_len = getlens(p->name);
          if(maxl_name<name_len) maxl_name = name_len;

          int runtime_len = getleni(p->runtime);
          if(maxl_runtime<runtime_len) maxl_runtime = runtime_len;

          int vruntime_len = getleni(p->vruntime);
          if(maxl_vruntime<vruntime_len) maxl_vruntime = vruntime_len;
      }
    }
  }

  if (maxl_runtime<6) maxl_runtime = 6;
  if (maxl_vruntime<6) maxl_vruntime = 6;
  int name_range = ((maxl_name/6)+1)*6;
  int runtime_range = ((maxl_runtime/6)+1)*6;
  int vruntime_range = ((maxl_vruntime/6)+1)*6;

  cprintf("name");
  for(int i=0;i<(name_range-4);i++) cprintf(" ");
  cprintf("pid      ");
  cprintf("state       ");
  cprintf("priority    ");
  cprintf("runtime/weight    ");
  cprintf("runtime");
  for(int i=0;i<(runtime_range-7);i++) cprintf(" ");
  cprintf("vruntime");
  for(int i=0;i<(vruntime_range-8);i++) cprintf(" ");
  cprintf("tick %d",ticks*1000);
  cprintf("\n");

  for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
    if((pid == 0) || (p->pid == pid)){
      if (p->state != 0){

        cprintf("%s",p->name);
        int l1 = getlens(p->name);
        for(int i=0;i<(name_range-l1);i++) cprintf(" ");
        
        cprintf("%d",p->pid);
        int l2 = getleni(p->pid);
        for(int i=0;i<(9-l2);i++) cprintf(" ");

        cprintf("%s",states[p->state]);
        int l3 = getlens(states[p->state]);
        for(int i=0;i<(12-l3);i++) cprintf(" ");

        cprintf("%d",p->nice);
        int l4 = getleni(p->nice);
        for(int i=0;i<(12-l4);i++) cprintf(" ");

        cprintf("%d",p->run_d_w);
        int l5 = getleni(p->run_d_w);
        for(int i=0;i<(18-l5);i++) cprintf(" ");

        cprintf("%d",p->runtime);
        int l6 = getleni(p->runtime);
        for(int i=0;i<(runtime_range-l6);i++) cprintf(" ");

        cprintf("%d",p->vruntime);
        int l7 = getleni(p->vruntime);
        for(int i=0;i<(vruntime_range-l7);i++) cprintf(" ");  
        cprintf("\n");
      }
    }
  }
  release(&ptable.lock);
  return;
}

다음과 같은 과정으로 기능을 추가했다.

getlens(), getleni() 함수. gcc가 지원하는 중첩함수를 통해 각각 string과 int의 길이를 반환한다.
반복문을 통해 각 column 별 최대 길이를 구하고, 이를 전부 담을수 있는 전체 길이를 선정한다.
특정 string이나 int를 출력하고, 전체길이 - getlens() or getleni() 길이만큼 공백을 추가한다.

8. 테스트 코드

우선 틱 마다 변화를 보기위해 임시적으로 trap.c에 다음 코드를 추가하였다. 테스트 후에는 삭제했다.

if((ticks%20==0)&&(ticks>=200)&&myproc()) {
    cprintf("\n\n");
    ps(0);
    cprintf("\n\n");
}

mytestlonglong.c – 8.1, 8.2 테스트에 활용

int main ()
{

    int pid1 = fork();
    int a = 1;
    int range = 10000;


    if(pid1>0){

        setnice(getpid(),0);
        for(int i = 0; i< range; i++){
            for(int j = 0; j< range; j++){
            a = 9.9+9.9*a;
            }
        }
        printf(1,"ans: %d\n",a);

    }
    else if(pid1 == 0){
        setnice(getpid(),10);
        for(int i = 0; i< range; i++){
            for(int j = 0; j< range; j++){
            a = 9.9+9.9*a;
            }
        }
        printf(1,"ans: %d\n",a);
    }

    exit();
}

mytest.c – 8.3 테스트에 활용

int main ()
{
    setnice(getpid(),5);
    int pid1 = fork();
    int a = 1;
    int range = 2000;

    if(pid1>0){
        int pid2 = fork();
        if(pid2>0){
            int pid3 = fork();

            if(pid3>0){
                wait();
                for(int i = 0; i< range; i++){
                    for(int j = 0; j< range; j++){
                        a = 9.9+9.9*a;
                    }
                }
                printf(1,"%d - ans: %d\n",getpid(),a);
            }
            else if (pid3==0){
                setnice(getpid(),15);
                for(int i = 0; i< range; i++){
                    for(int j = 0; j< range; j++){
                        a = 9.9+9.9*a;
                    }
                }
                printf(1,"%d - ans: %d\n",getpid(),a);
            }
        }
        else if(pid2 == 0){
            setnice(getpid(),10);
            for(int i = 0; i< range; i++){
                for(int j = 0; j< range; j++){
                a = 9.9+9.9*a;
                }
            }
            printf(1,"%d - ans: %d\n",getpid(),a);
        }
    }
    else if(pid1 == 0){
        setnice(getpid(),5);
        for(int i = 0; i< range; i++){
            for(int j = 0; j< range; j++){
            a = 9.9+9.9*a;
            }
        }
        printf(1,"%d - ans: %d\n",getpid(),a);
    }
    
    exit();
}

8.1 vruntime 유지

weight가 서로 다른 두개의 프로세스가 스케줄링 될 때, 서로 vurntime이 비슷하게 비율이 유지되며 증가한다. runtime은 확실히 차이가 나고, runtime/weight는 서로 비슷하게 비율이 유지되며 증가한다.

8.2 fork() 부모 복사

자식 프로세스가 생기는 순간 부모의 vruntime 8000을 잘 복제하는 모습을 보여준다.

8.3 sleeping 에서 복귀

프로세스 3이 wait()으로 자식을 기다리며 sleeping을 하다가 4번 자식이 끝나는 순간 wakeup을 하며, 이때 vruntime 은 가장 낮은 6번의 3003에서 nice 5 의 가중치를 고려한 vruntime 1 tick 값 35 를 뺀 2968 이 된다. 만약 RUNNABLE 이 없을 경우 0으로 잘 초기화 된다. 11인 이유는 초기화 되고나서 1틱동안 실행되었기 때문이다.

Source

mit-pdos/xv6-public
https://github.com/mit-pdos/xv6-public

목표
- CFS scheduler 구현
과정
Source