Lua5.3 GC源码阅读(3)

接上篇, 在真正阅读luaC_step源码之前,先来看一下Lua的垃圾回收器对外提供了哪些可调参数, 以及这些参数是如何控制垃圾回收器的.

Lua提供了一个函数collectgarbage([opt[,arg]])用于控制GC的一些形为.

collectgarbage通过第一个参数opt来执行不同的功能:

“collect”: 执行一次完整的垃圾回收循环,这是默认选项.
“stop”: 停止垃圾回收器的运行, 在调用”restart”前,回收器只有被显式调用时才运行.
“restart”: 重新开始垃圾回收器的自动运行.
“count”: 以Kbytes为单位显示Lua使用的内存.
“isrunning”: 表示垃圾回收器是否在工作.
“step”: 单步运行垃圾回收器, 步长”大小”由arg控制.
“setpause”: 将arg设置为回收器的’间歇率’.
“setstepmul”: 将arg设置为回收器的’步进倍率’.

“collect/stop/restart/count/isrunning” 都是很直观, 重点应该是”step/setpause/setstepmul”是如何影响luaC_step的执行流程的.

先来看一下”step/setpause/setstepmul”都做了哪些操作.

//lapi.c
case LUA_GCCOUNT: {
  /* GC values are expressed in Kbytes: #bytes/2^10 */
  res = cast_int(gettotalbytes(g) >> 10);
 	break;
}
case LUA_GCSTEP: {	//"step"
	l_mem debt = 1;  /* =1 to signal that it did an actual step */
	lu_byte oldrunning = g->gcrunning;
	g->gcrunning = 1;  /* allow GC to run */
	if (data == 0) {
		luaE_setdebt(g, -GCSTEPSIZE);  /* to do a "small" step */
		luaC_step(L);
	}
	else {  /* add 'data' to total debt */
		debt = cast(l_mem, data) * 1024 + g->GCdebt;
		luaE_setdebt(g, debt);
		luaC_checkGC(L);
	}
	g->gcrunning = oldrunning;  /* restore previous state */
	if (debt > 0 && g->gcstate == GCSpause)  /* end of cycle? */
		res = 1;  /* signal it */
	break;
}
case LUA_GCSETPAUSE: {	//"setpause"
	res = g->gcpause;
	g->gcpause = data;
	break;
}
case LUA_GCSETSTEPMUL: {	//"setstepmul"
	res = g->gcstepmul;
	if (data < 40) data = 40;  /* avoid ridiculous low values (and 0) */
	g->gcstepmul = data;
	break;
}

//lstate.h
/* actual number of total bytes allocated */
#define gettotalbytes(g)        cast(lu_mem, (g)->totalbytes + (g)->GCdebt)

typedef struct global_State {
  ...
  l_mem totalbytes;  /* number of bytes currently allocated - GCdebt */
  l_mem GCdebt;  /* bytes allocated not yet compensated by the collector */
  lu_mem GCestimate;  /* an estimate of the non-garbage memory in use */
  int gcpause;  /* size of pause between successive GCs */
  int gcstepmul;  /* GC 'granularity' */
  ...
};

//lstate.c
#if !defined(LUAI_GCPAUSE)
#define LUAI_GCPAUSE    200  /* 200% */
#endif

#if !defined(LUAI_GCMUL)
#define LUAI_GCMUL      200 /* GC runs 'twice the speed' of memory allocation */
#endif

LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
  int i;
  lua_State *L;
  global_State *g;
  LG *l = cast(LG *, (*f)(ud, NULL, LUA_TTHREAD, sizeof(LG)));
  if (l == NULL) return NULL;
  L = &l->l.l;
  g = &l->g;
  ...
  g->gcrunning = 0;  /* no GC while building state */
  g->GCestimate = 0;
  g->totalbytes = sizeof(LG);
  g->GCdebt = 0;
  g->gcpause = LUAI_GCPAUSE;
  g->gcstepmul = LUAI_GCMUL;
  ...
}

void luaE_setdebt (global_State *g, l_mem debt) {
  l_mem tb = gettotalbytes(g);
  lua_assert(tb > 0);
  if (debt < tb - MAX_LMEM)
    debt = tb - MAX_LMEM;  /* will make 'totalbytes == MAX_LMEM' */
  g->totalbytes = tb - debt;
  g->GCdebt = debt;
}

从上面代码可以得出两点结论:
1. global_State.totalbytes + global_State.GCdebt 就是整个LuaVM使用的全部内存总量
2. luaE_setdebt函数只会修改global_State.GCdebt, 但是不会改变整个LuaVM的使用内存总量
3. 步进倍率是靠变量global_State.gcstepmul控制的
4. 间歇率是靠变量global_State.gcpause来控制的
5. 根据注释可知GCestimate代表在使用的非垃圾内存

下面代码的就是GC如何通过global_State.gcstepmul和global_State.gcpause来精巧的控制步进倍率和间歇率

//lgc.c
static l_mem getdebt (global_State *g) {
  l_mem debt = g->GCdebt;
  int stepmul = g->gcstepmul;
  if (debt <= 0) return 0;  /* minimal debt */
  else {
    debt = (debt / STEPMULADJ) + 1;
    debt = (debt < MAX_LMEM / stepmul) ? debt * stepmul : MAX_LMEM;
    return debt;
  }
}

static void setpause (global_State *g) {
  l_mem threshold, debt;
  l_mem estimate = g->GCestimate / PAUSEADJ;  /* adjust 'estimate' */
  lua_assert(estimate > 0);
  threshold = (g->gcpause < MAX_LMEM / estimate)  /* overflow? */
            ? estimate * g->gcpause  /* no overflow */
            : MAX_LMEM;  /* overflow; truncate to maximum */
  debt = gettotalbytes(g) - threshold;
  luaE_setdebt(g, debt);
}


void luaC_step (lua_State *L) {
  global_State *g = G(L);
  l_mem debt = getdebt(g);  /* GC deficit (be paid now) */
  if (!g->gcrunning) {  /* not running? */
    luaE_setdebt(g, -GCSTEPSIZE * 10);  /* avoid being called too often */
    return;
  }
  do {  /* repeat until pause or enough "credit" (negative debt) */
    lu_mem work = singlestep(L);  /* perform one single step */
    debt -= work;
  } while (debt > -GCSTEPSIZE && g->gcstate != GCSpause);
  if (g->gcstate == GCSpause)
    setpause(g);  /* pause until next cycle */
  else {
    debt = (debt / g->gcstepmul) * STEPMULADJ;  /* convert 'work units' to Kb */
    luaE_setdebt(g, debt);
    runafewfinalizers(L);
  }
}

回忆一下上篇内容:

1. global_State.GCdebt 代表着内存分配所产生的债务
2. checkGC函数保证只有global_State.GCdebt > 0 才会执行luaC_step.

getdebt会将当前债务(global_State.GCdebt)根据变量global_State.gcstepmul进行放大

luaC_step中的while循环会保证只有getdebt返回的债务值全部被偿还完之后才会退出(或者执行完了整个GC流程)

这样即可通过设置更高的global_State.gcstepmul值来达到增大步长

再来看setpause函数, 在不考虑边界情况下,下面代码可以看的更直观一些:

l_mem threshold = g->gcpause * g->GCestimate / PAUSEADJ;
luaE_setdebt(g, gettotalbytes(g) - threshold);

当gcpause=2*PAUSEADJ时, threshold = 2 * g->GCestimate.

在luaC_fullgc代码中可以确认当GC循环执行到GCScallfin状态以前,g->GCestimate与gettotalbytes(g)必然相等.

暂时不考虑GCScallfin状态对内存的影响,setpuase函数会将g->GCdebt设置为负的g->GCestimate, 这将会导致,当内存分配超过g->GCestimate之后才会再次开始垃圾回收循环, 与g->gcpause的含义一致.

void luaC_fullgc (lua_State *L, int isemergency) {

luaC_runtilstate(L, bitmask(GCScallfin)); /* run up to finalizers */
/* estimate must be correct after a full GC cycle */
lua_assert(g->GCestimate == gettotalbytes(g));
luaC_runtilstate(L, bitmask(GCSpause)); /* finish collection */
g->gckind = KGC_NORMAL;
setpause(g);
}

下篇接着看singlestep是如何执行标记清除的.

发表评论

− two = two