AFL源码分析计划2 -- afl-as.c

从main函数开始

1
u8* inst_ratio_str = getenv("AFL_INST_RATIO");

先是从环境变量中拿了AFL_INST_RATIO,这是插入指令的密度

1
gettimeofday(&tv, &tz);

这里是根据时间和pid来随机化seed

1
2
3
4
5
6
if (inst_ratio_str) {

if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

}

inst_ratio_str转为数字

1
2
3
4
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}

如果使用ASAN或者MSAN的话,就会把插入指令的密度降低为1/3以加快速度。什么是ASAN呢

)

1
2
3
4
5
6
if (!(pid = fork())) {

execvp(as_params[0], (char**)as_params);
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

}

如果插入完指令后,会fork子进程,用来执行as,将汇编变成二进制

再来看关键的函数add_instrumentation

add_instrumentation

1
2
3
4
5
6
if (input_file) {

inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);

} else inf = stdin;

首先打开汇编代码文件

1
2
3
4
5
6
7
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

outf = fdopen(outfd, "w");

if (!outf) PFATAL("fdopen() failed");

再打开一个新的文件

1
while (fgets(line, MAX_LINE, inf)) {

循环读第一行,进行判断

1
2
3
4
5
6
7
8
9
10
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));

instrument_next = 0;
ins_lines++;

}

进行一系列的判断,如果满足这些条件的话,就插入指令

1
fputs(line, outf);

输出原来那一行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
if (line[0] == '\t' && line[1] == '.') {

/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */

if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;

if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}

if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}

}

寻找.text段

1
2
3
4
5
6
if (strstr(line, ".code")) {

if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;

}

判断32位还是64位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// 这段注释大概意思是,在main函数,GCC aranch label,clang branch label,conditional branches处插入指令
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:

^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches

...but not:

^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps

Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.

*/
1
2
3
4
5
6
7
8
9
10
11
12
13
14
if (line[0] == '\t') {

if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));

ins_lines++;

}

continue;

}

找到j开头,但是每二个字母不是m的指令,比如jne,jbe,后面的R(100)<instr_ration是根据概率来选择插入或者不插入。

1
2
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

再往后看到这里,会插入main_payload(main_payload在头文件afl-as.h中)

0%