Implement "#include".

This commit is contained in:
Rui Ueyama 2018-08-25 03:40:38 +00:00
parent e188ffd586
commit a382606b97
8 changed files with 142 additions and 62 deletions

12
9cc.h
View File

@ -131,12 +131,18 @@ typedef struct {
char len;
// For error reporting
char *buf;
char *filename;
char *start;
} Token;
Vector *tokenize(char *p);
Vector *tokenize(char *path, bool add_eof);
noreturn void bad_token(Token *t, char *msg);
/// preprocess.c
Vector *preprocess(Vector *tokens);
/// parse.c
enum {
@ -353,7 +359,3 @@ extern char *regs32[];
extern int num_regs;
void gen_x86(Vector *globals, Vector *fns);
/// main.c
char *filename;

View File

@ -11,10 +11,13 @@ test: 9cc test/test.c
./9cc -test
@gcc -E -P test/test.c | ./9cc - > tmp-test1.s
@./9cc test/token.c > tmp-test2.s
@gcc -c -o tmp-test2.o test/gcc.c
@gcc -static -o tmp-test tmp-test1.s tmp-test2.s tmp-test2.o
@./tmp-test
@gcc -static -o tmp-test1 tmp-test1.s tmp-test2.o
@./tmp-test1
@./9cc test/token.c > tmp-test2.s
@gcc -static -o tmp-test2 tmp-test2.s
@./tmp-test2
clean:
rm -f 9cc *.o *~ tmp* a.out test/*~

36
main.c
View File

@ -1,31 +1,5 @@
#include "9cc.h"
char *filename;
static char *read_file(char *filename) {
FILE *fp = stdin;
if (strcmp(filename, "-")) {
fp = fopen(filename, "r");
if (!fp) {
perror(filename);
exit(1);
}
}
StringBuilder *sb = new_sb();
char buf[4096];
for (;;) {
int nread = fread(buf, 1, sizeof(buf), fp);
if (nread == 0)
break;
sb_append_n(sb, buf, nread);
}
if (sb->data[sb->len] != '\n')
sb_add(sb, '\n');
return sb_get(sb);
}
void usage() { error("Usage: 9cc [-test] [-dump-ir1] [-dump-ir2] <file>"); }
int main(int argc, char **argv) {
@ -37,24 +11,24 @@ int main(int argc, char **argv) {
return 0;
}
char *path;
bool dump_ir1 = false;
bool dump_ir2 = false;
if (argc == 3 && !strcmp(argv[1], "-dump-ir1")) {
dump_ir1 = true;
filename = argv[2];
path = argv[2];
} else if (argc == 3 && !strcmp(argv[1], "-dump-ir2")) {
dump_ir2 = true;
filename = argv[2];
path = argv[2];
} else {
if (argc != 2)
usage();
filename = argv[1];
path = argv[1];
}
// Tokenize and parse.
char *input = read_file(filename);
Vector *tokens = tokenize(input);
Vector *tokens = tokenize(path, true);
Vector *nodes = parse(tokens);
Vector *globals = sema(nodes);
Vector *fns = gen_ir(nodes);

35
preprocess.c Normal file
View File

@ -0,0 +1,35 @@
// C preprocessor
#include "9cc.h"
Vector *preprocess(Vector *tokens) {
Vector *v = new_vec();
for (int i = 0; i < tokens->len;) {
Token *t = tokens->data[i];
if (t->ty != '#') {
i++;
vec_push(v, t);
continue;
}
t = tokens->data[++i];
if (t->ty != TK_IDENT || strcmp(t->name, "include"))
bad_token(t, "'include' expected");
t = tokens->data[++i];
if (t->ty != TK_STR)
bad_token(t, "string expected");
char *path = t->str;
t = tokens->data[++i];
if (t->ty != '\n')
bad_token(t, "newline expected");
Vector *nv = tokenize(path, false);
for (int i = 0; i < nv->len; i++)
vec_push(v, nv->data[i]);
}
return v;
}

7
test/test1.inc Normal file
View File

@ -0,0 +1,7 @@
int printf();
int main() {
#include "test/test2.inc"
1; 2;
return 0;
}

1
test/test2.inc Normal file
View File

@ -0,0 +1 @@
printf("OK\n");

View File

@ -1,8 +1,4 @@
// This file contains tests for the tokenizer.
//
// Note that we don't actually use the function defined by this file
// because we are interested only in knowing whether the tokenizer can
// tokenize this file or not.
// This file contains tests for the tokenizer and the preprocessor.
// a line comment \
continues\
@ -12,3 +8,5 @@ to this line
/* block comment
**
*/
#include "test/test1.inc"

96
token.c
View File

@ -2,16 +2,16 @@
// Error reporting
static char *input_file;
static char *buf;
static char *filename;
// Finds a line pointed by a given pointer from the input file
// to print it out.
static void print_line(char *pos) {
char *start = input_file;
static void print_line(char *start, char *path, char *pos) {
int line = 0;
int col = 0;
for (char *p = input_file; p; p++) {
for (char *p = start; p; p++) {
if (*p == '\n') {
start = p + 1;
line++;
@ -24,7 +24,7 @@ static void print_line(char *pos) {
continue;
}
fprintf(stderr, "error at %s:%d:%d\n\n", filename, line + 1, col + 1);
fprintf(stderr, "error at %s:%d:%d\n\n", path, line + 1, col + 1);
int linelen = strchr(p, '\n') - start;
fprintf(stderr, "%.*s\n", linelen, start);
@ -37,7 +37,7 @@ static void print_line(char *pos) {
}
noreturn void bad_token(Token *t, char *msg) {
print_line(t->start);
print_line(t->buf, t->filename, t->start);
error(msg);
}
@ -53,6 +53,8 @@ static Token *add(int ty, char *start) {
Token *t = calloc(1, sizeof(Token));
t->ty = ty;
t->start = start;
t->filename = filename;
t->buf = buf;
vec_push(tokens, t);
return t;
}
@ -80,6 +82,30 @@ static char escaped[256] = {
['v'] = '\v', ['e'] = '\033', ['E'] = '\033',
};
static char *read_file(char *path) {
FILE *fp = stdin;
if (strcmp(path, "-")) {
fp = fopen(path, "r");
if (!fp) {
perror(path);
exit(1);
}
}
StringBuilder *sb = new_sb();
char buf[4096];
for (;;) {
int nread = fread(buf, 1, sizeof(buf), fp);
if (nread == 0)
break;
sb_append_n(sb, buf, nread);
}
if (sb->data[sb->len] != '\n')
sb_add(sb, '\n');
return sb_get(sb);
}
static Map *keyword_map() {
Map *map = new_map();
map_puti(map, "_Alignof", TK_ALIGNOF);
@ -104,7 +130,7 @@ static char *block_comment(char *pos) {
for (char *p = pos + 2; *p; p++)
if (!strncmp(p, "*/", 2))
return p + 2;
print_line(pos);
print_line(buf, filename, pos);
error("unclosed comment");
}
@ -215,10 +241,18 @@ static char *number(char *p) {
// Tokenized input is stored to this array.
static void scan() {
char *p = input_file;
char *p = buf;
loop:
while (*p) {
// New line (preprocessor-only token)
if (*p == '\n') {
add(*p, p);
p++;
continue;
}
// Whitespace
if (isspace(*p)) {
p++;
continue;
@ -262,7 +296,7 @@ loop:
}
// Single-letter symbol
if (strchr("+-*/;=(),{}<>[]&.!?:|^%~", *p)) {
if (strchr("+-*/;=(),{}<>[]&.!?:|^%~#", *p)) {
add(*p, p);
p++;
continue;
@ -280,15 +314,13 @@ loop:
continue;
}
print_line(p);
print_line(buf, filename, p);
error("cannot tokenize");
}
add(TK_EOF, p);
}
static void canonicalize_newline() {
char *p = input_file;
char *p = buf;
for (char *q = p; *q;) {
if (q[0] == '\r' && q[1] == '\n')
q++;
@ -298,7 +330,7 @@ static void canonicalize_newline() {
}
static void remove_backslash_newline() {
char *p = input_file;
char *p = buf;
for (char *q = p; *q;) {
if (q[0] == '\\' && q[1] == '\n')
q += 2;
@ -308,6 +340,16 @@ static void remove_backslash_newline() {
*p = '\0';
}
static void strip_newlines() {
Vector *v = new_vec();
for (int i = 0; i < tokens->len; i++) {
Token *t = tokens->data[i];
if (t->ty != '\n')
vec_push(v, t);
}
tokens = v;
}
static void append(Token *x, Token *y) {
StringBuilder *sb = new_sb();
sb_append_n(sb, x->str, x->len - 1);
@ -333,14 +375,32 @@ static void join_string_literals() {
tokens = v;
}
Vector *tokenize(char *p) {
Vector *tokenize(char *path, bool add_eof) {
if (!keywords)
keywords = keyword_map();
Vector *tokens_ = tokens;
char *filename_ = filename;
char *buf_ = buf;
tokens = new_vec();
keywords = keyword_map();
input_file = p;
filename = path;
buf = read_file(path);
canonicalize_newline();
remove_backslash_newline();
scan();
if (add_eof)
add(TK_EOF, buf);
tokens = preprocess(tokens);
strip_newlines();
join_string_literals();
return tokens;
Vector *ret = tokens;
buf = buf_;
tokens = tokens_;
filename = filename_;
return ret;
}