Implement "#include".

2018-08-25 03:40:38 +00:00 · 2018-08-25 03:40:38 +00:00 · a382606b97
parent e188ffd586
commit a382606b97
8 changed files with 142 additions and 62 deletions
--- a/9cc.h
+++ b/9cc.h
@ -131,12 +131,18 @@ typedef struct {
  char len;

  // For error reporting
+  char *buf;
+  char *filename;
  char *start;
 } Token;

-Vector *tokenize(char *p);
+Vector *tokenize(char *path, bool add_eof);
 noreturn void bad_token(Token *t, char *msg);

+/// preprocess.c
+
+Vector *preprocess(Vector *tokens);
+
 /// parse.c

 enum {
@ -353,7 +359,3 @@ extern char *regs32[];
 extern int num_regs;

 void gen_x86(Vector *globals, Vector *fns);
-
-/// main.c
-
-char *filename;
--- a/9
+++ b/9
@ -11,10 +11,13 @@ test: 9cc test/test.c
 	./9cc -test

 	@gcc -E -P test/test.c | ./9cc - > tmp-test1.s
-	@./9cc test/token.c > tmp-test2.s
 	@gcc -c -o tmp-test2.o test/gcc.c
-	@gcc -static -o tmp-test tmp-test1.s tmp-test2.s tmp-test2.o
-	@./tmp-test
+	@gcc -static -o tmp-test1 tmp-test1.s tmp-test2.o
+	@./tmp-test1
+
+	@./9cc test/token.c > tmp-test2.s
+	@gcc -static -o tmp-test2 tmp-test2.s
+	@./tmp-test2

 clean:
 	rm -f 9cc *.o *~ tmp* a.out test/*~
--- a/main.c
+++ b/main.c
@ -1,31 +1,5 @@
 #include "9cc.h"

-char *filename;
-
-static char *read_file(char *filename) {
-  FILE *fp = stdin;
-  if (strcmp(filename, "-")) {
-    fp = fopen(filename, "r");
-    if (!fp) {
-      perror(filename);
-      exit(1);
-    }
-  }
-
-  StringBuilder *sb = new_sb();
-  char buf[4096];
-  for (;;) {
-    int nread = fread(buf, 1, sizeof(buf), fp);
-    if (nread == 0)
-      break;
-    sb_append_n(sb, buf, nread);
-  }
-
-  if (sb->data[sb->len] != '\n')
-    sb_add(sb, '\n');
-  return sb_get(sb);
-}
-
 void usage() { error("Usage: 9cc [-test] [-dump-ir1] [-dump-ir2] <file>"); }

 int main(int argc, char **argv) {
@ -37,24 +11,24 @@ int main(int argc, char **argv) {
    return 0;
  }

+  char *path;
  bool dump_ir1 = false;
  bool dump_ir2 = false;

  if (argc == 3 && !strcmp(argv[1], "-dump-ir1")) {
    dump_ir1 = true;
-    filename = argv[2];
+    path = argv[2];
  } else if (argc == 3 && !strcmp(argv[1], "-dump-ir2")) {
    dump_ir2 = true;
-    filename = argv[2];
+    path = argv[2];
  } else {
    if (argc != 2)
      usage();
-    filename = argv[1];
+    path = argv[1];
  }

  // Tokenize and parse.
-  char *input = read_file(filename);
-  Vector *tokens = tokenize(input);
+  Vector *tokens = tokenize(path, true);
  Vector *nodes = parse(tokens);
  Vector *globals = sema(nodes);
  Vector *fns = gen_ir(nodes);
--- a/preprocess.c
+++ b/preprocess.c
@ -0,0 +1,35 @@
+// C preprocessor
+
+#include "9cc.h"
+
+Vector *preprocess(Vector *tokens) {
+  Vector *v = new_vec();
+
+  for (int i = 0; i < tokens->len;) {
+    Token *t = tokens->data[i];
+    if (t->ty != '#') {
+      i++;
+      vec_push(v, t);
+      continue;
+    }
+
+    t = tokens->data[++i];
+    if (t->ty != TK_IDENT || strcmp(t->name, "include"))
+      bad_token(t, "'include' expected");
+
+    t = tokens->data[++i];
+    if (t->ty != TK_STR)
+      bad_token(t, "string expected");
+
+    char *path = t->str;
+
+    t = tokens->data[++i];
+    if (t->ty != '\n')
+      bad_token(t, "newline expected");
+
+    Vector *nv = tokenize(path, false);
+    for (int i = 0; i < nv->len; i++)
+      vec_push(v, nv->data[i]);
+  }
+  return v;
+}
--- a/test/test1.inc
+++ b/test/test1.inc
@ -0,0 +1,7 @@
+int printf();
+
+int main() {
+#include "test/test2.inc"
+  1; 2;
+  return 0;
+}
--- a/test/test2.inc
+++ b/test/test2.inc
@ -0,0 +1 @@
+printf("OK\n");
--- a/test/token.c
+++ b/test/token.c
@ -1,8 +1,4 @@
-// This file contains tests for the tokenizer.
-//
-// Note that we don't actually use the function defined by this file
-// because we are interested only in knowing whether the tokenizer can
-// tokenize this file or not.
+// This file contains tests for the tokenizer and the preprocessor.

 // a line comment
\
 continues\
@ -12,3 +8,5 @@ to this line
 /* block comment
 **
 */
+
+#include "test/test1.inc"
--- a/token.c
+++ b/token.c
@ -2,16 +2,16 @@

 // Error reporting

-static char *input_file;
+static char *buf;
+static char *filename;

 // Finds a line pointed by a given pointer from the input file
 // to print it out.
-static void print_line(char *pos) {
-  char *start = input_file;
+static void print_line(char *start, char *path, char *pos) {
  int line = 0;
  int col = 0;

-  for (char *p = input_file; p; p++) {
+  for (char *p = start; p; p++) {
    if (*p == '\n') {
      start = p + 1;
      line++;
@ -24,7 +24,7 @@ static void print_line(char *pos) {
      continue;
    }

-    fprintf(stderr, "error at %s:%d:%d\n\n", filename, line + 1, col + 1);
+    fprintf(stderr, "error at %s:%d:%d\n\n", path, line + 1, col + 1);

    int linelen = strchr(p, '\n') - start;
    fprintf(stderr, "%.*s\n", linelen, start);
@ -37,7 +37,7 @@ static void print_line(char *pos) {
 }

 noreturn void bad_token(Token *t, char *msg) {
-  print_line(t->start);
+  print_line(t->buf, t->filename, t->start);
  error(msg);
 }

@ -53,6 +53,8 @@ static Token *add(int ty, char *start) {
  Token *t = calloc(1, sizeof(Token));
  t->ty = ty;
  t->start = start;
+  t->filename = filename;
+  t->buf = buf;
  vec_push(tokens, t);
  return t;
 }
@ -80,6 +82,30 @@ static char escaped[256] = {
        ['v'] = '\v', ['e'] = '\033', ['E'] = '\033',
 };

+static char *read_file(char *path) {
+  FILE *fp = stdin;
+  if (strcmp(path, "-")) {
+    fp = fopen(path, "r");
+    if (!fp) {
+      perror(path);
+      exit(1);
+    }
+  }
+
+  StringBuilder *sb = new_sb();
+  char buf[4096];
+  for (;;) {
+    int nread = fread(buf, 1, sizeof(buf), fp);
+    if (nread == 0)
+      break;
+    sb_append_n(sb, buf, nread);
+  }
+
+  if (sb->data[sb->len] != '\n')
+    sb_add(sb, '\n');
+  return sb_get(sb);
+}
+
 static Map *keyword_map() {
  Map *map = new_map();
  map_puti(map, "_Alignof", TK_ALIGNOF);
@ -104,7 +130,7 @@ static char *block_comment(char *pos) {
  for (char *p = pos + 2; *p; p++)
    if (!strncmp(p, "*/", 2))
      return p + 2;
-  print_line(pos);
+  print_line(buf, filename, pos);
  error("unclosed comment");
 }

@ -215,10 +241,18 @@ static char *number(char *p) {

 // Tokenized input is stored to this array.
 static void scan() {
-  char *p = input_file;
+  char *p = buf;

 loop:
  while (*p) {
+    // New line (preprocessor-only token)
+    if (*p == '\n') {
+      add(*p, p);
+      p++;
+      continue;
+    }
+
+    // Whitespace
    if (isspace(*p)) {
      p++;
      continue;
@ -262,7 +296,7 @@ loop:
    }

    // Single-letter symbol
-    if (strchr("+-*/;=(),{}<>[]&.!?:|^%~", *p)) {
+    if (strchr("+-*/;=(),{}<>[]&.!?:|^%~#", *p)) {
      add(*p, p);
      p++;
      continue;
@ -280,15 +314,13 @@ loop:
      continue;
    }

-    print_line(p);
+    print_line(buf, filename, p);
    error("cannot tokenize");
  }
-
-  add(TK_EOF, p);
 }

 static void canonicalize_newline() {
-  char *p = input_file;
+  char *p = buf;
  for (char *q = p; *q;) {
    if (q[0] == '\r' && q[1] == '\n')
      q++;
@ -298,7 +330,7 @@ static void canonicalize_newline() {
 }

 static void remove_backslash_newline() {
-  char *p = input_file;
+  char *p = buf;
  for (char *q = p; *q;) {
    if (q[0] == '\\' && q[1] == '\n')
      q += 2;
@ -308,6 +340,16 @@ static void remove_backslash_newline() {
  *p = '\0';
 }

+static void strip_newlines() {
+  Vector *v = new_vec();
+  for (int i = 0; i < tokens->len; i++) {
+    Token *t = tokens->data[i];
+    if (t->ty != '\n')
+      vec_push(v, t);
+  }
+  tokens = v;
+}
+
 static void append(Token *x, Token *y) {
  StringBuilder *sb = new_sb();
  sb_append_n(sb, x->str, x->len - 1);
@ -333,14 +375,32 @@ static void join_string_literals() {
  tokens = v;
 }

-Vector *tokenize(char *p) {
+Vector *tokenize(char *path, bool add_eof) {
+  if (!keywords)
+    keywords = keyword_map();
+
+  Vector *tokens_ = tokens;
+  char *filename_ = filename;
+  char *buf_ = buf;
+
  tokens = new_vec();
-  keywords = keyword_map();
-  input_file = p;
+  filename = path;
+  buf = read_file(path);

  canonicalize_newline();
  remove_backslash_newline();
+
  scan();
+  if (add_eof)
+    add(TK_EOF, buf);
+
+  tokens = preprocess(tokens);
+  strip_newlines();
  join_string_literals();
-  return tokens;
+
+  Vector *ret = tokens;
+  buf = buf_;
+  tokens = tokens_;
+  filename = filename_;
+  return ret;
 }