commit 762330d6fa626e57a0b350cd89680d59b3158ba6
parent bab23d801eb12767f12ae8a04c24fdb29ece5aa8
Author: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date: Mon, 29 Jun 2015 06:56:11 -0400
try writing a parser, painful
Diffstat:
| M | lisc/lisc.h | | | 74 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
| M | lisc/lo.c | | | 37 | +++++++++++++++++++++++++++---------- |
| A | lisc/parse.c | | | 296 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 383 insertions(+), 24 deletions(-)
diff --git a/lisc/lisc.h b/lisc/lisc.h
@@ -1,33 +1,42 @@
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
+/*
+ References have to be able to encode:
+ - Results of other operations (temporaries).
+ - Machine registers (for lowering).
+ - Spill locations.
+ - Constants/pointers to program data.
+*/
+
enum {
- R = -1, /* Invalid reference. */
- Temp0 = 32, /* First temporary, below are machine registers. */
- MaxPreds = 16, /* Maximum number of predecessors for a block. */
+ R = 0, /* Invalid reference. */
+ Temp0 = 33, /* First temporary ref. */
+ Const0 = 20000, /* First constant ref. */
+ MaxIdnt = 32,
+ MaxPreds = 16,
MaxBlks = 128,
MaxInss = 128,
MaxPhis = 128,
};
-typedef int Ref;
+typedef unsigned Ref;
typedef struct Ins Ins;
typedef struct Phi Phi;
typedef struct Blk Blk;
+typedef struct Fn Fn;
typedef enum Op Op;
typedef enum Jmp Jmp;
enum Op {
- ONop,
- OAdd,
+ OAdd = 1,
OSub,
- OSDiv,
+ ODiv,
OMod,
- OParam,
- OCall,
- /* x86 instructions (reserved) */
- XDiv,
+ /* Reserved instructions. */
+ X86Div,
};
enum Jmp {
@@ -49,14 +58,51 @@ struct Phi {
};
struct Blk {
- int np;
- int ni;
Phi *ps;
Ins *is;
+ int np;
+ int ni;
struct {
Jmp ty;
Ref arg;
} jmp;
int suc0, suc1;
- int dpth;
+ int lvl;
+};
+
+struct Sym {
+ enum {
+ SUndef,
+ SReg,
+ SNum,
+ STemp,
+ } ty;
+ union {
+ char sreg[MaxIdnt];
+ long long snum;
+ struct {
+ char id[MaxIdnt];
+ int blk;
+ enum {
+ TPhi,
+ TIns,
+ } ty;
+ int loc;
+ } stemp;
+ } u;
+};
+
+#define sreg u.sreg
+#define snum u.snum
+#define stemp u.stemp
+
+struct Fn {
+ Sym *sym;
+ Blk *blk;
+ int nblk;
+ Ref nref;
};
+
+
+/* parse.c */
+Fn *parsefn(FILE *);
diff --git a/lisc/lo.c b/lisc/lo.c
@@ -1,8 +1,5 @@
-/*% clang -Wall -o # %
- */
#include "lisc.h"
-
static void
rporec(int *rpo, int *cnt, Blk *bs, int b)
{
@@ -16,7 +13,7 @@ rporec(int *rpo, int *cnt, Blk *bs, int b)
}
int
-dorpo(Blk *bs, int nb)
+rposched(Blk *bs, int nb)
{
static int rpo[MaxBlks];
Blk t;
@@ -55,6 +52,23 @@ dorpo(Blk *bs, int nb)
}
+void
+cfdump(FILE *f, char *pname, Blk *bs, int nb)
+{
+ Blk *b;
+ int i;
+
+ fprintf(f, "digraph %s {\n", pname);
+ for (b=bs, i=0; i<nb; i++, b++) {
+ if (b->suc0 >= 0)
+ fprintf(f, "b%d -> b%d;\n", i, b->suc0);
+ if (b->suc1 >= 0)
+ fprintf(f, "b%d -> b%d;\n", i, b->suc1);
+ fprintf(f, "b%d [shape=box]\n", i);
+ }
+ fprintf(f, "}\n");
+}
+
#define LEN(a) sizeof a / sizeof a[0]
Blk rpocond[] = {
@@ -69,14 +83,17 @@ int
main()
{
Blk *bs;
- int i, nb;
+ int nb;
+ FILE *f;
bs = rpocond;
nb = LEN(rpocond);
- nb = dorpo(bs, nb);
- for (i=0; i<nb; i++) {
- printf("%02d -> [%02d, %02d]\n", i,
- bs[i].suc0, bs[i].suc1);
- }
+
+ f = fopen("cf.dot", "w");
+ cfdump(f, "bef", bs, nb);
+ nb = rposched(bs, nb);
+ cfdump(f, "aft", bs, nb);
+ fclose(f);
+
return 0;
}
diff --git a/lisc/parse.c b/lisc/parse.c
@@ -0,0 +1,296 @@
+/* A really crude parser. */
+#include <ctype.h>
+#include <string.h>
+
+#include "lisc.h"
+
+enum {
+ MaxRefs = 256,
+};
+
+typedef enum Token Token;
+typedef enum PState PState;
+
+enum PState { /* Parsing state in a block. */
+ PErr,
+ PPhi,
+ PIns,
+ PEnd,
+};
+
+enum Token {
+ TAdd,
+ TSub,
+ TDiv,
+ TMod,
+ TPhi,
+ TJmp,
+ TCnd,
+ TRet,
+
+ TNum,
+ TVar,
+ TEq,
+ TComma,
+ TLParen,
+ TRParen,
+ TNL,
+ TEOF,
+};
+
+
+static FILE *inf;
+static char *errstr;
+
+static Sym sym[MaxRef];
+static Ref nref;
+static Blk blk[MaxBlks], *curblk;
+static Phi phi[MaxPhis], *curphi;
+static Ins ins[MaxInss], *curins;
+
+static struct {
+ long long num;
+ char *var;
+} tokval;
+static int lnum;
+
+
+static void
+err(char *s)
+{
+ if (!s)
+ s = errstr;
+ assert(s);
+ fprintf(stderr, "parse error: %s (line %d)\n", s, lnum);
+ exit(1);
+}
+
+static Token
+token()
+{
+ static struct {
+ char *str;
+ Token tok;
+ } tmap[] = {
+ { "add", TAdd },
+ { "sub", TSub },
+ { "div", TDiv },
+ { "mod", TMod },
+ { "phi", TPhi },
+ { "jmp", TJmp },
+ { "cnd", TCnd },
+ { "ret", TRet },
+ { 0 },
+ };
+ static char tok[MaxIdnt];
+ int c, i, var, sgn;
+
+ do
+ c = fgetc(inf);
+ while (isblank(c));
+ switch (c) {
+ case EOF:
+ return TEOF;
+ case ',':
+ return TComma;
+ case '(':
+ return TLParen;
+ case ')':
+ return TRParen;
+ case '=':
+ return TEq;
+ case '\n':
+ lnum++;
+ return TNL;
+ }
+ if (isdigit(c) || c == '-') {
+ if (c == '-') {
+ tokval.num = 0;
+ sgn = -1;
+ } else {
+ tokval.num = c - '0';
+ sgn = 1;
+ }
+ for (;;) {
+ c = fgetc(inf);
+ if (!isdigit(c))
+ break;
+ tokval.num *= 10;
+ tokval.num += c - '0';
+ }
+ ungetc(c, f);
+ tokval.num *= sgn;
+ return TNum;
+ }
+ if (c == '%') {
+ var = 1;
+ c = fgetc(inf);
+ } else
+ var = 0;
+ if (!isalpha(c))
+ err("lexing failure");
+ i = 0;
+ do {
+ if (i >= MaxIdnt-1)
+ err("identifier too long");
+ tok[i++] = c;
+ c = fgetc(inf);
+ } while (isalpha(c) || isdigit(c));
+ tok[i] = 0;
+ ungetc(c, f);
+ if (var) {
+ tokval.var = tok;
+ return TVar;
+ }
+ for (i=0; tmap[i].str; i++)
+ if (strcmp(tok, tmap[i].str) == 0)
+ return tmap[i].tok;
+ err("unknown keyword");
+ return -1;
+}
+
+static Ref
+varref(char *v)
+{
+ Ref r;
+
+ for (r=Temp0; r<nref; r++)
+ if (sym[r].ty == STemp)
+ if (strcmp(v, sym[r].stemp.id) == 0)
+ return r;
+ if (r >= MaxRefs)
+ err("too many references");
+ sym[r].ty = STemp;
+ strcpy(sym[r].stemp.id, v);
+ sym[r].stemp.blk = -1;
+ return nref++;
+}
+
+static Ref
+parseref()
+{
+ Ref r;
+
+ switch (token()) {
+ case TVar:
+ return varref(tokval.var);
+ case TNum:
+ /* Add the constant to the symbol table. */
+ for (r=Temp0; r<nref; r++)
+ if (sym[r].ty == SNum)
+ if (sym[r].snum == tokval.num)
+ return r;
+ if (r >= MaxRefs)
+ err("too many references");
+ sym[r].ty = SNum;
+ sym[r].snum = tokval.num;
+ retunr nref++;
+ default:
+ errstr = "number or variable expected";
+ return R;
+ }
+}
+
+static PState
+parseline(PState ps)
+{
+ Ref args[MaxPreds];
+ Ref r;
+ Token t;
+ int na;
+
+ t = token();
+ switch (t) {
+ default:
+ errstr = "variable or jump expected";
+ return PErr;
+ case TVar:
+ break;
+ case TRet:
+ curblk->jmp.ty = JRet;
+ goto Jump;
+ case TJmp:
+ curblk->jmp.ty = JJmp;
+ goto Jump;
+ case TCnd:
+ curblk->jmp.ty = JCnd;
+ r = parseref();
+ if (r == R) {
+ errstr = "invalid argument for cnd";
+ return PErr;
+ }
+ if (token() != TComma) {
+ errstr = "missing comma";
+ return PErr;
+ }
+ curblk->jmp.arg = r;
+ Jump:
+ if (
+ return PEnd;
+ }
+ r = varref(tokval.var);
+ if (token() != TEq) {
+ errstr = "= expected after variable";
+ return PErr;
+ }
+ switch (token()) {
+ case TAdd:
+ curins->op = OAdd;
+ break;
+ case TSub:
+ curins->op = OSub;
+ break;
+ case TDiv:
+ curins->op = ODiv;
+ break;
+ case TMod:
+ curins->op = OMod;
+ break;
+ case TPhi:
+ return PIns;
+ default:
+ errstr = "invalid instruction";
+ return PErr;
+ }
+ na = 0;
+ for (;;) {
+ x
+ }
+}
+
+
+
+
+
+
+// in main parsing routine, reset lnum to 1
+// also reset curxxx
+// also nsym to Temp0
+// also sym[0..nsym-1].ty = SReg
+
+#if 0
+int main()
+{
+ char *toknames[] = {
+ "TAdd",
+ "TSub",
+ "TDiv",
+ "TMod",
+ "TPhi",
+ "TJmp",
+ "TCnd",
+ "TRet",
+ "TNum",
+ "TVar",
+ "TEq",
+ "TComma",
+ "TLParen",
+ "TRParen",
+ "TNL",
+ "TEOF",
+ };
+ inf = stdin;
+ for (;;)
+ printf("%s\n", toknames[token()]);
+}
+#endif