parser.c (6502B)
1 #include <assert.h> 2 #include <ctype.h> 3 #include <errno.h> 4 #include <limits.h> 5 #include <setjmp.h> 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 11 #include <scc/cstd.h> 12 #include <scc/scc.h> 13 #include "as.h" 14 15 #define NARGS 20 16 #define NR_INPUTS 10 17 #define MAXLINE 100 18 19 struct input { 20 char *fname; 21 unsigned lineno; 22 FILE *fp; 23 }; 24 25 int nerrors; 26 jmp_buf recover; 27 char yytext[INTIDENTSIZ+1]; 28 int yytoken; 29 size_t yylen; 30 union yylval yylval; 31 32 static char *textp, *endp; 33 static int regmode; 34 static unsigned lineno; 35 static struct input inputs[NR_INPUTS], *isp = inputs; 36 37 static int 38 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno) 39 { 40 int c; 41 42 if ((c = *++textp) == expect1) 43 return ifyes1; 44 if (c == expect2) 45 return ifyes2; 46 --textp; 47 return ifno; 48 } 49 50 static void 51 tok2str(void) 52 { 53 if ((yylen = endp - textp) > INTIDENTSIZ) { 54 error("token too big"); 55 yylen = INTIDENTSIZ; 56 } 57 memcpy(yytext, textp, yylen); 58 yytext[yylen] = '\0'; 59 textp = endp; 60 } 61 62 static int 63 iden(void) 64 { 65 int c; 66 char *p; 67 68 for ( ; c = *endp; ++endp) { 69 if (isalnum(c)) 70 continue; 71 switch (c) { 72 case '\'': 73 case '_': 74 case '-': 75 case '.': 76 case '$': 77 continue; 78 default: 79 goto out_loop; 80 } 81 } 82 83 out_loop: 84 tok2str(); 85 yylval.sym = lookup(yytext); 86 87 return (yylval.sym->flags & FREG) ? REG : IDEN; 88 } 89 90 static int 91 number(void) 92 { 93 int c, base = 10; 94 char *p; 95 TUINT n; 96 97 if (*endp == '0') { 98 base = 8; 99 ++endp; 100 if (*endp == 'x') { 101 base = 16; 102 ++endp; 103 } 104 } 105 for (n = 0; (c = *endp) && isxdigit(c); n += c) { 106 n *= base; 107 c -= '0'; 108 if (n >= TUINT_MAX - c*base) 109 error("overflow in number"); 110 endp++; 111 } 112 tok2str(); 113 yylval.sym = tmpsym(n); 114 115 return NUMBER; 116 } 117 118 static int 119 character(void) 120 { 121 int c; 122 char *p; 123 124 while (*endp != '\'') 125 ++endp; 126 return NUMBER; 127 } 128 129 static int 130 string(void) 131 { 132 int c; 133 size_t l; 134 char *s; 135 Symbol *sym = tmpsym(0); 136 137 for (++endp; *endp != '"'; ++endp) 138 ; 139 ++endp; 140 tok2str(); 141 yylval.sym = sym; 142 /* FIXME: this memory is not freed ever */ 143 l = yylen-2; 144 s = memcpy(xmalloc(l+1), yytext+1, l); 145 s[l] = '\0'; 146 sym->name.buf = s; 147 148 return STRING; 149 } 150 151 static int 152 operator(void) 153 { 154 int c; 155 156 ++endp; 157 if ((c = *textp) == '>') 158 c = follow('=', '>', LE, SHL, '>'); 159 else if (c == '<') 160 c = follow('=', '<', GE, SHR, '>'); 161 tok2str(); 162 163 return c; 164 } 165 166 int 167 next(void) 168 { 169 int c; 170 171 while (isspace(*textp)) 172 ++textp; 173 174 endp = textp; 175 176 switch (c = *textp) { 177 case '\0': 178 strcpy(yytext, "EOS"); 179 yylen = 3; 180 c = EOS; 181 break; 182 case '"': 183 c = string(); 184 break; 185 case '\'': 186 c = character(); 187 break; 188 case '%': 189 c = (regmode ? iden : operator)(); 190 break; 191 case '_': 192 c = iden(); 193 break; 194 default: 195 if (isdigit(c)) 196 c = number(); 197 else if (isalpha(c)) 198 c = iden(); 199 else 200 c = operator(); 201 break; 202 } 203 return yytoken = c; 204 } 205 206 void 207 expect(int token) 208 { 209 if (yytoken != token) 210 unexpected(); 211 next(); 212 } 213 214 void 215 unexpected(void) 216 { 217 error("unexpected '%s'", yytext); 218 } 219 220 void 221 error(char *msg, ...) 222 { 223 va_list va; 224 struct input *ip; 225 226 assert(isp > inputs); 227 ip = &isp[-1]; 228 229 va_start(va, msg); 230 fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno); 231 vfprintf(stderr, msg, va); 232 putc('\n', stderr); 233 nerrors++; 234 va_end(va); 235 236 if (nerrors == 10) 237 die("as: too many errors"); 238 longjmp(recover, 1); 239 } 240 241 Node * 242 getreg(void) 243 { 244 Node *np; 245 246 np = node(REG, NULL, NULL); 247 np->sym = yylval.sym; 248 np->addr = AREG; 249 expect(REG); 250 return np; 251 } 252 253 void 254 regctx(int mode) 255 { 256 regmode = mode; 257 } 258 259 Node * 260 operand(char **strp) 261 { 262 int imm = 0; 263 Node *np; 264 265 textp = *strp; 266 regctx(1); 267 switch (next()) { 268 case EOS: 269 np = NULL; 270 break; 271 case REG: 272 np = getreg(); 273 break; 274 case STRING: 275 np = node(yytoken, NULL, NULL); 276 np->sym = yylval.sym; 277 np->addr = ASTR; 278 next(); 279 break; 280 case '$': 281 next(); 282 imm = 1; 283 default: 284 if (!imm) { 285 np = moperand(); 286 } else { 287 np = expr(); 288 np->addr = AIMM; 289 } 290 } 291 if (yytoken != ',' && yytoken != EOS) 292 error("trailing characters in expression '%s'", textp); 293 *strp = endp; 294 295 return np; 296 } 297 298 Node ** 299 getargs(char *s) 300 { 301 Node **ap; 302 static Node *args[NARGS]; 303 304 if (!s) 305 return NULL; 306 307 for (ap = args; ap < &args[NARGS-1]; ++ap) { 308 if ((*ap = operand(&s)) == NULL) 309 return args; 310 } 311 error("too many arguments in one instruction"); 312 } 313 314 static char * 315 field(char **oldp, size_t *siz) 316 { 317 char *s, *t, *begin; 318 size_t n; 319 320 if ((begin = *oldp) == NULL) 321 return NULL; 322 323 for (s = begin; isspace(*s) && *s != '\t'; ++s) 324 ; 325 if (*s == '\0' || *s == '#') { 326 *s = '\0'; 327 return *oldp = NULL; 328 } 329 330 for (t = s; *t && *t != '\t'; ++t) 331 ; 332 if (*t == '\t') 333 *t++ = '\0'; 334 *siz -= begin - t; 335 *oldp = t; 336 337 while (t >= s && isspace(*t)) 338 *t-- = '\0'; 339 return (*s != '\0') ? s : NULL; 340 } 341 342 static int 343 validlabel(char *name) 344 { 345 int c; 346 347 while ((c = *name++) != '\0') { 348 if (isalnum(c)) 349 continue; 350 switch (c) { 351 case '_': 352 case '-': 353 case '.': 354 case '$': 355 continue; 356 case ':': 357 if (*name != '\0') 358 return 0; 359 *--name = '\0'; 360 continue; 361 default: 362 return 0; 363 } 364 } 365 return 1; 366 } 367 368 static int 369 extract(char *s, size_t len, struct line *lp) 370 { 371 int r = 0; 372 373 if (lp->label = field(&s, &len)) 374 r++; 375 if (lp->op = field(&s, &len)) 376 r++; 377 if (lp->args = field(&s, &len)) 378 r++; 379 380 if (s && *s && *s != '#') 381 error("trailing characters at the end of the line"); 382 if (lp->label && !validlabel(lp->label)) 383 error("incorrect label name '%s'", lp->label); 384 385 return r; 386 } 387 388 static size_t 389 getln(FILE *fp, char buff[MAXLINE]) 390 { 391 int c; 392 char *bp; 393 394 for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) { 395 if (c == '\n') 396 break; 397 398 if (c > UCHAR_MAX) 399 error("invalid character '%x'", c); 400 401 if (bp == &buff[MAXLINE-1]) 402 error("line too long"); 403 } 404 *bp = '\0'; 405 406 return bp - buff; 407 } 408 409 int 410 nextline(struct line *lp) 411 { 412 struct input *ip; 413 size_t n; 414 static char buff[MAXLINE]; 415 416 assert(isp > inputs); 417 repeat: 418 if (isp == inputs) 419 return 0; 420 ip = &isp[-1]; 421 if (feof(ip->fp)) { 422 delinput(); 423 goto repeat; 424 } 425 n = getln(ip->fp, buff); 426 if (++ip->lineno == 0) 427 die("as: %s: file too long", infile); 428 if (n == 0) 429 goto repeat; 430 if (extract(buff, n, lp) == 0) 431 goto repeat; 432 return 1; 433 } 434 435 void 436 addinput(char *fname) 437 { 438 FILE *fp; 439 440 if (isp == &inputs[NR_INPUTS]) 441 die("as: too many included files"); 442 if ((fp = fopen(fname, "r")) == NULL) 443 die("as: %s: %s", fname, strerror(errno)); 444 isp->fname = xstrdup(fname); 445 isp->fp = fp; 446 isp->lineno = 0; 447 ++isp; 448 } 449 450 int 451 delinput(void) 452 { 453 if (isp == inputs) 454 return EOF; 455 --isp; 456 if (fclose(isp->fp) == EOF) 457 die("as: %s: %s", isp->fname, strerror(errno)); 458 free(isp->fname); 459 return 0; 460 }