parser.c (6917B)
1 #include <assert.h> 2 #include <ctype.h> 3 #include <errno.h> 4 #include <limits.h> 5 #include <setjmp.h> 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 11 #include <scc/cstd.h> 12 #include <scc/mach.h> 13 #include <scc/scc.h> 14 15 #include "as.h" 16 17 #define NARGS 20 18 #define NR_INPUTS 10 19 #define MAXLINE 100 20 21 struct input { 22 char *fname; 23 unsigned lineno; 24 FILE *fp; 25 }; 26 27 int nerrors; 28 jmp_buf recover; 29 char yytext[INTIDENTSIZ+1]; 30 int yytoken; 31 size_t yylen; 32 union yylval yylval; 33 34 static char *textp, *endp; 35 static int regmode; 36 static struct input inputs[NR_INPUTS], *isp = inputs; 37 38 static int 39 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno) 40 { 41 int c; 42 43 if ((c = *++textp) == expect1) 44 return ifyes1; 45 if (c == expect2) 46 return ifyes2; 47 --textp; 48 return ifno; 49 } 50 51 static void 52 tok2str(void) 53 { 54 if ((yylen = endp - textp) > INTIDENTSIZ) { 55 error("token too big"); 56 yylen = INTIDENTSIZ; 57 } 58 memcpy(yytext, textp, yylen); 59 yytext[yylen] = '\0'; 60 textp = endp; 61 } 62 63 static int 64 iden(void) 65 { 66 int c; 67 char *p; 68 69 for ( ; c = *endp; ++endp) { 70 if (isalnum(c)) 71 continue; 72 switch (c) { 73 case '\'': 74 case '_': 75 case '.': 76 case '$': 77 continue; 78 default: 79 goto out_loop; 80 } 81 } 82 83 out_loop: 84 tok2str(); 85 yylval.sym = lookup(yytext); 86 87 return (yylval.sym->flags & FREG) ? REG : IDEN; 88 } 89 90 static int 91 number(void) 92 { 93 int c, base = 10; 94 char *p; 95 TUINT n; 96 static char digits[] = "0123456789ABCDEF"; 97 98 if (*endp == '0') { 99 base = 8; 100 ++endp; 101 if (*endp == 'x') { 102 base = 16; 103 ++endp; 104 } 105 } 106 for (n = 0; (c = *endp) && isxdigit(c); n += c) { 107 p = strchr(digits, toupper(c)); 108 c = p - digits; 109 if (c > base) 110 error("invalid digit in number"); 111 if (n >= TUINT_MAX/base - c) 112 error("overflow in number"); 113 n *= base; 114 endp++; 115 } 116 tok2str(); 117 yylval.sym = tmpsym(n); 118 119 return NUMBER; 120 } 121 122 static int 123 character(void) 124 { 125 int c; 126 char *p; 127 128 while (*endp != '\'') 129 ++endp; 130 return NUMBER; 131 } 132 133 static int 134 string(void) 135 { 136 int c; 137 size_t l; 138 char *s; 139 Symbol *sym = tmpsym(0); 140 141 for (++endp; *endp != '"'; ++endp) 142 ; 143 ++endp; 144 tok2str(); 145 yylval.sym = sym; 146 /* FIXME: this memory is not freed ever */ 147 l = yylen-2; 148 s = memcpy(xmalloc(l+1), yytext+1, l); 149 s[l] = '\0'; 150 sym->name = s; 151 152 return STRING; 153 } 154 155 static int 156 operator(void) 157 { 158 int c; 159 160 ++endp; 161 if ((c = *textp) == '>') 162 c = follow('=', '>', LE, SHL, '>'); 163 else if (c == '<') 164 c = follow('=', '<', GE, SHR, '>'); 165 tok2str(); 166 167 return c; 168 } 169 170 int 171 ahead(void) 172 { 173 while (isspace(*textp)) 174 ++textp; 175 176 if (*textp != '\0') 177 return *textp; 178 return EOS; 179 } 180 181 int 182 next(void) 183 { 184 int c; 185 186 while (isspace(*textp)) 187 ++textp; 188 189 endp = textp; 190 191 switch (c = *textp) { 192 case '\0': 193 strcpy(yytext, "EOS"); 194 yylen = 3; 195 c = EOS; 196 break; 197 case '"': 198 c = string(); 199 break; 200 case '\'': 201 c = character(); 202 break; 203 case '%': 204 c = (regmode ? iden : operator)(); 205 break; 206 case '.': 207 case '_': 208 c = iden(); 209 break; 210 default: 211 if (isdigit(c)) 212 c = number(); 213 else if (isalpha(c)) 214 c = iden(); 215 else 216 c = operator(); 217 break; 218 } 219 return yytoken = c; 220 } 221 222 void 223 expect(int token) 224 { 225 if (yytoken != token) 226 unexpected(); 227 next(); 228 } 229 230 void 231 unexpected(void) 232 { 233 error("unexpected '%s'", yytext); 234 } 235 236 void 237 error(char *msg, ...) 238 { 239 va_list va; 240 struct input *ip; 241 242 assert(isp > inputs); 243 ip = &isp[-1]; 244 245 va_start(va, msg); 246 fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno); 247 vfprintf(stderr, msg, va); 248 putc('\n', stderr); 249 nerrors++; 250 va_end(va); 251 252 if (nerrors == 10) 253 die("as: too many errors"); 254 longjmp(recover, 1); 255 } 256 257 Node * 258 getreg(void) 259 { 260 Node *np; 261 262 np = node(REG, NULL, NULL); 263 np->sym = yylval.sym; 264 np->addr = AREG; 265 expect(REG); 266 return np; 267 } 268 269 void 270 regctx(int mode) 271 { 272 regmode = mode; 273 } 274 275 Node * 276 operand(char **strp) 277 { 278 int c, imm = 0; 279 Node *np; 280 281 textp = *strp; 282 regctx(1); 283 switch (next()) { 284 case EOS: 285 np = NULL; 286 break; 287 case REG: 288 np = getreg(); 289 break; 290 case STRING: 291 np = node(yytoken, NULL, NULL); 292 np->sym = yylval.sym; 293 np->addr = ASTR; 294 next(); 295 break; 296 case IDEN: 297 c = ahead(); 298 if (c != EOS && c != ',') 299 goto expression; 300 np = node(IDEN, NULL, NULL); 301 np->sym = yylval.sym; 302 np->addr = ANUMBER; 303 next(); 304 break; 305 case '$': 306 next(); 307 imm = 1; 308 default: 309 expression: 310 if (!imm) { 311 np = moperand(); 312 } else { 313 np = expr(); 314 np->addr = AIMM; 315 } 316 } 317 if (yytoken != ',' && yytoken != EOS) 318 error("trailing characters in expression '%s'", textp); 319 *strp = endp; 320 321 return np; 322 } 323 324 Node ** 325 getargs(char *s) 326 { 327 Node **ap; 328 static Node *args[NARGS]; 329 330 if (!s) 331 return NULL; 332 333 for (ap = args; ap < &args[NARGS-1]; ++ap) { 334 if ((*ap = operand(&s)) == NULL) 335 return args; 336 } 337 error("too many arguments in one instruction"); 338 } 339 340 static char * 341 field(char **oldp, size_t *siz) 342 { 343 char *s, *t, *begin; 344 size_t n; 345 346 if ((begin = *oldp) == NULL) 347 return NULL; 348 349 for (s = begin; isspace(*s) && *s != '\t'; ++s) 350 ; 351 if (*s == '\0' || *s == '#') { 352 *s = '\0'; 353 return *oldp = NULL; 354 } 355 356 for (t = s; *t && *t != '\t'; ++t) 357 ; 358 if (*t == '\t') 359 *t++ = '\0'; 360 *siz -= begin - t; 361 *oldp = t; 362 363 while (t >= s && isspace(*t)) 364 *t-- = '\0'; 365 return (*s != '\0') ? s : NULL; 366 } 367 368 static int 369 validlabel(char *name) 370 { 371 int c; 372 373 while ((c = *name++) != '\0') { 374 if (isalnum(c)) 375 continue; 376 switch (c) { 377 case '_': 378 case '.': 379 case '$': 380 continue; 381 case ':': 382 if (*name != '\0') 383 return 0; 384 *--name = '\0'; 385 continue; 386 default: 387 return 0; 388 } 389 } 390 return 1; 391 } 392 393 static int 394 extract(char *s, size_t len, struct line *lp) 395 { 396 int r = 0; 397 398 if (lp->label = field(&s, &len)) 399 r++; 400 if (lp->op = field(&s, &len)) 401 r++; 402 if (lp->args = field(&s, &len)) 403 r++; 404 405 if (s && *s && *s != '#') 406 error("trailing characters at the end of the line"); 407 if (lp->label && !validlabel(lp->label)) 408 error("incorrect label name '%s'", lp->label); 409 410 return r; 411 } 412 413 static size_t 414 getln(FILE *fp, char buff[MAXLINE]) 415 { 416 int c; 417 char *bp; 418 419 for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) { 420 if (c == '\n') 421 break; 422 423 if (c > UCHAR_MAX) 424 error("invalid character '%x'", c); 425 426 if (bp == &buff[MAXLINE-1]) 427 error("line too long"); 428 } 429 *bp = '\0'; 430 431 return bp - buff; 432 } 433 434 int 435 nextline(struct line *lp) 436 { 437 struct input *ip; 438 size_t n; 439 static char buff[MAXLINE]; 440 441 assert(isp > inputs); 442 repeat: 443 if (isp == inputs) 444 return 0; 445 ip = &isp[-1]; 446 if (feof(ip->fp)) { 447 delinput(); 448 goto repeat; 449 } 450 n = getln(ip->fp, buff); 451 if (++ip->lineno == 0) 452 die("as: %s: file too long", infile); 453 if (n == 0) 454 goto repeat; 455 if (extract(buff, n, lp) == 0) 456 goto repeat; 457 return 1; 458 } 459 460 void 461 addinput(char *fname) 462 { 463 FILE *fp; 464 465 if (isp == &inputs[NR_INPUTS]) 466 die("as: too many included files"); 467 if ((fp = fopen(fname, "r")) == NULL) 468 die("as: %s: %s", fname, strerror(errno)); 469 isp->fname = xstrdup(fname); 470 isp->fp = fp; 471 isp->lineno = 0; 472 ++isp; 473 } 474 475 int 476 delinput(void) 477 { 478 if (isp == inputs) 479 return EOF; 480 --isp; 481 if (fclose(isp->fp) == EOF) 482 die("as: %s: %s", isp->fname, strerror(errno)); 483 free(isp->fname); 484 return 0; 485 }