symbol.c (8242B)
1 #include <assert.h> 2 #include <limits.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include <scc/cstd.h> 8 #include <scc/scc.h> 9 #include "cc1.h" 10 11 #define NR_SYM_HASH 64 12 #define NR_CPP_HASH 32 13 #define NR_LBL_HASH 16 14 15 struct keyword { 16 char *str; 17 unsigned char token, value; 18 }; 19 20 unsigned curctx; 21 static unsigned short counterid; 22 23 static Symbol *head, *labels; 24 static Symbol *htab[NR_SYM_HASH]; 25 static Symbol *htabcpp[NR_CPP_HASH]; 26 static Symbol *htablbl[NR_LBL_HASH]; 27 28 #ifndef NDEBUG 29 void 30 dumpstab(Symbol **tbl, char *msg) 31 { 32 Symbol **bp, *sym; 33 unsigned size; 34 35 fprintf(stderr, "Symbol Table dump at ctx=%u\n%s\n", curctx, msg); 36 if (tbl == htab) 37 size = NR_SYM_HASH; 38 else if (tbl == htabcpp) 39 size = NR_CPP_HASH; 40 else if (tbl == htablbl) 41 size = NR_LBL_HASH; 42 else 43 abort(); 44 45 for (bp = tbl; bp < &tbl[size]; ++bp) { 46 if (*bp == NULL) 47 continue; 48 fprintf(stderr, "%d", (int) (bp - htab)); 49 for (sym = *bp; sym; sym = sym->hash) 50 fprintf(stderr, "->[%d,%d:'%s'=%p]", 51 sym->ns, sym->ctx, sym->name, (void *) sym); 52 putc('\n', stderr); 53 } 54 fputs("head:", stderr); 55 for (sym = head; sym; sym = sym->next) { 56 fprintf(stderr, "->[%d,%d:'%s'=%p]", 57 sym->ns, sym->ctx, 58 (sym->name) ? sym->name : "", (void *) sym); 59 } 60 fputs("\nlabels:", stderr); 61 for (sym = labels; sym; sym = sym->next) { 62 fprintf(stderr, "->[%d,%d:'%s'=%p]", 63 sym->ns, sym->ctx, 64 (sym->name) ? sym->name : "", (void *) sym); 65 } 66 putc('\n', stderr); 67 } 68 #endif 69 70 static Symbol ** 71 hash(char *s, int ns) 72 { 73 unsigned h, size; 74 Symbol **tab; 75 76 h = genhash(s); 77 78 switch (ns) { 79 case NS_CPP: 80 tab = htabcpp; 81 size = NR_CPP_HASH-1; 82 break; 83 case NS_LABEL: 84 tab = htablbl; 85 size = NR_LBL_HASH-1; 86 break; 87 default: 88 tab = htab; 89 size = NR_SYM_HASH-1; 90 break; 91 } 92 return &tab[h & size]; 93 } 94 95 static void 96 unlinkhash(Symbol *sym) 97 { 98 Symbol **h; 99 100 if ((sym->flags & SDECLARED) == 0) 101 return; 102 h = hash(sym->name, sym->ns); 103 assert(sym->ns == NS_CPP || *h == sym); 104 while (*h != sym) 105 h = &(*h)->hash; 106 *h = sym->hash; 107 } 108 109 void 110 pushctx(void) 111 { 112 DBG("SYM: pushed context %d", curctx+1); 113 if (++curctx == NR_BLOCK+1) 114 error("too many nested blocks"); 115 } 116 117 void 118 killsym(Symbol *sym) 119 { 120 short f; 121 char *name; 122 123 if (!sym) 124 return; 125 f = sym->flags; 126 if (f & SSTRING) 127 free(sym->u.s); 128 if (sym->ns == NS_TAG) 129 sym->type->prop &= ~TDEFINED; 130 unlinkhash(sym); 131 if ((name = sym->name) != NULL) { 132 switch (sym->ns) { 133 case NS_LABEL: 134 if ((f & SDEFINED) == 0) 135 errorp("label '%s' is not defined", name); 136 case NS_IDEN: 137 if ((f & (SUSED|SGLOBAL|SDECLARED)) == SDECLARED) 138 warn("'%s' defined but not used", name); 139 break; 140 } 141 } 142 free(name); 143 free(sym); 144 } 145 146 void 147 popctx(void) 148 { 149 Symbol *next, *sym; 150 int ns, dangling = 0; 151 152 DBG("SYM: popped context %d", curctx); 153 /* 154 * we have to be careful before popping the current 155 * context, because since the parser is one token 156 * ahead it may already have read an identifier at 157 * this point, and yylval.sym is a pointer to 158 * the symbol associated to such token. If that 159 * symbol is from the context that we are popping 160 * then we are going to generate a dangling pointer. 161 * We can detect this situation and call again to 162 * lookup. 163 */ 164 if ((yytoken == IDEN || yytoken == TYPEIDEN) && 165 yylval.sym->ctx == curctx) { 166 ns = yylval.sym->ns; 167 dangling = 1; 168 } 169 170 for (sym = head; sym && sym->ctx == curctx; sym = next) { 171 /* 172 * Since we are unlinking them in the inverse order 173 * we do know that sym is always the head of the 174 * collision list 175 */ 176 next = sym->next; 177 killsym(sym); 178 } 179 head = sym; 180 181 if (--curctx == GLOBALCTX) { 182 for (sym = labels; sym; sym = next) { 183 next = sym->next; 184 killsym(sym); 185 } 186 labels = NULL; 187 } 188 189 if (dangling) { 190 yylval.sym = lookup(ns, yytext, ALLOC); 191 yytoken = yylval.sym->token; 192 } 193 } 194 195 unsigned 196 newid(void) 197 { 198 unsigned short id; 199 200 if (lexmode == CPPMODE) 201 return 0; 202 id = ++counterid; 203 if (id == 0) { 204 die("cc1: overflow in %s identifiers", 205 (curctx) ? "internal" : "external"); 206 } 207 return id; 208 } 209 210 Symbol * 211 newsym(int ns, char *name) 212 { 213 Symbol *sym; 214 215 sym = xmalloc(sizeof(*sym)); 216 if (name) 217 name = xstrdup(name); 218 sym->name = name; 219 sym->id = 0; 220 sym->hide = 0; 221 sym->ns = ns; 222 sym->ctx = curctx; 223 sym->token = IDEN; 224 sym->flags = 0; 225 sym->u.s = NULL; 226 sym->type = NULL; 227 sym->hash = NULL; 228 229 if (ns == NS_LABEL) { 230 sym->next = labels; 231 labels = sym; 232 } else if (ns != NS_CPP) { 233 sym->next = head; 234 head = sym; 235 } 236 return sym; 237 } 238 239 static Symbol * 240 linkhash(Symbol *sym) 241 { 242 Symbol **h; 243 244 h = hash(sym->name, sym->ns); 245 sym->hash = *h; 246 *h = sym; 247 248 if (sym->ns != NS_CPP) 249 sym->id = newid(); 250 sym->flags |= SDECLARED; 251 return sym; 252 } 253 254 Symbol * 255 newstring(char *s, size_t len) 256 { 257 Symbol *sym = newsym(NS_IDEN, NULL); 258 259 if (lexmode != CPPMODE) 260 sym->type = mktype(chartype, ARY, len, NULL); 261 sym->id = newid(); 262 sym->flags |= SSTRING | SCONSTANT | SPRIVATE; 263 sym->u.s = xmalloc(len); 264 if (s) 265 memcpy(sym->u.s, s, len); 266 267 return sym; 268 } 269 270 Symbol * 271 newlabel(void) 272 { 273 Symbol *sym = newsym(NS_LABEL, NULL); 274 sym->id = newid(); 275 return sym; 276 } 277 278 Symbol * 279 lookup(int ns, char *name, int alloc) 280 { 281 Symbol *sym; 282 int sns, c; 283 char *t; 284 285 c = *name; 286 for (sym = *hash(name, ns); sym; sym = sym->hash) { 287 t = sym->name; 288 if (*t != c || strcmp(t, name)) 289 continue; 290 sns = sym->ns; 291 if (sns == ns) 292 return sym; 293 /* 294 * When a lookup is done in a namespace associated 295 * to a struct we also want symbols of NS_IDEN which 296 * are typedef, because in other case we cannot declare 297 * fields of such types. 298 * TODO: Remove this trick 299 */ 300 if (sns == NS_KEYWORD || 301 (sym->flags & STYPEDEF) && ns >= NS_STRUCTS) { 302 return sym; 303 } 304 } 305 return (alloc == ALLOC) ? newsym(ns, name) : NULL; 306 } 307 308 Symbol * 309 install(int ns, Symbol *sym) 310 { 311 if (sym->flags & SDECLARED || sym->ctx != curctx) { 312 if (sym->ctx == curctx && ns == sym->ns) 313 return NULL; 314 sym = newsym(ns, sym->name); 315 } 316 return linkhash(sym); 317 } 318 319 void 320 keywords(struct keyword *key, int ns) 321 { 322 Symbol *sym; 323 324 for ( ; key->str; ++key) { 325 sym = linkhash(newsym(ns, key->str)); 326 sym->token = key->token; 327 sym->u.token = key->value; 328 } 329 } 330 331 void 332 builtins(struct builtin *built) 333 { 334 Symbol *sym; 335 struct builtin *bp; 336 337 for (bp = built; bp->str; ++bp) { 338 sym = linkhash(newsym(NS_KEYWORD, bp->str)); 339 sym->token = BUILTIN; 340 sym->u.fun = bp->fun; 341 } 342 } 343 344 void 345 isyms(void) 346 { 347 static struct keyword cppoper[] = { 348 {"defined", DEFINED, DEFINED}, 349 {NULL, 0, 0} 350 }; 351 static struct keyword cppkeys[] = { 352 {"define", DEFINE, DEFINE}, 353 {"include", INCLUDE, INCLUDE}, 354 {"line", LINE, LINE}, 355 {"ifdef", IFDEF, IFDEF}, 356 {"if", IF, IF}, 357 {"elif", ELIF, ELIF}, 358 {"else", ELSE, ELSE}, 359 {"ifndef", IFNDEF, IFNDEF}, 360 {"endif", ENDIF, ENDIF}, 361 {"undef", UNDEF, UNDEF}, 362 {"pragma", PRAGMA, PRAGMA}, 363 {"error", ERROR, ERROR}, 364 {NULL, 0, 0} 365 }; 366 static struct keyword lexkeys[] = { 367 {"auto", SCLASS, AUTO}, 368 {"break", BREAK, BREAK}, 369 {"_Bool", TYPE, BOOL}, 370 {"__builtin_va_list", TYPE, VA_LIST}, 371 {"case", CASE, CASE}, 372 {"char", TYPE, CHAR}, 373 {"const", TQUALIFIER, CONST}, 374 {"continue", CONTINUE, CONTINUE}, 375 {"default", DEFAULT, DEFAULT}, 376 {"do", DO, DO}, 377 {"double", TYPE, DOUBLE}, 378 {"else", ELSE, ELSE}, 379 {"enum", TYPE, ENUM}, 380 {"extern", SCLASS, EXTERN}, 381 {"float", TYPE, FLOAT}, 382 {"for", FOR, FOR}, 383 {"goto", GOTO, GOTO}, 384 {"if", IF, IF}, 385 {"inline", TQUALIFIER, INLINE}, 386 {"int", TYPE, INT}, 387 {"long", TYPE, LONG}, 388 {"register", SCLASS, REGISTER}, 389 {"restrict", TQUALIFIER, RESTRICT}, 390 {"return", RETURN, RETURN}, 391 {"short", TYPE, SHORT}, 392 {"signed", TYPE, SIGNED}, 393 {"sizeof", SIZEOF, SIZEOF}, 394 {"static", SCLASS, STATIC}, 395 {"struct", TYPE, STRUCT}, 396 {"switch", SWITCH, SWITCH}, 397 {"typedef", SCLASS, TYPEDEF}, 398 {"union", TYPE, UNION}, 399 {"unsigned", TYPE, UNSIGNED}, 400 {"void", TYPE, VOID}, 401 {"volatile", TQUALIFIER, VOLATILE}, 402 {"while", WHILE, WHILE}, 403 {NULL, 0, 0}, 404 }; 405 406 keywords(lexkeys, NS_KEYWORD); 407 keywords(cppkeys, NS_CPPCLAUSES); 408 keywords(cppoper, NS_CPP); 409 ibuilts(); 410 411 /* 412 * Remove all the predefined symbols from * the symbol list. It 413 * will make faster some operations. There is no problem of memory 414 * leakeage because this memory is not ever freed 415 */ 416 counterid = 0; 417 head = NULL; 418 }