scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit 2de303245c939d0d2b96a4c5423286879584eaff
parent 9e878a06e2febcd985c35b619d5054ec63e37095
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue, 29 May 2018 15:19:37 +0100

Merge branch 'master' of ssh://simple-cc.org:/var/gitrepos/scc

Diffstat:
MREADME | 2+-
Mas/target/x86/ins.c | 50++++++++++++++++++++++++++++++++++++++++++--------
Mas/target/x86/proc.h | 6++++--
Mas/target/x86/x86.dat | 1+
Mld/Makefile | 1+
Mld/coff32.c | 193+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mld/formats.c | 1+
Mld/ld.h | 19++++++++++++++++++-
Mld/main.c | 1+
Mld/obj.c | 23++++++++++++++++-------
Mlib/c/calloc.c | 1+
Mlib/c/ctype.c | 1-
Mlib/c/fclose.c | 3++-
Mlib/c/fputs.c | 1+
Mlib/c/isascii.c | 1-
Mlib/c/isblank.c | 3+++
Mlib/c/puts.c | 1+
Mlib/c/realloc.c | 1+
18 files changed, 269 insertions(+), 40 deletions(-)

diff --git a/README b/README @@ -1,6 +1,6 @@ Derivations from standard C =========================== -This compiler is aimed to be being fully compatible with the C99 standard, but +This compiler aims to be fully compatible with the C99 standard, but it will have some differences: - Type qualifiers are accepted but ignored. diff --git a/as/target/x86/ins.c b/as/target/x86/ins.c @@ -32,6 +32,16 @@ getclass(Node *np) case AREG_DH: return R8CLASS; + case AREG_AX: + case AREG_BX: + case AREG_CX: + case AREG_DX: + case AREG_DI: + case AREG_SI: + case AREG_SP: + case AREG_BP: + return R16CLASS; + case AREG_CS: case AREG_DS: case AREG_SS: @@ -58,37 +68,29 @@ getclass(Node *np) case AREG_VIP: case AREG_ID: - case AREG_AX: case AREG_EAX: case AREG_RAX: - case AREG_BX: case AREG_EBX: case AREG_RBX: - case AREG_CX: case AREG_ECX: case AREG_RCX: - case AREG_DX: case AREG_EDX: case AREG_RDX: - case AREG_SI: case AREG_SIL: case AREG_ESI: case AREG_RSI: - case AREG_DI: case AREG_DIL: case AREG_EDI: case AREG_RDI: - case AREG_SP: case AREG_SPL: case AREG_ESP: case AREG_RSP: - case AREG_BP: case AREG_BPL: case AREG_EBP: case AREG_RBP: @@ -202,6 +204,9 @@ match(Op *op, Node **args) break; case AREG_R8CLASS: class = R8CLASS; + goto check_class; + case AREG_R16CLASS: + class = R16CLASS; check_class: if ((getclass(np) & class) == 0) return 0; @@ -265,3 +270,32 @@ reg8_reg8(Op *op, Node **args) buf[1] = addrbyte(REG_MODE, src, dst); emit(buf, 2); } + +static int +reg16toint(Node *np) +{ + switch (np->sym->value) { + case AREG_AX: return 0; + case AREG_CX: return 1; + case AREG_DX: return 2; + case AREG_BX: return 3; + case AREG_SP: return 4; + case AREG_BP: return 5; + case AREG_SI: return 6; + case AREG_DI: return 7; + default: abort(); + } +} + +void +reg16_reg16(Op *op, Node **args) +{ + int src, dst; + char buf[2]; + + src = reg16toint(args[0]); + dst = reg16toint(args[1]); + buf[0] = op->bytes[0]; + buf[1] = addrbyte(REG_MODE, src, dst); + emit(buf, 2); +} diff --git a/as/target/x86/proc.h b/as/target/x86/proc.h @@ -148,8 +148,10 @@ enum args { AREG_MXCSR, AREG_R8CLASS, /* register class for 8 bit registers in i286 */ + AREG_R16CLASS, /* register class for 16 bit registers in i286 */ }; -enum { - R8CLASS = 1 << 0, +enum class { + R8CLASS = 1 << 0, + R16CLASS = 1 << 1, }; diff --git a/as/target/x86/x86.dat b/as/target/x86/x86.dat @@ -25,3 +25,4 @@ RET none 1 0xc3 noargs I286,I386,AMD64 # 8 bit arithmetic operations ADDB reg8,reg8 2 0x00 reg8_reg8 I286,I386,AMD64 +ADDW reg16,reg16 2 0x01 reg16_reg16 I286,I386,AMD64 diff --git a/ld/Makefile b/ld/Makefile @@ -26,6 +26,7 @@ coff32.o: ./../inc/coff32/scnhdr.h coff32.o: ./../inc/coff32/syms.h coff32.o: ./../inc/scc.h coff32.o: ./ld.h +formats.o: ./../inc/scc.h formats.o: ./ld.h main.o: ./../inc/ar.h main.o: ./../inc/scc.h diff --git a/ld/coff32.c b/ld/coff32.c @@ -1,7 +1,9 @@ static char sccsid[] = "@(#) ./ld/coff32.c"; #include <assert.h> +#include <ctype.h> #include <errno.h> +#include <limits.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -14,6 +16,7 @@ static char sccsid[] = "@(#) ./ld/coff32.c"; #include "ld.h" static int (*unpack)(unsigned char *, char *, ...); +static int align; static FILHDR * getfhdr(unsigned char *buff, FILHDR *hdr) @@ -48,6 +51,13 @@ readstr(Obj *obj, long off) (*unpack)(buff, "l", &siz); + siz -= 4; + if (siz == 0) { + obj->strtbl = NULL; + obj->strsiz = 0; + return 0; + } + if (siz > SIZE_MAX || (str = malloc(siz)) == NULL) outmem(); @@ -55,6 +65,8 @@ readstr(Obj *obj, long off) return -1; obj->strtbl = str; + obj->strsiz = siz; + return 0; } @@ -84,8 +96,9 @@ readsects(Obj *obj, long off) { unsigned nsec, i; unsigned char buff[SCNHSZ]; - SCNHDR *scn; + SCNHDR *scn, *p; FILHDR *hdr; + Symbol *sym; hdr = obj->filhdr; nsec = hdr->f_nscns; @@ -98,10 +111,20 @@ readsects(Obj *obj, long off) if (fseek(obj->fp, off, SEEK_SET) == EOF) return -1; - for (i = 0; i < nsec; i++) { + for (p = scn; p < &scn[nsec]; ++p) { if (fread(buff, SCNHSZ, 1, obj->fp) != 1) return -1; - getscn(buff, &scn[i]); + getscn(buff, p); + sym = lookup(p->s_name); + + sym->size = (sym->size + align-1) & align-1; + if (sym->size > ULLONG_MAX - p->s_size) { + fprintf(stderr, + "ld: %s: overflow in section '%s'\n", + obj->fname, p->s_name); + exit(EXIT_FAILURE); + } + sym->size += p->s_size; } obj->scnhdr = scn; @@ -130,31 +153,156 @@ getsym(unsigned char *buff, SYMENT *ent) (*unpack)(buff, "ll", &ent->n_zeroes, &ent->n_offset); } +static char * +symname(Obj *obj, SYMENT *ent) +{ + long off; + + if (ent->n_zeroes != 0) + return ent->n_name; + + off = ent->n_offset; + if (off >= obj->strsiz) { + fprintf(stderr, + "ld: invalid offset in symbol table: %zd\n", off); + return ""; + } + + return &obj->strtbl[off]; +} + +static char +typeof(Obj *obj, SYMENT *ent) +{ + SCNHDR *sec; + FILHDR *hdr; + int c, n; + long flags; + + switch (ent->n_scnum) { + case N_DEBUG: + c = 'n'; + break; + case N_ABS: + c = 'a'; + break; + case N_UNDEF: + c = (ent->n_value != 0) ? 'C' : 'U'; + break; + default: + sec = obj->scnhdr; + hdr = obj->filhdr; + n = ent->n_scnum; + if (n > hdr->f_nscns) + return '?'; + sec = &sec[n-1]; + flags = sec->s_flags; + if (flags & STYP_TEXT) + c = 't'; + else if (flags & STYP_DATA) + c = 'd'; + else if (flags & STYP_BSS) + c = 'b'; + else + c = '?'; + break; + } + + if (ent->n_sclass == C_EXT) + c = toupper(c); + + return c; +} + +static TUINT +getval(Obj *obj, SYMENT *ent) +{ + FILHDR *hdr = obj->filhdr;; + SCNHDR *scn = obj->scnhdr; + + if (ent->n_scnum > hdr->f_nscns) { + fprintf(stderr, + "ld: %s: incorrect section number\n", + obj->fname, + ent->n_scnum); + exit(EXIT_FAILURE); + } + + scn = &scn[ent->n_scnum-1]; + + /* + * TODO: We have to add the composed size of the segment minus + * the size of the fragment + */ + return ent->n_value - scn->s_size; +} + static int readsyms(Obj *obj, long off) { - unsigned i, nsym; - unsigned char buff[SYMESZ]; - SYMENT *ent; - FILHDR *hdr; + int type; + unsigned i; + FILHDR *hdr = obj->filhdr;; - hdr = obj->filhdr; - nsym = hdr->f_nsyms; - if (nsym > SIZE_MAX / sizeof(*ent)) + if (fseek(obj->fp, off, SEEK_SET) == EOF) return -1; - if ((ent = malloc(nsym * sizeof(*ent))) == NULL) + if (hdr->f_nsyms > SIZE_MAX / sizeof(Symbol *)) { + fprintf(stderr, + "ld: %s: overflow in size of symbol redirection\n", + obj->fname); + exit(EXIT_FAILURE); + } + obj->symbols = malloc(sizeof(Symbol *) * sizeof(Symbol *)); + if (!obj->symbols) outmem(); - if (fseek(obj->fp, off, SEEK_SET) == EOF) - return -1; + hdr = obj->filhdr; + for (i = 0; i < hdr->f_nsyms; i++) { + Symbol *sym; + TUINT value; + SYMENT ent; + unsigned char buff[SYMESZ]; + char *name; - for (i = 0; i < nsym; i++) { if (fread(buff, SYMESZ, 1, obj->fp) != 1) return -1; - getsym(buff, &ent[i]); + getsym(buff, &ent); + name = symname(obj, &ent); + type = typeof(obj, &ent); + sym = lookup(name); + + switch (sym->type) { + case 'U': + sym->type = type; + sym->value = ent.n_value; + if (type == 'C') + sym->size = ent.n_value; + break; + case 'C': + switch (type) { + case 'U': + case 'C': + if (ent.n_value > sym->size) + sym->size = ent.n_value; + break; + default: + sym->type = type; + sym->value = ent.n_value; + break; + } + break; + default: + if (type != 'U') { + fprintf(stderr, + "ld: %s: redifinition of symbol '%s'\n", + obj->fname, sym->name); + } + break; + } + + obj->symbols[i] = sym; } - obj->enthdr = ent; return 0; } @@ -183,10 +331,10 @@ readobj(Obj *obj) if (readstr(obj, stroff) < 0) goto bad_file; - if (readsyms(obj, symoff) < 0) - goto bad_file; if (readsects(obj, secoff) < 0) goto bad_file; + if (readsyms(obj, symoff) < 0) + goto bad_file; return; bad_file: @@ -199,10 +347,17 @@ static void pass1(char *fname, char *member, FILE *fp) { Obj *obj; + SYMENT *ent; + FILHDR *hdr; + unsigned n, nsyms; + int islib = member != NULL; obj = newobj(fname, member); obj->fp = fp; readobj(obj); + + hdr = obj->filhdr; + nsyms = hdr->f_nsyms; } static void @@ -232,6 +387,8 @@ probe(char *fname, char *member, FILE *fp) switch (magic) { case COFF_Z80MAGIC: + unpack = lunpack; + align = 2; return 1; default: return 0; diff --git a/ld/formats.c b/ld/formats.c @@ -2,6 +2,7 @@ static char sccsid[] = "@(#) ./ld/probe.c"; #include <stdio.h> +#include "../inc/scc.h" #include "ld.h" /* TODO: Autogenerate this file */ diff --git a/ld/ld.h b/ld/ld.h @@ -8,13 +8,19 @@ struct obj { FILE *fp; void *filhdr; void *scnhdr; - void *enthdr; + Symbol **symbols; char *strtbl; + size_t strsiz; struct obj *next; }; struct symbol { char *name; + char type; + short flags; + long size; + TUINT base; + TUINT value; struct symbol *hash; }; @@ -30,3 +36,14 @@ extern Symbol *lookup(char *name); /* main.c */ extern void outmem(void); + +/* + * Definition of globals variables + */ +extern int pass; +extern int sflag; +extern int xflag; +extern int Xflag; +extern int rflag; +extern int dflag; +extern int gflag; diff --git a/ld/main.c b/ld/main.c @@ -19,6 +19,7 @@ int xflag; /* discard local symbols */ int Xflag; /* discard locals starting with 'L' */ int rflag; /* preserve relocation bits */ int dflag; /* define common even with rflag */ +int gflag; /* preserve debug symbols */ void outmem(void) diff --git a/ld/obj.c b/ld/obj.c @@ -46,18 +46,25 @@ newobj(char *fname, char *member) return obj; } +static unsigned +hash(char *s) +{ + unsigned h, c; + + for (h = 0; c = *s; ++s) + h = h*33 ^ c; + return h & NR_SYM_HASH-1; +} + Symbol * lookup(char *name) { - unsigned h, c; + unsigned h; char *s; size_t len; Symbol *sym; - for (h = 0; c = *name; ++s) - h = h*33 ^ c; - h &= NR_SYM_HASH-1; - + h = hash(name); for (sym = symtbl[h]; sym; sym = sym->hash) { s = sym->name; if (*name == *s && !strcmp(name, s)) @@ -69,11 +76,13 @@ lookup(char *name) s = malloc(len); if (!sym || !s) outmem(); + memset(sym, 0, sizeof(*sym)); + memcpy(s, name, len); + sym->hash = symtbl[h]; symtbl[h] = sym; sym->name = s; - memset(sym, 0, sizeof(*sym)); - memcpy(sym->name, name, len); + sym->type = 'U'; return sym; } diff --git a/lib/c/calloc.c b/lib/c/calloc.c @@ -1,5 +1,6 @@ #include <stdlib.h> #include <string.h> +#undef calloc void * calloc(size_t nmemb, size_t size) diff --git a/lib/c/ctype.c b/lib/c/ctype.c @@ -1,6 +1,5 @@ #define __USE_MACROS #include <ctype.h> -#undef ctype int __ctmp; diff --git a/lib/c/fclose.c b/lib/c/fclose.c @@ -1,5 +1,6 @@ #include <stdio.h> +#include "syscall.h" #undef fclose extern int _flsbuf(FILE *fp); @@ -14,7 +15,7 @@ fclose(FILE *fp) r = 0; if (_flsbuf(fp) == EOF) r = EOF; - if (close(fp->fd) < 0) + if (_close(fp->fd) < 0) r = EOF; } diff --git a/lib/c/fputs.c b/lib/c/fputs.c @@ -1,5 +1,6 @@ #include <stdio.h> +#undef fputs int fputs(const char * restrict bp, FILE * restrict fp) diff --git a/lib/c/isascii.c b/lib/c/isascii.c @@ -1,4 +1,3 @@ -#define __USE_MACROS #include <ctype.h> #undef isascii diff --git a/lib/c/isblank.c b/lib/c/isblank.c @@ -1,3 +1,6 @@ + +#include <ctype.h> + int isblank(int c) { diff --git a/lib/c/puts.c b/lib/c/puts.c @@ -1,5 +1,6 @@ #include <stdio.h> +#undef puts int puts(const char *str) diff --git a/lib/c/realloc.c b/lib/c/realloc.c @@ -2,6 +2,7 @@ #include <string.h> #include "malloc.h" +#undef realloc void * realloc(void *ptr, size_t nbytes)