scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit e60e6dc4ded6d0ff578610e39d7a8c8a6f62f3ed
parent ad3d0dac54f11b7b789b7a8932977ecbadf6380e
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue, 26 Oct 2021 21:30:56 +0200

libmach: Add elf64read()

This function loads in memory all the information needed from an
elf file.

Diffstat:
Ainclude/scc/scc/elf/elfent.h | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/scc/scc/elf/elfhdr.h | 3+++
Ainclude/scc/scc/elf/elfphdr.h | 44++++++++++++++++++++++++++++++++++++++++++++
Ainclude/scc/scc/elf/elfshdr.h | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/libmach/deps.mk | 3+++
Msrc/libmach/elf64/elf64.c | 4++--
Msrc/libmach/elf64/elf64.h | 28++++++++++++++++++++++++++++
Asrc/libmach/elf64/elf64read.c | 381+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/libmach/elf64/rules.mk | 1+
9 files changed, 620 insertions(+), 2 deletions(-)

diff --git a/include/scc/scc/elf/elfent.h b/include/scc/scc/elf/elfent.h @@ -0,0 +1,65 @@ +/* Symbol table index */ +#define STN_UNDEF 0 /* undefined */ + +/* Extract symbol info - st_info */ +#define ELF32_ST_BIND(x) ((x) >> 4) +#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +#define ELF64_ST_BIND(x) ((x) >> 4) +#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +/* Symbol Binding - ELF32_ST_BIND - st_info */ +#define STB_LOCAL 0 /* Local symbol */ +#define STB_GLOBAL 1 /* Global symbol */ +#define STB_WEAK 2 /* like global - lower precedence */ +#define STB_NUM 3 /* number of symbol bindings */ +#define STB_LOPROC 13 /* reserved range for processor */ +#define STB_HIPROC 15 /* specific symbol bindings */ + +/* Symbol type - ELF32_ST_TYPE - st_info */ +#define STT_NOTYPE 0 /* not specified */ +#define STT_OBJECT 1 /* data object */ +#define STT_FUNC 2 /* function */ +#define STT_SECTION 3 /* section */ +#define STT_FILE 4 /* file */ +#define STT_COMMON 5 /* common symbol */ +#define STT_TLS 6 /* thread local storage */ +#define STT_LOPROC 13 /* reserved range for processor */ +#define STT_HIPROC 15 /* specific symbol types */ + +/* Extract symbol visibility - st_other */ +#define ELF_ST_VISIBILITY(v) ((v) & 0x3) +#define ELF32_ST_VISIBILITY ELF_ST_VISIBILITY +#define ELF64_ST_VISIBILITY ELF_ST_VISIBILITY + +#define STV_DEFAULT 0 /* Visibility set by binding type */ +#define STV_INTERNAL 1 /* OS specific version of STV_HIDDEN */ +#define STV_HIDDEN 2 /* can only be seen inside own .so */ +#define STV_PROTECTED 3 /* HIDDEN inside, DEFAULT outside */ + +#define ELFE32SZ 16 +#define ELFE64SZ 24 + +typedef struct elf32_sym Elf32_Sym; +typedef struct elf64_sym Elf64_Sym; + +/* Symbol Table Entry */ +struct elf32_sym { + Elf32_Word st_name; /* name - index into string table */ + Elf32_Addr st_value; /* symbol value */ + Elf32_Word st_size; /* symbol size */ + unsigned char st_info; /* type and binding */ + unsigned char st_other; /* 0 - no defined meaning */ + Elf32_Half st_shndx; /* section header index */ +}; + +struct elf64_sym { + Elf64_Word st_name; /* Symbol name index in str table */ + unsigned char st_info; /* type / binding attrs */ + unsigned char st_other; /* unused */ + Elf64_Half st_shndx; /* section index of symbol */ + Elf64_Addr st_value; /* value of symbol */ + Elf64_Xword st_size; /* size of symbol */ +}; diff --git a/include/scc/scc/elf/elfhdr.h b/include/scc/scc/elf/elfhdr.h @@ -261,6 +261,9 @@ /* Magic for e_phnum: get real value from sh_info of first section header */ #define PN_XNUM 0xffff +#define ELFH32SZ 54 +#define ELFH64SZ 64 + typedef struct elfhdr32 Elf32_Ehdr; typedef struct elfhdr64 Elf64_Ehdr; diff --git a/include/scc/scc/elf/elfphdr.h b/include/scc/scc/elf/elfphdr.h @@ -0,0 +1,44 @@ +/* See http://www.sco.com/developers/gabi/latest/contents.html */ + +/* Segment types - p_type */ +#define PT_NULL 0 /* unused */ +#define PT_LOAD 1 /* loadable segment */ +#define PT_DYNAMIC 2 /* dynamic linking section */ +#define PT_INTERP 3 /* the RTLD */ +#define PT_NOTE 4 /* auxiliary information */ +#define PT_SHLIB 5 /* reserved - purpose undefined */ +#define PT_PHDR 6 /* program header */ +#define PT_TLS 7 /* thread local storage */ +#define PT_LOOS 0x60000000 /* reserved range for OS */ +#define PT_HIOS 0x6fffffff /* specific segment types */ +#define PT_LOPROC 0x70000000 /* reserved range for processor */ +#define PT_HIPROC 0x7fffffff /* specific segment types */ + +#define ELFP32SZ 32 +#define ELFP64SZ 56 + +typedef struct elf32_phdr Elf32_Phdr; +typedef struct elf64_phdr Elf64_Phdr; + +/* Program Header */ +struct elf32_phdr { + Elf32_Word p_type; /* segment type */ + Elf32_Off p_offset; /* segment offset */ + Elf32_Addr p_vaddr; /* virtual address of segment */ + Elf32_Addr p_paddr; /* physical address - ignored? */ + Elf32_Word p_filesz; /* number of bytes in file for seg. */ + Elf32_Word p_memsz; /* number of bytes in mem. for seg. */ + Elf32_Word p_flags; /* flags */ + Elf32_Word p_align; /* memory alignment */ +}; + +struct elf64_phdr { + Elf64_Word p_type; /* entry type */ + Elf64_Word p_flags; /* flags */ + Elf64_Off p_offset; /* offset */ + Elf64_Addr p_vaddr; /* virtual address */ + Elf64_Addr p_paddr; /* physical address */ + Elf64_Xword p_filesz; /* file size */ + Elf64_Xword p_memsz; /* memory size */ + Elf64_Xword p_align; /* memory & file alignment */ +}; diff --git a/include/scc/scc/elf/elfshdr.h b/include/scc/scc/elf/elfshdr.h @@ -0,0 +1,93 @@ +/* Special Section Indexes */ +#define SHN_UNDEF 0 /* undefined */ +#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */ +#define SHN_LOPROC 0xff00 /* reserved range for processor */ +#define SHN_HIPROC 0xff1f /* specific section indexes */ +#define SHN_ABS 0xfff1 /* absolute value */ +#define SHN_COMMON 0xfff2 /* common symbol */ +#define SHN_XINDEX 0xffff /* Escape -- index stored elsewhere. */ +#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */ + +/* sh_type */ +#define SHT_NULL 0 /* inactive */ +#define SHT_PROGBITS 1 /* program defined information */ +#define SHT_SYMTAB 2 /* symbol table section */ +#define SHT_STRTAB 3 /* string table section */ +#define SHT_RELA 4 /* relocation section with addends*/ +#define SHT_HASH 5 /* symbol hash table section */ +#define SHT_DYNAMIC 6 /* dynamic section */ +#define SHT_NOTE 7 /* note section */ +#define SHT_NOBITS 8 /* no space section */ +#define SHT_REL 9 /* relation section without addends */ +#define SHT_SHLIB 10 /* reserved - purpose unknown */ +#define SHT_DYNSYM 11 /* dynamic symbol table section */ +#define SHT_NUM 12 /* number of section types */ +#define SHT_INIT_ARRAY 14 /* pointers to init functions */ +#define SHT_FINI_ARRAY 15 /* pointers to termination functions */ +#define SHT_PREINIT_ARRAY 16 /* ptrs to funcs called before init */ +#define SHT_GROUP 17 /* defines a section group */ +#define SHT_SYMTAB_SHNDX 18 /* Section indexes (see SHN_XINDEX). */ +#define SHT_LOOS 0x60000000 /* reserved range for OS specific */ +#define SHT_SUNW_dof 0x6ffffff4 /* used by dtrace */ +#define SHT_GNU_LIBLIST 0x6ffffff7 /* libraries to be prelinked */ +#define SHT_SUNW_move 0x6ffffffa /* inf for partially init'ed symbols */ +#define SHT_SUNW_syminfo 0x6ffffffc /* ad symbol information */ +#define SHT_SUNW_verdef 0x6ffffffd /* symbol versioning inf */ +#define SHT_SUNW_verneed 0x6ffffffe /* symbol versioning req */ +#define SHT_SUNW_versym 0x6fffffff /* symbol versioning table */ +#define SHT_HIOS 0x6fffffff /* section header types */ +#define SHT_LOPROC 0x70000000 /* reserved range for processor */ +#define SHT_HIPROC 0x7fffffff /* specific section header types */ +#define SHT_LOUSER 0x80000000 /* reserved range for application */ +#define SHT_HIUSER 0xffffffff /* specific indexes */ + +#define SHT_GNU_HASH 0x6ffffff6 /* GNU-style hash table section */ + +/* Section Attribute Flags - sh_flags */ +#define SHF_WRITE 0x1 /* Writable */ +#define SHF_ALLOC 0x2 /* occupies memory */ +#define SHF_EXECINSTR 0x4 /* executable */ +#define SHF_MERGE 0x10 /* may be merged */ +#define SHF_STRINGS 0x20 /* contains strings */ +#define SHF_INFO_LINK 0x40 /* sh_info holds section index */ +#define SHF_LINK_ORDER 0x80 /* ordering requirements */ +#define SHF_OS_NONCONFORMING 0x100 /* OS-specific processing required */ +#define SHF_GROUP 0x200 /* member of section group */ +#define SHF_TLS 0x400 /* thread local storage */ +#define SHF_COMPRESSED 0x800 /* contains compressed data */ +#define SHF_MASKOS 0x0ff00000 /* OS-specific semantics */ +#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */ + /* specific section attributes */ + +#define ELFS32SZ 40 +#define ELFS64SZ 64 + +typedef struct elf32_shdr Elf32_Shdr; +typedef struct elf64_shdr Elf64_Shdr; + +/* Section Header */ +struct elf32_shdr { + Elf32_Word sh_name; /* section name */ + Elf32_Word sh_type; /* type */ + Elf32_Word sh_flags; /* flags */ + Elf32_Addr sh_addr; /* address */ + Elf32_Off sh_offset; /* file offset */ + Elf32_Word sh_size; /* section size */ + Elf32_Word sh_link; /* section header table index link */ + Elf32_Word sh_info; /* extra information */ + Elf32_Word sh_addralign; /* address alignment */ + Elf32_Word sh_entsize; /* section entry size */ +}; + +struct elf64_shdr { + Elf64_Word sh_name; /* section name */ + Elf64_Word sh_type; /* section type */ + Elf64_Xword sh_flags; /* section flags */ + Elf64_Addr sh_addr; /* virtual address */ + Elf64_Off sh_offset; /* file offset */ + Elf64_Xword sh_size; /* section size */ + Elf64_Word sh_link; /* link to another */ + Elf64_Word sh_info; /* misc info */ + Elf64_Xword sh_addralign; /* memory alignment */ + Elf64_Xword sh_entsize; /* table entry size */ +}; diff --git a/src/libmach/deps.mk b/src/libmach/deps.mk @@ -59,6 +59,9 @@ elf64/elf64new.o: elf64/elf64.h elf64/elf64probe.o: $(INCDIR)/scc/scc/mach.h elf64/elf64probe.o: elf64/../libmach.h elf64/elf64probe.o: elf64/elf64.h +elf64/elf64read.o: $(INCDIR)/scc/scc/mach.h +elf64/elf64read.o: elf64/../libmach.h +elf64/elf64read.o: elf64/elf64.h findsec.o: $(INCDIR)/scc/scc/mach.h findsec.o: libmach.h getindex.o: $(INCDIR)/scc/scc/mach.h diff --git a/src/libmach/elf64/elf64.c b/src/libmach/elf64/elf64.c @@ -5,10 +5,10 @@ #include "../libmach.h" #include "elf64.h" -struct objops coff32 = { +struct objops elf64 = { .probe = elf64probe, .new = elf64new, - .read = NULL, + .read = elf64read, .getidx = NULL, .setidx = NULL, .pc2line = NULL, diff --git a/src/libmach/elf64/elf64.h b/src/libmach/elf64/elf64.h @@ -1,10 +1,38 @@ #include <scc/elf/elftypes.h> #include <scc/elf/elfhdr.h> +#include <scc/elf/elfphdr.h> +#include <scc/elf/elfshdr.h> +#include <scc/elf/elfent.h> +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE #define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELFHSZ ELFH64SZ +#define ELFPSZ ELFP64SZ +#define ELFSSZ ELFS64SZ +#define ELFESZ ELFE64SZ + + +#define SEC_STRTBL 0 +#define SYM_STRTBL 1 + +typedef struct elf64 Elf64; struct elf64 { Elf_Ehdr hdr; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + Elf_Shdr *symtab; + Elf_Sym *syms; + + char *strtbl[2]; + size_t strsiz[2]; + + size_t nsec; + size_t nsym; }; extern int elf64new(Obj *); diff --git a/src/libmach/elf64/elf64read.c b/src/libmach/elf64/elf64read.c @@ -0,0 +1,381 @@ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> + +#include <scc/mach.h> + +#include "../libmach.h" +#include "elf64.h" + +static int +unpack_hdr(int order, unsigned char *buf, Elf_Ehdr *hdr) +{ + int n; + + n = unpack(order, + buf, + "'16sslqqqlssssss", + hdr->e_ident, + &hdr->e_type, + &hdr->e_machine, + &hdr->e_version, + &hdr->e_entry, + &hdr->e_phoff, + &hdr->e_shoff, + &hdr->e_flags, + &hdr->e_ehsize, + &hdr->e_phentsize, + &hdr->e_phnum, + &hdr->e_shentsize, + &hdr->e_shnum, + &hdr->e_shstrndx); + + assert(n == ELFHSZ); + + return n; +} + +static int +unpack_phdr(int order, unsigned char *buf, Elf_Phdr *phdr) +{ + int n; + + n = unpack(order, + buf, + "llqqqqqq", + &phdr->p_type, + &phdr->p_flags, + &phdr->p_offset, + &phdr->p_vaddr, + &phdr->p_paddr, + &phdr->p_filesz, + &phdr->p_memsz, + &phdr->p_align); + + assert(n == ELFPSZ); + + return n; +} + +static int +unpack_shdr(int order, unsigned char *buf, Elf_Shdr *shdr) +{ + int n; + + n = unpack(order, + buf, + "llqqqqllqq", + &shdr->sh_name, + &shdr->sh_type, + &shdr->sh_flags, + &shdr->sh_addr, + &shdr->sh_offset, + &shdr->sh_size, + &shdr->sh_link, + &shdr->sh_info, + &shdr->sh_addralign, + &shdr->sh_entsize); + + assert(n == ELFSSZ); + + return n; +} + +static int +unpack_sym(int order, unsigned char *buf, Elf_Sym *sym) +{ + int n; + + n = unpack(order, + buf, + "lccsqq", + &sym->st_name, + &sym->st_info, + &sym->st_other, + &sym->st_shndx, + &sym->st_value, + &sym->st_size); + assert(n == ELFESZ); + + return n; +} + +static int +readhdr(Obj *obj, FILE *fp) +{ + Elf64 *elf; + Elf_Ehdr *hdr; + unsigned char buf[ELFHSZ]; + + elf = obj->data; + hdr = &elf->hdr; + + if (fread(buf, ELFHSZ, 1, fp) != 1) + return 0; + unpack_hdr(ORDER(obj->type), buf, hdr); + + switch (hdr->e_type) { + case ET_REL: + case ET_EXEC: + case ET_DYN: + return 1; + default: + return 0; + } +} + +static int +readphdr(Obj *obj, FILE *fp) +{ + long long i; + Elf_Ehdr *hdr; + Elf_Phdr *phdr; + Elf64 *elf; + unsigned char buf[ELFPSZ]; + + elf = obj->data; + hdr = &elf->hdr; + + if (hdr->e_phoff == 0 || hdr->e_phnum == 0) + return 1; + + phdr = calloc(hdr->e_phnum, sizeof(*phdr)); + if (!phdr) + return 0; + elf->phdr = phdr; + + if (!objpos(obj, fp, hdr->e_phoff)) + return 0; + for (i = 0; i < hdr->e_phnum; i++) { + if (fread(buf, ELFPSZ, 1, fp) != 1) + return 0; + unpack_phdr(ORDER(obj->type), buf, &phdr[i]); + } + + return 1; +} + +static int +readshdr(Obj *obj, FILE *fp) +{ + long long i, nsec; + Elf_Ehdr *hdr; + Elf_Shdr *shdr; + Elf64 *elf; + unsigned char buf[ELFSSZ + ELFHSZ]; + + elf = obj->data; + hdr = &elf->hdr; + + if (hdr->e_shoff == 0) + return 1; + + if (!objpos(obj, fp, hdr->e_shoff)) + return 0; + + if (hdr->e_shnum != SHN_UNDEF) { + nsec = hdr->e_shnum; + } else { + Elf_Shdr sec0; + fpos_t pos; + + fgetpos(fp, &pos); + fread(buf, ELFHSZ, 1, fp); + fsetpos(fp, &pos); + + if (ferror(fp)) + return 0; + + unpack_shdr(ORDER(obj->type), buf, &sec0); + nsec = sec0.sh_size; + } + + if (nsec > SIZE_MAX) + return 0; + + shdr = calloc(nsec, sizeof(*shdr)); + if (!shdr) + return 0; + elf->shdr = shdr; + elf->nsec = nsec; + + for (i = 0; i < nsec; i++) { + if (fread(buf, ELFSSZ, 1, fp) != 1) + return 0; + unpack_shdr(ORDER(obj->type), buf, &shdr[i]); + if (shdr[i].sh_type == SHT_SYMTAB) { + /* + * elf supports multiple symbol table, but we don't + * care and we only support one, and we reject elf + * files with more of one symbol table. + */ + if (elf->symtab) + return 0; + elf->symtab = &shdr[i]; + } + } + + return 1; +} + +static int +readsecstr(Obj *obj, FILE *fp) +{ + long idx; + size_t siz; + char *str; + Elf_Shdr *shdr; + Elf64 *elf; + Elf_Ehdr *hdr; + + elf = obj->data; + hdr = &elf->hdr; + idx = hdr->e_shstrndx; + if (idx == SHN_UNDEF) + return 0; + if (idx == SHN_XINDEX) { + if (hdr->e_shnum == 0) + return 0; + idx = elf->shdr[0].sh_link; + } + + if (idx >= hdr->e_shnum) + return 0; + shdr = &elf->shdr[idx]; + + if (shdr->sh_size > SIZE_MAX) + return 0; + + siz = shdr->sh_size; + if (siz == 0) + return 1; + str = malloc(siz); + if (!str) + return 0; + + elf->strtbl[SEC_STRTBL] = str; + elf->strsiz[SEC_STRTBL] = siz; + + if (!objpos(obj, fp, shdr->sh_offset)) + return 0; + if (fread(str, siz, 1, fp) != 1) + return 0; + + return 1; +} + +static int +readsymstr(Obj *obj, FILE *fp) +{ + long idx; + size_t siz; + char *str; + Elf64 *elf; + Elf_Shdr *shdr; + + elf = obj->data; + if (!elf->symtab) + return 1; + + idx = elf->symtab->sh_link; + if (idx >= elf->nsec) + return 0; + shdr = &elf->shdr[idx]; + + if (shdr->sh_size > SIZE_MAX) + return 0; + + siz = shdr->sh_size; + if (siz == 0) + return 1; + str = malloc(siz); + if (!str) + return 0; + + elf->strtbl[SYM_STRTBL] = str; + elf->strsiz[SYM_STRTBL] = siz; + + if (!objpos(obj, fp, shdr->sh_offset)) + return 0; + if (fread(str, siz, 1, fp) != 1) + return 0; + + return 1; +} + +static int +readsym(Obj *obj, FILE *fp) +{ + long nsym, i; + int sec; + Elf64 *elf; + Elf_Sym *syms; + Elf_Shdr *shdr; + unsigned char buf[ELFSSZ]; + + elf = obj->data; + if (!elf->symtab) + return 1; + shdr = elf->symtab; + + assert(shdr->sh_type == SHT_SYMTAB); + + nsym = shdr->sh_size / shdr->sh_entsize; + if (nsym >= SIZE_MAX) + return 0; + + syms = calloc(nsym, sizeof(*syms)); + if (!syms) + return 0; + elf->syms = syms; + elf->nsym = nsym; + + if (!objpos(obj, fp, shdr->sh_offset)) + return 0; + + for (i = 0; i < nsym; i++) { + if (fread(buf, ELFESZ, 1, fp) != 1) + return 0; + unpack_sym(ORDER(obj->type), buf, &syms[i]); + + sec = syms[i].st_shndx; + switch (sec) { + case SHN_XINDEX: + /* + * Elf supports an extension mechanism to allow + * indexes bigger than 4 bytes. We don't care + * and we reject elf files using this feature. + */ + return 0; + case SHN_UNDEF: + case SHN_ABS: + case SHN_COMMON: + break; + default: + if (sec >= elf->nsec) + return 0; + break; + } + } + + return 1; +} + +int +elf64read(Obj *obj, FILE *fp) +{ + if (!readhdr(obj, fp)) + return -1; + if (!readphdr(obj, fp)) + return -1; + if (!readshdr(obj, fp)) + return -1; + if (!readsym(obj, fp)) + return -1; + if (!readsecstr(obj, fp)) + return -1; + if (!readsymstr(obj, fp)) + return -1; + + return 0; +} diff --git a/src/libmach/elf64/rules.mk b/src/libmach/elf64/rules.mk @@ -2,3 +2,4 @@ ELF64_OBJS =\ elf64/elf64.o \ elf64/elf64new.o\ elf64/elf64probe.o\ + elf64/elf64read.o\