qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

commit 367c8215d99054892740ad74c690b106c45ebf60
parent bf153b359e9ce3ebef9bca899eb7ed5bd9045c11
Author: Quentin Carbonneaux <quentin@c9x.me>
Date:   Sun,  5 Dec 2021 21:25:00 +0100

arm64: fix slots with offset >32k

When slots are used with a large offset,
the emitter generates invalid assembly
code. That is caught later on by the
assembler, but it prevents compilation
of programs with large stack frames.

When a slot offset is too large to be
expressed as a constant offset to x29
(the frame pointer), emitins() inserts
a late Oaddr instruction to x16 and
replaces the large slot reference with
x16.

This change also gave me the opportunity
to refactor the save/restore logic for
callee-save registers.

This fixes the following Hare issue:
https://todo.sr.ht/~sircmpwn/hare/387

Diffstat:
Marm64/emit.c | 88+++++++++++++++++++++++++++++++++++++------------------------------------------
1 file changed, 41 insertions(+), 47 deletions(-)

diff --git a/arm64/emit.c b/arm64/emit.c @@ -228,7 +228,7 @@ emitf(char *s, Ins *i, E *e) fprintf(e->f, "[%s]", rname(r.val, Kl)); break; case RSlot: - fprintf(e->f, "[sp, %"PRIu64"]", slot(r.val, e)); + fprintf(e->f, "[x29, %"PRIu64"]", slot(r.val, e)); break; } break; @@ -276,6 +276,26 @@ loadcon(Con *c, int r, int k, FILE *f) } } +static void emitins(Ins *, E *); + +static void +fixarg(Ref *pr, E *e) +{ + Ins *i; + Ref r; + uint64_t s; + + r = *pr; + if (rtype(r) == RSlot) { + s = slot(r.val, e); + if (s > 32760) { + i = &(Ins){Oaddr, Kl, TMP(IP0), {r}}; + emitins(i, e); + *pr = TMP(IP0); + } + } +} + static void emitins(Ins *i, E *e) { @@ -285,6 +305,10 @@ emitins(Ins *i, E *e) switch (i->op) { default: + if (isload(i->op)) + fixarg(&i->arg[0], e); + if (isstore(i->op)) + fixarg(&i->arg[1], e); Table: /* most instructions are just pulled out of * the table omap[], some special cases are @@ -409,9 +433,9 @@ arm64_emitfn(Fn *fn, FILE *out) #undef X }; static int id0; - int n, c, lbl, *r; + int s, n, c, lbl, *r; uint64_t o; - Blk *b, *s; + Blk *b, *t; Ins *i; E *e; @@ -457,28 +481,13 @@ arm64_emitfn(Fn *fn, FILE *out) e->frame & 0xFFFF, e->frame >> 16 ); fputs("\tadd\tx29, sp, 0\n", e->f); - for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + s = (e->frame - e->padding) / 4; + for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { - if (o <= 32760) - fprintf(e->f, - "\tstr\t%s, [sp, %"PRIu64"]\n", - rname(*r, Kx), o -= 8 - ); - else if (o <= 65535) - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tstr\t%s, [sp, x16]\n", - o -= 8, rname(*r, Kx) - ); - else { - o -= 8; - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tmovk\tx16, #%"PRIu64", lsl #16\n" - "\tstr\t%s, [sp, x16]\n", - o & 0xFFFF, o >> 16, rname(*r, Kx) - ); - } + s -= 2; + i = &(Ins){.arg = {TMP(*r), SLOT(s)}}; + i->op = *r >= V0 ? Ostored : Ostorel; + emitins(i, e); } for (lbl=0, b=e->fn->start; b; b=b->link) { @@ -489,28 +498,13 @@ arm64_emitfn(Fn *fn, FILE *out) lbl = 1; switch (b->jmp.type) { case Jret0: - for (o=e->frame+16, r=arm64_rclob; *r>=0; r++) + s = (e->frame - e->padding) / 4; + for (r=arm64_rclob; *r>=0; r++) if (e->fn->reg & BIT(*r)) { - if (o <= 32760) - fprintf(e->f, - "\tldr\t%s, [sp, %"PRIu64"]\n", - rname(*r, Kx), o -= 8 - ); - else if (o <= 65535) - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tldr\t%s, [sp, x16]\n", - o -= 8, rname(*r, Kx) - ); - else { - o -= 8; - fprintf(e->f, - "\tmov\tx16, #%"PRIu64"\n" - "\tmovk\tx16, #%"PRIu64", lsl #16\n" - "\tldr\t%s, [sp, x16]\n", - o & 0xFFFF, o >> 16, rname(*r, Kx) - ); - } + s -= 2; + i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}}; + i->cls = *r >= V0 ? Kd : Kl; + emitins(i, e); } o = e->frame + 16; if (e->fn->vararg) @@ -555,9 +549,9 @@ arm64_emitfn(Fn *fn, FILE *out) if (c < 0 || c > NCmp) die("unhandled jump %d", b->jmp.type); if (b->link == b->s2) { - s = b->s1; + t = b->s1; b->s1 = b->s2; - b->s2 = s; + b->s2 = t; } else c = cmpneg(c); fprintf(e->f, "\tb%s\t.L%d\n", ctoa[c], id0+b->s2->id);