qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

commit 8ecae922997c55f70cd9e19cbf947a520f7ecca3
parent 577e93fe6d729b63447faad471fd0f5f2296f667
Author: Quentin Carbonneaux <quentin@c9x.me>
Date:   Wed, 12 Oct 2022 20:59:20 +0200

thread-local storage for amd64_apple

It is quite similar to arm64_apple.
Probably, the call that needs to be
generated also provides extra
invariants on top of the regular
abi, but I have not checked that.

Clang generates code that is a bit
neater than qbe's because, on x86,
a load can be fused in a call
instruction! We do not bother with
supporting these since we expect
only sporadic use of the feature.

For reference, here is what clang
might output for a store to the
second entry of a thread-local
array of ints:

        movq    _x@TLVP(%rip), %rdi
        callq   *(%rdi)
        movl    %ecx, 4(%rax)

Diffstat:
Mamd64/emit.c | 12++++++++----
Mamd64/isel.c | 25++++++++++++++++++++++++-
Marm64/isel.c | 2+-
Atest/tls.ssa | 30++++++++++++++++++++++++++++++
4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/amd64/emit.c b/amd64/emit.c @@ -167,9 +167,12 @@ emitcon(Con *con, FILE *f) case CAddr: l = str(con->label); p = l[0] == '"' ? "" : T.assym; - if (con->reloc == RelThr) - fprintf(f, "%%fs:%s%s@tpoff", p, l); - else + if (con->reloc == RelThr) { + if (T.apple) + fprintf(f, "%s%s@TLVP", p, l); + else + fprintf(f, "%%fs:%s%s@tpoff", p, l); + } else fprintf(f, "%s%s", p, l); if (con->bits.i) fprintf(f, "%+"PRId64, con->bits.i); @@ -340,7 +343,8 @@ Next: case RCon: off = fn->con[ref.val]; emitcon(&off, f); - if (off.type == CAddr && off.reloc != RelThr) + if (off.type == CAddr) + if (off.reloc != RelThr || T.apple) fprintf(f, "(%%rip)"); break; case RTmp: diff --git a/amd64/isel.c b/amd64/isel.c @@ -63,7 +63,8 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) { char buf[32]; Addr a, *m; - Ref r0, r1; + Con cc, *c; + Ref r0, r1, r2, r3; int s, n, op; r1 = r0 = *r; @@ -121,6 +122,28 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn) m->offset.type = CUndef; m->base = r0; } + } else if (T.apple && rtype(r0) == RCon + && (c = &fn->con[r0.val])->type == CAddr + && c->reloc == RelThr) { + r1 = newtmp("isel", Kl, fn); + if (c->bits.i) { + r2 = newtmp("isel", Kl, fn); + cc = (Con){.type = CBits}; + cc.bits.i = c->bits.i; + r3 = newcon(&cc, fn); + emit(Oadd, Kl, r1, r2, r3); + } else + r2 = r1; + emit(Ocopy, Kl, r2, TMP(RAX), R); + r2 = newtmp("isel", Kl, fn); + r3 = newtmp("isel", Kl, fn); + emit(Ocall, 0, R, r3, CALL(17)); + emit(Ocopy, Kl, TMP(RDI), r2, R); + emit(Oload, Kl, r3, r2, R); + cc = *c; + cc.bits.i = 0; + r3 = newcon(&cc, fn); + emit(Oload, Kl, r2, r3, R); } *r = r1; } diff --git a/arm64/isel.c b/arm64/isel.c @@ -70,9 +70,9 @@ static void fixarg(Ref *pr, int k, int phi, Fn *fn) { char buf[32]; + Con *c, cc; Ref r0, r1, r2, r3; int s, n; - Con *c, cc; r0 = *pr; switch (rtype(r0)) { diff --git a/test/tls.ssa b/test/tls.ssa @@ -0,0 +1,30 @@ +thread data $i = align 4 {w 42} +data $fmt = align 1 {b "i%d==%d\n", b 0} + +export +function w $main() { +@start + %pthr =l alloc8 8 + %rval =l alloc8 8 + call $pthread_create(l %pthr, l 0, l $thread, l 0) + %t =l load %pthr + call $pthread_join(l %t, l %rval) + %i0 =w loadw thread $i + call $printf(l $fmt, ..., w 0, w %i0) + %i1 =w load %rval + call $printf(l $fmt, ..., w 1, w %i1) + ret 0 +} + +function l $thread(l %arg) { +@start + %i3 =l add thread $i, 3 + storeb 24, %i3 + %ret =l loadsw thread $i + ret %ret +} + +# >>> output +# i0==42 +# i1==402653226 +# <<<