commit 8ecae922997c55f70cd9e19cbf947a520f7ecca3
parent 577e93fe6d729b63447faad471fd0f5f2296f667
Author: Quentin Carbonneaux <quentin@c9x.me>
Date: Wed, 12 Oct 2022 20:59:20 +0200
thread-local storage for amd64_apple
It is quite similar to arm64_apple.
Probably, the call that needs to be
generated also provides extra
invariants on top of the regular
abi, but I have not checked that.
Clang generates code that is a bit
neater than qbe's because, on x86,
a load can be fused in a call
instruction! We do not bother with
supporting these since we expect
only sporadic use of the feature.
For reference, here is what clang
might output for a store to the
second entry of a thread-local
array of ints:
movq _x@TLVP(%rip), %rdi
callq *(%rdi)
movl %ecx, 4(%rax)
Diffstat:
4 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/amd64/emit.c b/amd64/emit.c
@@ -167,9 +167,12 @@ emitcon(Con *con, FILE *f)
case CAddr:
l = str(con->label);
p = l[0] == '"' ? "" : T.assym;
- if (con->reloc == RelThr)
- fprintf(f, "%%fs:%s%s@tpoff", p, l);
- else
+ if (con->reloc == RelThr) {
+ if (T.apple)
+ fprintf(f, "%s%s@TLVP", p, l);
+ else
+ fprintf(f, "%%fs:%s%s@tpoff", p, l);
+ } else
fprintf(f, "%s%s", p, l);
if (con->bits.i)
fprintf(f, "%+"PRId64, con->bits.i);
@@ -340,7 +343,8 @@ Next:
case RCon:
off = fn->con[ref.val];
emitcon(&off, f);
- if (off.type == CAddr && off.reloc != RelThr)
+ if (off.type == CAddr)
+ if (off.reloc != RelThr || T.apple)
fprintf(f, "(%%rip)");
break;
case RTmp:
diff --git a/amd64/isel.c b/amd64/isel.c
@@ -63,7 +63,8 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
{
char buf[32];
Addr a, *m;
- Ref r0, r1;
+ Con cc, *c;
+ Ref r0, r1, r2, r3;
int s, n, op;
r1 = r0 = *r;
@@ -121,6 +122,28 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
m->offset.type = CUndef;
m->base = r0;
}
+ } else if (T.apple && rtype(r0) == RCon
+ && (c = &fn->con[r0.val])->type == CAddr
+ && c->reloc == RelThr) {
+ r1 = newtmp("isel", Kl, fn);
+ if (c->bits.i) {
+ r2 = newtmp("isel", Kl, fn);
+ cc = (Con){.type = CBits};
+ cc.bits.i = c->bits.i;
+ r3 = newcon(&cc, fn);
+ emit(Oadd, Kl, r1, r2, r3);
+ } else
+ r2 = r1;
+ emit(Ocopy, Kl, r2, TMP(RAX), R);
+ r2 = newtmp("isel", Kl, fn);
+ r3 = newtmp("isel", Kl, fn);
+ emit(Ocall, 0, R, r3, CALL(17));
+ emit(Ocopy, Kl, TMP(RDI), r2, R);
+ emit(Oload, Kl, r3, r2, R);
+ cc = *c;
+ cc.bits.i = 0;
+ r3 = newcon(&cc, fn);
+ emit(Oload, Kl, r2, r3, R);
}
*r = r1;
}
diff --git a/arm64/isel.c b/arm64/isel.c
@@ -70,9 +70,9 @@ static void
fixarg(Ref *pr, int k, int phi, Fn *fn)
{
char buf[32];
+ Con *c, cc;
Ref r0, r1, r2, r3;
int s, n;
- Con *c, cc;
r0 = *pr;
switch (rtype(r0)) {
diff --git a/test/tls.ssa b/test/tls.ssa
@@ -0,0 +1,30 @@
+thread data $i = align 4 {w 42}
+data $fmt = align 1 {b "i%d==%d\n", b 0}
+
+export
+function w $main() {
+@start
+ %pthr =l alloc8 8
+ %rval =l alloc8 8
+ call $pthread_create(l %pthr, l 0, l $thread, l 0)
+ %t =l load %pthr
+ call $pthread_join(l %t, l %rval)
+ %i0 =w loadw thread $i
+ call $printf(l $fmt, ..., w 0, w %i0)
+ %i1 =w load %rval
+ call $printf(l $fmt, ..., w 1, w %i1)
+ ret 0
+}
+
+function l $thread(l %arg) {
+@start
+ %i3 =l add thread $i, 3
+ storeb 24, %i3
+ %ret =l loadsw thread $i
+ ret %ret
+}
+
+# >>> output
+# i0==42
+# i1==402653226
+# <<<