scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

commit 02741f466aea87b88a42ec4955132349ae38c8aa
parent 42027ec8bb4d7dcdd943cad220852eaed14aa288
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue, 14 Sep 2021 15:11:29 +0200

libc/amd64: Add assembly functions for string.h

Amd64 has support for string operations using prefixes
that can repeat instructions and they can increase
the performance in this kind of functions.

Diffstat:
Msrc/libc/arch/amd64/Makefile | 17++++++++++++-----
Asrc/libc/arch/amd64/memchr.s | 23+++++++++++++++++++++++
Asrc/libc/arch/amd64/memcmp.s | 30++++++++++++++++++++++++++++++
Msrc/libc/arch/amd64/memcpy.s | 5+++--
Asrc/libc/arch/amd64/memmove.s | 29+++++++++++++++++++++++++++++
Asrc/libc/arch/amd64/memset.s | 14++++++++++++++
Asrc/libc/arch/amd64/strchr.s | 30++++++++++++++++++++++++++++++
Asrc/libc/arch/amd64/strcmp.s | 39+++++++++++++++++++++++++++++++++++++++
Asrc/libc/arch/amd64/strcpy.s | 22++++++++++++++++++++++
9 files changed, 202 insertions(+), 7 deletions(-)

diff --git a/src/libc/arch/amd64/Makefile b/src/libc/arch/amd64/Makefile @@ -1,20 +1,27 @@ .POSIX: +PROJECTDIR = ../../../.. DIRS =\ - netbsd\ - openbsd\ + darwin\ dragonfly\ linux\ - darwin\ + netbsd\ + openbsd\ -PROJECTDIR =../../../.. include $(PROJECTDIR)/scripts/rules.mk include ../../rules.mk OBJS =\ longjmp.$O\ - setjmp.$O\ + memchr.$O\ + memcmp.$O\ memcpy.$O\ + memmove.$O\ + memset.$O\ + setjmp.$O\ + strchr.$O\ + strcmp.$O\ + strcpy.$O\ all: $(OBJS) $(SYS) diff --git a/src/libc/arch/amd64/memchr.s b/src/libc/arch/amd64/memchr.s @@ -0,0 +1,23 @@ + .file "memchr.s" + + .text + .globl memchr,_memchr +_memchr: +memchr: + cmpq $0,%rdx + je notfound + + movq %rdx,%rcx + movb %sil,%al + cld + repne + scasb + je found + +notfound: + xor %eax,%eax + ret + +found: + leaq -1(%rdi),%rax + ret diff --git a/src/libc/arch/amd64/memcmp.s b/src/libc/arch/amd64/memcmp.s @@ -0,0 +1,30 @@ + .file "memcmp.s" + + .text + .globl memcmp,_memcmp +_memcmp: +memcmp: + cmpq $0,%rdx + je EQUA + cld + movq %rdx,%rcx + movq %rdi,%r8 + movq %rsi,%rdi + movq %r8,%rsi + rep + cmpsb + je equa + jb less + ja grea + +equa: + movq $0,%rax + ret + +less: + movq $-1,%rax + ret + +grea: + movq $1,%rax + ret diff --git a/src/libc/arch/amd64/memcpy.s b/src/libc/arch/amd64/memcpy.s @@ -1,12 +1,13 @@ .file "memcpy.s" + .text .globl memcpy,_memcpy -memcpy: _memcpy: +memcpy: + cld mov %rdi,%rax mov %rdx,%rcx - cld rep movsb ret diff --git a/src/libc/arch/amd64/memmove.s b/src/libc/arch/amd64/memmove.s @@ -0,0 +1,29 @@ + .file "memmove.s" + + .text + .globl memmove,_memmove +_memmove: +memmove: + movq %rdi,%rax + + movq %rdx,%rcx + + cmpq %rdi,%rsi + jg forward + jl backward + ret + +forward: + cld + rep + movsb + ret + +backward: + std + movq %rdx,%r8 + subq $1,%r8 + addq %r8,%rdi + addq %r8,%rsi + rep; movsb + ret diff --git a/src/libc/arch/amd64/memset.s b/src/libc/arch/amd64/memset.s @@ -0,0 +1,14 @@ + .file "memset.s" + + .text + .globl memset,_memset +_memset: +memset: + cld + movq %rdi,%r8 + movq %rdx,%rcx + movl %esi,%eax + rep + stosb + movq %r8,%rax + ret diff --git a/src/libc/arch/amd64/strchr.s b/src/libc/arch/amd64/strchr.s @@ -0,0 +1,30 @@ + .file "strchr.s" + + .text + .globl strchr,_strchr +_strchr: +strchr: + movq %rdi,%r8 + + movb $0,%al + movq $-1,%rcx + cld + repne + scasb + + leaq -1(%rdi),%rcx + subq %r8,%rcx + + movq %r8,%rdi + movb %sil,%al + repne + scasb + je found + +none: + xor %rax,%rax + ret + +found: + leaq -1(%rdi),%rax + ret diff --git a/src/libc/arch/amd64/strcmp.s b/src/libc/arch/amd64/strcmp.s @@ -0,0 +1,39 @@ + .file "strcmp.s" + + .text + .globl strcmp,_strcmp +_strcmp: +strcmp: + movq %rdi,%r8 + movb $0,%al + movq $-1,%rcx + cld + repne + scasb + + leaq -1(%rdi),%rcx + subq %r8,%rcx + movq %r8,%rdi + + xor %rax,%rax /* set ZF to 1 */ + + movq %rdi,%r8 + movq %rsi,%rdi + movq %r8,%rsi + rep + cmpsb + je equa + jb less + ja grea + +grea: + movq $1,%rax + ret + +less: + movq $-1,%rax + ret + +equa: + movq $0,%rax + ret diff --git a/src/libc/arch/amd64/strcpy.s b/src/libc/arch/amd64/strcpy.s @@ -0,0 +1,22 @@ + .file "strcpy.s" + + .text + .globl strcpy,_strcpy + +strcpy: +_strcpy: + movq %rdi,%r9 + + movb $0,%al + movq $-1,%rcx + cld + movq %rsi,%rdi + repne + scasb + movq %rdi,%rcx + subq %rsi,%rcx + + movq %r9,%rdi + mov %rdi,%rax + rep; movsb + ret