1140 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			1140 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
Subject: [PATCH] AVR32-optimized string operations
 | 
						|
 | 
						|
Add hand-optimized AVR32-specific string operations. Some of them
 | 
						|
need a bit more testing, though.
 | 
						|
 | 
						|
---
 | 
						|
 | 
						|
 libc/string/avr32/Makefile      |   40 +++++++++++
 | 
						|
 libc/string/avr32/bcopy.S       |   15 ++++
 | 
						|
 libc/string/avr32/bzero.S       |   12 +++
 | 
						|
 libc/string/avr32/memchr.S      |   62 +++++++++++++++++
 | 
						|
 libc/string/avr32/memcmp.S      |   50 +++++++++++++
 | 
						|
 libc/string/avr32/memcpy.S      |  110 ++++++++++++++++++++++++++++++
 | 
						|
 libc/string/avr32/memmove.S     |  114 +++++++++++++++++++++++++++++++
 | 
						|
 libc/string/avr32/memset.S      |   60 ++++++++++++++++
 | 
						|
 libc/string/avr32/strcat.S      |   95 ++++++++++++++++++++++++++
 | 
						|
 libc/string/avr32/strcmp.S      |   80 ++++++++++++++++++++++
 | 
						|
 libc/string/avr32/strcpy.S      |   63 +++++++++++++++++
 | 
						|
 libc/string/avr32/stringtest.c  |  144 ++++++++++++++++++++++++++++++++++++++++
 | 
						|
 libc/string/avr32/strlen.S      |   52 ++++++++++++++
 | 
						|
 libc/string/avr32/strncpy.S     |   77 +++++++++++++++++++++
 | 
						|
 libc/string/avr32/test_memcpy.c |   66 ++++++++++++++++++
 | 
						|
 15 files changed, 1040 insertions(+)
 | 
						|
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/bcopy.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,15 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global bcopy
 | 
						|
+	.type	bcopy, @function
 | 
						|
+	.align	1
 | 
						|
+bcopy:
 | 
						|
+	/* Swap the first two arguments */
 | 
						|
+	eor	r11, r12
 | 
						|
+	eor	r12, r11
 | 
						|
+	eor	r11, r12
 | 
						|
+	rjmp	__memmove
 | 
						|
+	.size	bcopy, . - bcopy
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/bzero.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/bzero.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,12 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global bzero
 | 
						|
+	.type	bzero, @function
 | 
						|
+	.align	1
 | 
						|
+bzero:
 | 
						|
+	mov	r10, r11
 | 
						|
+	mov	r11, 0
 | 
						|
+	rjmp	__memset
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/Makefile
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/Makefile	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,40 @@
 | 
						|
+# Makefile for uClibc
 | 
						|
+#
 | 
						|
+# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org>
 | 
						|
+#
 | 
						|
+# This program is free software; you can redistribute it and/or modify it under
 | 
						|
+# the terms of the GNU Library General Public License as published by the Free
 | 
						|
+# Software Foundation; either version 2 of the License, or (at your option) any
 | 
						|
+# later version.
 | 
						|
+#
 | 
						|
+# This program is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
						|
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
 | 
						|
+# details.
 | 
						|
+#
 | 
						|
+# You should have received a copy of the GNU Library General Public License
 | 
						|
+# along with this program; if not, write to the Free Software Foundation, Inc.,
 | 
						|
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 | 
						|
+
 | 
						|
+TOPDIR=../../../
 | 
						|
+include $(TOPDIR)Rules.mak
 | 
						|
+
 | 
						|
+SSRC	:= bcopy.S bzero.S memcmp.S memcpy.S memmove.S
 | 
						|
+SSRC	+= memset.S strcmp.S strlen.S
 | 
						|
+# memchr.S, strcat.S, strcpy.S, strncpy.S is broken
 | 
						|
+SOBJS	:= $(patsubst %.S,%.o, $(SSRC))
 | 
						|
+OBJS	:= $(SOBJS)
 | 
						|
+
 | 
						|
+OBJ_LIST:= ../../obj.string.$(TARGET_ARCH)
 | 
						|
+
 | 
						|
+all: $(OBJ_LIST)
 | 
						|
+
 | 
						|
+$(OBJ_LIST): $(OBJS)
 | 
						|
+	echo $(addprefix string/$(TARGET_ARCH)/, $(OBJS)) > $@
 | 
						|
+
 | 
						|
+$(SOBJS): %.o: %.S
 | 
						|
+	$(CC) $(ASFLAGS) -c $< -o $@
 | 
						|
+	$(STRIPTOOL) -x -R .note -R .comment $@
 | 
						|
+
 | 
						|
+clean:
 | 
						|
+	$(RM) *.[oa] *~ core
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/memchr.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/memchr.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,62 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define str r12
 | 
						|
+#define chr r11
 | 
						|
+#define len r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global memchr
 | 
						|
+	.type	memchr, @function
 | 
						|
+memchr:
 | 
						|
+	or	chr, chr, chr << 8
 | 
						|
+	or	chr, chr, chr << 16
 | 
						|
+
 | 
						|
+	mov	r9, str
 | 
						|
+	andl	r9, 3, COH
 | 
						|
+	brne	.Lunaligned_str
 | 
						|
+
 | 
						|
+1:	sub	len, 4
 | 
						|
+	brlt	2f
 | 
						|
+	ld.w	r8, str++
 | 
						|
+	psub.b	r9, r8, r11
 | 
						|
+	tnbz	r9
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+	sub	str, 4
 | 
						|
+	bfextu	r9, r8, 24, 8
 | 
						|
+	cp.b	r9, r11
 | 
						|
+	reteq	str
 | 
						|
+	sub	str, -1
 | 
						|
+	bfextu	r9, r8, 16, 8
 | 
						|
+	cp.b	r9, r11
 | 
						|
+	reteq	str
 | 
						|
+	sub	str, -1
 | 
						|
+	bfextu	r9, r8, 8, 8
 | 
						|
+	cp.b	r9, r11
 | 
						|
+	reteq	str
 | 
						|
+	sub	str, -1
 | 
						|
+	retal	str
 | 
						|
+
 | 
						|
+2:	sub	len, -4
 | 
						|
+	reteq	0
 | 
						|
+
 | 
						|
+3:	ld.ub	r8, str++
 | 
						|
+	cp.w	r8, 0
 | 
						|
+	reteq	str
 | 
						|
+	sub	len, 1
 | 
						|
+	brne	3b
 | 
						|
+
 | 
						|
+	retal	0
 | 
						|
+
 | 
						|
+.Lunaligned_str:
 | 
						|
+1:	sub	len, 1
 | 
						|
+	retlt	0
 | 
						|
+	ld.ub	r8, str++
 | 
						|
+	cp.b	r8, r11
 | 
						|
+	reteq	str
 | 
						|
+	sub	r9, 1
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+	rjmp	.Laligned_search
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/memcmp.S	2006-10-20 10:42:09.000000000 +0200
 | 
						|
@@ -0,0 +1,50 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define s1 r12
 | 
						|
+#define s2 r11
 | 
						|
+#define len r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global memcmp
 | 
						|
+	.type	memcmp, @function
 | 
						|
+	.align	1
 | 
						|
+memcmp:
 | 
						|
+	sub	len, 4
 | 
						|
+	brlt	.Lless_than_4
 | 
						|
+
 | 
						|
+1:	ld.w	r8, s1++
 | 
						|
+	ld.w	r9, s2++
 | 
						|
+	cp.w	r8, r9
 | 
						|
+	brne	.Lfound_word
 | 
						|
+	sub	len, 4
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+.Lless_than_4:
 | 
						|
+	sub	len, -4
 | 
						|
+	reteq	0
 | 
						|
+
 | 
						|
+1:	ld.ub	r8, s1++
 | 
						|
+	ld.ub	r9, s2++
 | 
						|
+	sub	r8, r9
 | 
						|
+	retne	r8
 | 
						|
+	sub	len, 1
 | 
						|
+	brgt	1b
 | 
						|
+
 | 
						|
+	retal	0
 | 
						|
+
 | 
						|
+.Lfound_word:
 | 
						|
+	psub.b	r9, r8, r9
 | 
						|
+	bfextu	r8, r9, 24, 8
 | 
						|
+	retne	r8
 | 
						|
+	bfextu	r8, r9, 16, 8
 | 
						|
+	retne	r8
 | 
						|
+	bfextu	r8, r9, 8, 8
 | 
						|
+	retne	r8
 | 
						|
+	retal	r9
 | 
						|
+
 | 
						|
+	.size	memcmp, . - memcmp
 | 
						|
+
 | 
						|
+	.weak	bcmp
 | 
						|
+	bcmp = memcmp
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/memcpy.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,110 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+/* Don't use r12 as dst since we must return it unmodified */
 | 
						|
+#define dst r9
 | 
						|
+#define src r11
 | 
						|
+#define len r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global	memcpy
 | 
						|
+	.type	memcpy, @function
 | 
						|
+
 | 
						|
+	.global	__memcpy
 | 
						|
+	.hidden	__memcpy
 | 
						|
+	.type	__memcpy, @function
 | 
						|
+memcpy:
 | 
						|
+__memcpy:
 | 
						|
+	pref	src[0]
 | 
						|
+	mov	dst, r12
 | 
						|
+
 | 
						|
+	/* If we have less than 32 bytes, don't do anything fancy */
 | 
						|
+	cp.w	len, 32
 | 
						|
+	brge	.Lmore_than_31
 | 
						|
+
 | 
						|
+	sub	len, 1
 | 
						|
+	retlt	r12
 | 
						|
+1:	ld.ub	r8, src++
 | 
						|
+	st.b	dst++, r8
 | 
						|
+	sub	len, 1
 | 
						|
+	brge	1b
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lmore_than_31:
 | 
						|
+	pushm	r0-r7, lr
 | 
						|
+
 | 
						|
+	/* Check alignment */
 | 
						|
+	mov	r8, src
 | 
						|
+	andl	r8, 31, COH
 | 
						|
+	brne	.Lunaligned_src
 | 
						|
+	mov	r8, dst
 | 
						|
+	andl	r8, 3, COH
 | 
						|
+	brne	.Lunaligned_dst
 | 
						|
+
 | 
						|
+.Laligned_copy:
 | 
						|
+	sub	len, 32
 | 
						|
+	brlt	.Lless_than_32
 | 
						|
+
 | 
						|
+1:	/* Copy 32 bytes at a time */
 | 
						|
+	ldm	src, r0-r7
 | 
						|
+	sub	src, -32
 | 
						|
+	stm	dst, r0-r7
 | 
						|
+	sub	dst, -32
 | 
						|
+	sub	len, 32
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+.Lless_than_32:
 | 
						|
+	/* Copy 16 more bytes if possible */
 | 
						|
+	sub	len, -16
 | 
						|
+	brlt	.Lless_than_16
 | 
						|
+	ldm	src, r0-r3
 | 
						|
+	sub	src, -16
 | 
						|
+	sub	len, 16
 | 
						|
+	stm	dst, r0-r3
 | 
						|
+	sub	dst, -16
 | 
						|
+
 | 
						|
+.Lless_than_16:
 | 
						|
+	/* Do the remaining as byte copies */
 | 
						|
+	neg	len
 | 
						|
+	add	pc, pc, len << 2
 | 
						|
+	.rept	15
 | 
						|
+	ld.ub	r0, src++
 | 
						|
+	st.b	dst++, r0
 | 
						|
+	.endr
 | 
						|
+
 | 
						|
+	popm	r0-r7, pc
 | 
						|
+
 | 
						|
+.Lunaligned_src:
 | 
						|
+	/* Make src cacheline-aligned. r8 = (src & 31) */
 | 
						|
+	rsub	r8, r8, 32
 | 
						|
+	sub	len, r8
 | 
						|
+1:	ld.ub	r0, src++
 | 
						|
+	st.b	dst++, r0
 | 
						|
+	sub	r8, 1
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+	/* If dst is word-aligned, we're ready to go */
 | 
						|
+	pref	src[0]
 | 
						|
+	mov	r8, 3
 | 
						|
+	tst	dst, r8
 | 
						|
+	breq	.Laligned_copy
 | 
						|
+
 | 
						|
+.Lunaligned_dst:
 | 
						|
+	/* src is aligned, but dst is not. Expect bad performance */
 | 
						|
+	sub	len, 4
 | 
						|
+	brlt	2f
 | 
						|
+1:	ld.w	r0, src++
 | 
						|
+	st.w	dst++, r0
 | 
						|
+	sub	len, 4
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+2:	neg	len
 | 
						|
+	add	pc, pc, len << 2
 | 
						|
+	.rept	3
 | 
						|
+	ld.ub	r0, src++
 | 
						|
+	st.b	dst++, r0
 | 
						|
+	.endr
 | 
						|
+
 | 
						|
+	popm	r0-r7, pc
 | 
						|
+	.size	memcpy, . - memcpy
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/memmove.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/memmove.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,114 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define dst r12
 | 
						|
+#define src r11
 | 
						|
+#define len r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global memmove
 | 
						|
+	.type	memmove, @function
 | 
						|
+
 | 
						|
+	.global	__memmove
 | 
						|
+	.hidden	__memmove
 | 
						|
+	.type	__memmove, @function
 | 
						|
+memmove:
 | 
						|
+__memmove:
 | 
						|
+	cp.w	src, dst
 | 
						|
+	brge	__memcpy
 | 
						|
+
 | 
						|
+	add	dst, len
 | 
						|
+	add	src, len
 | 
						|
+	pref	src[-1]
 | 
						|
+
 | 
						|
+	/*
 | 
						|
+	 * The rest is basically the same as in memcpy.S except that
 | 
						|
+	 * the direction is reversed.
 | 
						|
+	 */
 | 
						|
+	cp.w	len, 32
 | 
						|
+	brge	.Lmore_than_31
 | 
						|
+
 | 
						|
+	sub	len, 1
 | 
						|
+	retlt	r12
 | 
						|
+1:	ld.ub	r8, --src
 | 
						|
+	st.b	--dst, r8
 | 
						|
+	sub	len, 1
 | 
						|
+	brge	1b
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lmore_than_31:
 | 
						|
+	pushm	r0-r7, lr
 | 
						|
+
 | 
						|
+	/* Check alignment */
 | 
						|
+	mov	r8, src
 | 
						|
+	andl	r8, 31, COH
 | 
						|
+	brne	.Lunaligned_src
 | 
						|
+	mov	r8, r12
 | 
						|
+	andl	r8, 3, COH
 | 
						|
+	brne	.Lunaligned_dst
 | 
						|
+
 | 
						|
+.Laligned_copy:
 | 
						|
+	sub	len, 32
 | 
						|
+	brlt	.Lless_than_32
 | 
						|
+
 | 
						|
+1:	/* Copy 32 bytes at a time */
 | 
						|
+	sub	src, 32
 | 
						|
+	ldm	src, r0-r7
 | 
						|
+	sub	dst, 32
 | 
						|
+	sub	len, 32
 | 
						|
+	stm	dst, r0-r7
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+.Lless_than_32:
 | 
						|
+	/* Copy 16 more bytes if possible */
 | 
						|
+	sub	len, -16
 | 
						|
+	brlt	.Lless_than_16
 | 
						|
+	sub	src, 16
 | 
						|
+	ldm	src, r0-r3
 | 
						|
+	sub	dst, 16
 | 
						|
+	sub	len, 16
 | 
						|
+	stm	dst, r0-r3
 | 
						|
+
 | 
						|
+.Lless_than_16:
 | 
						|
+	/* Do the remaining as byte copies */
 | 
						|
+	sub	len, -16
 | 
						|
+	breq	2f
 | 
						|
+1:	ld.ub	r0, --src
 | 
						|
+	st.b	--dst, r0
 | 
						|
+	sub	len, 1
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+2:	popm	r0-r7, pc
 | 
						|
+
 | 
						|
+.Lunaligned_src:
 | 
						|
+	/* Make src cacheline-aligned. r8 = (src & 31) */
 | 
						|
+	sub	len, r8
 | 
						|
+1:	ld.ub	r0, --src
 | 
						|
+	st.b	--dst, r0
 | 
						|
+	sub	r8, 1
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+	/* If dst is word-aligned, we're ready to go */
 | 
						|
+	pref	src[-4]
 | 
						|
+	mov	r8, 3
 | 
						|
+	tst	dst, r8
 | 
						|
+	breq	.Laligned_copy
 | 
						|
+
 | 
						|
+.Lunaligned_dst:
 | 
						|
+	/* src is aligned, but dst is not. Expect bad performance */
 | 
						|
+	sub	len, 4
 | 
						|
+	brlt	2f
 | 
						|
+1:	ld.w	r0, --src
 | 
						|
+	st.w	--dst, r0
 | 
						|
+	sub	len, 4
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+2:	neg	len
 | 
						|
+	add	pc, pc, len << 2
 | 
						|
+	.rept	3
 | 
						|
+	ld.ub	r0, --src
 | 
						|
+	st.b	--dst, r0
 | 
						|
+	.endr
 | 
						|
+
 | 
						|
+	popm	r0-r7, pc
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/memset.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/memset.S	2006-10-20 10:42:15.000000000 +0200
 | 
						|
@@ -0,0 +1,60 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define s r12
 | 
						|
+#define c r11
 | 
						|
+#define n r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global memset
 | 
						|
+	.type	memset, @function
 | 
						|
+
 | 
						|
+	.global	__memset
 | 
						|
+	.hidden	__memset
 | 
						|
+	.type	__memset, @function
 | 
						|
+
 | 
						|
+	.align	1
 | 
						|
+memset:
 | 
						|
+__memset:
 | 
						|
+	cp.w	n, 32
 | 
						|
+	mov	r9, s
 | 
						|
+	brge	.Llarge_memset
 | 
						|
+
 | 
						|
+	sub	n, 1
 | 
						|
+	retlt	s
 | 
						|
+1:	st.b	s++, c
 | 
						|
+	sub	n, 1
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+	retal	r9
 | 
						|
+
 | 
						|
+.Llarge_memset:
 | 
						|
+	mov	r8, r11
 | 
						|
+	mov	r11, 3
 | 
						|
+	bfins	r8, r8, 8, 8
 | 
						|
+	bfins	r8, r8, 16, 16
 | 
						|
+	tst	s, r11
 | 
						|
+	breq	2f
 | 
						|
+
 | 
						|
+1:	st.b	s++, r8
 | 
						|
+	sub	n, 1
 | 
						|
+	tst	s, r11
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+2:	mov	r11, r9
 | 
						|
+	mov	r9, r8
 | 
						|
+	sub	n, 8
 | 
						|
+
 | 
						|
+3:	st.d	s++, r8
 | 
						|
+	sub	n, 8
 | 
						|
+	brge	3b
 | 
						|
+
 | 
						|
+	/* If we are done, n == -8 and we'll skip all st.b insns below */
 | 
						|
+	neg	n
 | 
						|
+	lsl	n, 1
 | 
						|
+	add	pc, n
 | 
						|
+	.rept	7
 | 
						|
+	st.b	s++, r8
 | 
						|
+	.endr
 | 
						|
+	retal	r11
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/strcat.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/strcat.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,95 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define s1 r9
 | 
						|
+#define s2 r11
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global strcat
 | 
						|
+	.type	strcat, @function
 | 
						|
+	.align	1
 | 
						|
+strcat:
 | 
						|
+	mov	s1, r12
 | 
						|
+
 | 
						|
+	/* Make sure s1 is word-aligned */
 | 
						|
+	mov	r10, s1
 | 
						|
+	andl	r10, 3, COH
 | 
						|
+	breq	2f
 | 
						|
+
 | 
						|
+	add	pc, pc, r10 << 3
 | 
						|
+	sub	r0, r0, 0	/* 4-byte nop */
 | 
						|
+	ld.ub	r8, s1++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	breq	2f
 | 
						|
+	ld.ub	r8, s1++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	breq	3f
 | 
						|
+	ld.ub	r8, s1++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	breq	4f
 | 
						|
+
 | 
						|
+	/* Find the end of the first string */
 | 
						|
+5:	ld.w	r8, s1++
 | 
						|
+	tnbz	r8
 | 
						|
+	brne	5b
 | 
						|
+
 | 
						|
+	sub	s1, 4
 | 
						|
+
 | 
						|
+	bfextu	r10, r8, 24, 8
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	breq	1f
 | 
						|
+	sub	s1, -1
 | 
						|
+	bfextu	r10, r8, 16, 8
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	breq	2f
 | 
						|
+	sub	s1, -1
 | 
						|
+	bfextu	r10, r8, 8, 8
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	breq	3f
 | 
						|
+	sub	s1, -1
 | 
						|
+	rjmp	4f
 | 
						|
+
 | 
						|
+	/* Now, append s2 */
 | 
						|
+1:	ld.ub	r8, s2++
 | 
						|
+	st.b	s1++, r8
 | 
						|
+	cp.w	r8, 0
 | 
						|
+	reteq	r12
 | 
						|
+2:	ld.ub	r8, s2++
 | 
						|
+	st.b	s1++, r8
 | 
						|
+	cp.w	r8, 0
 | 
						|
+	reteq	r12
 | 
						|
+3:	ld.ub	r8, s2++
 | 
						|
+	st.b	s1++, r8
 | 
						|
+	cp.w	r8, 0
 | 
						|
+	reteq	r12
 | 
						|
+4:	ld.ub	r8, s2++
 | 
						|
+	st.b	s1++, r8
 | 
						|
+	cp.w	r8, 0
 | 
						|
+	reteq	r12
 | 
						|
+
 | 
						|
+	/* Copy one word at a time */
 | 
						|
+	ld.w	r8, s2++
 | 
						|
+	tnbz	r8
 | 
						|
+	breq	2f
 | 
						|
+1:	st.w	r8, s2++
 | 
						|
+	ld.w	r8, s2++
 | 
						|
+	tnbz	r8
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+	/* Copy the remaining bytes */
 | 
						|
+	bfextu	r10, r8, 24, 8
 | 
						|
+	st.b	s1++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r10, r8, 16, 8
 | 
						|
+	st.b	s1++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r10, r8, 8, 8
 | 
						|
+	st.b	s1++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	st.b	s1++, r8
 | 
						|
+	retal	r12
 | 
						|
+	.size	strcat, . - strcat
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/strcmp.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,80 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define s1 r12
 | 
						|
+#define s2 r11
 | 
						|
+#define len r10
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global strcmp
 | 
						|
+	.type	strcmp, @function
 | 
						|
+	.align	1
 | 
						|
+strcmp:
 | 
						|
+	mov	r8, 3
 | 
						|
+	tst	s1, r8
 | 
						|
+	brne	.Lunaligned_s1
 | 
						|
+	tst	s2, r8
 | 
						|
+	brne	.Lunaligned_s2
 | 
						|
+
 | 
						|
+1:	ld.w	r8, s1++
 | 
						|
+	ld.w	r9, s2++
 | 
						|
+	cp.w	r8, r9
 | 
						|
+	brne	2f
 | 
						|
+	tnbz	r8
 | 
						|
+	brne	1b
 | 
						|
+	retal	0
 | 
						|
+
 | 
						|
+2:	bfextu	r12, r8, 24, 8
 | 
						|
+	bfextu	r11, r9, 24, 8
 | 
						|
+	sub	r12, r11
 | 
						|
+	retne	r12
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	0
 | 
						|
+	bfextu	r12, r8, 16, 8
 | 
						|
+	bfextu	r11, r9, 16, 8
 | 
						|
+	sub	r12, r11
 | 
						|
+	retne	r12
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	0
 | 
						|
+	bfextu	r12, r8, 8, 8
 | 
						|
+	bfextu	r11, r9, 8, 8
 | 
						|
+	sub	r12, r11
 | 
						|
+	retne	r12
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	0
 | 
						|
+	bfextu	r12, r8, 0, 8
 | 
						|
+	bfextu	r11, r9, 0, 8
 | 
						|
+	sub	r12, r11
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lunaligned_s1:
 | 
						|
+3:	tst	s1, r8
 | 
						|
+	breq	4f
 | 
						|
+	ld.ub	r10, s1++
 | 
						|
+	ld.ub	r9, s2++
 | 
						|
+	sub	r10, r9
 | 
						|
+	retne	r10
 | 
						|
+	cp.w	r9, 0
 | 
						|
+	brne	3b
 | 
						|
+	retal	r10
 | 
						|
+
 | 
						|
+4:	tst	s2, r8
 | 
						|
+	breq	1b
 | 
						|
+
 | 
						|
+.Lunaligned_s2:
 | 
						|
+	/*
 | 
						|
+	 * s1 and s2 can't both be aligned, and unaligned word loads
 | 
						|
+	 * can trigger spurious exceptions if we cross a page boundary.
 | 
						|
+	 * Do it the slow way...
 | 
						|
+	 */
 | 
						|
+1:	ld.ub	r8, s1++
 | 
						|
+	ld.ub	r9, s2++
 | 
						|
+	sub	r8, r9
 | 
						|
+	retne	r8
 | 
						|
+	cp.w	r9, 0
 | 
						|
+	brne	1b
 | 
						|
+	retal	0
 | 
						|
+
 | 
						|
+	.weak	strcoll
 | 
						|
+	strcoll	= strcmp
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/strcpy.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,63 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ *
 | 
						|
+ * To reduce the size, this one might simply call strncpy with len = -1.
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define dst r9
 | 
						|
+#define src r11
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global strcpy
 | 
						|
+	.type	strcpy, @function
 | 
						|
+strcpy:
 | 
						|
+	mov	dst, r12
 | 
						|
+
 | 
						|
+	pref	src[0]
 | 
						|
+
 | 
						|
+	/*
 | 
						|
+	 * Check alignment. If src is aligned but dst isn't, we can't
 | 
						|
+	 * do much about it...
 | 
						|
+	 */
 | 
						|
+	mov	r8, src
 | 
						|
+	andl	r8, 3 COH
 | 
						|
+	brne	.Lunaligned_src
 | 
						|
+
 | 
						|
+.Laligned_copy:
 | 
						|
+1:	ld.w	r8, src++
 | 
						|
+	tnbz	r8
 | 
						|
+	breq	2f
 | 
						|
+	st.w	dst++, r8
 | 
						|
+	rjmp	1b
 | 
						|
+
 | 
						|
+2:	/*
 | 
						|
+	 * Ok, r8 now contains the terminating '\0'. Copy the
 | 
						|
+	 * remaining bytes individually.
 | 
						|
+	 */
 | 
						|
+	bfextu	r10, r8, 24, 8
 | 
						|
+	st.b	dst++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r10, r8, 16, 8
 | 
						|
+	st.b	dst++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r10, r8, 8, 8
 | 
						|
+	st.b	dst++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+	st.b	dst++, r8
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lunaligned_src:
 | 
						|
+	/* Copy bytes until we're aligned */
 | 
						|
+	rsub	r8, r8, 4
 | 
						|
+	add	pc, pc, r8 << 3
 | 
						|
+	nop
 | 
						|
+	nop
 | 
						|
+	ld.ub	r10, src++
 | 
						|
+	st.b	dst++, r10
 | 
						|
+	cp.w	r10, 0
 | 
						|
+	reteq	r12
 | 
						|
+
 | 
						|
+	rjmp	.Laligned_copy
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/stringtest.c	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,144 @@
 | 
						|
+
 | 
						|
+#include <stdio.h>
 | 
						|
+#include <string.h>
 | 
						|
+#include <time.h>
 | 
						|
+#include <sys/mman.h>
 | 
						|
+
 | 
						|
+#define BUF_SIZE (8 * 1024)
 | 
						|
+
 | 
						|
+static char *buf1;
 | 
						|
+static char *buf1_ref;
 | 
						|
+static char *buf2;
 | 
						|
+
 | 
						|
+extern void *optimized_memcpy(void *dest, void *src, size_t len);
 | 
						|
+extern void *optimized_memmove(void *dest, void *src, size_t len);
 | 
						|
+extern char *optimized_strcpy(char *dest, char *src);
 | 
						|
+extern char *optimized_strncpy(char *dest, char *src, size_t len);
 | 
						|
+
 | 
						|
+void dump_mismatch(char *buf, char *ref, size_t len)
 | 
						|
+{
 | 
						|
+	int i, j;
 | 
						|
+
 | 
						|
+	for (i = 0; i < len; i += 16) {
 | 
						|
+		if (memcmp(buf + i, ref + i, 16) == 0)
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		printf("%4x buf:", i);
 | 
						|
+		for (j = i; j < (i + 16); j++)
 | 
						|
+			printf(" %02x", buf[j]);
 | 
						|
+		printf("\n     ref:");
 | 
						|
+		for (j = i; j < (i + 16); j++)
 | 
						|
+			printf(" %02x", ref[j]);
 | 
						|
+		printf("\n");
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void test_memcpy(int src_offset, int dst_offset, int len)
 | 
						|
+{
 | 
						|
+	clock_t start, old, new;
 | 
						|
+	int i;
 | 
						|
+
 | 
						|
+	memset(buf1, 0x55, BUF_SIZE);
 | 
						|
+	memset(buf1_ref, 0x55, BUF_SIZE);
 | 
						|
+	memset(buf2, 0xaa, BUF_SIZE);
 | 
						|
+
 | 
						|
+	printf("Testing memcpy with offsets %d => %d and len %d...",
 | 
						|
+	       src_offset, dst_offset, len);
 | 
						|
+
 | 
						|
+	start = clock();
 | 
						|
+	for (i = 0; i < 8192; i++)
 | 
						|
+		optimized_memcpy(buf1 + dst_offset, buf2 + src_offset, len);
 | 
						|
+	new = clock() - start;
 | 
						|
+	start = clock();
 | 
						|
+	for ( i = 0; i < 8192; i++)
 | 
						|
+		memcpy(buf1_ref + dst_offset, buf2 + src_offset, len);
 | 
						|
+	old = clock() - start;
 | 
						|
+
 | 
						|
+	if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0)
 | 
						|
+		printf("OK\n");
 | 
						|
+	else {
 | 
						|
+		printf("FAILED\n");
 | 
						|
+		dump_mismatch(buf1, buf1_ref, BUF_SIZE);
 | 
						|
+	}
 | 
						|
+	printf("CPU time used: %d vs. %d\n", new, old);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void test_memmove(int src_offset, int dst_offset, int len)
 | 
						|
+{
 | 
						|
+	clock_t start, old, new;
 | 
						|
+
 | 
						|
+	memset(buf1, 0x55, BUF_SIZE);
 | 
						|
+	memset(buf1_ref, 0x55, BUF_SIZE);
 | 
						|
+	memset(buf2, 0xaa, BUF_SIZE);
 | 
						|
+
 | 
						|
+	printf("Testing memmove with offsets %d => %d and len %d...",
 | 
						|
+	       src_offset, dst_offset, len);
 | 
						|
+
 | 
						|
+	start = clock();
 | 
						|
+	optimized_memmove(buf1 + dst_offset, buf2 + src_offset, len);
 | 
						|
+	new = clock() - start;
 | 
						|
+	start = clock();
 | 
						|
+	memmove(buf1_ref + dst_offset, buf2 + src_offset, len);
 | 
						|
+	old = clock() - start;
 | 
						|
+
 | 
						|
+	if (memcmp(buf1, buf1_ref, BUF_SIZE) == 0)
 | 
						|
+		printf("OK\n");
 | 
						|
+	else {
 | 
						|
+		printf("FAILED\n");
 | 
						|
+		dump_mismatch(buf1, buf1_ref, BUF_SIZE);
 | 
						|
+	}
 | 
						|
+	printf("CPU time used: %d vs. %d\n", new, old);
 | 
						|
+}
 | 
						|
+
 | 
						|
+int main(int argc, char *argv[])
 | 
						|
+{
 | 
						|
+	buf2 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE,
 | 
						|
+		    MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 | 
						|
+	if (buf2 == MAP_FAILED) {
 | 
						|
+		perror("Failed to allocate memory for buf2");
 | 
						|
+		return 1;
 | 
						|
+	}
 | 
						|
+	buf1 = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE,
 | 
						|
+		    MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 | 
						|
+	if (buf1 == MAP_FAILED) {
 | 
						|
+		perror("Failed to allocate memory for buf1");
 | 
						|
+		return 1;
 | 
						|
+	}
 | 
						|
+	buf1_ref = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE,
 | 
						|
+			MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 | 
						|
+	if (buf1_ref == MAP_FAILED) {
 | 
						|
+		perror("Failed to allocate memory for buf1_ref");
 | 
						|
+		return 1;
 | 
						|
+	}
 | 
						|
+	printf("\n === MEMCPY ===\n\n");
 | 
						|
+
 | 
						|
+	test_memcpy(0, 0, BUF_SIZE - 32);
 | 
						|
+	test_memcpy(0, 0, 1);
 | 
						|
+	test_memcpy(0, 0, 31);
 | 
						|
+	test_memcpy(0, 0, 32);
 | 
						|
+	test_memcpy(0, 0, 127);
 | 
						|
+	test_memcpy(0, 0, 128);
 | 
						|
+	test_memcpy(4, 4, BUF_SIZE - 32 - 4);
 | 
						|
+	test_memcpy(1, 1, BUF_SIZE - 32 - 1);
 | 
						|
+	test_memcpy(1, 1, 126);
 | 
						|
+	test_memcpy(0, 3, 128);
 | 
						|
+	test_memcpy(1, 4, 128);
 | 
						|
+	test_memcpy(0, 0, 0);
 | 
						|
+
 | 
						|
+	printf("\n === MEMMOVE ===\n\n");
 | 
						|
+
 | 
						|
+	test_memmove(0, 0, BUF_SIZE - 32);
 | 
						|
+	test_memmove(0, 0, 1);
 | 
						|
+	test_memmove(0, 0, 31);
 | 
						|
+	test_memmove(0, 0, 32);
 | 
						|
+	test_memmove(0, 0, BUF_SIZE - 33);
 | 
						|
+	test_memmove(0, 0, 128);
 | 
						|
+	test_memmove(4, 4, BUF_SIZE - 32 - 4);
 | 
						|
+	test_memmove(1, 1, BUF_SIZE - 32 - 1);
 | 
						|
+	test_memmove(1, 1, BUF_SIZE - 130);
 | 
						|
+	test_memmove(0, 3, BUF_SIZE - 128);
 | 
						|
+	test_memmove(1, 4, BUF_SIZE - 128);
 | 
						|
+	test_memmove(0, 0, 0);
 | 
						|
+
 | 
						|
+	return 0;
 | 
						|
+}
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/strlen.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/strlen.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,52 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define str r12
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global strlen
 | 
						|
+	.type	strlen, @function
 | 
						|
+strlen:
 | 
						|
+	mov	r11, r12
 | 
						|
+
 | 
						|
+	mov	r9, str
 | 
						|
+	andl	r9, 3, COH
 | 
						|
+	brne	.Lunaligned_str
 | 
						|
+
 | 
						|
+1:	ld.w	r8, str++
 | 
						|
+	tnbz	r8
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+	sub	r12, r11
 | 
						|
+	bfextu	r9, r8, 24, 8
 | 
						|
+	cp.w	r9, 0
 | 
						|
+	subeq	r12, 4
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r9, r8, 16, 8
 | 
						|
+	cp.w	r9, 0
 | 
						|
+	subeq	r12, 3
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r9, r8, 8, 8
 | 
						|
+	cp.w	r9, 0
 | 
						|
+	subeq	r12, 2
 | 
						|
+	reteq	r12
 | 
						|
+	sub	r12, 1
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lunaligned_str:
 | 
						|
+	add	pc, pc, r9 << 3
 | 
						|
+	sub	r0, r0, 0	/* 4-byte nop */
 | 
						|
+	ld.ub	r8, str++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	breq	1f
 | 
						|
+	ld.ub	r8, str++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	breq	1f
 | 
						|
+	ld.ub	r8, str++
 | 
						|
+	sub	r8, r8, 0
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+1:	sub	r12, 1
 | 
						|
+	sub	r12, r11
 | 
						|
+	retal	r12
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/strncpy.S	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,77 @@
 | 
						|
+/*
 | 
						|
+ * Copyright (C) 2004 Atmel Norway
 | 
						|
+ */
 | 
						|
+
 | 
						|
+#define dst r9
 | 
						|
+#define src r11
 | 
						|
+
 | 
						|
+	.text
 | 
						|
+	.global strcpy
 | 
						|
+	.type	strncpy, @function
 | 
						|
+strncpy:
 | 
						|
+	mov	dst, r12
 | 
						|
+
 | 
						|
+	pref	src[0]
 | 
						|
+	mov	dst, r12
 | 
						|
+
 | 
						|
+	/*
 | 
						|
+	 * Check alignment. If src is aligned but dst isn't, we can't
 | 
						|
+	 * do much about it...
 | 
						|
+	 */
 | 
						|
+	mov	r8, src
 | 
						|
+	andl	r8, 3 COH
 | 
						|
+	brne	.Lunaligned_src
 | 
						|
+
 | 
						|
+.Laligned_copy:
 | 
						|
+	sub	r10, 4
 | 
						|
+	brlt	3f
 | 
						|
+1:	ld.w	r8, src++
 | 
						|
+	tnbz	r8
 | 
						|
+	breq	2f
 | 
						|
+	st.w	dst++, r8
 | 
						|
+	sub	r10, 4
 | 
						|
+	brne	1b
 | 
						|
+
 | 
						|
+3:	sub	r10, -4
 | 
						|
+	reteq	r12
 | 
						|
+
 | 
						|
+	/* This is safe as long as src is word-aligned and r10 > 0 */
 | 
						|
+	ld.w	r8, src++
 | 
						|
+
 | 
						|
+2:	/*
 | 
						|
+	 * Ok, r8 now contains the terminating '\0'. Copy the
 | 
						|
+	 * remaining bytes individually.
 | 
						|
+	 */
 | 
						|
+	bfextu	r11, r8, 24, 8
 | 
						|
+	st.b	dst++, r11
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	r12
 | 
						|
+	sub	r10, 1
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r11, r8, 16, 8
 | 
						|
+	st.b	dst++, r11
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	r12
 | 
						|
+	sub	r10, 1
 | 
						|
+	reteq	r12
 | 
						|
+	bfextu	r11, r8, 8, 8
 | 
						|
+	st.b	dst++, r11
 | 
						|
+	cp.w	r11, 0
 | 
						|
+	reteq	r12
 | 
						|
+	sub	r10, 1
 | 
						|
+	reteq	r12
 | 
						|
+	st.b	dst++, r8
 | 
						|
+	retal	r12
 | 
						|
+
 | 
						|
+.Lunaligned_src:
 | 
						|
+	/* Copy bytes until we're aligned */
 | 
						|
+	min	r8, r8, r10
 | 
						|
+	sub	r10, r8
 | 
						|
+	sub	r8, 1
 | 
						|
+	retlt	r12
 | 
						|
+1:	ld.ub	r10, src++
 | 
						|
+	st.b	dst++, r10
 | 
						|
+	sub	r8, 1
 | 
						|
+	brge	1b
 | 
						|
+
 | 
						|
+	rjmp	.Laligned_copy
 | 
						|
Index: uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c
 | 
						|
===================================================================
 | 
						|
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
 | 
						|
+++ uClibc-0.9.28-avr32/libc/string/avr32/test_memcpy.c	2006-10-19 15:05:52.000000000 +0200
 | 
						|
@@ -0,0 +1,66 @@
 | 
						|
+
 | 
						|
+#include <stdio.h>
 | 
						|
+#include <string.h>
 | 
						|
+
 | 
						|
+#define BUF_SIZE 32768
 | 
						|
+
 | 
						|
+static char buf1[BUF_SIZE] __attribute__((aligned(32)));
 | 
						|
+static char buf1_ref[BUF_SIZE] __attribute__((aligned(32)));
 | 
						|
+static char buf2[BUF_SIZE] __attribute__((aligned(32)));
 | 
						|
+
 | 
						|
+extern void *new_memcpy(void *dest, void *src, size_t len);
 | 
						|
+
 | 
						|
+void dump_mismatch(char *buf, char *ref, size_t len)
 | 
						|
+{
 | 
						|
+	int i, j;
 | 
						|
+
 | 
						|
+	for (i = 0; i < len; i += 16) {
 | 
						|
+		if (memcmp(buf + i, ref + i, 16) == 0)
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		printf("% 4x buf:", i);
 | 
						|
+		for (j = i; j < (i + 16); j++)
 | 
						|
+			printf(" %02x", buf[j]);
 | 
						|
+		printf("\n     ref:");
 | 
						|
+		for (j = i; j < (i + 16); j++)
 | 
						|
+			printf(" %02x", ref[j]);
 | 
						|
+		printf("\n");
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+void test(int src_offset, int dst_offset, int len)
 | 
						|
+{
 | 
						|
+	memset(buf1, 0x55, sizeof(buf1));
 | 
						|
+	memset(buf1_ref, 0x55, sizeof(buf1_ref));
 | 
						|
+	memset(buf2, 0xaa, sizeof(buf2));
 | 
						|
+
 | 
						|
+	printf("Testing with offsets %d => %d and len %d...",
 | 
						|
+	       src_offset, dst_offset, len);
 | 
						|
+
 | 
						|
+	new_memcpy(buf1 + dst_offset, buf2 + src_offset, len);
 | 
						|
+	memcpy(buf1_ref + dst_offset, buf2 + src_offset, len);
 | 
						|
+
 | 
						|
+	if (memcmp(buf1, buf1_ref, sizeof(buf1)) == 0)
 | 
						|
+		printf("OK\n");
 | 
						|
+	else {
 | 
						|
+		printf("FAILED\n");
 | 
						|
+		dump_mismatch(buf1, buf1_ref, sizeof(buf1));
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+int main(int argc, char *argv[])
 | 
						|
+{
 | 
						|
+	test(0, 0, BUF_SIZE);
 | 
						|
+	test(0, 0, 1);
 | 
						|
+	test(0, 0, 31);
 | 
						|
+	test(0, 0, 32);
 | 
						|
+	test(0, 0, 127);
 | 
						|
+	test(0, 0, 128);
 | 
						|
+	test(4, 4, BUF_SIZE - 4);
 | 
						|
+	test(1, 1, BUF_SIZE - 1);
 | 
						|
+	test(1, 1, 126);
 | 
						|
+	test(0, 3, 128);
 | 
						|
+	test(1, 4, 128);
 | 
						|
+
 | 
						|
+	return 0;
 | 
						|
+}
 |