diff options
131 files changed, 4 insertions, 22027 deletions
diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 57034e7..0000000 --- a/.gitignore +++ /dev/null @@ -1,24 +0,0 @@ -*.a -*.o -*.la -*.lo -*.png -*.pyc -.deps -.dirstamp -.libs -try-* -cache.txt -/Makefile.in -/aclocal.m4 -/autom4te.cache -/compile -/config.guess -/config.h.in -/config.sub -/configure -/depcomp -/install-sh -/ltmain.sh -/missing -/test-driver diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index f2879d0..0000000 --- a/Makefile.am +++ /dev/null @@ -1,347 +0,0 @@ -# Copyright (c) 2011, Linaro Limited -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Linaro nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -# Top level Makefile for cortex-strings - -# Used to record the compiler version in the executables -COMPILER = $(shell $(CC) --version 2>&1 | head -n1) - -# The main library -lib_LTLIBRARIES = \ - libcortex-strings.la - -## Test suite -check_PROGRAMS = \ - tests/test-memchr \ - tests/test-memcmp \ - tests/test-memcpy \ - tests/test-memmove \ - tests/test-memset \ - tests/test-strchr \ - tests/test-strrchr \ - tests/test-strcmp \ - tests/test-strcpy \ - tests/test-strlen \ - tests/test-strncmp \ - tests/test-strnlen - -# Options for the tests -tests_cflags = -D_GNU_SOURCE -I$(srcdir)/tests $(AM_CFLAGS) -tests_ldadd = libcortex-strings.la -tests_test_memchr_LDADD = $(tests_ldadd) -tests_test_memchr_CFLAGS = $(tests_cflags) -tests_test_memcmp_LDADD = $(tests_ldadd) -tests_test_memcmp_CFLAGS = $(tests_cflags) -tests_test_memcpy_LDADD = $(tests_ldadd) -tests_test_memcpy_CFLAGS = $(tests_cflags) -tests_test_memmove_LDADD = $(tests_ldadd) -tests_test_memmove_CFLAGS = $(tests_cflags) -tests_test_memset_LDADD = $(tests_ldadd) -tests_test_memset_CFLAGS = $(tests_cflags) -tests_test_strchr_LDADD = $(tests_ldadd) -tests_test_strchr_CFLAGS = $(tests_cflags) -tests_test_strcmp_LDADD = $(tests_ldadd) -tests_test_strcmp_CFLAGS = $(tests_cflags) -tests_test_strcpy_LDADD = $(tests_ldadd) -tests_test_strcpy_CFLAGS = $(tests_cflags) -tests_test_strlen_LDADD = $(tests_ldadd) -tests_test_strlen_CFLAGS = $(tests_cflags) -tests_test_strncmp_LDADD = $(tests_ldadd) -tests_test_strncmp_CFLAGS = $(tests_cflags) -tests_test_strnlen_LDADD = $(tests_ldadd) -tests_test_strnlen_CFLAGS = $(tests_cflags) -tests_test_strrchr_LDADD = $(tests_ldadd) -tests_test_strrchr_CFLAGS = $(tests_cflags) - -TESTS = $(check_PROGRAMS) - -## Benchmarks -noinst_PROGRAMS = \ - dhry \ - dhry-native \ - try-none \ - try-this \ - try-plain \ - try-newlib-c \ - try-bionic-c \ - try-glibc-c - -# Good 'ol Dhrystone -dhry_SOURCES = \ - benchmarks/dhry/dhry_1.c \ - benchmarks/dhry/dhry_2.c \ - benchmarks/dhry/dhry.h - -dhry_CFLAGS = -Dcompiler="\"$(COMPILER)\"" -Doptions="\"$(CFLAGS)\"" -dhry_LDADD = libcortex-strings.la - -dhry_native_SOURCES = $(dhry_SOURCES) -dhry_native_CFLAGS = $(dhry_CFLAGS) - -# Benchmark harness -noinst_LIBRARIES = \ - libmulti.a \ - libbionic-c.a \ - libglibc-c.a \ - libnewlib-c.a \ - libplain.a - -libmulti_a_SOURCES = \ - benchmarks/multi/harness.c - -libmulti_a_CFLAGS = -DVERSION=\"$(VERSION)\" $(AM_CFLAGS) - -## Other architecture independant implementaions -libbionic_c_a_SOURCES = \ - reference/bionic-c/bcopy.c \ - reference/bionic-c/memchr.c \ - reference/bionic-c/memcmp.c \ - reference/bionic-c/memcpy.c \ - reference/bionic-c/memset.c \ - reference/bionic-c/strchr.c \ - reference/bionic-c/strcmp.c \ - reference/bionic-c/strcpy.c \ - reference/bionic-c/strlen.c - -libglibc_c_a_SOURCES = \ - reference/glibc-c/memchr.c \ - reference/glibc-c/memcmp.c \ - reference/glibc-c/memcpy.c \ - reference/glibc-c/memset.c \ - reference/glibc-c/strchr.c \ - reference/glibc-c/strcmp.c \ - reference/glibc-c/strcpy.c \ - reference/glibc-c/strlen.c \ - reference/glibc-c/wordcopy.c \ - reference/glibc-c/memcopy.h \ - reference/glibc-c/pagecopy.h - -libnewlib_c_a_SOURCES = \ - reference/newlib-c/memchr.c \ - reference/newlib-c/memcmp.c \ - reference/newlib-c/memcpy.c \ - reference/newlib-c/memset.c \ - reference/newlib-c/strchr.c \ - reference/newlib-c/strcmp.c \ - reference/newlib-c/strcpy.c \ - reference/newlib-c/strlen.c \ - reference/newlib-c/shim.h - -libplain_a_SOURCES = \ - reference/plain/memset.c \ - reference/plain/memcpy.c \ - reference/plain/strcmp.c \ - reference/plain/strcpy.c - -try_none_SOURCES = -try_none_LDADD = libmulti.a -lrt -try_this_SOURCES = -try_this_LDADD = libmulti.a libcortex-strings.la -lrt -try_bionic_c_SOURCES = -try_bionic_c_LDADD = libmulti.a libbionic-c.a -lrt -try_glibc_c_SOURCES = -try_glibc_c_LDADD = libmulti.a libglibc-c.a -lrt -try_newlib_c_SOURCES = -try_newlib_c_LDADD = libmulti.a libnewlib-c.a -lrt -try_plain_SOURCES = -try_plain_LDADD = libmulti.a libplain.a -lrt - -# Architecture specific - -if HOST_AARCH32 - -if WITH_NEON -# Pull in the NEON specific files -neon_bionic_a9_sources = \ - reference/bionic-a9/memcpy.S \ - reference/bionic-a9/memset.S -neon_bionic_a15_sources = \ - reference/bionic-a15/memcpy.S \ - reference/bionic-a15/memset.S -fpu_flags = -mfpu=neon -else -if WITH_VFP -fpu_flags = -mfpu=vfp -else -fpu_flags = -msoft-float -endif -endif - -# Benchmarks and example programs -noinst_PROGRAMS += \ - try-bionic-a9 \ - try-bionic-a15 \ - try-csl \ - try-glibc \ - try-newlib \ - try-newlib-xscale - -# Libraries used in the benchmarks and examples -noinst_LIBRARIES += \ - libbionic-a9.a \ - libbionic-a15.a \ - libcsl.a \ - libglibc.a \ - libnewlib.a \ - libnewlib-xscale.a - -# Main library -libcortex_strings_la_SOURCES = \ - src/thumb-2/strcpy.c \ - src/arm/memchr.S \ - src/arm/strchr.S \ - src/thumb-2/strlen.S \ - src/arm/memset.S \ - src/arm/memcpy.S \ - src/arm/strcmp.S - -# Libraries containing the difference reference versions -libbionic_a9_a_SOURCES = \ - $(neon_bionic_a9_sources) \ - reference/bionic-a9/memcmp.S \ - reference/bionic-a9/strcmp.S \ - reference/bionic-a9/strcpy.S \ - reference/bionic-a9/strlen.c - -libbionic_a9_a_CFLAGS = -Wa,-mimplicit-it=thumb - -libbionic_a15_a_SOURCES = \ - $(neon_bionic_a15_sources) \ - reference/bionic-a15/memcmp.S \ - reference/bionic-a15/strcmp.S \ - reference/bionic-a15/strcpy.S \ - reference/bionic-a15/strlen.c - -libbionic_a15_a_CFLAGS = -Wa,-mimplicit-it=thumb - -libcsl_a_SOURCES = \ - reference/csl/memcpy.c \ - reference/csl/memset.c \ - reference/csl/arm_asm.h - -libglibc_a_SOURCES = \ - reference/glibc/memcpy.S \ - reference/glibc/memset.S \ - reference/glibc/strchr.S \ - reference/glibc/strlen.S - -libnewlib_a_SOURCES = \ - reference/newlib/memcpy.S \ - reference/newlib/strcmp.S \ - reference/newlib/strcpy.c \ - reference/newlib/strlen.c \ - reference/newlib/arm_asm.h \ - reference/newlib/shim.h - -libnewlib_xscale_a_SOURCES = \ - reference/newlib-xscale/memchr.c \ - reference/newlib-xscale/memcpy.c \ - reference/newlib-xscale/memset.c \ - reference/newlib-xscale/strchr.c \ - reference/newlib-xscale/strcmp.c \ - reference/newlib-xscale/strcpy.c \ - reference/newlib-xscale/strlen.c \ - reference/newlib-xscale/xscale.h - -# Flags for the benchmark helpers -try_bionic_a9_SOURCES = -try_bionic_a9_LDADD = libmulti.a libbionic-a9.a -lrt -try_bionic_a15_SOURCES = -try_bionic_a15_LDADD = libmulti.a libbionic-a15.a -lrt -try_csl_SOURCES = -try_csl_LDADD = libmulti.a libcsl.a -lrt -try_glibc_SOURCES = -try_glibc_LDADD = libmulti.a libglibc.a -lrt -try_newlib_SOURCES = -try_newlib_LDADD = libmulti.a libnewlib.a -lrt -try_newlib_xscale_SOURCES = -try_newlib_xscale_LDADD = libmulti.a libnewlib-xscale.a -lrt - -AM_CPPFLAGS = $(fpu_flags) -AM_LDFLAGS = $(fpu_flags) - -endif - -# aarch64 specific -if HOST_AARCH64 - -if WITH_SVE -libcortex_strings_la_SOURCES = \ - src/sve/memchr.S \ - src/sve/memcmp.S \ - src/sve/strchr.S \ - src/sve/strchrnul.S \ - src/sve/strcmp.S \ - src/sve/strcpy.S \ - src/sve/strlen.S \ - src/sve/strncmp.S \ - src/sve/strnlen.S \ - src/sve/strrchr.S -else -libcortex_strings_la_SOURCES = \ - src/aarch64/memchr.S \ - src/aarch64/memcmp.S \ - src/aarch64/strchr.S \ - src/aarch64/strchrnul.S \ - src/aarch64/strcmp.S \ - src/aarch64/strcpy.S \ - src/aarch64/strlen.S \ - src/aarch64/strncmp.S \ - src/aarch64/strnlen.S -endif - -libcortex_strings_la_SOURCES += \ - src/aarch64/memcpy.S \ - src/aarch64/memmove.S \ - src/aarch64/memset.S -endif - -libcortex_strings_la_LDFLAGS = -version-info 1:0:0 - -AM_CFLAGS = \ - -std=gnu99 -Wall \ - -fno-builtin -fno-stack-protector -U_FORTIFY_SOURCE \ - $(AM_CPPFLAGS) - -if WITH_SUBMACHINE -AM_CFLAGS += \ - -mtune=$(submachine) -endif - -EXTRA_DIST = \ - tests/hp-timing.h \ - tests/test-string.h \ - tests/test-skeleton.c \ - scripts/add-license.sh \ - scripts/bench.py \ - scripts/fixup.py \ - scripts/libplot.py \ - scripts/plot-align.py \ - scripts/plot.py \ - scripts/plot-sizes.py \ - scripts/plot-top.py \ - scripts/trim.sh \ - autogen.sh @@ -1,111 +1,5 @@ -= Cortex-A String Routines = +The 'cortex-string' project is now maintained in the 'ARM optimized +routines' [1]. All optimized routines have moved to the new project +and further changes should be done in that repository. -This package contains optimised string routines including memcpy(), memset(), -strcpy(), strlen() for the ARM Cortex-A series of cores. - -Various implementations of these routines are provided, including generic -implementations for ARMv7-A cores with/without Neon, Thumb2 implementations -and generic implementations for cores supporting AArch64. - -== Getting started == -First configure and then install libcortex-strings.so. To make other -applications use this library, either add -lcortex-strings to the link -command or use LD_PRELOAD to load the library into existing applications. - -Our intent is to get these routines into the common C libraries such -as GLIBC, Bionic, and Newlib. Your system may already include them! - -== Contents == - * src/ contains the routines themselves - * tests/ contains the unit tests - * reference/ contains reference copies of other ARM-focused - implementations gathered from around the Internet - * benchmarks/ contains various benchmarks, tools, and scripts used to - check and report on the different implementations. - -The src directory contains different variants organised by the -implementation they run on and optional features used. For example: - * src/thumb-2 contains generic non-NEON routines for AArch32 (with Thumb-2). - * src/arm contains tuned routines for Cortex-A class processors. - * src/aarch64 contains generic routines for AArch64. - * src/thumb contains generic routines for armv6-M (with Thumb). - -== Reference versions == -reference/ contains versions collected from various popular Open -Source libraries. These have been modified for use in benchmarking. -Please refer to the individual files for any licensing terms. - -The routines were collected from the following releases: - * EGLIBC 2.13 - * Newlib 1.19.0 - * Bionic android-2.3.5_r1 - -== Licensing == -All Linaro-authored routines are under the modified BSD license: - -Copyright (c) 2011, Linaro Limited -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -All ARM-authored routines are under the modified BSD license: - -Copyright (c) 2014 ARM Ltd -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -All third party routines are under a GPL compatible license. - -== Notes and Limitations == -Some of the implementations have been collected from other -projects and have a variety of licenses and copyright holders. - -== Style == -Assembly code attempts to follow the GLIBC coding convetions. They -are: - * Copyright headers in C style comment blocks - * Instructions indented with one tab - * Operands indented with one tab - * Text is wrapped at 70 characters - * End of line comments are fine +[1] https://github.com/ARM-software/optimized-routines diff --git a/autogen.sh b/autogen.sh deleted file mode 100755 index 8e0591c..0000000 --- a/autogen.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/sh -# -# autogen.sh glue for hplip -# -# HPLIP used to have five or so different autotools trees. Upstream -# has reduced it to two. Still, this script is capable of cleaning -# just about any possible mess of autoconf files. -# -# BE CAREFUL with trees that are not completely automake-generated, -# this script deletes all Makefile.in files it can find. -# -# Requires: automake 1.9, autoconf 2.57+ -# Conflicts: autoconf 2.13 -set -e - -# Refresh GNU autotools toolchain. -echo Cleaning autotools files... -find -type d -name autom4te.cache -print0 | xargs -0 rm -rf \; -find -type f \( -name missing -o -name install-sh -o -name mkinstalldirs \ - -o -name depcomp -o -name ltmain.sh -o -name configure \ - -o -name config.sub -o -name config.guess \ - -o -name Makefile.in \) -print0 | xargs -0 rm -f - -echo Running autoreconf... -autoreconf --force --install - -# For the Debian package build -test -d debian && { - # link these in Debian builds - rm -f config.sub config.guess - ln -s /usr/share/misc/config.sub . - ln -s /usr/share/misc/config.guess . - - # refresh list of executable scripts, to avoid possible breakage if - # upstream tarball does not include the file or if it is mispackaged - # for whatever reason. - [ "$1" = "updateexec" ] && { - echo Generating list of executable files... - rm -f debian/executable.files - find -type f -perm +111 ! -name '.*' -fprint debian/executable.files - } - - # Remove any files in upstream tarball that we don't have in the Debian - # package (because diff cannot remove files) - version=`dpkg-parsechangelog | awk '/Version:/ { print $2 }' | sed -e 's/-[^-]\+$//'` - source=`dpkg-parsechangelog | awk '/Source:/ { print $2 }' | tr -d ' '` - if test -r ../${source}_${version}.orig.tar.gz ; then - echo Generating list of files that should be removed... - rm -f debian/deletable.files - touch debian/deletable.files - [ -e debian/tmp ] && rm -rf debian/tmp - mkdir debian/tmp - ( cd debian/tmp ; tar -zxf ../../../${source}_${version}.orig.tar.gz ) - find debian/tmp/ -type f ! -name '.*' -print0 | xargs -0 -ri echo '{}' | \ - while read -r i ; do - if test -e "${i}" ; then - filename=$(echo "${i}" | sed -e 's#.*debian/tmp/[^/]\+/##') - test -e "${filename}" || echo "${filename}" >>debian/deletable.files - fi - done - rm -fr debian/tmp - else - echo Emptying list of files that should be deleted... - rm -f debian/deletable.files - touch debian/deletable.files - fi -} - -exit 0 diff --git a/benchmarks/dhry/dhry.h b/benchmarks/dhry/dhry.h deleted file mode 100644 index 69b8526..0000000 --- a/benchmarks/dhry/dhry.h +++ /dev/null @@ -1,311 +0,0 @@ -/*
- **************************************************************************
- * DHRYSTONE 2.1 BENCHMARK PC VERSION
- **************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry.h (part 1 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- * Siemens AG, AUT E 51
- * Postfach 3220
- * 8520 Erlangen
- * Germany (West)
- * Phone: [+49]-9131-7-20330
- * (8-17 Central European Time)
- * Usenet: ..!mcsun!unido!estevax!weicker
- *
- * Original Version (in Ada) published in
- * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
- * pp. 1013 - 1030, together with the statistics
- * on which the distribution of statements etc. is based.
- *
- * In this C version, the following C library functions are used:
- * - strcpy, strcmp (inside the measurement loop)
- * - printf, scanf (outside the measurement loop)
- * In addition, Berkeley UNIX system calls "times ()" or "time ()"
- * are used for execution time measurement. For measurements
- * on other systems, these calls have to be changed.
- *
- * Collection of Results:
- * Reinhold Weicker (address see above) and
- *
- * Rick Richardson
- * PC Research. Inc.
- * 94 Apple Orchard Drive
- * Tinton Falls, NJ 07724
- * Phone: (201) 389-8963 (9-17 EST)
- * Usenet: ...!uunet!pcrat!rick
- *
- * Please send results to Rick Richardson and/or Reinhold Weicker.
- * Complete information should be given on hardware and software used.
- * Hardware information includes: Machine type, CPU, type and size
- * of caches; for microprocessors: clock frequency, memory speed
- * (number of wait states).
- * Software information includes: Compiler (and runtime library)
- * manufacturer and version, compilation switches, OS version.
- * The Operating System version may give an indication about the
- * compiler; Dhrystone itself performs no OS calls in the measurement
- * loop.
- *
- * The complete output generated by the program should be mailed
- * such that at least some checks for correctness can be made.
- *
- **************************************************************************
- *
- * This version has changes made by Roy Longbottom to conform to a common
- * format for a series of standard benchmarks for PCs:
- *
- * Running time greater than 5 seconds due to inaccuracy of the PC clock.
- *
- * Automatic adjustment of run time, no manually inserted parameters.
- *
- * Initial display of calibration times to confirm linearity.
- *
- * Display of results within one screen (or at a slow speed as the test
- * progresses) so that it can be seen to have run successfully.
- *
- * Facilities to type in details of system used etc.
- *
- * All results and details appended to a results file.
- *
- *
- * Roy Longbottom
- * 101323.2241@compuserve.com
- *
- **************************************************************************
- *
- * For details of history, changes, other defines, benchmark construction
- * statistics see official versions from ftp.nosc.mil/pub/aburto where
- * the latest table of results (dhry.tbl) are available. See also
- * netlib@ornl.gov
- *
- **************************************************************************
- *
- * Defines: The following "Defines" are possible:
- * -DREG=register (default: Not defined)
- * As an approximation to what an average C programmer
- * might do, the "register" storage class is applied
- * (if enabled by -DREG=register)
- * - for local variables, if they are used (dynamically)
- * five or more times
- * - for parameters if they are used (dynamically)
- * six or more times
- * Note that an optimal "register" strategy is
- * compiler-dependent, and that "register" declarations
- * do not necessarily lead to faster execution.
- * -DNOSTRUCTASSIGN (default: Not defined)
- * Define if the C compiler does not support
- * assignment of structures.
- * -DNOENUMS (default: Not defined)
- * Define if the C compiler does not support
- * enumeration types.
- ***************************************************************************
- *
- * Compilation model and measurement (IMPORTANT):
- *
- * This C version of Dhrystone consists of three files:
- * - dhry.h (this file, containing global definitions and comments)
- * - dhry_1.c (containing the code corresponding to Ada package Pack_1)
- * - dhry_2.c (containing the code corresponding to Ada package Pack_2)
- *
- * The following "ground rules" apply for measurements:
- * - Separate compilation
- * - No procedure merging
- * - Otherwise, compiler optimizations are allowed but should be indicated
- * - Default results are those without register declarations
- * See the companion paper "Rationale for Dhrystone Version 2" for a more
- * detailed discussion of these ground rules.
- *
- * For 16-Bit processors (e.g. 80186, 80286), times for all compilation
- * models ("small", "medium", "large" etc.) should be given if possible,
- * together with a definition of these models for the compiler system used.
- *
- **************************************************************************
- * Examples of Pentium Results
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel -otexan -zp8 -fp5 -5r
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 1600010
- * Ptr_Glob->
- * Ptr_Comp: * 98008
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98008 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Selected.
- *
- * Microseconds 1 loop: 4.53
- * Dhrystones / second: 220690
- * VAX MIPS rating: 125.61
- *
- *
- * Dhrystone Benchmark Version 2.1 (Language: C)
- *
- * Month run 4/1996
- * PC model Escom
- * CPU Pentium
- * Clock MHz 100
- * Cache 256K
- * Options Neptune chipset
- * OS/DOS Windows 95
- * Compiler Watcom C/ C++ 10.5 Win386
- * OptLevel No optimisation
- * Run by Roy Longbottom
- * From UK
- * Mail 101323.2241@compuserve.com
- *
- * Final values (* implementation-dependent):
- *
- * Int_Glob: O.K. 5
- * Bool_Glob: O.K. 1
- * Ch_1_Glob: O.K. A
- * Ch_2_Glob: O.K. B
- * Arr_1_Glob[8]: O.K. 7
- * Arr_2_Glob8/7: O.K. 320010
- * Ptr_Glob->
- * Ptr_Comp: * 98004
- * Discr: O.K. 0
- * Enum_Comp: O.K. 2
- * Int_Comp: O.K. 17
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Next_Ptr_Glob->
- * Ptr_Comp: * 98004 same as above
- * Discr: O.K. 0
- * Enum_Comp: O.K. 1
- * Int_Comp: O.K. 18
- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
- * Int_1_Loc: O.K. 5
- * Int_2_Loc: O.K. 13
- * Int_3_Loc: O.K. 7
- * Enum_Loc: O.K. 1
- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
- *
- * Register option Not selected.
- *
- * Microseconds 1 loop: 20.06
- * Dhrystones / second: 49844
- * VAX MIPS rating: 28.37
- *
- **************************************************************************
- */
-
-/* Compiler and system dependent definitions: */
-
-#ifndef TIME
-#define TIMES
-#endif
- /* Use times(2) time function unless */
- /* explicitly defined otherwise */
-
-#ifdef TIMES
-/* #include <sys/types.h>
- #include <sys/times.h> */
- /* for "times" */
-#endif
-
-#define Mic_secs_Per_Second 1000000.0
- /* Berkeley UNIX C returns process times in seconds/HZ */
-
-#ifdef NOSTRUCTASSIGN
-#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
-#else
-#define structassign(d, s) d = s
-#endif
-
-#ifdef NOENUM
-#define Ident_1 0
-#define Ident_2 1
-#define Ident_3 2
-#define Ident_4 3
-#define Ident_5 4
- typedef int Enumeration;
-#else
- typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
- Enumeration;
-#endif
- /* for boolean and enumeration types in Ada, Pascal */
-
-/* General definitions: */
-
-#include <stdio.h>
-#include <string.h>
-
- /* for strcpy, strcmp */
-
-#define Null 0
- /* Value of a Null pointer */
-#define true 1
-#define false 0
-
-typedef int One_Thirty;
-typedef int One_Fifty;
-typedef char Capital_Letter;
-typedef int Boolean;
-typedef char Str_30 [31];
-typedef int Arr_1_Dim [50];
-typedef int Arr_2_Dim [50] [50];
-
-typedef struct record
- {
- struct record *Ptr_Comp;
- Enumeration Discr;
- union {
- struct {
- Enumeration Enum_Comp;
- int Int_Comp;
- char Str_Comp [31];
- } var_1;
- struct {
- Enumeration E_Comp_2;
- char Str_2_Comp [31];
- } var_2;
- struct {
- char Ch_1_Comp;
- char Ch_2_Comp;
- } var_3;
- } variant;
- } Rec_Type, *Rec_Pointer;
-
-
-
diff --git a/benchmarks/dhry/dhry_1.c b/benchmarks/dhry/dhry_1.c deleted file mode 100644 index 495610a..0000000 --- a/benchmarks/dhry/dhry_1.c +++ /dev/null @@ -1,778 +0,0 @@ -/*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_1.c (part 2 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include <time.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include "dhry.h"
- /*COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER*/
-
- #ifdef COW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNW
- #define compiler "Watcom C/C++ 10.5 Win386"
- #define options " No optimisation"
- #endif
- #ifdef COD
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CND
- #define compiler "Watcom C/C++ 10.5 Dos4GW"
- #define options " No optimisation"
- #endif
- #ifdef CONT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNNT
- #define compiler "Watcom C/C++ 10.5 Win32NT"
- #define options " No optimisation"
- #endif
- #ifdef COO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " -otexan -zp8 -5r -ms"
- #endif
- #ifdef CNO2
- #define compiler "Watcom C/C++ 10.5 OS/2-32"
- #define options " No optimisation"
- #endif
-
-
-/* Global Variables: */
-
-Rec_Pointer Ptr_Glob,
- Next_Ptr_Glob;
-int Int_Glob;
- Boolean Bool_Glob;
- char Ch_1_Glob,
- Ch_2_Glob;
- int Arr_1_Glob [50];
- int Arr_2_Glob [50] [50];
- int getinput = 1;
-
-
- char Reg_Define[100] = "Register option Selected.";
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val);
- /*
- forward declaration necessary since Enumeration may not simply be int
- */
-
- #ifndef ROPT
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par);
- void Proc_2 (One_Fifty *Int_Par_Ref);
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par);
- void Proc_4 ();
- void Proc_5 ();
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref);
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val);
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
-
-
- /* variables for time measurement: */
-
- #define Too_Small_Time 2
- /* Measurements should last at least 2 seconds */
-
- double Begin_Time,
- End_Time,
- User_Time;
-
- double Microseconds,
- Dhrystones_Per_Second,
- Vax_Mips;
-
- /* end of variables for time measurement */
-
-
- void main (int argc, char *argv[])
- /*****/
-
- /* main program, corresponds to procedures */
- /* Main and Proc_0 in the Ada version */
- {
- double dtime();
-
- One_Fifty Int_1_Loc;
- REG One_Fifty Int_2_Loc;
- One_Fifty Int_3_Loc;
- REG char Ch_Index;
- Enumeration Enum_Loc;
- Str_30 Str_1_Loc;
- Str_30 Str_2_Loc;
- REG int Run_Index;
- REG int Number_Of_Runs;
- int endit, count = 10;
- FILE *Ap;
- char general[9][80] = {" "};
-
- /* Initializations */
- if (argc > 1)
- {
- switch (argv[1][0])
- {
- case 'N':
- getinput = 0;
- break;
- case 'n':
- getinput = 0;
- break;
- }
- }
-
- if ((Ap = fopen("Dhry.txt","a+")) == NULL)
- {
- printf("Can not open Dhry.txt\n\n");
- printf("Press any key\n");
- exit(1);
- }
-
-/***********************************************************************
- * Change for compiler and optimisation used *
- ***********************************************************************/
-
- Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
- Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
-
- Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
- Ptr_Glob->Discr = Ident_1;
- Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
- Ptr_Glob->variant.var_1.Int_Comp = 40;
- strcpy (Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING");
- strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
-
- Arr_2_Glob [8][7] = 10;
- /* Was missing in published program. Without this statement, */
- /* Arr_2_Glob [8][7] would have an undefined value. */
- /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
- /* overflow may occur for this array element. */
-
- printf ("\n");
- printf ("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
- printf ("\n");
-
- if (getinput == 0)
- {
- printf ("No run time input data\n\n");
- }
- else
- {
- printf ("With run time input data\n\n");
- }
-
- printf ("Compiler %s\n", compiler);
- printf ("Optimisation %s\n", options);
- #ifdef ROPT
- printf ("Register option selected\n\n");
- #else
- printf ("Register option not selected\n\n");
- strcpy(Reg_Define, "Register option Not selected.");
- #endif
-
- /*
- if (Reg)
- {
- printf ("Program compiled with 'register' attribute\n");
- printf ("\n");
- }
- else
- {
- printf ("Program compiled without 'register' attribute\n");
- printf ("\n");
- }
-
- printf ("Please give the number of runs through the benchmark: ");
- {
- int n;
- scanf ("%d", &n);
- Number_Of_Runs = n;
- }
- printf ("\n");
- printf ("Execution starts, %d runs through Dhrystone\n",
- Number_Of_Runs);
- */
-
- Number_Of_Runs = 5000;
-
- do
- {
-
- Number_Of_Runs = Number_Of_Runs * 2;
- count = count - 1;
- Arr_2_Glob [8][7] = 10;
-
- /***************/
- /* Start timer */
- /***************/
-
- Begin_Time = dtime();
-
- for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
- {
-
- Proc_5();
- Proc_4();
- /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
- Int_1_Loc = 2;
- Int_2_Loc = 3;
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
- Enum_Loc = Ident_2;
- Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
- /* Bool_Glob == 1 */
- while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
- {
- Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
- /* Int_3_Loc == 7 */
- Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
- /* Int_3_Loc == 7 */
- Int_1_Loc += 1;
- } /* while */
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
- /* Int_Glob == 5 */
- Proc_1 (Ptr_Glob);
- for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
- /* loop body executed twice */
- {
- if (Enum_Loc == Func_1 (Ch_Index, 'C'))
- /* then, not executed */
- {
- Proc_6 (Ident_1, &Enum_Loc);
- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
- Int_2_Loc = Run_Index;
- Int_Glob = Run_Index;
- }
- }
- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
- Int_2_Loc = Int_2_Loc * Int_1_Loc;
- Int_1_Loc = Int_2_Loc / Int_3_Loc;
- Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
- /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
- Proc_2 (&Int_1_Loc);
- /* Int_1_Loc == 5 */
-
- } /* loop "for Run_Index" */
-
- /**************/
- /* Stop timer */
- /**************/
-
- End_Time = dtime();
- User_Time = End_Time - Begin_Time;
-
- printf ("%12.0f runs %6.2f seconds \n",(double) Number_Of_Runs, User_Time);
- if (User_Time > 5)
- {
- count = 0;
- }
- else
- {
- if (User_Time < 0.1)
- {
- Number_Of_Runs = Number_Of_Runs * 5;
- }
- }
- } /* calibrate/run do while */
- while (count >0);
-
- printf ("\n");
- printf ("Final values (* implementation-dependent):\n");
- printf ("\n");
- printf ("Int_Glob: ");
- if (Int_Glob == 5) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_Glob);
-
- printf ("Bool_Glob: ");
- if (Bool_Glob == 1) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Bool_Glob);
-
- printf ("Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c ", Ch_1_Glob);
-
- printf ("Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%c\n", Ch_2_Glob);
-
- printf ("Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Arr_1_Glob[8]);
-
- printf ("Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%10d\n", Arr_2_Glob[8][7]);
-
- printf ("Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- printf (" Discr: ");
- if (Ptr_Glob->Discr == 0) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Next_Ptr_Glob-> ");
- printf (" Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- printf (" same as above\n");
-
- printf (" Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->Discr);
-
- printf ("Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- printf (" Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- printf ("Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- printf ("Int_1_Loc: ");
- if (Int_1_Loc == 5)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_1_Loc);
-
- printf ("Int_2_Loc: ");
- if (Int_2_Loc == 13)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Int_2_Loc);
-
- printf ("Int_3_Loc: ");
- if (Int_3_Loc == 7)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d ", Int_3_Loc);
-
- printf ("Enum_Loc: ");
- if (Enum_Loc == 1)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%d\n", Enum_Loc);
-
- printf ("Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_1_Loc);
-
- printf ("Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- printf ("O.K. ");
- else printf ("WRONG ");
- printf ("%s\n", Str_2_Loc);
-
- printf ("\n");
-
-
- if (User_Time < Too_Small_Time)
- {
- printf ("Measured time too small to obtain meaningful results\n");
- printf ("Please increase number of runs\n");
- printf ("\n");
- }
- else
- {
- Microseconds = User_Time * Mic_secs_Per_Second
- / (double) Number_Of_Runs;
- Dhrystones_Per_Second = (double) Number_Of_Runs / User_Time;
- Vax_Mips = Dhrystones_Per_Second / 1757.0;
-
- printf ("Microseconds for one run through Dhrystone: ");
- printf ("%12.2lf \n", Microseconds);
- printf ("Dhrystones per Second: ");
- printf ("%10.0lf \n", Dhrystones_Per_Second);
- printf ("VAX MIPS rating = ");
- printf ("%12.2lf \n",Vax_Mips);
- printf ("\n");
-
-/************************************************************************
- * Type details of hardware, software etc. *
- ************************************************************************/
-
- if (getinput == 1)
- {
- printf ("Enter the following which will be added with results to file DHRY.TXT\n");
- printf ("When submitting a number of results you need only provide details once\n");
- printf ("but a cross reference such as an abbreviated CPU type would be useful.\n");
- printf ("You can kill (exit or close) the program now and no data will be added.\n\n");
-
- printf ("PC Supplier/model ? ");
- gets(general[1]);
-
- printf ("CPU chip ? ");
- gets(general[2]);
-
- printf ("Clock MHz ? ");
- gets(general[3]);
-
- printf ("Cache size ? ");
- gets(general[4]);
-
- printf ("Chipset & H/W options ? ");
- gets(general[5]);
-
- printf ("OS/DOS version ? ");
- gets(general[6]);
-
- printf ("Your name ? ");
- gets(general[7]);
-
- printf ("Company/Location ? ");
- gets(general[8]);
-
- printf ("E-mail address ? ");
- gets(general[0]);
- }
-/************************************************************************
- * Add results to output file Dhry.txt *
- ************************************************************************/
- fprintf (Ap, "-------------------- -----------------------------------"
- "\n");
- fprintf (Ap, "Dhrystone Benchmark Version 2.1 (Language: C++)\n\n");
- fprintf (Ap, "PC model %s\n", general[1]);
- fprintf (Ap, "CPU %s\n", general[2]);
- fprintf (Ap, "Clock MHz %s\n", general[3]);
- fprintf (Ap, "Cache %s\n", general[4]);
- fprintf (Ap, "Options %s\n", general[5]);
- fprintf (Ap, "OS/DOS %s\n", general[6]);
- fprintf (Ap, "Compiler %s\n", compiler);
- fprintf (Ap, "OptLevel %s\n", options);
- fprintf (Ap, "Run by %s\n", general[7]);
- fprintf (Ap, "From %s\n", general[8]);
- fprintf (Ap, "Mail %s\n\n", general[0]);
-
- fprintf (Ap, "Final values (* implementation-dependent):\n");
- fprintf (Ap, "\n");
- fprintf (Ap, "Int_Glob: ");
- if (Int_Glob == 5) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_Glob);
-
- fprintf (Ap, "Bool_Glob: ");
- if (Bool_Glob == 1) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Bool_Glob);
-
- fprintf (Ap, "Ch_1_Glob: ");
- if (Ch_1_Glob == 'A') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_1_Glob);
-
- fprintf (Ap, "Ch_2_Glob: ");
- if (Ch_2_Glob == 'B') fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%c\n", Ch_2_Glob);
-
- fprintf (Ap, "Arr_1_Glob[8]: ");
- if (Arr_1_Glob[8] == 7) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Arr_1_Glob[8]);
-
- fprintf (Ap, "Arr_2_Glob8/7: ");
- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%10d\n", Arr_2_Glob[8][7]);
-
- fprintf (Ap, "Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
-
- fprintf (Ap, " Discr: ");
- if (Ptr_Glob->Discr == 0) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Ptr_Glob->variant.var_1.Int_Comp == 17) fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Next_Ptr_Glob-> \n");
- fprintf (Ap, " Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
- fprintf (Ap, " same as above\n");
-
- fprintf (Ap, " Discr: ");
- if (Next_Ptr_Glob->Discr == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->Discr);
-
- fprintf (Ap, " Enum_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
-
- fprintf (Ap, " Int_Comp: ");
- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
-
- fprintf (Ap, " Str_Comp: ");
- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
- "DHRYSTONE PROGRAM, SOME STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
-
- fprintf (Ap, "Int_1_Loc: ");
- if (Int_1_Loc == 5)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_1_Loc);
-
- fprintf (Ap, "Int_2_Loc: ");
- if (Int_2_Loc == 13)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_2_Loc);
-
- fprintf (Ap, "Int_3_Loc: ");
- if (Int_3_Loc == 7)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Int_3_Loc);
-
- fprintf (Ap, "Enum_Loc: ");
- if (Enum_Loc == 1)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%d\n", Enum_Loc);
-
- fprintf (Ap, "Str_1_Loc: ");
- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_1_Loc);
-
- fprintf (Ap, "Str_2_Loc: ");
- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
- fprintf (Ap, "O.K. ");
- else fprintf (Ap, "WRONG ");
- fprintf (Ap, "%s\n", Str_2_Loc);
-
-
- fprintf (Ap, "\n");
- fprintf(Ap,"%s\n",Reg_Define);
- fprintf (Ap, "\n");
- fprintf(Ap,"Microseconds 1 loop: %12.2lf\n",Microseconds);
- fprintf(Ap,"Dhrystones / second: %10.0lf\n",Dhrystones_Per_Second);
- fprintf(Ap,"VAX MIPS rating: %12.2lf\n\n",Vax_Mips);
- fclose(Ap);
- }
-
- printf ("\n");
- printf ("A new results file will have been created in the same directory as the\n");
- printf (".EXE files if one did not already exist. If you made a mistake on input, \n");
- printf ("you can use a text editor to correct it, delete the results or copy \n");
- printf ("them to a different file name. If you intend to run multiple tests you\n");
- printf ("you may wish to rename DHRY.TXT with a more informative title.\n\n");
- printf ("Please submit feedback and results files as a posting in Section 12\n");
- printf ("or to Roy_Longbottom@compuserve.com\n\n");
-
- if (getinput == 1)
- {
- printf("Press any key to exit\n");
- printf ("\nIf this is displayed you must close the window in the normal way\n");
- }
- }
-
-
- void Proc_1 (REG Rec_Pointer Ptr_Val_Par)
- /******************/
-
- /* executed once */
- {
- REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
- /* == Ptr_Glob_Next */
- /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
- /* corresponds to "rename" in Ada, "with" in Pascal */
-
- structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
- Ptr_Val_Par->variant.var_1.Int_Comp = 5;
- Next_Record->variant.var_1.Int_Comp
- = Ptr_Val_Par->variant.var_1.Int_Comp;
- Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
- Proc_3 (&Next_Record->Ptr_Comp);
- /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
- == Ptr_Glob->Ptr_Comp */
- if (Next_Record->Discr == Ident_1)
- /* then, executed */
- {
- Next_Record->variant.var_1.Int_Comp = 6;
- Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
- &Next_Record->variant.var_1.Enum_Comp);
- Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
- Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
- &Next_Record->variant.var_1.Int_Comp);
- }
- else /* not executed */
- structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
- } /* Proc_1 */
-
-
- void Proc_2 (One_Fifty *Int_Par_Ref)
- /******************/
- /* executed once */
- /* *Int_Par_Ref == 1, becomes 4 */
-
- {
- One_Fifty Int_Loc;
- Enumeration Enum_Loc;
-
- Int_Loc = *Int_Par_Ref + 10;
- do /* executed once */
- if (Ch_1_Glob == 'A')
- /* then, executed */
- {
- Int_Loc -= 1;
- *Int_Par_Ref = Int_Loc - Int_Glob;
- Enum_Loc = Ident_1;
- } /* if */
- while (Enum_Loc != Ident_1); /* true */
- } /* Proc_2 */
-
-
- void Proc_3 (Rec_Pointer *Ptr_Ref_Par)
- /******************/
- /* executed once */
- /* Ptr_Ref_Par becomes Ptr_Glob */
-
- {
- if (Ptr_Glob != Null)
- /* then, executed */
- *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
- Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
- } /* Proc_3 */
-
-
-void Proc_4 () /* without parameters */
- /*******/
- /* executed once */
- {
- Boolean Bool_Loc;
-
- Bool_Loc = Ch_1_Glob == 'A';
- Bool_Glob = Bool_Loc | Bool_Glob;
- Ch_2_Glob = 'B';
- } /* Proc_4 */
-
-
- void Proc_5 () /* without parameters */
- /*******/
- /* executed once */
- {
- Ch_1_Glob = 'A';
- Bool_Glob = false;
- } /* Proc_5 */
-
-
- /* Procedure for the assignment of structures, */
- /* if the C compiler doesn't support this feature */
- #ifdef NOSTRUCTASSIGN
- memcpy (d, s, l)
- register char *d;
- register char *s;
- register int l;
- {
- while (l--) *d++ = *s++;
- }
- #endif
-
-
-double dtime()
-{
-
- /* #include <ctype.h> */
-
- #define HZ CLOCKS_PER_SEC
- clock_t tnow;
-
- double q;
- tnow = clock();
- q = (double)tnow / (double)HZ;
- return q;
-}
diff --git a/benchmarks/dhry/dhry_2.c b/benchmarks/dhry/dhry_2.c deleted file mode 100644 index 74cb65b..0000000 --- a/benchmarks/dhry/dhry_2.c +++ /dev/null @@ -1,186 +0,0 @@ - /*
- *************************************************************************
- *
- * "DHRYSTONE" Benchmark Program
- * -----------------------------
- *
- * Version: C, Version 2.1
- *
- * File: dhry_2.c (part 3 of 3)
- *
- * Date: May 25, 1988
- *
- * Author: Reinhold P. Weicker
- *
- *************************************************************************
- */
-
- #include "dhry.h"
-
- #ifndef REG
- #define REG
- /* REG becomes defined as empty */
- /* i.e. no register variables */
- #else
- #define REG register
- #endif
-
- extern int Int_Glob;
- extern char Ch_1_Glob;
-
- Boolean Func_3 (Enumeration Enum_Par_Val);
-
- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
- /*********************************/
- /* executed once */
- /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
-
- {
- *Enum_Ref_Par = Enum_Val_Par;
- if (! Func_3 (Enum_Val_Par))
- /* then, not executed */
- *Enum_Ref_Par = Ident_4;
- switch (Enum_Val_Par)
- {
- case Ident_1:
- *Enum_Ref_Par = Ident_1;
- break;
- case Ident_2:
- if (Int_Glob > 100)
- /* then */
- *Enum_Ref_Par = Ident_1;
- else *Enum_Ref_Par = Ident_4;
- break;
- case Ident_3: /* executed */
- *Enum_Ref_Par = Ident_2;
- break;
- case Ident_4: break;
- case Ident_5:
- *Enum_Ref_Par = Ident_3;
- break;
- } /* switch */
- } /* Proc_6 */
-
-
- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
- One_Fifty *Int_Par_Ref)
- /**********************************************/
- /* executed three times */
- /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
- /* Int_Par_Ref becomes 7 */
- /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
- /* Int_Par_Ref becomes 17 */
- /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
- /* Int_Par_Ref becomes 18 */
-
- {
- One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 2;
- *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
- } /* Proc_7 */
-
-
- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
- int Int_1_Par_Val, int Int_2_Par_Val)
- /*********************************************************************/
- /* executed once */
- /* Int_Par_Val_1 == 3 */
- /* Int_Par_Val_2 == 7 */
-
- {
- REG One_Fifty Int_Index;
- REG One_Fifty Int_Loc;
-
- Int_Loc = Int_1_Par_Val + 5;
- Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
- Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
- Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
- for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
- Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
- Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
- Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
- Int_Glob = 5;
- } /* Proc_8 */
-
-
- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
- Capital_Letter Ch_2_Par_Val)
- /*************************************************/
- /* executed three times */
- /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
- /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
- /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
-
- {
- Capital_Letter Ch_1_Loc;
- Capital_Letter Ch_2_Loc;
-
- Ch_1_Loc = Ch_1_Par_Val;
- Ch_2_Loc = Ch_1_Loc;
- if (Ch_2_Loc != Ch_2_Par_Val)
- /* then, executed */
- return (Ident_1);
- else /* not executed */
- {
- Ch_1_Glob = Ch_1_Loc;
- return (Ident_2);
- }
- } /* Func_1 */
-
-
- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
- /*************************************************/
- /* executed once */
- /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
- /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
-
- {
- REG One_Thirty Int_Loc;
- Capital_Letter Ch_Loc;
-
- Int_Loc = 2;
- while (Int_Loc <= 2) /* loop body executed once */
- if (Func_1 (Str_1_Par_Ref[Int_Loc],
- Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
- /* then, executed */
- {
- Ch_Loc = 'A';
- Int_Loc += 1;
- } /* if, while */
- if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
- /* then, not executed */
- Int_Loc = 7;
- if (Ch_Loc == 'R')
- /* then, not executed */
- return (true);
- else /* executed */
- {
- if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
- /* then, not executed */
- {
- Int_Loc += 7;
- Int_Glob = Int_Loc;
- return (true);
- }
- else /* executed */
- return (false);
- } /* if Ch_Loc */
- } /* Func_2 */
-
-
- Boolean Func_3 (Enumeration Enum_Par_Val)
- /***************************/
- /* executed once */
- /* Enum_Par_Val == Ident_3 */
-
- {
- Enumeration Enum_Loc;
-
- Enum_Loc = Enum_Par_Val;
- if (Enum_Loc == Ident_3)
- /* then, executed */
- return (true);
- else /* not executed */
- return (false);
- } /* Func_3 */
diff --git a/benchmarks/multi/harness.c b/benchmarks/multi/harness.c deleted file mode 100644 index 257a308..0000000 --- a/benchmarks/multi/harness.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** A simple harness that times how long a string function takes to - * run. - */ - -/* PENDING: Add EPL */ - -#include <string.h> -#include <time.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <stdbool.h> -#include <assert.h> -#include <unistd.h> -#include <errno.h> - -#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0])) - -#ifndef VERSION -#define VERSION "(unknown version)" -#endif - -/** Make sure a function is called by using the return value */ -#define SPOIL(_x) volatile long x = (long)(_x); (void)x - -/** Type of functions that can be tested */ -typedef void (*stub_t)(void *dest, void *src, size_t n); - -/** Meta data about one test */ -struct test -{ - /** Test name */ - const char *name; - /** Function to test */ - stub_t stub; -}; - -/** Flush the cache by reading a chunk of memory */ -static void empty(volatile char *against) -{ - /* We know that there's a 16 k cache with 64 byte lines giving - a total of 256 lines. Read randomly from 256*5 places should - flush everything */ - int offset = (1024 - 256)*1024; - - for (int i = offset; i < offset + 16*1024*3; i += 64) - { - against[i]; - } -} - -/** Stub that does nothing. Used for calibrating */ -static void xbounce(void *dest, void *src, size_t n) -{ - SPOIL(0); -} - -/** Stub that calls memcpy */ -static void xmemcpy(void *dest, void *src, size_t n) -{ - SPOIL(memcpy(dest, src, n)); -} - -/** Stub that calls memset */ -static void xmemset(void *dest, void *src, size_t n) -{ - SPOIL(memset(dest, 0, n)); -} - -/** Stub that calls memcmp */ -static void xmemcmp(void *dest, void *src, size_t n) -{ - SPOIL(memcmp(dest, src, n)); -} - -/** Stub that calls strcpy */ -static void xstrcpy(void *dest, void *src, size_t n) -{ - SPOIL(strcpy(dest, src)); -} - -/** Stub that calls strlen */ -static void xstrlen(void *dest, void *src, size_t n) -{ - SPOIL(strlen(dest)); -} - -/** Stub that calls strcmp */ -static void xstrcmp(void *dest, void *src, size_t n) -{ - SPOIL(strcmp(dest, src)); -} - -/** Stub that calls strchr */ -static void xstrchr(void *dest, void *src, size_t n) -{ - /* Put the character at the end of the string and before the null */ - ((char *)src)[n-1] = 32; - SPOIL(strchr(src, 32)); -} - -/** Stub that calls memchr */ -static void xmemchr(void *dest, void *src, size_t n) -{ - /* Put the character at the end of the block */ - ((char *)src)[n-1] = 32; - SPOIL(memchr(src, 32, n)); -} - -/** All functions that can be tested */ -static const struct test tests[] = - { - { "bounce", xbounce }, - { "memchr", xmemchr }, - { "memcpy", xmemcpy }, - { "memset", xmemset }, - { "memcmp", xmemcmp }, - { "strchr", xstrchr }, - { "strcmp", xstrcmp }, - { "strcpy", xstrcpy }, - { "strlen", xstrlen }, - { NULL } - }; - -/** Show basic usage */ -static void usage(const char* name) -{ - printf("%s %s: run a string related benchmark.\n" - "usage: %s [-c block-size] [-l loop-count] [-a alignment|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n" - , name, VERSION, name); - - printf("Tests:"); - - for (const struct test *ptest = tests; ptest->name != NULL; ptest++) - { - printf(" %s", ptest->name); - } - - printf("\n"); - - exit(-1); -} - -/** Find the test by name */ -static const struct test *find_test(const char *name) -{ - if (name == NULL) - { - return tests + 0; - } - else - { - for (const struct test *p = tests; p->name != NULL; p++) - { - if (strcmp(p->name, name) == 0) - { - return p; - } - } - } - - return NULL; -} - -#define MIN_BUFFER_SIZE 1024*1024 -#define MAX_ALIGNMENT 256 - -/** Take a pointer and ensure that the lower bits == alignment */ -static char *realign(char *p, int alignment) -{ - uintptr_t pp = (uintptr_t)p; - pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1); - pp += alignment; - - return (char *)pp; -} - -static int parse_int_arg(const char *arg, const char *exe_name) -{ - long int ret; - - errno = 0; - ret = strtol(arg, NULL, 0); - - if (errno) - { - usage(exe_name); - } - - return (int)ret; -} - -static void parse_alignment_arg(const char *arg, const char *exe_name, - int *src_alignment, int *dst_alignment) -{ - long int ret; - char *endptr; - - errno = 0; - ret = strtol(arg, &endptr, 0); - - if (errno) - { - usage(exe_name); - } - - *src_alignment = (int)ret; - - if (ret > 256 || ret < 1) - { - printf("Alignment should be in the range [1, 256].\n"); - usage(exe_name); - } - - if (ret == 256) - ret = 0; - - if (endptr && *endptr == ':') - { - errno = 0; - ret = strtol(endptr + 1, NULL, 0); - - if (errno) - { - usage(exe_name); - } - - if (ret > 256 || ret < 1) - { - printf("Alignment should be in the range [1, 256].\n"); - usage(exe_name); - } - - if (ret == 256) - ret = 0; - } - - *dst_alignment = (int)ret; -} - -/** Setup and run a test */ -int main(int argc, char **argv) -{ - /* Size of src and dest buffers */ - size_t buffer_size = MIN_BUFFER_SIZE; - - /* Number of bytes per call */ - int count = 31; - /* Number of times to run */ - int loops = 10000000; - /* True to flush the cache each time */ - int flush = 0; - /* Name of the test */ - const char *name = NULL; - /* Alignment of buffers */ - int src_alignment = 8; - int dst_alignment = 8; - /* Name of the run */ - const char *run_id = "0"; - - int opt; - - while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0) - { - switch (opt) - { - case 'c': - count = parse_int_arg(optarg, argv[0]); - break; - case 'l': - loops = parse_int_arg(optarg, argv[0]); - break; - case 'a': - parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment); - break; - case 'f': - flush = 1; - break; - case 't': - name = strdup(optarg); - break; - case 'r': - run_id = strdup(optarg); - break; - case 'h': - usage(argv[0]); - break; - default: - usage(argv[0]); - break; - } - } - - /* Find the test by name */ - const struct test *ptest = find_test(name); - - if (ptest == NULL) - { - usage(argv[0]); - } - - if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE) - { - buffer_size = count + MAX_ALIGNMENT * 2; - } - - /* Buffers to read and write from */ - char *src = malloc(buffer_size); - char *dest = malloc(buffer_size); - - assert(src != NULL && dest != NULL); - - src = realign(src, src_alignment); - dest = realign(dest, dst_alignment); - - /* Fill the buffer with non-zero, reproducable random data */ - srandom(1539); - - for (int i = 0; i < buffer_size; i++) - { - src[i] = (char)random() | 1; - dest[i] = src[i]; - } - - /* Make sure the buffers are null terminated for any string tests */ - src[count] = 0; - dest[count] = 0; - - struct timespec start, end; - int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); - assert(err == 0); - - /* Preload */ - stub_t stub = ptest->stub; - - /* Run two variants to reduce the cost of testing for the flush */ - if (flush == 0) - { - for (int i = 0; i < loops; i++) - { - (*stub)(dest, src, count); - } - } - else - { - for (int i = 0; i < loops; i++) - { - (*stub)(dest, src, count); - empty(dest); - } - } - - err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); - assert(err == 0); - - /* Drop any leading path and pull the variant name out of the executable */ - char *variant = strrchr(argv[0], '/'); - - if (variant == NULL) - { - variant = argv[0]; - } - - variant = strstr(variant, "try-"); - assert(variant != NULL); - - double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9; - /* Estimate the bounce time. Measured on a Panda. */ - double bounced = 0.448730 * loops / 50000000; - - /* Dump both machine and human readable versions */ - printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes. ~%.3f MB/s corrected.\n", - variant + 4, ptest->name, - count, loops, src_alignment, dst_alignment, run_id, - elapsed, - elapsed, loops, ptest->name, count, - (double)loops*count/(elapsed - bounced)/(1024*1024)); - - return 0; -} diff --git a/configure.ac b/configure.ac deleted file mode 100644 index d6b3c9c..0000000 --- a/configure.ac +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) 2011-2012, Linaro Limited -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Linaro nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -AC_INIT(cortex-strings, 1.1-2012.06~dev) -AM_INIT_AUTOMAKE(foreign subdir-objects color-tests dist-bzip2) -AC_CONFIG_HEADERS([config.h]) -AC_CONFIG_FILES(Makefile) -AC_CANONICAL_HOST -AM_PROG_AS -AC_PROG_CC -AC_PROG_LIBTOOL - -default_submachine= - -case $host in -aarch64*-*-*) - arch=aarch64 - ;; -arm*-*-*) - arch=aarch32 - default_submachine=cortex-a9 - ;; -x86_64-*-*-*) - arch=generic - ;; -*) - AC_MSG_ERROR([unknown architecture $host]) - ;; -esac - -AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32]) -AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64]) -AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric]) - -AC_ARG_WITH([cpu], - AS_HELP_STRING([--with-cpu=CPU], - [select code for CPU variant @<:@default=cortex-a9@:>@]]), - [dnl - case "$withval" in - yes|'') AC_MSG_ERROR([--with-cpu requires an argument]) ;; - no) ;; - *) submachine="$withval" ;; - esac -], -[submachine=$default_submachine]) - -AC_SUBST(submachine) -AM_CONDITIONAL([WITH_SUBMACHINE], [test x$submachine != x]) - -AC_ARG_WITH([neon], - AC_HELP_STRING([--with-neon], - [include NEON specific routines @<:@default=yes@:>@]), - [with_neon=$withval], - [with_neon=yes]) -AC_SUBST(with_neon) -AM_CONDITIONAL(WITH_NEON, test x$with_neon = xyes) - -AC_ARG_WITH([vfp], - AC_HELP_STRING([--with-vfp], - [include VFP specific routines @<:@default=yes@:>@]), - [with_vfp=$withval], - [with_vfp=yes]) -AC_SUBST(with_vfp) -AM_CONDITIONAL(WITH_VFP, test x$with_vfp = xyes) - -AC_ARG_WITH([sve], - AC_HELP_STRING([--with-sve], - [include SVE specific routines @<:@default=no@:>@]), - [with_sve=$withval], - [with_sve=no]) -AC_SUBST(with_sve) -AM_CONDITIONAL(WITH_SVE, test x$with_sve = xyes) - -AC_OUTPUT diff --git a/reference/bionic-a15/memcmp.S b/reference/bionic-a15/memcmp.S deleted file mode 100644 index 8876a98..0000000 --- a/reference/bionic-a15/memcmp.S +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef HAVE_32_BYTE_CACHE_LINE -#define CACHE_LINE_SIZE 32 -#else -#define CACHE_LINE_SIZE 64 -#endif - -/* - * Optimized memcmp() for Cortex-A9. - */ - .text - .globl memcmp - .type memcmp,%function -memcmp: - .fnstart - pld [r0, #(CACHE_LINE_SIZE * 0)] - pld [r0, #(CACHE_LINE_SIZE * 1)] - - /* take of the case where length is 0 or the buffers are the same */ - cmp r0, r1 - moveq r0, #0 - bxeq lr - - pld [r1, #(CACHE_LINE_SIZE * 0)] - pld [r1, #(CACHE_LINE_SIZE * 1)] - - /* make sure we have at least 8+4 bytes, this simplify things below - * and avoid some overhead for small blocks - */ - cmp r2, #(8+4) - bmi 10f -/* - * Neon optimization - * Comparing 32 bytes at a time - */ -#if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS) - subs r2, r2, #32 - blo 3f - - /* preload all the cache lines we need. */ - pld [r0, #(CACHE_LINE_SIZE * 2)] - pld [r1, #(CACHE_LINE_SIZE * 2)] - -1: /* The main loop compares 32 bytes at a time */ - vld1.8 {d0 - d3}, [r0]! - pld [r0, #(CACHE_LINE_SIZE * 2)] - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(CACHE_LINE_SIZE * 2)] - - /* Start subtracting the values and merge results */ - vsub.i8 q0, q2 - vsub.i8 q1, q3 - vorr q2, q0, q1 - vorr d4, d5 - vmov r3, ip, d4 - /* Check if there are any differences among the 32 bytes */ - orrs r3, ip - bne 2f - subs r2, r2, #32 - bhs 1b - b 3f -2: - /* Check if the difference was in the first or last 16 bytes */ - sub r0, #32 - vorr d0, d1 - sub r1, #32 - vmov r3, ip, d0 - orrs r3, ip - /* if the first 16 bytes are equal, we only have to rewind 16 bytes */ - ittt eq - subeq r2, #16 - addeq r0, #16 - addeq r1, #16 - -3: /* fix-up the remaining count */ - add r2, r2, #32 - - cmp r2, #(8+4) - bmi 10f -#endif - - .save {r4, lr} - /* save registers */ - stmfd sp!, {r4, lr} - - /* since r0 hold the result, move the first source - * pointer somewhere else - */ - mov r4, r0 - - /* align first pointer to word boundary - * offset = -src & 3 - */ - rsb r3, r4, #0 - ands r3, r3, #3 - beq 0f - - /* align first pointer */ - sub r2, r2, r3 -1: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - subs r0, r0, ip - bne 9f - subs r3, r3, #1 - bne 1b - - -0: /* here the first pointer is aligned, and we have at least 4 bytes - * to process. - */ - - /* see if the pointers are congruent */ - eor r0, r4, r1 - ands r0, r0, #3 - bne 5f - - /* congruent case, 32 bytes per iteration - * We need to make sure there are at least 32+4 bytes left - * because we effectively read ahead one word, and we could - * read past the buffer (and segfault) if we're not careful. - */ - - ldr ip, [r1] - subs r2, r2, #(32 + 4) - bmi 1f - -0: pld [r4, #(CACHE_LINE_SIZE * 2)] - pld [r1, #(CACHE_LINE_SIZE * 2)] - ldr r0, [r4], #4 - ldr lr, [r1, #4]! - eors r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - bne 2f - subs r2, r2, #32 - bhs 0b - - /* do we have at least 4 bytes left? */ -1: adds r2, r2, #(32 - 4 + 4) - bmi 4f - - /* finish off 4 bytes at a time */ -3: ldr r0, [r4], #4 - ldr ip, [r1], #4 - eors r0, r0, ip - bne 2f - subs r2, r2, #4 - bhs 3b - - /* are we done? */ -4: adds r2, r2, #4 - moveq r0, #0 - beq 9f - - /* finish off the remaining bytes */ - b 8f - -2: /* the last 4 bytes are different, restart them */ - sub r4, r4, #4 - sub r1, r1, #4 - mov r2, #4 - - /* process the last few bytes */ -8: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - // stall - subs r0, r0, ip - bne 9f - subs r2, r2, #1 - bne 8b - -9: /* restore registers and return */ - ldmfd sp!, {r4, lr} - bx lr - -10: /* process less than 12 bytes */ - cmp r2, #0 - moveq r0, #0 - bxeq lr - mov r3, r0 -11: - ldrb r0, [r3], #1 - ldrb ip, [r1], #1 - subs r0, ip - bxne lr - subs r2, r2, #1 - bne 11b - bx lr - -5: /*************** non-congruent case ***************/ - and r0, r1, #3 - cmp r0, #2 - bne 4f - - /* here, offset is 2 (16-bits aligned, special cased) */ - - /* make sure we have at least 16 bytes to process */ - subs r2, r2, #16 - addmi r2, r2, #16 - bmi 8b - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr lr, [r1], #4 - -6: pld [r1, #(CACHE_LINE_SIZE * 2)] - pld [r4, #(CACHE_LINE_SIZE * 2)] - mov ip, lr, lsr #16 - ldr lr, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, lr, lsl #16 - eors r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #16 - bhs 6b - sub r1, r1, #2 - /* are we done? */ - adds r2, r2, #16 - moveq r0, #0 - beq 9b - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #(4+2) - sub r4, r4, #4 - mov r2, #4 - b 8b - - -4: /*************** offset is 1 or 3 (less optimized) ***************/ - - stmfd sp!, {r5, r6, r7} - - // r5 = rhs - // r6 = lhs - // r7 = scratch - - mov r5, r0, lsl #3 /* r5 = right shift */ - rsb r6, r5, #32 /* r6 = left shift */ - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr r7, [r1], #4 - sub r2, r2, #8 - -6: mov ip, r7, lsr r5 - ldr r7, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, r7, lsl r6 - eors r0, r0, ip - moveq ip, r7, lsr r5 - ldreq r7, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, r7, lsl r6 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #8 - bhs 6b - - sub r1, r1, r6, lsr #3 - ldmfd sp!, {r5, r6, r7} - - /* are we done? */ - adds r2, r2, #8 - moveq r0, #0 - beq 9b - - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #4 - sub r1, r1, r6, lsr #3 - sub r4, r4, #4 - mov r2, #4 - ldmfd sp!, {r5, r6, r7} - b 8b - .fnend - .size memcmp, .-memcmp diff --git a/reference/bionic-a15/memcpy.S b/reference/bionic-a15/memcpy.S deleted file mode 100644 index 921b1df..0000000 --- a/reference/bionic-a15/memcpy.S +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /* Prototype: void *memcpy (void *dst, const void *src, size_t count). */ - - // This version is tuned for the Cortex-A15 processor. - - .text - .syntax unified - .fpu neon - -#define CACHE_LINE_SIZE 64 - - .globl memcpy - .type memcpy,%function -memcpy: - .fnstart - // Assumes that n >= 0, and dst, src are valid pointers. - // For any sizes less than 832 use the neon code that doesn't - // care about the src alignment. This avoids any checks - // for src alignment, and offers the best improvement since - // smaller sized copies are dominated by the overhead of - // the pre and post main loop. - // For larger copies, if src and dst cannot both be aligned to - // word boundaries, use the neon code. - // For all other copies, align dst to a double word boundary - // and copy using LDRD/STRD instructions. - - // Save registers (r0 holds the return value): - // optimized push {r0, lr}. - .save {r0, lr} - pld [r1, #(CACHE_LINE_SIZE*16)] - push {r0, lr} - - cmp r2, #16 - blo copy_less_than_16_unknown_align - - cmp r2, #832 - bge check_alignment - -copy_unknown_alignment: - // Unknown alignment of src and dst. - // Assumes that the first few bytes have already been prefetched. - - // Align destination to 128 bits. The mainloop store instructions - // require this alignment or they will throw an exception. - rsb r3, r0, #0 - ands r3, r3, #0xF - beq 2f - - // Copy up to 15 bytes (count in r3). - sub r2, r2, r3 - movs ip, r3, lsl #31 - - itt mi - ldrbmi lr, [r1], #1 - strbmi lr, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1], #1 - strbcs ip, [r0], #1 - strbcs lr, [r0], #1 - - movs ip, r3, lsl #29 - bge 1f - // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after. - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! -1: bcc 2f - // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after. - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0, :64]! - -2: // Make sure we have at least 64 bytes to copy. - subs r2, r2, #64 - blo 2f - -1: // The main loop copies 64 bytes at a time. - vld1.8 {d0 - d3}, [r1]! - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(CACHE_LINE_SIZE*4)] - subs r2, r2, #64 - vst1.8 {d0 - d3}, [r0, :128]! - vst1.8 {d4 - d7}, [r0, :128]! - bhs 1b - -2: // Fix-up the remaining count and make sure we have >= 32 bytes left. - adds r2, r2, #32 - blo 3f - - // 32 bytes. These cache lines were already preloaded. - vld1.8 {d0 - d3}, [r1]! - sub r2, r2, #32 - vst1.8 {d0 - d3}, [r0, :128]! -3: // Less than 32 left. - add r2, r2, #32 - tst r2, #0x10 - beq copy_less_than_16_unknown_align - // Copies 16 bytes, destination 128 bits aligned. - vld1.8 {d0, d1}, [r1]! - vst1.8 {d0, d1}, [r0, :128]! - -copy_less_than_16_unknown_align: - // Copy up to 15 bytes (count in r2). - movs ip, r2, lsl #29 - bcc 1f - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0]! -1: bge 2f - vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! - vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! - -2: // Copy 0 to 4 bytes. - lsls r2, r2, #31 - itt ne - ldrbne lr, [r1], #1 - strbne lr, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1] - strbcs ip, [r0], #1 - strbcs lr, [r0] - - pop {r0, pc} - -check_alignment: - // If src and dst cannot both be aligned to a word boundary, - // use the unaligned copy version. - eor r3, r0, r1 - ands r3, r3, #0x3 - bne copy_unknown_alignment - - // To try and improve performance, stack layout changed, - // i.e., not keeping the stack looking like users expect - // (highest numbered register at highest address). - // TODO: Add debug frame directives. - // We don't need exception unwind directives, because the code below - // does not throw any exceptions and does not call any other functions. - // Generally, newlib functions like this lack debug information for - // assembler source. - .save {r4, r5} - strd r4, r5, [sp, #-8]! - .save {r6, r7} - strd r6, r7, [sp, #-8]! - .save {r8, r9} - strd r8, r9, [sp, #-8]! - - // Optimized for already aligned dst code. - ands ip, r0, #3 - bne dst_not_word_aligned - -word_aligned: - // Align the destination buffer to 8 bytes, to make sure double - // loads and stores don't cross a cache line boundary, - // as they are then more expensive even if the data is in the cache - // (require two load/store issue cycles instead of one). - // If only one of the buffers is not 8 bytes aligned, - // then it's more important to align dst than src, - // because there is more penalty for stores - // than loads that cross a cacheline boundary. - // This check and realignment are only done if there is >= 832 - // bytes to copy. - - // Dst is word aligned, but check if it is already double word aligned. - ands r3, r0, #4 - beq 1f - ldr r3, [r1], #4 - str r3, [r0], #4 - sub r2, #4 - -1: // Can only get here if > 64 bytes to copy, so don't do check r2. - sub r2, #64 - -2: // Every loop iteration copies 64 bytes. - .irp offset, #0, #8, #16, #24, #32 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - - ldrd r4, r5, [r1, #40] - ldrd r6, r7, [r1, #48] - ldrd r8, r9, [r1, #56] - - // Keep the pld as far from the next load as possible. - // The amount to prefetch was determined experimentally using - // large sizes, and verifying the prefetch size does not affect - // the smaller copies too much. - // WARNING: If the ldrd and strd instructions get too far away - // from each other, performance suffers. Three loads - // in a row is the best tradeoff. - pld [r1, #(CACHE_LINE_SIZE*16)] - strd r4, r5, [r0, #40] - strd r6, r7, [r0, #48] - strd r8, r9, [r0, #56] - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 2b - - // Fix-up the remaining count and make sure we have >= 32 bytes left. - adds r2, r2, #32 - blo 4f - - // Copy 32 bytes. These cache lines were already preloaded. - .irp offset, #0, #8, #16, #24 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - add r1, r1, #32 - add r0, r0, #32 - sub r2, r2, #32 -4: // Less than 32 left. - add r2, r2, #32 - tst r2, #0x10 - beq 5f - // Copy 16 bytes. - .irp offset, #0, #8 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - add r1, r1, #16 - add r0, r0, #16 - -5: // Copy up to 15 bytes (count in r2). - movs ip, r2, lsl #29 - bcc 1f - // Copy 8 bytes. - ldrd r4, r5, [r1], #8 - strd r4, r5, [r0], #8 -1: bge 2f - // Copy 4 bytes. - ldr r4, [r1], #4 - str r4, [r0], #4 -2: // Copy 0 to 4 bytes. - lsls r2, r2, #31 - itt ne - ldrbne lr, [r1], #1 - strbne lr, [r0], #1 - itttt cs - ldrbcs ip, [r1], #1 - ldrbcs lr, [r1] - strbcs ip, [r0], #1 - strbcs lr, [r0] - - // Restore registers: optimized pop {r0, pc} - ldrd r8, r9, [sp], #8 - ldrd r6, r7, [sp], #8 - ldrd r4, r5, [sp], #8 - pop {r0, pc} - -dst_not_word_aligned: - // Align dst to word. - rsb ip, ip, #4 - cmp ip, #2 - - itt gt - ldrbgt lr, [r1], #1 - strbgt lr, [r0], #1 - - itt ge - ldrbge lr, [r1], #1 - strbge lr, [r0], #1 - - ldrb lr, [r1], #1 - strb lr, [r0], #1 - - sub r2, r2, ip - - // Src is guaranteed to be at least word aligned by this point. - b word_aligned - .fnend - .size memcpy, .-memcpy diff --git a/reference/bionic-a15/memset.S b/reference/bionic-a15/memset.S deleted file mode 100644 index 6fd7c8e..0000000 --- a/reference/bionic-a15/memset.S +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - /* - * Optimized memset() for ARM. - * - * memset() returns its first argument. - */ - - .fpu neon - .syntax unified - - .globl memset - .type memset,%function -memset: - .fnstart - .save {r0} - stmfd sp!, {r0} - - // The new algorithm is slower for copies < 16 so use the old - // neon code in that case. - cmp r2, #16 - blo set_less_than_16_unknown_align - - // Use strd which requires an even and odd register so move the - // values so that: - // r0 and r1 contain the memset value - // r2 is the number of bytes to set - // r3 is the destination pointer - mov r3, r0 - - // Copy the byte value in every byte of r1. - mov r1, r1, lsl #24 - orr r1, r1, r1, lsr #8 - orr r1, r1, r1, lsr #16 - -check_alignment: - // Align destination to a double word to avoid the strd crossing - // a cache line boundary. - ands ip, r3, #7 - bne do_double_word_align - -double_word_aligned: - mov r0, r1 - - subs r2, #64 - blo set_less_than_64 - -1: // Main loop sets 64 bytes at a time. - .irp offset, #0, #8, #16, #24, #32, #40, #48, #56 - strd r0, r1, [r3, \offset] - .endr - - add r3, #64 - subs r2, #64 - bge 1b - -set_less_than_64: - // Restore r2 to the count of bytes left to set. - add r2, #64 - lsls ip, r2, #27 - bcc set_less_than_32 - // Set 32 bytes. - .irp offset, #0, #8, #16, #24 - strd r0, r1, [r3, \offset] - .endr - add r3, #32 - -set_less_than_32: - bpl set_less_than_16 - // Set 16 bytes. - .irp offset, #0, #8 - strd r0, r1, [r3, \offset] - .endr - add r3, #16 - -set_less_than_16: - // Less than 16 bytes to set. - lsls ip, r2, #29 - bcc set_less_than_8 - - // Set 8 bytes. - strd r0, r1, [r3], #8 - -set_less_than_8: - bpl set_less_than_4 - // Set 4 bytes - str r1, [r3], #4 - -set_less_than_4: - lsls ip, r2, #31 - it ne - strbne r1, [r3], #1 - itt cs - strbcs r1, [r3], #1 - strbcs r1, [r3] - - ldmfd sp!, {r0} - bx lr - -do_double_word_align: - rsb ip, ip, #8 - sub r2, r2, ip - movs r0, ip, lsl #31 - it mi - strbmi r1, [r3], #1 - itt cs - strbcs r1, [r3], #1 - strbcs r1, [r3], #1 - - // Dst is at least word aligned by this point. - cmp ip, #4 - blo double_word_aligned - str r1, [r3], #4 - b double_word_aligned - -set_less_than_16_unknown_align: - // Set up to 15 bytes. - vdup.8 d0, r1 - movs ip, r2, lsl #29 - bcc 1f - vst1.8 {d0}, [r0]! -1: bge 2f - vst1.32 {d0[0]}, [r0]! -2: movs ip, r2, lsl #31 - it mi - strbmi r1, [r0], #1 - itt cs - strbcs r1, [r0], #1 - strbcs r1, [r0], #1 - ldmfd sp!, {r0} - bx lr - .fnend - .size memset, .-memset diff --git a/reference/bionic-a15/strcmp.S b/reference/bionic-a15/strcmp.S deleted file mode 100644 index 9787e25..0000000 --- a/reference/bionic-a15/strcmp.S +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef __ARMEB__ -#define S2LOMEM lsl -#define S2LOMEMEQ lsleq -#define S2HIMEM lsr -#define MSB 0x000000ff -#define LSB 0xff000000 -#define BYTE0_OFFSET 24 -#define BYTE1_OFFSET 16 -#define BYTE2_OFFSET 8 -#define BYTE3_OFFSET 0 -#else /* not __ARMEB__ */ -#define S2LOMEM lsr -#define S2LOMEMEQ lsreq -#define S2HIMEM lsl -#define BYTE0_OFFSET 0 -#define BYTE1_OFFSET 8 -#define BYTE2_OFFSET 16 -#define BYTE3_OFFSET 24 -#define MSB 0xff000000 -#define LSB 0x000000ff -#endif /* not __ARMEB__ */ - -.syntax unified - -#if defined (__thumb__) - .thumb - .thumb_func -#endif - - .globl strcmp - .type strcmp,%function -strcmp: - .fnstart - /* Use LDRD whenever possible. */ - -/* The main thing to look out for when comparing large blocks is that - the loads do not cross a page boundary when loading past the index - of the byte with the first difference or the first string-terminator. - - For example, if the strings are identical and the string-terminator - is at index k, byte by byte comparison will not load beyond address - s1+k and s2+k; word by word comparison may load up to 3 bytes beyond - k; double word - up to 7 bytes. If the load of these bytes crosses - a page boundary, it might cause a memory fault (if the page is not mapped) - that would not have happened in byte by byte comparison. - - If an address is (double) word aligned, then a load of a (double) word - from that address will not cross a page boundary. - Therefore, the algorithm below considers word and double-word alignment - of strings separately. */ - -/* High-level description of the algorithm. - - * The fast path: if both strings are double-word aligned, - use LDRD to load two words from each string in every loop iteration. - * If the strings have the same offset from a word boundary, - use LDRB to load and compare byte by byte until - the first string is aligned to a word boundary (at most 3 bytes). - This is optimized for quick return on short unaligned strings. - * If the strings have the same offset from a double-word boundary, - use LDRD to load two words from each string in every loop iteration, as in the fast path. - * If the strings do not have the same offset from a double-word boundary, - load a word from the second string before the loop to initialize the queue. - Use LDRD to load two words from every string in every loop iteration. - Inside the loop, load the second word from the second string only after comparing - the first word, using the queued value, to guarantee safety across page boundaries. - * If the strings do not have the same offset from a word boundary, - use LDR and a shift queue. Order of loads and comparisons matters, - similarly to the previous case. - - * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. - * The only difference between ARM and Thumb modes is the use of CBZ instruction. - * The only difference between big and little endian is the use of REV in little endian - to compute the return value, instead of MOV. -*/ - - .macro m_cbz reg label -#ifdef __thumb2__ - cbz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - beq \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbz */ - - .macro m_cbnz reg label -#ifdef __thumb2__ - cbnz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - bne \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbnz */ - - .macro init - /* Macro to save temporary registers and prepare magic values. */ - subs sp, sp, #16 - strd r4, r5, [sp, #8] - strd r6, r7, [sp] - mvn r6, #0 /* all F */ - mov r7, #0 /* all 0 */ - .endm /* init */ - - .macro magic_compare_and_branch w1 w2 label - /* Macro to compare registers w1 and w2 and conditionally branch to label. */ - cmp \w1, \w2 /* Are w1 and w2 the same? */ - magic_find_zero_bytes \w1 - it eq - cmpeq ip, #0 /* Is there a zero byte in w1? */ - bne \label - .endm /* magic_compare_and_branch */ - - .macro magic_find_zero_bytes w1 - /* Macro to find all-zero bytes in w1, result is in ip. */ -#if (defined (__ARM_FEATURE_DSP)) - uadd8 ip, \w1, r6 - sel ip, r7, r6 -#else /* not defined (__ARM_FEATURE_DSP) */ - /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors. - Coincidently, these processors only have Thumb-2 mode, where we can use the - the (large) magic constant available directly as an immediate in instructions. - Note that we cannot use the magic constant in ARM mode, where we need - to create the constant in a register. */ - sub ip, \w1, #0x01010101 - bic ip, ip, \w1 - and ip, ip, #0x80808080 -#endif /* not defined (__ARM_FEATURE_DSP) */ - .endm /* magic_find_zero_bytes */ - - .macro setup_return w1 w2 -#ifdef __ARMEB__ - mov r1, \w1 - mov r2, \w2 -#else /* not __ARMEB__ */ - rev r1, \w1 - rev r2, \w2 -#endif /* not __ARMEB__ */ - .endm /* setup_return */ - - pld [r0, #0] - pld [r1, #0] - - /* Are both strings double-word aligned? */ - orr ip, r0, r1 - tst ip, #7 - bne do_align - - /* Fast path. */ - init - -doubleword_aligned: - - /* Get here when the strings to compare are double-word aligned. */ - /* Compare two words in every iteration. */ - .p2align 2 -2: - pld [r0, #16] - pld [r1, #16] - - /* Load the next double-word from each string. */ - ldrd r2, r3, [r0], #8 - ldrd r4, r5, [r1], #8 - - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 - b 2b - -do_align: - /* Is the first string word-aligned? */ - ands ip, r0, #3 - beq word_aligned_r0 - - /* Fast compare byte by byte until the first string is word-aligned. */ - /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes - to read until the next word boundary is 4-ip. */ - bic r0, r0, #3 - ldr r2, [r0], #4 - lsls ip, ip, #31 - beq byte2 - bcs byte3 - -byte1: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE1_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte2: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE2_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte3: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE3_OFFSET - subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 - -fast_return: - mov r0, ip - bx lr - -word_aligned_r0: - init - /* The first string is word-aligned. */ - /* Is the second string word-aligned? */ - ands ip, r1, #3 - bne strcmp_unaligned - -word_aligned: - /* The strings are word-aligned. */ - /* Is the first string double-word aligned? */ - tst r0, #4 - beq doubleword_aligned_r0 - - /* If r0 is not double-word aligned yet, align it by loading - and comparing the next word from each string. */ - ldr r2, [r0], #4 - ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 - -doubleword_aligned_r0: - /* Get here when r0 is double-word aligned. */ - /* Is r1 doubleword_aligned? */ - tst r1, #4 - beq doubleword_aligned - - /* Get here when the strings to compare are word-aligned, - r0 is double-word aligned, but r1 is not double-word aligned. */ - - /* Initialize the queue. */ - ldr r5, [r1], #4 - - /* Compare two words in every iteration. */ - .p2align 2 -3: - pld [r0, #16] - pld [r1, #16] - - /* Load the next double-word from each string and compare. */ - ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 - ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 - b 3b - - .macro miscmp_word offsetlo offsethi - /* Macro to compare misaligned strings. */ - /* r0, r1 are word-aligned, and at least one of the strings - is not double-word aligned. */ - /* Compare one word in every loop iteration. */ - /* OFFSETLO is the original bit-offset of r1 from a word-boundary, - OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ - - /* Initialize the shift queue. */ - ldr r5, [r1], #4 - - /* Compare one word from each string in every loop iteration. */ - .p2align 2 -7: - ldr r3, [r0], #4 - S2LOMEM r5, r5, #\offsetlo - magic_find_zero_bytes w1=r3 - cmp r7, ip, S2HIMEM #\offsetlo - and r2, r3, r6, S2LOMEM #\offsetlo - it eq - cmpeq r2, r5 - bne return_25 - ldr r5, [r1], #4 - cmp ip, #0 - eor r3, r2, r3 - S2HIMEM r2, r5, #\offsethi - it eq - cmpeq r3, r2 - bne return_32 - b 7b - .endm /* miscmp_word */ - -strcmp_unaligned: - /* r0 is word-aligned, r1 is at offset ip from a word. */ - /* Align r1 to the (previous) word-boundary. */ - bic r1, r1, #3 - - /* Unaligned comparison word by word using LDRs. */ - cmp ip, #2 - beq miscmp_word_16 /* If ip == 2. */ - bge miscmp_word_24 /* If ip == 3. */ - miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ -miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 -miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 - - -return_32: - setup_return w1=r3, w2=r2 - b do_return -return_34: - setup_return w1=r3, w2=r4 - b do_return -return_25: - setup_return w1=r2, w2=r5 - b do_return -return_35: - setup_return w1=r3, w2=r5 - b do_return -return_24: - setup_return w1=r2, w2=r4 - -do_return: - -#ifdef __ARMEB__ - mov r0, ip -#else /* not __ARMEB__ */ - rev r0, ip -#endif /* not __ARMEB__ */ - - /* Restore temporaries early, before computing the return value. */ - ldrd r6, r7, [sp] - ldrd r4, r5, [sp, #8] - adds sp, sp, #16 - - /* There is a zero or a different byte between r1 and r2. */ - /* r0 contains a mask of all-zero bytes in r1. */ - /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value - clz r0, r0 - /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ - rsb r0, r0, #24 - /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ - lsr r1, r1, r0 - lsr r2, r2, r0 - -compute_return_value: - movs r0, #1 - cmp r1, r2 - /* The return value is computed as follows. - If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. - If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, - which means r0:=r0-r0-1 and r0 is #-1 at return. - If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, - which means r0:=r0-r0 and r0 is #0 at return. - (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ - it ls - sbcls r0, r0, r0 - bx lr - .fnend - .size strcmp, .-strcmp diff --git a/reference/bionic-a15/strcpy.S b/reference/bionic-a15/strcpy.S deleted file mode 100644 index 9925378..0000000 --- a/reference/bionic-a15/strcpy.S +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * Copyright (c) 2008 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Android adaptation and tweak by Jim Huang <jserv@0xlab.org>. - */ - - .globl strcpy - .type strcpy, %function - .text - -strcpy: - .fnstart - PLD [r1, #0] - eor r2, r0, r1 - mov ip, r0 - tst r2, #3 - bne 4f - tst r1, #3 - bne 3f -5: - str r5, [sp, #-4]! - mov r5, #0x01 - orr r5, r5, r5, lsl #8 - orr r5, r5, r5, lsl #16 - - str r4, [sp, #-4]! - tst r1, #4 - ldr r3, [r1], #4 - beq 2f - sub r2, r3, r5 - bics r2, r2, r3 - tst r2, r5, lsl #7 - itt eq - streq r3, [ip], #4 - ldreq r3, [r1], #4 - bne 1f - /* Inner loop. We now know that r1 is 64-bit aligned, so we - can safely fetch up to two words. This allows us to avoid - load stalls. */ - .p2align 2 -2: - PLD [r1, #8] - ldr r4, [r1], #4 - sub r2, r3, r5 - bics r2, r2, r3 - tst r2, r5, lsl #7 - sub r2, r4, r5 - bne 1f - str r3, [ip], #4 - bics r2, r2, r4 - tst r2, r5, lsl #7 - itt eq - ldreq r3, [r1], #4 - streq r4, [ip], #4 - beq 2b - mov r3, r4 -1: -#ifdef __ARMEB__ - rors r3, r3, #24 -#endif - strb r3, [ip], #1 - tst r3, #0xff -#ifdef __ARMEL__ - ror r3, r3, #8 -#endif - bne 1b - ldr r4, [sp], #4 - ldr r5, [sp], #4 - bx lr - - /* Strings have the same offset from word alignment, but it's - not zero. */ -3: - tst r1, #1 - beq 1f - ldrb r2, [r1], #1 - strb r2, [ip], #1 - cmp r2, #0 - it eq - bxeq lr -1: - tst r1, #2 - beq 5b - ldrh r2, [r1], #2 -#ifdef __ARMEB__ - tst r2, #0xff00 - iteet ne - strneh r2, [ip], #2 - lsreq r2, r2, #8 - streqb r2, [ip] - tstne r2, #0xff -#else - tst r2, #0xff - itet ne - strneh r2, [ip], #2 - streqb r2, [ip] - tstne r2, #0xff00 -#endif - bne 5b - bx lr - - /* src and dst do not have a common word-alignement. Fall back to - byte copying. */ -4: - ldrb r2, [r1], #1 - strb r2, [ip], #1 - cmp r2, #0 - bne 4b - bx lr - - .fnend diff --git a/reference/bionic-a15/strlen.c b/reference/bionic-a15/strlen.c deleted file mode 100644 index 8781d76..0000000 --- a/reference/bionic-a15/strlen.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <string.h> -#include <stdint.h> -#undef strlen - -#define __ARM_HAVE_PLD 1 - -size_t strlen(const char *s) -{ - __builtin_prefetch(s); - __builtin_prefetch(s+32); - - union { - const char *b; - const uint32_t *w; - uintptr_t i; - } u; - - // these are some scratch variables for the asm code below - uint32_t v, t; - - // initialize the string length to zero - size_t l = 0; - - // align the pointer to a 32-bit word boundary - u.b = s; - while (u.i & 0x3) { - if (__builtin_expect(*u.b++ == 0, 0)) { - goto done; - } - l++; - } - - // loop for each word, testing if it contains a zero byte - // if so, exit the loop and update the length. - // We need to process 32 bytes per loop to schedule PLD properly - // and achieve the maximum bus speed. - asm( - "ldr %[v], [%[s]], #4 \n" - "sub %[l], %[l], %[s] \n" - "0: \n" -#if __ARM_HAVE_PLD - "pld [%[s], #64] \n" -#endif - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" -#if !defined(__OPTIMIZE_SIZE__) - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]] , #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" -#endif - "beq 0b \n" - "1: \n" - "add %[l], %[l], %[s] \n" - "tst %[v], #0xFF \n" - "beq 2f \n" - "add %[l], %[l], #1 \n" - "tst %[v], #0xFF00 \n" - "beq 2f \n" - "add %[l], %[l], #1 \n" - "tst %[v], #0xFF0000 \n" - "addne %[l], %[l], #1 \n" - "2: \n" - : [l]"=&r"(l), [v]"=&r"(v), [t]"=&r"(t), [s]"=&r"(u.b) - : "%[l]"(l), "%[s]"(u.b), [mask]"r"(0x80808080UL) - : "cc" - ); - -done: - return l; -} diff --git a/reference/bionic-a9/memcmp.S b/reference/bionic-a9/memcmp.S deleted file mode 100644 index 8876a98..0000000 --- a/reference/bionic-a9/memcmp.S +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef HAVE_32_BYTE_CACHE_LINE -#define CACHE_LINE_SIZE 32 -#else -#define CACHE_LINE_SIZE 64 -#endif - -/* - * Optimized memcmp() for Cortex-A9. - */ - .text - .globl memcmp - .type memcmp,%function -memcmp: - .fnstart - pld [r0, #(CACHE_LINE_SIZE * 0)] - pld [r0, #(CACHE_LINE_SIZE * 1)] - - /* take of the case where length is 0 or the buffers are the same */ - cmp r0, r1 - moveq r0, #0 - bxeq lr - - pld [r1, #(CACHE_LINE_SIZE * 0)] - pld [r1, #(CACHE_LINE_SIZE * 1)] - - /* make sure we have at least 8+4 bytes, this simplify things below - * and avoid some overhead for small blocks - */ - cmp r2, #(8+4) - bmi 10f -/* - * Neon optimization - * Comparing 32 bytes at a time - */ -#if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS) - subs r2, r2, #32 - blo 3f - - /* preload all the cache lines we need. */ - pld [r0, #(CACHE_LINE_SIZE * 2)] - pld [r1, #(CACHE_LINE_SIZE * 2)] - -1: /* The main loop compares 32 bytes at a time */ - vld1.8 {d0 - d3}, [r0]! - pld [r0, #(CACHE_LINE_SIZE * 2)] - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(CACHE_LINE_SIZE * 2)] - - /* Start subtracting the values and merge results */ - vsub.i8 q0, q2 - vsub.i8 q1, q3 - vorr q2, q0, q1 - vorr d4, d5 - vmov r3, ip, d4 - /* Check if there are any differences among the 32 bytes */ - orrs r3, ip - bne 2f - subs r2, r2, #32 - bhs 1b - b 3f -2: - /* Check if the difference was in the first or last 16 bytes */ - sub r0, #32 - vorr d0, d1 - sub r1, #32 - vmov r3, ip, d0 - orrs r3, ip - /* if the first 16 bytes are equal, we only have to rewind 16 bytes */ - ittt eq - subeq r2, #16 - addeq r0, #16 - addeq r1, #16 - -3: /* fix-up the remaining count */ - add r2, r2, #32 - - cmp r2, #(8+4) - bmi 10f -#endif - - .save {r4, lr} - /* save registers */ - stmfd sp!, {r4, lr} - - /* since r0 hold the result, move the first source - * pointer somewhere else - */ - mov r4, r0 - - /* align first pointer to word boundary - * offset = -src & 3 - */ - rsb r3, r4, #0 - ands r3, r3, #3 - beq 0f - - /* align first pointer */ - sub r2, r2, r3 -1: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - subs r0, r0, ip - bne 9f - subs r3, r3, #1 - bne 1b - - -0: /* here the first pointer is aligned, and we have at least 4 bytes - * to process. - */ - - /* see if the pointers are congruent */ - eor r0, r4, r1 - ands r0, r0, #3 - bne 5f - - /* congruent case, 32 bytes per iteration - * We need to make sure there are at least 32+4 bytes left - * because we effectively read ahead one word, and we could - * read past the buffer (and segfault) if we're not careful. - */ - - ldr ip, [r1] - subs r2, r2, #(32 + 4) - bmi 1f - -0: pld [r4, #(CACHE_LINE_SIZE * 2)] - pld [r1, #(CACHE_LINE_SIZE * 2)] - ldr r0, [r4], #4 - ldr lr, [r1, #4]! - eors r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - ldreq r0, [r4], #4 - ldreq lr, [r1, #4]! - eoreqs r0, r0, ip - ldreq r0, [r4], #4 - ldreq ip, [r1, #4]! - eoreqs r0, r0, lr - bne 2f - subs r2, r2, #32 - bhs 0b - - /* do we have at least 4 bytes left? */ -1: adds r2, r2, #(32 - 4 + 4) - bmi 4f - - /* finish off 4 bytes at a time */ -3: ldr r0, [r4], #4 - ldr ip, [r1], #4 - eors r0, r0, ip - bne 2f - subs r2, r2, #4 - bhs 3b - - /* are we done? */ -4: adds r2, r2, #4 - moveq r0, #0 - beq 9f - - /* finish off the remaining bytes */ - b 8f - -2: /* the last 4 bytes are different, restart them */ - sub r4, r4, #4 - sub r1, r1, #4 - mov r2, #4 - - /* process the last few bytes */ -8: ldrb r0, [r4], #1 - ldrb ip, [r1], #1 - // stall - subs r0, r0, ip - bne 9f - subs r2, r2, #1 - bne 8b - -9: /* restore registers and return */ - ldmfd sp!, {r4, lr} - bx lr - -10: /* process less than 12 bytes */ - cmp r2, #0 - moveq r0, #0 - bxeq lr - mov r3, r0 -11: - ldrb r0, [r3], #1 - ldrb ip, [r1], #1 - subs r0, ip - bxne lr - subs r2, r2, #1 - bne 11b - bx lr - -5: /*************** non-congruent case ***************/ - and r0, r1, #3 - cmp r0, #2 - bne 4f - - /* here, offset is 2 (16-bits aligned, special cased) */ - - /* make sure we have at least 16 bytes to process */ - subs r2, r2, #16 - addmi r2, r2, #16 - bmi 8b - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr lr, [r1], #4 - -6: pld [r1, #(CACHE_LINE_SIZE * 2)] - pld [r4, #(CACHE_LINE_SIZE * 2)] - mov ip, lr, lsr #16 - ldr lr, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, lr, lsl #16 - eors r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - moveq ip, lr, lsr #16 - ldreq lr, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, lr, lsl #16 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #16 - bhs 6b - sub r1, r1, #2 - /* are we done? */ - adds r2, r2, #16 - moveq r0, #0 - beq 9b - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #(4+2) - sub r4, r4, #4 - mov r2, #4 - b 8b - - -4: /*************** offset is 1 or 3 (less optimized) ***************/ - - stmfd sp!, {r5, r6, r7} - - // r5 = rhs - // r6 = lhs - // r7 = scratch - - mov r5, r0, lsl #3 /* r5 = right shift */ - rsb r6, r5, #32 /* r6 = left shift */ - - /* align the unaligned pointer */ - bic r1, r1, #3 - ldr r7, [r1], #4 - sub r2, r2, #8 - -6: mov ip, r7, lsr r5 - ldr r7, [r1], #4 - ldr r0, [r4], #4 - orr ip, ip, r7, lsl r6 - eors r0, r0, ip - moveq ip, r7, lsr r5 - ldreq r7, [r1], #4 - ldreq r0, [r4], #4 - orreq ip, ip, r7, lsl r6 - eoreqs r0, r0, ip - bne 7f - subs r2, r2, #8 - bhs 6b - - sub r1, r1, r6, lsr #3 - ldmfd sp!, {r5, r6, r7} - - /* are we done? */ - adds r2, r2, #8 - moveq r0, #0 - beq 9b - - /* finish off the remaining bytes */ - b 8b - -7: /* fix up the 2 pointers and fallthrough... */ - sub r1, r1, #4 - sub r1, r1, r6, lsr #3 - sub r4, r4, #4 - mov r2, #4 - ldmfd sp!, {r5, r6, r7} - b 8b - .fnend - .size memcmp, .-memcmp diff --git a/reference/bionic-a9/memcpy.S b/reference/bionic-a9/memcpy.S deleted file mode 100644 index b9d3c9b..0000000 --- a/reference/bionic-a9/memcpy.S +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * This code assumes it is running on a processor that supports all arm v7 - * instructions, that supports neon instructions, and that has a 32 byte - * cache line. - */ - - .text - .fpu neon - -#define CACHE_LINE_SIZE 32 - - .globl memcpy - .type memcpy,%function -memcpy: - .fnstart - .save {r0, lr} - /* start preloading as early as possible */ - pld [r1, #(CACHE_LINE_SIZE * 0)] - stmfd sp!, {r0, lr} - pld [r1, #(CACHE_LINE_SIZE * 2)] - - // Check so divider is at least 16 bytes, needed for alignment code. - cmp r2, #16 - blo 5f - - - /* check if buffers are aligned. If so, run arm-only version */ - eor r3, r0, r1 - ands r3, r3, #0x3 - beq 11f - - /* Check the upper size limit for Neon unaligned memory access in memcpy */ - cmp r2, #224 - blo 3f - - /* align destination to 16 bytes for the write-buffer */ - rsb r3, r0, #0 - ands r3, r3, #0xF - beq 3f - - /* copy up to 15-bytes (count in r3) */ - sub r2, r2, r3 - movs ip, r3, lsl #31 - ldrmib lr, [r1], #1 - strmib lr, [r0], #1 - ldrcsb ip, [r1], #1 - ldrcsb lr, [r1], #1 - strcsb ip, [r0], #1 - strcsb lr, [r0], #1 - movs ip, r3, lsl #29 - bge 1f - // copies 4 bytes, destination 32-bits aligned - vld1.32 {d0[0]}, [r1]! - vst1.32 {d0[0]}, [r0, :32]! -1: bcc 2f - // copies 8 bytes, destination 64-bits aligned - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0, :64]! -2: - /* preload immediately the next cache line, which we may need */ - pld [r1, #(CACHE_LINE_SIZE * 0)] - pld [r1, #(CACHE_LINE_SIZE * 2)] -3: - /* make sure we have at least 64 bytes to copy */ - subs r2, r2, #64 - blo 2f - - /* preload all the cache lines we need */ - pld [r1, #(CACHE_LINE_SIZE * 4)] - pld [r1, #(CACHE_LINE_SIZE * 6)] - -1: /* The main loop copies 64 bytes at a time */ - vld1.8 {d0 - d3}, [r1]! - vld1.8 {d4 - d7}, [r1]! - pld [r1, #(CACHE_LINE_SIZE * 6)] - subs r2, r2, #64 - vst1.8 {d0 - d3}, [r0]! - vst1.8 {d4 - d7}, [r0]! - bhs 1b - -2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ - add r2, r2, #64 - subs r2, r2, #32 - blo 4f - -3: /* 32 bytes at a time. These cache lines were already preloaded */ - vld1.8 {d0 - d3}, [r1]! - subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! - bhs 3b - -4: /* less than 32 left */ - add r2, r2, #32 - tst r2, #0x10 - beq 5f - // copies 16 bytes, 128-bits aligned - vld1.8 {d0, d1}, [r1]! - vst1.8 {d0, d1}, [r0]! -5: /* copy up to 15-bytes (count in r2) */ - movs ip, r2, lsl #29 - bcc 1f - vld1.8 {d0}, [r1]! - vst1.8 {d0}, [r0]! -1: bge 2f - vld1.32 {d0[0]}, [r1]! - vst1.32 {d0[0]}, [r0]! -2: movs ip, r2, lsl #31 - ldrmib r3, [r1], #1 - ldrcsb ip, [r1], #1 - ldrcsb lr, [r1], #1 - strmib r3, [r0], #1 - strcsb ip, [r0], #1 - strcsb lr, [r0], #1 - - ldmfd sp!, {r0, lr} - bx lr -11: - /* Simple arm-only copy loop to handle aligned copy operations */ - stmfd sp!, {r4, r5, r6, r7, r8} - pld [r1, #(CACHE_LINE_SIZE * 4)] - - /* Check alignment */ - rsb r3, r1, #0 - ands r3, #3 - beq 2f - - /* align source to 32 bits. We need to insert 2 instructions between - * a ldr[b|h] and str[b|h] because byte and half-word instructions - * stall 2 cycles. - */ - movs r12, r3, lsl #31 - sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ - ldrmib r3, [r1], #1 - ldrcsb r4, [r1], #1 - ldrcsb r5, [r1], #1 - strmib r3, [r0], #1 - strcsb r4, [r0], #1 - strcsb r5, [r0], #1 - -2: - subs r2, r2, #64 - blt 4f - -3: /* Main copy loop, copying 64 bytes at a time */ - pld [r1, #(CACHE_LINE_SIZE * 8)] - ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr} - stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr} - ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr} - stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr} - subs r2, r2, #64 - bge 3b - -4: /* Check if there are > 32 bytes left */ - adds r2, r2, #64 - subs r2, r2, #32 - blt 5f - - /* Copy 32 bytes */ - ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr} - stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr} - subs r2, #32 - -5: /* Handle any remaining bytes */ - adds r2, #32 - beq 6f - - movs r12, r2, lsl #28 - ldmcsia r1!, {r3, r4, r5, r6} /* 16 bytes */ - ldmmiia r1!, {r7, r8} /* 8 bytes */ - stmcsia r0!, {r3, r4, r5, r6} - stmmiia r0!, {r7, r8} - movs r12, r2, lsl #30 - ldrcs r3, [r1], #4 /* 4 bytes */ - ldrmih r4, [r1], #2 /* 2 bytes */ - strcs r3, [r0], #4 - strmih r4, [r0], #2 - tst r2, #0x1 - ldrneb r3, [r1] /* last byte */ - strneb r3, [r0] -6: - ldmfd sp!, {r4, r5, r6, r7, r8} - ldmfd sp!, {r0, pc} - .fnend - .size memcpy, .-memcpy diff --git a/reference/bionic-a9/memset.S b/reference/bionic-a9/memset.S deleted file mode 100644 index 1040986..0000000 --- a/reference/bionic-a9/memset.S +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * This code assumes it is running on a processor that supports all arm v7 - * instructions and that supports neon instructions. - */ - - .text - .fpu neon - -/* memset() returns its first argument. */ - .globl memset - .type memset,%function -memset: - .fnstart - # The neon memset only wins for less than 132. - cmp r2, #132 - bhi 11f - - .save {r0} - stmfd sp!, {r0} - - vdup.8 q0, r1 - - /* make sure we have at least 32 bytes to write */ - subs r2, r2, #32 - blo 2f - vmov q1, q0 - -1: /* The main loop writes 32 bytes at a time */ - subs r2, r2, #32 - vst1.8 {d0 - d3}, [r0]! - bhs 1b - -2: /* less than 32 left */ - add r2, r2, #32 - tst r2, #0x10 - beq 3f - - // writes 16 bytes, 128-bits aligned - vst1.8 {d0, d1}, [r0]! -3: /* write up to 15-bytes (count in r2) */ - movs ip, r2, lsl #29 - bcc 1f - vst1.8 {d0}, [r0]! -1: bge 2f - vst1.32 {d0[0]}, [r0]! -2: movs ip, r2, lsl #31 - strmib r1, [r0], #1 - strcsb r1, [r0], #1 - strcsb r1, [r0], #1 - ldmfd sp!, {r0} - bx lr -11: - /* compute the offset to align the destination - * offset = (4-(src&3))&3 = -src & 3 - */ - - .save {r0, r4-r7, lr} - stmfd sp!, {r0, r4-r7, lr} - rsb r3, r0, #0 - ands r3, r3, #3 - cmp r3, r2 - movhi r3, r2 - - /* splat r1 */ - mov r1, r1, lsl #24 - orr r1, r1, r1, lsr #8 - orr r1, r1, r1, lsr #16 - - movs r12, r3, lsl #31 - strcsb r1, [r0], #1 /* can't use strh (alignment unknown) */ - strcsb r1, [r0], #1 - strmib r1, [r0], #1 - subs r2, r2, r3 - ldmlsfd sp!, {r0, r4-r7, lr} /* return */ - bxls lr - - /* align the destination to a cache-line */ - mov r12, r1 - mov lr, r1 - mov r4, r1 - mov r5, r1 - mov r6, r1 - mov r7, r1 - - rsb r3, r0, #0 - ands r3, r3, #0x1C - beq 3f - cmp r3, r2 - andhi r3, r2, #0x1C - sub r2, r2, r3 - - /* conditionally writes 0 to 7 words (length in r3) */ - movs r3, r3, lsl #28 - stmcsia r0!, {r1, lr} - stmcsia r0!, {r1, lr} - stmmiia r0!, {r1, lr} - movs r3, r3, lsl #2 - strcs r1, [r0], #4 - -3: - subs r2, r2, #32 - mov r3, r1 - bmi 2f -1: subs r2, r2, #32 - stmia r0!, {r1,r3,r4,r5,r6,r7,r12,lr} - bhs 1b -2: add r2, r2, #32 - - /* conditionally stores 0 to 31 bytes */ - movs r2, r2, lsl #28 - stmcsia r0!, {r1,r3,r12,lr} - stmmiia r0!, {r1, lr} - movs r2, r2, lsl #2 - strcs r1, [r0], #4 - strmih r1, [r0], #2 - movs r2, r2, lsl #2 - strcsb r1, [r0] - ldmfd sp!, {r0, r4-r7, lr} - bx lr - .fnend - .size memset, .-memset diff --git a/reference/bionic-a9/strcmp.S b/reference/bionic-a9/strcmp.S deleted file mode 100644 index 7e48079..0000000 --- a/reference/bionic-a9/strcmp.S +++ /dev/null @@ -1,545 +0,0 @@ -/* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef __ARMEB__ -#define S2LOMEM lsl -#define S2LOMEMEQ lsleq -#define S2HIMEM lsr -#define MSB 0x000000ff -#define LSB 0xff000000 -#define BYTE0_OFFSET 24 -#define BYTE1_OFFSET 16 -#define BYTE2_OFFSET 8 -#define BYTE3_OFFSET 0 -#else /* not __ARMEB__ */ -#define S2LOMEM lsr -#define S2LOMEMEQ lsreq -#define S2HIMEM lsl -#define BYTE0_OFFSET 0 -#define BYTE1_OFFSET 8 -#define BYTE2_OFFSET 16 -#define BYTE3_OFFSET 24 -#define MSB 0xff000000 -#define LSB 0x000000ff -#endif /* not __ARMEB__ */ - -.syntax unified - -#if defined (__thumb__) - .thumb - .thumb_func -#endif - - .globl strcmp - .type strcmp,%function -strcmp: - .fnstart - /* Use LDRD whenever possible. */ - -/* The main thing to look out for when comparing large blocks is that - the loads do not cross a page boundary when loading past the index - of the byte with the first difference or the first string-terminator. - - For example, if the strings are identical and the string-terminator - is at index k, byte by byte comparison will not load beyond address - s1+k and s2+k; word by word comparison may load up to 3 bytes beyond - k; double word - up to 7 bytes. If the load of these bytes crosses - a page boundary, it might cause a memory fault (if the page is not mapped) - that would not have happened in byte by byte comparison. - - If an address is (double) word aligned, then a load of a (double) word - from that address will not cross a page boundary. - Therefore, the algorithm below considers word and double-word alignment - of strings separately. */ - -/* High-level description of the algorithm. - - * The fast path: if both strings are double-word aligned, - use LDRD to load two words from each string in every loop iteration. - * If the strings have the same offset from a word boundary, - use LDRB to load and compare byte by byte until - the first string is aligned to a word boundary (at most 3 bytes). - This is optimized for quick return on short unaligned strings. - * If the strings have the same offset from a double-word boundary, - use LDRD to load two words from each string in every loop iteration, as in the fast path. - * If the strings do not have the same offset from a double-word boundary, - load a word from the second string before the loop to initialize the queue. - Use LDRD to load two words from every string in every loop iteration. - Inside the loop, load the second word from the second string only after comparing - the first word, using the queued value, to guarantee safety across page boundaries. - * If the strings do not have the same offset from a word boundary, - use LDR and a shift queue. Order of loads and comparisons matters, - similarly to the previous case. - - * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. - * The only difference between ARM and Thumb modes is the use of CBZ instruction. - * The only difference between big and little endian is the use of REV in little endian - to compute the return value, instead of MOV. -*/ - - .macro m_cbz reg label -#ifdef __thumb2__ - cbz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - beq \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbz */ - - .macro m_cbnz reg label -#ifdef __thumb2__ - cbnz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - bne \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbnz */ - - .macro init - /* Macro to save temporary registers and prepare magic values. */ - subs sp, sp, #16 - strd r4, r5, [sp, #8] - strd r6, r7, [sp] - mvn r6, #0 /* all F */ - mov r7, #0 /* all 0 */ - .endm /* init */ - - .macro magic_compare_and_branch w1 w2 label - /* Macro to compare registers w1 and w2 and conditionally branch to label. */ - cmp \w1, \w2 /* Are w1 and w2 the same? */ - magic_find_zero_bytes \w1 - it eq - cmpeq ip, #0 /* Is there a zero byte in w1? */ - bne \label - .endm /* magic_compare_and_branch */ - - .macro magic_find_zero_bytes w1 - /* Macro to find all-zero bytes in w1, result is in ip. */ -#if (defined (__ARM_FEATURE_DSP)) - uadd8 ip, \w1, r6 - sel ip, r7, r6 -#else /* not defined (__ARM_FEATURE_DSP) */ - /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors. - Coincidently, these processors only have Thumb-2 mode, where we can use the - the (large) magic constant available directly as an immediate in instructions. - Note that we cannot use the magic constant in ARM mode, where we need - to create the constant in a register. */ - sub ip, \w1, #0x01010101 - bic ip, ip, \w1 - and ip, ip, #0x80808080 -#endif /* not defined (__ARM_FEATURE_DSP) */ - .endm /* magic_find_zero_bytes */ - - .macro setup_return w1 w2 -#ifdef __ARMEB__ - mov r1, \w1 - mov r2, \w2 -#else /* not __ARMEB__ */ - rev r1, \w1 - rev r2, \w2 -#endif /* not __ARMEB__ */ - .endm /* setup_return */ - - pld [r0, #0] - pld [r1, #0] - - /* Are both strings double-word aligned? */ - orr ip, r0, r1 - tst ip, #7 - bne do_align - - /* Fast path. */ - init - -doubleword_aligned: - - /* Get here when the strings to compare are double-word aligned. */ - /* Compare two words in every iteration. */ - .p2align 2 -2: - pld [r0, #16] - pld [r1, #16] - - /* Load the next double-word from each string. */ - ldrd r2, r3, [r0], #8 - ldrd r4, r5, [r1], #8 - - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 - b 2b - -do_align: - /* Is the first string word-aligned? */ - ands ip, r0, #3 - beq word_aligned_r0 - - /* Fast compare byte by byte until the first string is word-aligned. */ - /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes - to read until the next word boundary is 4-ip. */ - bic r0, r0, #3 - ldr r2, [r0], #4 - lsls ip, ip, #31 - beq byte2 - bcs byte3 - -byte1: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE1_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte2: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE2_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte3: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE3_OFFSET - subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 - -fast_return: - mov r0, ip - bx lr - -word_aligned_r0: - init - /* The first string is word-aligned. */ - /* Is the second string word-aligned? */ - ands ip, r1, #3 - bne strcmp_unaligned - -word_aligned: - /* The strings are word-aligned. */ - /* Is the first string double-word aligned? */ - tst r0, #4 - beq doubleword_aligned_r0 - - /* If r0 is not double-word aligned yet, align it by loading - and comparing the next word from each string. */ - ldr r2, [r0], #4 - ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 - -doubleword_aligned_r0: - /* Get here when r0 is double-word aligned. */ - /* Is r1 doubleword_aligned? */ - tst r1, #4 - beq doubleword_aligned - - /* Get here when the strings to compare are word-aligned, - r0 is double-word aligned, but r1 is not double-word aligned. */ - - /* Initialize the queue. */ - ldr r5, [r1], #4 - - /* Compare two words in every iteration. */ - .p2align 2 -3: - pld [r0, #16] - pld [r1, #16] - - /* Load the next double-word from each string and compare. */ - ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 - ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 - b 3b - - .macro miscmp_word offsetlo offsethi - /* Macro to compare misaligned strings. */ - /* r0, r1 are word-aligned, and at least one of the strings - is not double-word aligned. */ - /* Compare one word in every loop iteration. */ - /* OFFSETLO is the original bit-offset of r1 from a word-boundary, - OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ - - /* Initialize the shift queue. */ - ldr r5, [r1], #4 - - /* Compare one word from each string in every loop iteration. */ - .p2align 2 -7: - ldr r3, [r0], #4 - S2LOMEM r5, r5, #\offsetlo - magic_find_zero_bytes w1=r3 - cmp r7, ip, S2HIMEM #\offsetlo - and r2, r3, r6, S2LOMEM #\offsetlo - it eq - cmpeq r2, r5 - bne return_25 - ldr r5, [r1], #4 - cmp ip, #0 - eor r3, r2, r3 - S2HIMEM r2, r5, #\offsethi - it eq - cmpeq r3, r2 - bne return_32 - b 7b - .endm /* miscmp_word */ - -return_32: - setup_return w1=r3, w2=r2 - b do_return -return_34: - setup_return w1=r3, w2=r4 - b do_return -return_25: - setup_return w1=r2, w2=r5 - b do_return -return_35: - setup_return w1=r3, w2=r5 - b do_return -return_24: - setup_return w1=r2, w2=r4 - -do_return: - -#ifdef __ARMEB__ - mov r0, ip -#else /* not __ARMEB__ */ - rev r0, ip -#endif /* not __ARMEB__ */ - - /* Restore temporaries early, before computing the return value. */ - ldrd r6, r7, [sp] - ldrd r4, r5, [sp, #8] - adds sp, sp, #16 - - /* There is a zero or a different byte between r1 and r2. */ - /* r0 contains a mask of all-zero bytes in r1. */ - /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value - clz r0, r0 - /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ - rsb r0, r0, #24 - /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ - lsr r1, r1, r0 - lsr r2, r2, r0 - -compute_return_value: - movs r0, #1 - cmp r1, r2 - /* The return value is computed as follows. - If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. - If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, - which means r0:=r0-r0-1 and r0 is #-1 at return. - If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, - which means r0:=r0-r0 and r0 is #0 at return. - (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ - it ls - sbcls r0, r0, r0 - bx lr - - /* The code from the previous version of strcmp.S handles all of the - * cases where the first string and seconds string cannot both be - * aligned to a word boundary faster than the new algorithm. See - * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited - * version of the code. - */ -strcmp_unaligned: - wp1 .req r0 - wp2 .req r1 - b1 .req r2 - w1 .req r4 - w2 .req r5 - t1 .req ip - @ r3 is scratch - -2: - mov b1, #1 - orr b1, b1, b1, lsl #8 - orr b1, b1, b1, lsl #16 - - and t1, wp2, #3 - bic wp2, wp2, #3 - ldr w1, [wp1], #4 - ldr w2, [wp2], #4 - cmp t1, #2 - beq 2f - bhi 3f - - /* Critical inner Loop: Block with 3 bytes initial overlap */ - .p2align 2 -1: - bic t1, w1, #MSB - cmp t1, w2, S2LOMEM #8 - sub r3, w1, b1 - bic r3, r3, w1 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #24 - bne 6f - ldr w1, [wp1], #4 - b 1b -4: - S2LOMEM w2, w2, #8 - b 8f - -5: -#ifdef __ARMEB__ - /* The syndrome value may contain false ones if the string ends - * with the bytes 0x01 0x00 - */ - tst w1, #0xff000000 - itt ne - tstne w1, #0x00ff0000 - tstne w1, #0x0000ff00 - beq 7f -#else - bics r3, r3, #0xff000000 - bne 7f -#endif - ldrb w2, [wp2] - S2LOMEM t1, w1, #24 -#ifdef __ARMEB__ - lsl w2, w2, #24 -#endif - b 8f - -6: - S2LOMEM t1, w1, #24 - and w2, w2, #LSB - b 8f - - /* Critical inner Loop: Block with 2 bytes initial overlap */ - .p2align 2 -2: - S2HIMEM t1, w1, #16 - sub r3, w1, b1 - S2LOMEM t1, t1, #16 - bic r3, r3, w1 - cmp t1, w2, S2LOMEM #16 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #16 - bne 6f - ldr w1, [wp1], #4 - b 2b - -5: -#ifdef __ARMEB__ - /* The syndrome value may contain false ones if the string ends - * with the bytes 0x01 0x00 - */ - tst w1, #0xff000000 - it ne - tstne w1, #0x00ff0000 - beq 7f -#else - lsls r3, r3, #16 - bne 7f -#endif - ldrh w2, [wp2] - S2LOMEM t1, w1, #16 -#ifdef __ARMEB__ - lsl w2, w2, #16 -#endif - b 8f - -6: - S2HIMEM w2, w2, #16 - S2LOMEM t1, w1, #16 -4: - S2LOMEM w2, w2, #16 - b 8f - - /* Critical inner Loop: Block with 1 byte initial overlap */ - .p2align 2 -3: - and t1, w1, #LSB - cmp t1, w2, S2LOMEM #24 - sub r3, w1, b1 - bic r3, r3, w1 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #8 - bne 6f - ldr w1, [wp1], #4 - b 3b -4: - S2LOMEM w2, w2, #24 - b 8f -5: - /* The syndrome value may contain false ones if the string ends - * with the bytes 0x01 0x00 - */ - tst w1, #LSB - beq 7f - ldr w2, [wp2], #4 -6: - S2LOMEM t1, w1, #8 - bic w2, w2, #MSB - b 8f -7: - mov r0, #0 - - /* Restore registers and stack. */ - ldrd r6, r7, [sp] - ldrd r4, r5, [sp, #8] - adds sp, sp, #16 - - bx lr - -8: - and r2, t1, #LSB - and r0, w2, #LSB - cmp r0, #1 - it cs - cmpcs r0, r2 - itt eq - S2LOMEMEQ t1, t1, #8 - S2LOMEMEQ w2, w2, #8 - beq 8b - sub r0, r2, r0 - - /* Restore registers and stack. */ - ldrd r6, r7, [sp] - ldrd r4, r5, [sp, #8] - adds sp, sp, #16 - - bx lr - .fnend - .size strcmp, .-strcmp diff --git a/reference/bionic-a9/strcpy.S b/reference/bionic-a9/strcpy.S deleted file mode 100644 index 9925378..0000000 --- a/reference/bionic-a9/strcpy.S +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * Copyright (c) 2008 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Android adaptation and tweak by Jim Huang <jserv@0xlab.org>. - */ - - .globl strcpy - .type strcpy, %function - .text - -strcpy: - .fnstart - PLD [r1, #0] - eor r2, r0, r1 - mov ip, r0 - tst r2, #3 - bne 4f - tst r1, #3 - bne 3f -5: - str r5, [sp, #-4]! - mov r5, #0x01 - orr r5, r5, r5, lsl #8 - orr r5, r5, r5, lsl #16 - - str r4, [sp, #-4]! - tst r1, #4 - ldr r3, [r1], #4 - beq 2f - sub r2, r3, r5 - bics r2, r2, r3 - tst r2, r5, lsl #7 - itt eq - streq r3, [ip], #4 - ldreq r3, [r1], #4 - bne 1f - /* Inner loop. We now know that r1 is 64-bit aligned, so we - can safely fetch up to two words. This allows us to avoid - load stalls. */ - .p2align 2 -2: - PLD [r1, #8] - ldr r4, [r1], #4 - sub r2, r3, r5 - bics r2, r2, r3 - tst r2, r5, lsl #7 - sub r2, r4, r5 - bne 1f - str r3, [ip], #4 - bics r2, r2, r4 - tst r2, r5, lsl #7 - itt eq - ldreq r3, [r1], #4 - streq r4, [ip], #4 - beq 2b - mov r3, r4 -1: -#ifdef __ARMEB__ - rors r3, r3, #24 -#endif - strb r3, [ip], #1 - tst r3, #0xff -#ifdef __ARMEL__ - ror r3, r3, #8 -#endif - bne 1b - ldr r4, [sp], #4 - ldr r5, [sp], #4 - bx lr - - /* Strings have the same offset from word alignment, but it's - not zero. */ -3: - tst r1, #1 - beq 1f - ldrb r2, [r1], #1 - strb r2, [ip], #1 - cmp r2, #0 - it eq - bxeq lr -1: - tst r1, #2 - beq 5b - ldrh r2, [r1], #2 -#ifdef __ARMEB__ - tst r2, #0xff00 - iteet ne - strneh r2, [ip], #2 - lsreq r2, r2, #8 - streqb r2, [ip] - tstne r2, #0xff -#else - tst r2, #0xff - itet ne - strneh r2, [ip], #2 - streqb r2, [ip] - tstne r2, #0xff00 -#endif - bne 5b - bx lr - - /* src and dst do not have a common word-alignement. Fall back to - byte copying. */ -4: - ldrb r2, [r1], #1 - strb r2, [ip], #1 - cmp r2, #0 - bne 4b - bx lr - - .fnend diff --git a/reference/bionic-a9/strlen.c b/reference/bionic-a9/strlen.c deleted file mode 100644 index 8781d76..0000000 --- a/reference/bionic-a9/strlen.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <string.h> -#include <stdint.h> -#undef strlen - -#define __ARM_HAVE_PLD 1 - -size_t strlen(const char *s) -{ - __builtin_prefetch(s); - __builtin_prefetch(s+32); - - union { - const char *b; - const uint32_t *w; - uintptr_t i; - } u; - - // these are some scratch variables for the asm code below - uint32_t v, t; - - // initialize the string length to zero - size_t l = 0; - - // align the pointer to a 32-bit word boundary - u.b = s; - while (u.i & 0x3) { - if (__builtin_expect(*u.b++ == 0, 0)) { - goto done; - } - l++; - } - - // loop for each word, testing if it contains a zero byte - // if so, exit the loop and update the length. - // We need to process 32 bytes per loop to schedule PLD properly - // and achieve the maximum bus speed. - asm( - "ldr %[v], [%[s]], #4 \n" - "sub %[l], %[l], %[s] \n" - "0: \n" -#if __ARM_HAVE_PLD - "pld [%[s], #64] \n" -#endif - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" -#if !defined(__OPTIMIZE_SIZE__) - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]] , #4 \n" - "bne 1f \n" - "sub %[t], %[v], %[mask], lsr #7\n" - "and %[t], %[t], %[mask] \n" - "bics %[t], %[t], %[v] \n" - "ldreq %[v], [%[s]], #4 \n" -#endif - "beq 0b \n" - "1: \n" - "add %[l], %[l], %[s] \n" - "tst %[v], #0xFF \n" - "beq 2f \n" - "add %[l], %[l], #1 \n" - "tst %[v], #0xFF00 \n" - "beq 2f \n" - "add %[l], %[l], #1 \n" - "tst %[v], #0xFF0000 \n" - "addne %[l], %[l], #1 \n" - "2: \n" - : [l]"=&r"(l), [v]"=&r"(v), [t]"=&r"(t), [s]"=&r"(u.b) - : "%[l]"(l), "%[s]"(u.b), [mask]"r"(0x80808080UL) - : "cc" - ); - -done: - return l; -} diff --git a/reference/bionic-c/bcopy.c b/reference/bionic-c/bcopy.c deleted file mode 100644 index 4308c64..0000000 --- a/reference/bionic-c/bcopy.c +++ /dev/null @@ -1,128 +0,0 @@ -/* $OpenBSD: bcopy.c,v 1.5 2005/08/08 08:05:37 espie Exp $ */ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Chris Torek. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <string.h> - -/* - * sizeof(word) MUST BE A POWER OF TWO - * SO THAT wmask BELOW IS ALL ONES - */ -typedef long word; /* "word" used for optimal copy speed */ - -#define wsize sizeof(word) -#define wmask (wsize - 1) - -/* - * Copy a block of memory, handling overlap. - * This is the routine that actually implements - * (the portable versions of) bcopy, memcpy, and memmove. - */ -#ifdef MEMCOPY -void * -memcpy(void *dst0, const void *src0, size_t length) -#else -#ifdef MEMMOVE -void * -memmove(void *dst0, const void *src0, size_t length) -#else -void -bcopy(const void *src0, void *dst0, size_t length) -#endif -#endif -{ - char *dst = dst0; - const char *src = src0; - size_t t; - - if (length == 0 || dst == src) /* nothing to do */ - goto done; - - /* - * Macros: loop-t-times; and loop-t-times, t>0 - */ -#define TLOOP(s) if (t) TLOOP1(s) -#define TLOOP1(s) do { s; } while (--t) - - if ((unsigned long)dst < (unsigned long)src) { - /* - * Copy forward. - */ - t = (long)src; /* only need low bits */ - if ((t | (long)dst) & wmask) { - /* - * Try to align operands. This cannot be done - * unless the low bits match. - */ - if ((t ^ (long)dst) & wmask || length < wsize) - t = length; - else - t = wsize - (t & wmask); - length -= t; - TLOOP1(*dst++ = *src++); - } - /* - * Copy whole words, then mop up any trailing bytes. - */ - t = length / wsize; - TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize); - t = length & wmask; - TLOOP(*dst++ = *src++); - } else { - /* - * Copy backwards. Otherwise essentially the same. - * Alignment works as before, except that it takes - * (t&wmask) bytes to align, not wsize-(t&wmask). - */ - src += length; - dst += length; - t = (long)src; - if ((t | (long)dst) & wmask) { - if ((t ^ (long)dst) & wmask || length <= wsize) - t = length; - else - t &= wmask; - length -= t; - TLOOP1(*--dst = *--src); - } - t = length / wsize; - TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src); - t = length & wmask; - TLOOP(*--dst = *--src); - } -done: -#if defined(MEMCOPY) || defined(MEMMOVE) - return (dst0); -#else - return; -#endif -} diff --git a/reference/bionic-c/memchr.c b/reference/bionic-c/memchr.c deleted file mode 100644 index b14167a..0000000 --- a/reference/bionic-c/memchr.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <stddef.h> -#include <string.h> - -void *memchr(const void *s, int c, size_t n) -{ - const unsigned char* p = s; - const unsigned char* end = p + n; - - for (;;) { - if (p >= end || p[0] == c) break; p++; - if (p >= end || p[0] == c) break; p++; - if (p >= end || p[0] == c) break; p++; - if (p >= end || p[0] == c) break; p++; - } - if (p >= end) - return NULL; - else - return (void*) p; -} diff --git a/reference/bionic-c/memcmp.c b/reference/bionic-c/memcmp.c deleted file mode 100644 index 8640954..0000000 --- a/reference/bionic-c/memcmp.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <string.h> - -int memcmp(const void *s1, const void *s2, size_t n) -{ - const unsigned char* p1 = s1; - const unsigned char* end1 = p1 + n; - const unsigned char* p2 = s2; - int d = 0; - - for (;;) { - if (d || p1 >= end1) break; - d = (int)*p1++ - (int)*p2++; - - if (d || p1 >= end1) break; - d = (int)*p1++ - (int)*p2++; - - if (d || p1 >= end1) break; - d = (int)*p1++ - (int)*p2++; - - if (d || p1 >= end1) break; - d = (int)*p1++ - (int)*p2++; - } - return d; -} diff --git a/reference/bionic-c/memcpy.c b/reference/bionic-c/memcpy.c deleted file mode 100644 index dea78b2..0000000 --- a/reference/bionic-c/memcpy.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#define MEMCOPY -#include "bcopy.c" diff --git a/reference/bionic-c/memset.c b/reference/bionic-c/memset.c deleted file mode 100644 index 41dafb2..0000000 --- a/reference/bionic-c/memset.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <string.h> -#include <stdint.h> - -void* memset(void* dst, int c, size_t n) -{ - char* q = dst; - char* end = q + n; - - for (;;) { - if (q >= end) break; *q++ = (char) c; - if (q >= end) break; *q++ = (char) c; - if (q >= end) break; *q++ = (char) c; - if (q >= end) break; *q++ = (char) c; - } - - return dst; -} diff --git a/reference/bionic-c/strchr.c b/reference/bionic-c/strchr.c deleted file mode 100644 index 3f9aec5..0000000 --- a/reference/bionic-c/strchr.c +++ /dev/null @@ -1,44 +0,0 @@ -/* $OpenBSD: index.c,v 1.5 2005/08/08 08:05:37 espie Exp $ */ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <string.h> -#undef strchr - -char * -strchr(const char *p, int ch) -{ - for (;; ++p) { - if (*p == (char) ch) - return((char *)p); - if (!*p) - return((char *)NULL); - } - /* NOTREACHED */ -} diff --git a/reference/bionic-c/strcmp.c b/reference/bionic-c/strcmp.c deleted file mode 100644 index c4e4783..0000000 --- a/reference/bionic-c/strcmp.c +++ /dev/null @@ -1,52 +0,0 @@ -/* $OpenBSD: strcmp.c,v 1.7 2005/08/08 08:05:37 espie Exp $ */ - -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Chris Torek. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if !defined(_KERNEL) && !defined(_STANDALONE) -#include <string.h> -#undef strcmp -#else -#include <lib/libkern/libkern.h> -#endif - -/* - * Compare strings. - */ -int -strcmp(const char *s1, const char *s2) -{ - while (*s1 == *s2++) - if (*s1++ == 0) - return (0); - return (*(unsigned char *)s1 - *(unsigned char *)--s2); -} diff --git a/reference/bionic-c/strcpy.c b/reference/bionic-c/strcpy.c deleted file mode 100644 index eb21d67..0000000 --- a/reference/bionic-c/strcpy.c +++ /dev/null @@ -1,41 +0,0 @@ -/* $OpenBSD: strcpy.c,v 1.8 2005/08/08 08:05:37 espie Exp $ */ - -/* - * Copyright (c) 1988 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <string.h> - -char * -strcpy(char *to, const char *from) -{ - char *save = to; - - for (; (*to = *from) != '\0'; ++from, ++to); - return(save); -} diff --git a/reference/bionic-c/strlen.c b/reference/bionic-c/strlen.c deleted file mode 100644 index 12d9ec4..0000000 --- a/reference/bionic-c/strlen.c +++ /dev/null @@ -1,47 +0,0 @@ -/* $OpenBSD: strlen.c,v 1.7 2005/08/08 08:05:37 espie Exp $ */ - -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if !defined(_KERNEL) && !defined(_STANDALONE) -#include <string.h> -#else -#include <lib/libkern/libkern.h> -#endif - -size_t -strlen(const char *str) -{ - const char *s; - - for (s = str; *s; ++s) - ; - return (s - str); -} - diff --git a/reference/csl/arm_asm.h b/reference/csl/arm_asm.h deleted file mode 100644 index 2e0b1dd..0000000 --- a/reference/csl/arm_asm.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2009 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ARM_ASM__H -#define ARM_ASM__H - -/* First define some macros that keep everything else sane. */ -#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) -#define _ISA_ARM_7 -#endif - -#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6__) || \ - defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6T2__) || \ - defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) || \ - defined (__ARM_ARCH_6Z__) -#define _ISA_ARM_6 -#endif - -#if defined (_ISA_ARM_6) || defined (__ARM_ARCH_5__) || \ - defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5TE__) || \ - defined (__ARM_ARCH_5TEJ__) -#define _ISA_ARM_5 -#endif - -#if defined (_ISA_ARM_5) || defined (__ARM_ARCH_4T__) -#define _ISA_ARM_4T -#endif - -#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7__) || \ - defined (__ARM_ARCH_7EM__) -#define _ISA_THUMB_2 -#endif - -#if defined (_ISA_THUMB_2) || defined (__ARM_ARCH_6M__) -#define _ISA_THUMB_1 -#endif - - -/* Now some macros for common instruction sequences. */ - -asm(".macro RETURN cond=\n\t" -#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1) - "bx\\cond lr\n\t" -#else - "mov\\cond pc, lr\n\t" -#endif - ".endm" - ); - -asm(".macro optpld base, offset=#0\n\t" -#if defined (_ISA_ARM_7) - "pld [\\base, \\offset]\n\t" -#endif - ".endm" - ); - -#endif /* ARM_ASM__H */ diff --git a/reference/csl/memcpy.c b/reference/csl/memcpy.c deleted file mode 100644 index 9c8270b..0000000 --- a/reference/csl/memcpy.c +++ /dev/null @@ -1,291 +0,0 @@ -/* Copyright (c) 2009 CodeSourcery, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of CodeSourcery nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY CODESOURCERY, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL CODESOURCERY BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" -#include <string.h> -#include <stdint.h> -#include <stddef.h> - -/* Standard operations for word-sized values. */ -#define WORD_REF(ADDRESS, OFFSET) \ - *((WORD_TYPE*)((char*)(ADDRESS) + (OFFSET))) -#define WORD_COPY(OUT, IN, OFFSET) \ - WORD_REF(OUT, OFFSET) = WORD_REF(IN, OFFSET) - -/* On processors with NEON, we use 128-bit vectors. Also, - we need to include arm_neon.h to use these. */ -#if defined(__ARM_NEON__) - #include <arm_neon.h> - - #define WORD_TYPE uint8x16_t - #define WORD_SIZE 16 - #define MAYBE_PREFETCH(IN) __builtin_prefetch((IN), 0, 0) - -/* On ARM processors with 64-bit ldrd instructions, we use those, - except on Cortex-M* where benchmarking has shown them to - be slower. */ -#elif defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) || defined(_ISA_ARM_6) - #define WORD_TYPE uint64_t - #define WORD_SIZE 8 - #define MAYBE_PREFETCH(IN) __builtin_prefetch((IN), 0, 0) - -/* On everything else, we use 32-bit loads and stores, and - do not use prefetching. */ -#else - #define WORD_TYPE uint32_t - #define WORD_SIZE 4 - #define MAYBE_PREFETCH(IN) -#endif - -/* On all ARM platforms, 'SHORTWORD' is a 32-bit value. */ -#define SHORTWORD_TYPE uint32_t -#define SHORTWORD_SIZE 4 -#define SHORTWORD_REF(ADDRESS, OFFSET) \ - *((SHORTWORD_TYPE*)((char*)(ADDRESS) + (OFFSET))) -#define SHORTWORD_COPY(OUT, IN, OFFSET) \ - SHORTWORD_REF(OUT, OFFSET) = SHORTWORD_REF(IN, OFFSET) - -/* Shifting directionality depends on endianness. */ -#ifdef __ARMEB__ -#define SHORTWORD_SHIFT(IN0, IN1, OFFSET) \ - ((IN0) << ((OFFSET)*8)) | ((IN1) >> (SHORTWORD_SIZE*8 - (OFFSET)*8)) -#else -#define SHORTWORD_SHIFT(IN0, IN1, OFFSET) \ - ((IN0) >> ((OFFSET)*8)) | ((IN1) << (SHORTWORD_SIZE*8 - (OFFSET)*8)) -#endif - -void *memcpy(void *OUT, const void *IN, size_t N) -{ - void* OUT0 = OUT; - -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - const char* OUT_end = (char*)OUT + N; - while ((char*)OUT < OUT_end) { - *((char*)OUT) = *((char*)IN); - OUT++; - IN++; - } - - return OUT0; -#else - /* Handle short strings and immediately return. */ - if (__builtin_expect(N < SHORTWORD_SIZE, 1)) { - size_t i = 0; - while (i < N) { - ((char*)OUT)[i] = ((char*)IN)[i]; - i++; - } - return OUT; - } - - const char* OUT_end = (char*)OUT + N; - - /* Align OUT to SHORTWORD_SIZE. */ - while ((uintptr_t)OUT % SHORTWORD_SIZE != 0) { - *(char*) (OUT++) = *(char*) (IN++); - } - - if ((uintptr_t) IN % SHORTWORD_SIZE == 0) { - -#if WORD_SIZE > SHORTWORD_SIZE - /* Align OUT to WORD_SIZE in steps of SHORTWORD_SIZE. */ - if (__builtin_expect(OUT_end - (char*)OUT >= WORD_SIZE, 0)) { - while ((uintptr_t)OUT % WORD_SIZE != 0) { - SHORTWORD_COPY(OUT, IN, 0); - OUT += SHORTWORD_SIZE; - IN += SHORTWORD_SIZE; - } - - if ((uintptr_t) IN % WORD_SIZE == 0) { -#endif /* WORD_SIZE > SHORTWORD_SIZE */ - -#if defined(__ARM_NEON__) - /* Testing on Cortex-A8 indicates that the following idiom - produces faster assembly code when doing vector copies, - but not when doing regular copies. */ - size_t i = 0; - N = OUT_end - (char*)OUT; - MAYBE_PREFETCH(IN + 64); - MAYBE_PREFETCH(IN + 128); - MAYBE_PREFETCH(IN + 192); - if (N >= 640) { - MAYBE_PREFETCH(IN + 256); - MAYBE_PREFETCH(IN + 320); - MAYBE_PREFETCH(IN + 384); - MAYBE_PREFETCH(IN + 448); - MAYBE_PREFETCH(IN + 512); - MAYBE_PREFETCH(IN + 576); - MAYBE_PREFETCH(IN + 640); - MAYBE_PREFETCH(IN + 704); - /* We phrase the loop condition in this way so that the - i + WORD_SIZE * 16 value can be reused to increment i. */ - while (i + WORD_SIZE * 16 <= N - 640) { - MAYBE_PREFETCH(IN + 768); - MAYBE_PREFETCH(IN + 832); - MAYBE_PREFETCH(IN + 896); - MAYBE_PREFETCH(IN + 960); - WORD_COPY(OUT, IN, i); - WORD_COPY(OUT, IN, i + WORD_SIZE * 1); - WORD_COPY(OUT, IN, i + WORD_SIZE * 2); - WORD_COPY(OUT, IN, i + WORD_SIZE * 3); - WORD_COPY(OUT, IN, i + WORD_SIZE * 4); - WORD_COPY(OUT, IN, i + WORD_SIZE * 5); - WORD_COPY(OUT, IN, i + WORD_SIZE * 6); - WORD_COPY(OUT, IN, i + WORD_SIZE * 7); - WORD_COPY(OUT, IN, i + WORD_SIZE * 8); - WORD_COPY(OUT, IN, i + WORD_SIZE * 9); - WORD_COPY(OUT, IN, i + WORD_SIZE * 10); - WORD_COPY(OUT, IN, i + WORD_SIZE * 11); - WORD_COPY(OUT, IN, i + WORD_SIZE * 12); - WORD_COPY(OUT, IN, i + WORD_SIZE * 13); - WORD_COPY(OUT, IN, i + WORD_SIZE * 14); - WORD_COPY(OUT, IN, i + WORD_SIZE * 15); - i += WORD_SIZE * 16; - } - } - while (i + WORD_SIZE * 16 <= N) { - WORD_COPY(OUT, IN, i); - WORD_COPY(OUT, IN, i + WORD_SIZE * 1); - WORD_COPY(OUT, IN, i + WORD_SIZE * 2); - WORD_COPY(OUT, IN, i + WORD_SIZE * 3); - WORD_COPY(OUT, IN, i + WORD_SIZE * 4); - WORD_COPY(OUT, IN, i + WORD_SIZE * 5); - WORD_COPY(OUT, IN, i + WORD_SIZE * 6); - WORD_COPY(OUT, IN, i + WORD_SIZE * 7); - WORD_COPY(OUT, IN, i + WORD_SIZE * 8); - WORD_COPY(OUT, IN, i + WORD_SIZE * 9); - WORD_COPY(OUT, IN, i + WORD_SIZE * 10); - WORD_COPY(OUT, IN, i + WORD_SIZE * 11); - WORD_COPY(OUT, IN, i + WORD_SIZE * 12); - WORD_COPY(OUT, IN, i + WORD_SIZE * 13); - WORD_COPY(OUT, IN, i + WORD_SIZE * 14); - WORD_COPY(OUT, IN, i + WORD_SIZE * 15); - i += WORD_SIZE * 16; - } - while (i + WORD_SIZE * 4 <= N) { - WORD_COPY(OUT, IN, i); - WORD_COPY(OUT, IN, i + WORD_SIZE * 1); - WORD_COPY(OUT, IN, i + WORD_SIZE * 2); - WORD_COPY(OUT, IN, i + WORD_SIZE * 3); - i += WORD_SIZE * 4; - } - while (i + WORD_SIZE <= N) { - WORD_COPY(OUT, IN, i); - i += WORD_SIZE; - } - OUT += i; - IN += i; -#else /* not defined(__ARM_NEON__) */ - /* Note: 16-times unrolling is about 20% faster than 4-times - unrolling on both ARM Cortex-A8 and Cortex-M3. */ - MAYBE_PREFETCH(IN + 64); - MAYBE_PREFETCH(IN + 128); - MAYBE_PREFETCH(IN + 192); - while (OUT_end - (char*)OUT >= WORD_SIZE * 16) { - MAYBE_PREFETCH(IN + 256); - MAYBE_PREFETCH(IN + 320); - WORD_COPY(OUT, IN, 0); - WORD_COPY(OUT, IN, WORD_SIZE * 1); - WORD_COPY(OUT, IN, WORD_SIZE * 2); - WORD_COPY(OUT, IN, WORD_SIZE * 3); - WORD_COPY(OUT, IN, WORD_SIZE * 4); - WORD_COPY(OUT, IN, WORD_SIZE * 5); - WORD_COPY(OUT, IN, WORD_SIZE * 6); - WORD_COPY(OUT, IN, WORD_SIZE * 7); - WORD_COPY(OUT, IN, WORD_SIZE * 8); - WORD_COPY(OUT, IN, WORD_SIZE * 9); - WORD_COPY(OUT, IN, WORD_SIZE * 10); - WORD_COPY(OUT, IN, WORD_SIZE * 11); - WORD_COPY(OUT, IN, WORD_SIZE * 12); - WORD_COPY(OUT, IN, WORD_SIZE * 13); - WORD_COPY(OUT, IN, WORD_SIZE * 14); - WORD_COPY(OUT, IN, WORD_SIZE * 15); - OUT += WORD_SIZE * 16; - IN += WORD_SIZE * 16; - } - while (WORD_SIZE * 4 <= OUT_end - (char*)OUT) { - WORD_COPY(OUT, IN, 0); - WORD_COPY(OUT, IN, WORD_SIZE * 1); - WORD_COPY(OUT, IN, WORD_SIZE * 2); - WORD_COPY(OUT, IN, WORD_SIZE * 3); - OUT += WORD_SIZE * 4; - IN += WORD_SIZE * 4; - } - while (WORD_SIZE <= OUT_end - (char*)OUT) { - WORD_COPY(OUT, IN, 0); - OUT += WORD_SIZE; - IN += WORD_SIZE; - } -#endif /* not defined(__ARM_NEON__) */ - -#if WORD_SIZE > SHORTWORD_SIZE - } else { /* if IN is not WORD_SIZE aligned */ - while (SHORTWORD_SIZE * 4 <= OUT_end - (char*)OUT) { - SHORTWORD_COPY(OUT, IN, 0); - SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 1); - SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 2); - SHORTWORD_COPY(OUT, IN, SHORTWORD_SIZE * 3); - OUT += SHORTWORD_SIZE * 4; - IN += SHORTWORD_SIZE * 4; - } - } /* end if IN is not WORD_SIZE aligned */ - } /* end if N >= WORD_SIZE */ - - while (SHORTWORD_SIZE <= OUT_end - (char*)OUT) { - SHORTWORD_COPY(OUT, IN, 0); - OUT += SHORTWORD_SIZE; - IN += SHORTWORD_SIZE; - } -#endif /* WORD_SIZE > SHORTWORD_SIZE */ - - } else { /* if IN is not SHORTWORD_SIZE aligned */ - ptrdiff_t misalign = (uintptr_t)IN % SHORTWORD_SIZE; - - SHORTWORD_TYPE temp1, temp2; - temp1 = SHORTWORD_REF(IN, -misalign); - - /* Benchmarking indicates that unrolling this loop doesn't - produce a measurable performance improvement on ARM. */ - while (SHORTWORD_SIZE <= OUT_end - (char*)OUT) { - IN += SHORTWORD_SIZE; - temp2 = SHORTWORD_REF(IN, -misalign); - SHORTWORD_REF(OUT, 0) = SHORTWORD_SHIFT(temp1, temp2, misalign); - temp1 = temp2; - OUT += SHORTWORD_SIZE; - } - - } /* end if IN is not SHORTWORD_SIZE aligned */ - - while ((char*)OUT < OUT_end) { - *((char*)OUT) = *((char*)IN); - OUT++; - IN++; - } - - return OUT0; -#endif -} diff --git a/reference/csl/memset.c b/reference/csl/memset.c deleted file mode 100644 index 3986fa9..0000000 --- a/reference/csl/memset.c +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (c) 2009 CodeSourcery, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of CodeSourcery nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY CODESOURCERY, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL CODESOURCERY BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" -#include <string.h> -#include <stdint.h> - -/* Standard operations for word-sized values. */ -#define WORD_REF(ADDRESS, OFFSET) \ - *((WORD_TYPE*)((char*)(ADDRESS) + (OFFSET))) - -/* On processors with NEON, we use 128-bit vectors. Also, - we need to include arm_neon.h to use these. */ -#if defined(__ARM_NEON__) - #include <arm_neon.h> - - #define WORD_TYPE uint8x16_t - #define WORD_SIZE 16 - - #define WORD_DUPLICATE(VALUE) \ - vdupq_n_u8(VALUE) - -/* On ARM processors with 64-bit ldrd instructions, we use those, - except on Cortex-M* where benchmarking has shown them to - be slower. */ -#elif defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) || defined(_ISA_ARM_6) - #define WORD_TYPE uint64_t - #define WORD_SIZE 8 - - /* ARM stores 64-bit values in two 32-bit registers and does not - have 64-bit multiply or bitwise-or instructions, so this union - operation results in optimal code. */ - static inline uint64_t splat8(value) { - union { uint32_t ints[2]; uint64_t result; } quad; - quad.ints[0] = (unsigned char)(value) * 0x01010101; - quad.ints[1] = quad.ints[0]; - return quad.result; - } - #define WORD_DUPLICATE(VALUE) \ - splat8(VALUE) - -/* On everything else, we use 32-bit loads and stores. */ -#else - #define WORD_TYPE uint32_t - #define WORD_SIZE 4 - #define WORD_DUPLICATE(VALUE) \ - (unsigned char)(VALUE) * 0x01010101 -#endif - -/* On all ARM platforms, 'SHORTWORD' is a 32-bit value. */ -#define SHORTWORD_TYPE uint32_t -#define SHORTWORD_SIZE 4 -#define SHORTWORD_REF(ADDRESS, OFFSET) \ - *((SHORTWORD_TYPE*)((char*)(ADDRESS) + (OFFSET))) -#define SHORTWORD_DUPLICATE(VALUE) \ - (uint32_t)(unsigned char)(VALUE) * 0x01010101 - -void *memset(void *DST, int C, size_t LENGTH) -{ - void* DST0 = DST; - unsigned char C_BYTE = C; - -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - const char* DST_end = (char*)DST + LENGTH; - while ((char*)DST < DST_end) { - *((char*)DST) = C_BYTE; - DST++; - } - - return DST0; -#else /* not PREFER_SIZE_OVER_SPEED */ - /* Handle short strings and immediately return. */ - if (__builtin_expect(LENGTH < SHORTWORD_SIZE, 1)) { - size_t i = 0; - while (i < LENGTH) { - ((char*)DST)[i] = C_BYTE; - i++; - } - return DST; - } - - const char* DST_end = (char*)DST + LENGTH; - - /* Align DST to SHORTWORD_SIZE. */ - while ((uintptr_t)DST % SHORTWORD_SIZE != 0) { - *(char*) (DST++) = C_BYTE; - } - -#if WORD_SIZE > SHORTWORD_SIZE - SHORTWORD_TYPE C_SHORTWORD = SHORTWORD_DUPLICATE(C_BYTE); - - /* Align DST to WORD_SIZE in steps of SHORTWORD_SIZE. */ - if (__builtin_expect(DST_end - (char*)DST >= WORD_SIZE, 0)) { - while ((uintptr_t)DST % WORD_SIZE != 0) { - SHORTWORD_REF(DST, 0) = C_SHORTWORD; - DST += SHORTWORD_SIZE; - } -#endif /* WORD_SIZE > SHORTWORD_SIZE */ - - WORD_TYPE C_WORD = WORD_DUPLICATE(C_BYTE); - -#if defined(__ARM_NEON__) - /* Testing on Cortex-A8 indicates that the following idiom - produces faster assembly code when doing vector copies, - but not when doing regular copies. */ - size_t i = 0; - LENGTH = DST_end - (char*)DST; - while (i + WORD_SIZE * 16 <= LENGTH) { - WORD_REF(DST, i) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 1) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 2) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 3) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 4) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 5) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 6) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 7) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 8) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 9) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 10) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 11) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 12) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 13) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 14) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 15) = C_WORD; - i += WORD_SIZE * 16; - } - while (i + WORD_SIZE * 4 <= LENGTH) { - WORD_REF(DST, i) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 1) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 2) = C_WORD; - WORD_REF(DST, i + WORD_SIZE * 3) = C_WORD; - i += WORD_SIZE * 4; - } - while (i + WORD_SIZE <= LENGTH) { - WORD_REF(DST, i) = C_WORD; - i += WORD_SIZE; - } - DST += i; -#else /* not defined(__ARM_NEON__) */ - /* Note: 16-times unrolling is about 50% faster than 4-times - unrolling on both ARM Cortex-A8 and Cortex-M3. */ - while (DST_end - (char*) DST >= WORD_SIZE * 16) { - WORD_REF(DST, 0) = C_WORD; - WORD_REF(DST, WORD_SIZE * 1) = C_WORD; - WORD_REF(DST, WORD_SIZE * 2) = C_WORD; - WORD_REF(DST, WORD_SIZE * 3) = C_WORD; - WORD_REF(DST, WORD_SIZE * 4) = C_WORD; - WORD_REF(DST, WORD_SIZE * 5) = C_WORD; - WORD_REF(DST, WORD_SIZE * 6) = C_WORD; - WORD_REF(DST, WORD_SIZE * 7) = C_WORD; - WORD_REF(DST, WORD_SIZE * 8) = C_WORD; - WORD_REF(DST, WORD_SIZE * 9) = C_WORD; - WORD_REF(DST, WORD_SIZE * 10) = C_WORD; - WORD_REF(DST, WORD_SIZE * 11) = C_WORD; - WORD_REF(DST, WORD_SIZE * 12) = C_WORD; - WORD_REF(DST, WORD_SIZE * 13) = C_WORD; - WORD_REF(DST, WORD_SIZE * 14) = C_WORD; - WORD_REF(DST, WORD_SIZE * 15) = C_WORD; - DST += WORD_SIZE * 16; - } - while (WORD_SIZE * 4 <= DST_end - (char*) DST) { - WORD_REF(DST, 0) = C_WORD; - WORD_REF(DST, WORD_SIZE * 1) = C_WORD; - WORD_REF(DST, WORD_SIZE * 2) = C_WORD; - WORD_REF(DST, WORD_SIZE * 3) = C_WORD; - DST += WORD_SIZE * 4; - } - while (WORD_SIZE <= DST_end - (char*) DST) { - WORD_REF(DST, 0) = C_WORD; - DST += WORD_SIZE; - } -#endif /* not defined(__ARM_NEON__) */ - -#if WORD_SIZE > SHORTWORD_SIZE - } /* end if N >= WORD_SIZE */ - - while (SHORTWORD_SIZE <= DST_end - (char*)DST) { - SHORTWORD_REF(DST, 0) = C_SHORTWORD; - DST += SHORTWORD_SIZE; - } -#endif /* WORD_SIZE > SHORTWORD_SIZE */ - - while ((char*)DST < DST_end) { - *((char*)DST) = C_BYTE; - DST++; - } - - return DST0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} diff --git a/reference/glibc-c/memchr.c b/reference/glibc-c/memchr.c deleted file mode 100644 index bc606b2..0000000 --- a/reference/glibc-c/memchr.c +++ /dev/null @@ -1,204 +0,0 @@ -/* Copyright (C) 1991,93,96,97,99,2000,2003,2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#undef __ptr_t -#define __ptr_t void * - -#if defined _LIBC -# include <string.h> -# include <memcopy.h> -#endif - -#if HAVE_STDLIB_H || defined _LIBC -# include <stdlib.h> -#endif - -#if HAVE_LIMITS_H || defined _LIBC -# include <limits.h> -#endif - -#define LONG_MAX_32_BITS 2147483647 - -#ifndef LONG_MAX -#define LONG_MAX LONG_MAX_32_BITS -#endif - -#include <sys/types.h> -#if HAVE_BP_SYM_H || defined _LIBC -#include <bp-sym.h> -#else -# define BP_SYM(sym) sym -#endif - -#undef memchr -#undef __memchr - -/* Search no more than N bytes of S for C. */ -__ptr_t -memchr (s, c_in, n) - const __ptr_t s; - int c_in; - size_t n; -{ - const unsigned char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, magic_bits, charmask; - unsigned char c; - - c = (unsigned char) c_in; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - n > 0 && ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - --n, ++char_ptr) - if (*char_ptr == c) - return (__ptr_t) char_ptr; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - - if (sizeof (longword) != 4 && sizeof (longword) != 8) - abort (); - -#if LONG_MAX <= LONG_MAX_32_BITS - magic_bits = 0x7efefeff; -#else - magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff; -#endif - - /* Set up a longword, each of whose bytes is C. */ - charmask = c | (c << 8); - charmask |= charmask << 16; -#if LONG_MAX > LONG_MAX_32_BITS - charmask |= charmask << 32; -#endif - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - while (n >= sizeof (longword)) - { - /* We tentatively exit the loop if adding MAGIC_BITS to - LONGWORD fails to change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-30 is set, there will be a carry - into bit 31, so all of the hole bits will be changed. - - The one misfire occurs when bits 24-30 are clear and bit - 31 is set; in this case, the hole at bit 31 is not - changed. If we had access to the processor carry flag, - we could close this loophole by putting the fourth hole - at bit 32! - - So it ignores everything except 128's, when they're aligned - properly. - - 3) But wait! Aren't we looking for C, not zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - longword = *longword_ptr++ ^ charmask; - - /* Add MAGIC_BITS to LONGWORD. */ - if ((((longword + magic_bits) - - /* Set those bits that were unchanged by the addition. */ - ^ ~longword) - - /* Look at only the hole bits. If any of the hole bits - are unchanged, most likely one of the bytes was a - zero. */ - & ~magic_bits) != 0) - { - /* Which of the bytes was C? If none of them were, it was - a misfire; continue the search. */ - - const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); - - if (cp[0] == c) - return (__ptr_t) cp; - if (cp[1] == c) - return (__ptr_t) &cp[1]; - if (cp[2] == c) - return (__ptr_t) &cp[2]; - if (cp[3] == c) - return (__ptr_t) &cp[3]; -#if LONG_MAX > 2147483647 - if (cp[4] == c) - return (__ptr_t) &cp[4]; - if (cp[5] == c) - return (__ptr_t) &cp[5]; - if (cp[6] == c) - return (__ptr_t) &cp[6]; - if (cp[7] == c) - return (__ptr_t) &cp[7]; -#endif - } - - n -= sizeof (longword); - } - - char_ptr = (const unsigned char *) longword_ptr; - - while (n-- > 0) - { - if (*char_ptr == c) - return (__ptr_t) char_ptr; - else - ++char_ptr; - } - - return 0; -} diff --git a/reference/glibc-c/memcmp.c b/reference/glibc-c/memcmp.c deleted file mode 100644 index 8ea364c..0000000 --- a/reference/glibc-c/memcmp.c +++ /dev/null @@ -1,369 +0,0 @@ -/* Copyright (C) 1991,1993,1995,1997,1998,2003,2004,2012 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#undef __ptr_t -#define __ptr_t void * - -#if defined HAVE_STRING_H || defined _LIBC -# include <string.h> -#endif - -#undef memcmp - -#ifdef _LIBC - -# include <memcopy.h> -# include <endian.h> - -# if __BYTE_ORDER == __BIG_ENDIAN -# define WORDS_BIGENDIAN -# endif - -#else /* Not in the GNU C library. */ - -# include <sys/types.h> - -/* Type to use for aligned memory operations. - This should normally be the biggest type supported by a single load - and store. Must be an unsigned type. */ -# define op_t unsigned long int -# define OPSIZ (sizeof(op_t)) - -/* Threshold value for when to enter the unrolled loops. */ -# define OP_T_THRES 16 - -/* Type to use for unaligned operations. */ -typedef unsigned char byte; - -# ifndef WORDS_BIGENDIAN -# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) -# else -# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) -# endif - -#endif /* In the GNU C library. */ - -#ifdef WORDS_BIGENDIAN -# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) -#else -# define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) -#endif - -/* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ - -/* The strategy of this memcmp is: - - 1. Compare bytes until one of the block pointers is aligned. - - 2. Compare using memcmp_common_alignment or - memcmp_not_common_alignment, regarding the alignment of the other - block after the initial byte operations. The maximum number of - full words (of type op_t) are compared in this way. - - 3. Compare the few remaining bytes. */ - -#ifndef WORDS_BIGENDIAN -/* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. - A and B are known to be different. - This is needed only on little-endian machines. */ - -static int memcmp_bytes (op_t, op_t) __THROW; - -# ifdef __GNUC__ -__inline -# endif -static int -memcmp_bytes (a, b) - op_t a, b; -{ - long int srcp1 = (long int) &a; - long int srcp2 = (long int) &b; - op_t a0, b0; - - do - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - } - while (a0 == b0); - return a0 - b0; -} -#endif - -static int memcmp_common_alignment (long, long, size_t) __THROW; - -/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' - objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for - memory operations on `op_t's. */ -static int -memcmp_common_alignment (srcp1, srcp2, len) - long int srcp1; - long int srcp2; - size_t len; -{ - op_t a0, a1; - op_t b0, b1; - - switch (len % 4) - { - default: /* Avoid warning about uninitialized local variables. */ - case 2: - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - srcp1 -= 2 * OPSIZ; - srcp2 -= 2 * OPSIZ; - len += 2; - goto do1; - case 3: - a1 = ((op_t *) srcp1)[0]; - b1 = ((op_t *) srcp2)[0]; - srcp1 -= OPSIZ; - srcp2 -= OPSIZ; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return 0; - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - goto do3; - case 1: - a1 = ((op_t *) srcp1)[0]; - b1 = ((op_t *) srcp2)[0]; - srcp1 += OPSIZ; - srcp2 += OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - /* Fall through. */ - } - - do - { - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - - do3: - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[1]; - if (a0 != b0) - return CMP_LT_OR_GT (a0, b0); - - do2: - a0 = ((op_t *) srcp1)[2]; - b0 = ((op_t *) srcp2)[2]; - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - - do1: - a1 = ((op_t *) srcp1)[3]; - b1 = ((op_t *) srcp2)[3]; - if (a0 != b0) - return CMP_LT_OR_GT (a0, b0); - - srcp1 += 4 * OPSIZ; - srcp2 += 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - if (a1 != b1) - return CMP_LT_OR_GT (a1, b1); - return 0; -} - -static int memcmp_not_common_alignment (long, long, size_t) __THROW; - -/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN - `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory - operations on `op_t', but SRCP1 *should be unaligned*. */ -static int -memcmp_not_common_alignment (srcp1, srcp2, len) - long int srcp1; - long int srcp2; - size_t len; -{ - op_t a0, a1, a2, a3; - op_t b0, b1, b2, b3; - op_t x; - int shl, shr; - - /* Calculate how to shift a word read at the memory operation - aligned srcp1 to make it aligned for comparison. */ - - shl = 8 * (srcp1 % OPSIZ); - shr = 8 * OPSIZ - shl; - - /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' - it points in the middle of. */ - srcp1 &= -OPSIZ; - - switch (len % 4) - { - default: /* Avoid warning about uninitialized local variables. */ - case 2: - a1 = ((op_t *) srcp1)[0]; - a2 = ((op_t *) srcp1)[1]; - b2 = ((op_t *) srcp2)[0]; - srcp1 -= 1 * OPSIZ; - srcp2 -= 2 * OPSIZ; - len += 2; - goto do1; - case 3: - a0 = ((op_t *) srcp1)[0]; - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[0]; - srcp2 -= 1 * OPSIZ; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return 0; - a3 = ((op_t *) srcp1)[0]; - a0 = ((op_t *) srcp1)[1]; - b0 = ((op_t *) srcp2)[0]; - srcp1 += 1 * OPSIZ; - goto do3; - case 1: - a2 = ((op_t *) srcp1)[0]; - a3 = ((op_t *) srcp1)[1]; - b3 = ((op_t *) srcp2)[0]; - srcp1 += 2 * OPSIZ; - srcp2 += 1 * OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - /* Fall through. */ - } - - do - { - a0 = ((op_t *) srcp1)[0]; - b0 = ((op_t *) srcp2)[0]; - x = MERGE(a2, shl, a3, shr); - if (x != b3) - return CMP_LT_OR_GT (x, b3); - - do3: - a1 = ((op_t *) srcp1)[1]; - b1 = ((op_t *) srcp2)[1]; - x = MERGE(a3, shl, a0, shr); - if (x != b0) - return CMP_LT_OR_GT (x, b0); - - do2: - a2 = ((op_t *) srcp1)[2]; - b2 = ((op_t *) srcp2)[2]; - x = MERGE(a0, shl, a1, shr); - if (x != b1) - return CMP_LT_OR_GT (x, b1); - - do1: - a3 = ((op_t *) srcp1)[3]; - b3 = ((op_t *) srcp2)[3]; - x = MERGE(a1, shl, a2, shr); - if (x != b2) - return CMP_LT_OR_GT (x, b2); - - srcp1 += 4 * OPSIZ; - srcp2 += 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - x = MERGE(a2, shl, a3, shr); - if (x != b3) - return CMP_LT_OR_GT (x, b3); - return 0; -} - -int -memcmp (s1, s2, len) - const __ptr_t s1; - const __ptr_t s2; - size_t len; -{ - op_t a0; - op_t b0; - long int srcp1 = (long int) s1; - long int srcp2 = (long int) s2; - op_t res; - - if (len >= OP_T_THRES) - { - /* There are at least some bytes to compare. No need to test - for LEN == 0 in this alignment loop. */ - while (srcp2 % OPSIZ != 0) - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - res = a0 - b0; - if (res != 0) - return res; - len -= 1; - } - - /* SRCP2 is now aligned for memory operations on `op_t'. - SRCP1 alignment determines if we can do a simple, - aligned compare or need to shuffle bits. */ - - if (srcp1 % OPSIZ == 0) - res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); - else - res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); - if (res != 0) - return res; - - /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ - srcp1 += len & -OPSIZ; - srcp2 += len & -OPSIZ; - len %= OPSIZ; - } - - /* There are just a few bytes to compare. Use byte memory operations. */ - while (len != 0) - { - a0 = ((byte *) srcp1)[0]; - b0 = ((byte *) srcp2)[0]; - srcp1 += 1; - srcp2 += 1; - res = a0 - b0; - if (res != 0) - return res; - len -= 1; - } - - return 0; -} diff --git a/reference/glibc-c/memcopy.h b/reference/glibc-c/memcopy.h deleted file mode 100644 index a8e5a44..0000000 --- a/reference/glibc-c/memcopy.h +++ /dev/null @@ -1,146 +0,0 @@ -/* memcopy.h -- definitions for memory copy functions. Generic C version. - Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* The strategy of the memory functions is: - - 1. Copy bytes until the destination pointer is aligned. - - 2. Copy words in unrolled loops. If the source and destination - are not aligned in the same way, use word memory operations, - but shift and merge two read words before writing. - - 3. Copy the few remaining bytes. - - This is fast on processors that have at least 10 registers for - allocation by GCC, and that can access memory at reg+const in one - instruction. - - I made an "exhaustive" test of this memmove when I wrote it, - exhaustive in the sense that I tried all alignment and length - combinations, with and without overlap. */ - -#include <sys/cdefs.h> -#include <endian.h> - -/* The macros defined in this file are: - - BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) - - BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) - - WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy) - - WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy) - - MERGE(old_word, sh_1, new_word, sh_2) - [I fail to understand. I feel stupid. --roland] -*/ - -/* Type to use for aligned memory operations. - This should normally be the biggest type supported by a single load - and store. */ -#define op_t unsigned long int -#define OPSIZ (sizeof(op_t)) - -/* Type to use for unaligned operations. */ -typedef unsigned char byte; - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) -#endif -#if __BYTE_ORDER == __BIG_ENDIAN -#define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) -#endif - -/* Copy exactly NBYTES bytes from SRC_BP to DST_BP, - without any assumptions about alignment of the pointers. */ -#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ - do \ - { \ - size_t __nbytes = (nbytes); \ - while (__nbytes > 0) \ - { \ - byte __x = ((byte *) src_bp)[0]; \ - src_bp += 1; \ - __nbytes -= 1; \ - ((byte *) dst_bp)[0] = __x; \ - dst_bp += 1; \ - } \ - } while (0) - -/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, - beginning at the bytes right before the pointers and continuing towards - smaller addresses. Don't assume anything about alignment of the - pointers. */ -#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ - do \ - { \ - size_t __nbytes = (nbytes); \ - while (__nbytes > 0) \ - { \ - byte __x; \ - src_ep -= 1; \ - __x = ((byte *) src_ep)[0]; \ - dst_ep -= 1; \ - __nbytes -= 1; \ - ((byte *) dst_ep)[0] = __x; \ - } \ - } while (0) - -/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with - the assumption that DST_BP is aligned on an OPSIZ multiple. If - not all bytes could be easily copied, store remaining number of bytes - in NBYTES_LEFT, otherwise store 0. */ -extern void _wordcopy_fwd_aligned (long int, long int, size_t) __THROW; -extern void _wordcopy_fwd_dest_aligned (long int, long int, size_t) __THROW; -#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ - do \ - { \ - if (src_bp % OPSIZ == 0) \ - _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \ - else \ - _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \ - src_bp += (nbytes) & -OPSIZ; \ - dst_bp += (nbytes) & -OPSIZ; \ - (nbytes_left) = (nbytes) % OPSIZ; \ - } while (0) - -/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, - beginning at the words (of type op_t) right before the pointers and - continuing towards smaller addresses. May take advantage of that - DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be - easily copied, store remaining number of bytes in NBYTES_REMAINING, - otherwise store 0. */ -extern void _wordcopy_bwd_aligned (long int, long int, size_t) __THROW; -extern void _wordcopy_bwd_dest_aligned (long int, long int, size_t) __THROW; -#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ - do \ - { \ - if (src_ep % OPSIZ == 0) \ - _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \ - else \ - _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \ - src_ep -= (nbytes) & -OPSIZ; \ - dst_ep -= (nbytes) & -OPSIZ; \ - (nbytes_left) = (nbytes) % OPSIZ; \ - } while (0) - - -/* Threshold value for when to enter the unrolled loops. */ -#define OP_T_THRES 16 diff --git a/reference/glibc-c/memcpy.c b/reference/glibc-c/memcpy.c deleted file mode 100644 index 77e5216..0000000 --- a/reference/glibc-c/memcpy.c +++ /dev/null @@ -1,63 +0,0 @@ -/* Copy memory to memory until the specified number of bytes - has been copied. Overlap is NOT handled correctly. - Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include "memcopy.h" -#include "pagecopy.h" - -#undef memcpy - -void * -memcpy (dstpp, srcpp, len) - void *dstpp; - const void *srcpp; - size_t len; -{ - unsigned long int dstp = (long int) dstpp; - unsigned long int srcp = (long int) srcpp; - - /* Copy from the beginning to the end. */ - - /* If there not too few bytes to copy, use word copy. */ - if (len >= OP_T_THRES) - { - /* Copy just a few bytes to make DSTP aligned. */ - len -= (-dstp) % OPSIZ; - BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ); - - /* Copy whole pages from SRCP to DSTP by virtual address manipulation, - as much as possible. */ - - PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); - - /* Copy from SRCP to DSTP taking advantage of the known alignment of - DSTP. Number of bytes remaining is put in the third argument, - i.e. in LEN. This number may vary from machine to machine. */ - - WORD_COPY_FWD (dstp, srcp, len, len); - - /* Fall out and copy the tail. */ - } - - /* There are just a few bytes to copy. Use byte memory operations. */ - BYTE_COPY_FWD (dstp, srcp, len); - - return dstpp; -} diff --git a/reference/glibc-c/memset.c b/reference/glibc-c/memset.c deleted file mode 100644 index 207e7c4..0000000 --- a/reference/glibc-c/memset.c +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include "memcopy.h" - -#undef memset - -void * -memset (dstpp, c, len) - void *dstpp; - int c; - size_t len; -{ - long int dstp = (long int) dstpp; - - if (len >= 8) - { - size_t xlen; - op_t cccc; - - cccc = (unsigned char) c; - cccc |= cccc << 8; - cccc |= cccc << 16; - if (OPSIZ > 4) - /* Do the shift in two steps to avoid warning if long has 32 bits. */ - cccc |= (cccc << 16) << 16; - - /* There are at least some bytes to set. - No need to test for LEN == 0 in this alignment loop. */ - while (dstp % OPSIZ != 0) - { - ((byte *) dstp)[0] = c; - dstp += 1; - len -= 1; - } - - /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ - xlen = len / (OPSIZ * 8); - while (xlen > 0) - { - ((op_t *) dstp)[0] = cccc; - ((op_t *) dstp)[1] = cccc; - ((op_t *) dstp)[2] = cccc; - ((op_t *) dstp)[3] = cccc; - ((op_t *) dstp)[4] = cccc; - ((op_t *) dstp)[5] = cccc; - ((op_t *) dstp)[6] = cccc; - ((op_t *) dstp)[7] = cccc; - dstp += 8 * OPSIZ; - xlen -= 1; - } - len %= OPSIZ * 8; - - /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ - xlen = len / OPSIZ; - while (xlen > 0) - { - ((op_t *) dstp)[0] = cccc; - dstp += OPSIZ; - xlen -= 1; - } - len %= OPSIZ; - } - - /* Write the last few bytes. */ - while (len > 0) - { - ((byte *) dstp)[0] = c; - dstp += 1; - len -= 1; - } - - return dstpp; -} diff --git a/reference/glibc-c/pagecopy.h b/reference/glibc-c/pagecopy.h deleted file mode 100644 index 89f392c..0000000 --- a/reference/glibc-c/pagecopy.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Macros for copying by pages; used in memcpy, memmove. Generic macros. - Copyright (C) 1995, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* This file defines the macro: - - PAGE_COPY_FWD_MAYBE (dstp, srcp, nbytes_left, nbytes) - - which is invoked like WORD_COPY_FWD et al. The pointers should be at - least word aligned. This will check if virtual copying by pages can and - should be done and do it if so. - - System-specific pagecopy.h files should define these macros and then - #include this file: - - PAGE_COPY_THRESHOLD - -- Minimum size for which virtual copying by pages is worthwhile. - - PAGE_SIZE - -- Size of a page. - - PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes) - -- Macro to perform the virtual copy operation. - The pointers will be aligned to PAGE_SIZE bytes. -*/ - - -#if PAGE_COPY_THRESHOLD - -#include <assert.h> - -#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) \ - do \ - { \ - if ((nbytes) >= PAGE_COPY_THRESHOLD && \ - PAGE_OFFSET ((dstp) - (srcp)) == 0) \ - { \ - /* The amount to copy is past the threshold for copying \ - pages virtually with kernel VM operations, and the \ - source and destination addresses have the same alignment. */ \ - size_t nbytes_before = PAGE_OFFSET (-(dstp)); \ - if (nbytes_before != 0) \ - { \ - /* First copy the words before the first page boundary. */ \ - WORD_COPY_FWD (dstp, srcp, nbytes_left, nbytes_before); \ - assert (nbytes_left == 0); \ - nbytes -= nbytes_before; \ - } \ - PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes); \ - } \ - } while (0) - -/* The page size is always a power of two, so we can avoid modulo division. */ -#define PAGE_OFFSET(n) ((n) & (PAGE_SIZE - 1)) - -#else - -#define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) /* nada */ - -#endif diff --git a/reference/glibc-c/strchr.c b/reference/glibc-c/strchr.c deleted file mode 100644 index 3866d1b..0000000 --- a/reference/glibc-c/strchr.c +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright (C) 1991,1993-1997,1999,2000,2003,2006 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - bug fix and commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include "memcopy.h" -#include <stdlib.h> - -#undef strchr - -/* Find the first occurrence of C in S. */ -char * -strchr (s, c_in) - const char *s; - int c_in; -{ - const unsigned char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, magic_bits, charmask; - unsigned char c; - - c = (unsigned char) c_in; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = (const unsigned char *) s; - ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0; - ++char_ptr) - if (*char_ptr == c) - return (void *) char_ptr; - else if (*char_ptr == '\0') - return NULL; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - switch (sizeof (longword)) - { - case 4: magic_bits = 0x7efefeffL; break; - case 8: magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; break; - default: - abort (); - } - - /* Set up a longword, each of whose bytes is C. */ - charmask = c | (c << 8); - charmask |= charmask << 16; - if (sizeof (longword) > 4) - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - charmask |= (charmask << 16) << 16; - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) - { - /* We tentatively exit the loop if adding MAGIC_BITS to - LONGWORD fails to change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-30 is set, there will be a carry - into bit 31, so all of the hole bits will be changed. - - The one misfire occurs when bits 24-30 are clear and bit - 31 is set; in this case, the hole at bit 31 is not - changed. If we had access to the processor carry flag, - we could close this loophole by putting the fourth hole - at bit 32! - - So it ignores everything except 128's, when they're aligned - properly. - - 3) But wait! Aren't we looking for C as well as zero? - Good point. So what we do is XOR LONGWORD with a longword, - each of whose bytes is C. This turns each byte that is C - into a zero. */ - - longword = *longword_ptr++; - - /* Add MAGIC_BITS to LONGWORD. */ - if ((((longword + magic_bits) - - /* Set those bits that were unchanged by the addition. */ - ^ ~longword) - - /* Look at only the hole bits. If any of the hole bits - are unchanged, most likely one of the bytes was a - zero. */ - & ~magic_bits) != 0 || - - /* That caught zeroes. Now test for C. */ - ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask)) - & ~magic_bits) != 0) - { - /* Which of the bytes was C or zero? - If none of them were, it was a misfire; continue the search. */ - - const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); - - if (*cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (sizeof (longword) > 4) - { - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - if (*++cp == c) - return (char *) cp; - else if (*cp == '\0') - return NULL; - } - } - } - - return NULL; -} diff --git a/reference/glibc-c/strcmp.c b/reference/glibc-c/strcmp.c deleted file mode 100644 index 6ca7815..0000000 --- a/reference/glibc-c/strcmp.c +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (C) 1991, 1996, 1997, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include "memcopy.h" - -#undef strcmp - -/* Compare S1 and S2, returning less than, equal to or - greater than zero if S1 is lexicographically less than, - equal to or greater than S2. */ -int -strcmp (p1, p2) - const char *p1; - const char *p2; -{ - register const unsigned char *s1 = (const unsigned char *) p1; - register const unsigned char *s2 = (const unsigned char *) p2; - unsigned char c1, c2; - - do - { - c1 = (unsigned char) *s1++; - c2 = (unsigned char) *s2++; - if (c1 == '\0') - return c1 - c2; - } - while (c1 == c2); - - return c1 - c2; -} diff --git a/reference/glibc-c/strcpy.c b/reference/glibc-c/strcpy.c deleted file mode 100644 index 0b66109..0000000 --- a/reference/glibc-c/strcpy.c +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (C) 1991-2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stddef.h> -#include <string.h> - -#undef strcpy - -/* Copy SRC to DEST. */ -char * -strcpy (char *dest, const char *src) -{ - char c; - char *s = (char *) src; - const ptrdiff_t off = dest - s - 1; - - do - { - c = *s++; - s[off] = c; - } - while (c != '\0'); - - return dest; -} diff --git a/reference/glibc-c/strlen.c b/reference/glibc-c/strlen.c deleted file mode 100644 index d6db374..0000000 --- a/reference/glibc-c/strlen.c +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (C) 1991,1993,1997,2000,2003,2009 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se); - commentary by Jim Blandy (jimb@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include <stdlib.h> - -#undef strlen - -/* Return the length of the null-terminated string STR. Scan for - the null terminator quickly by testing four bytes at a time. */ -size_t -strlen (str) - const char *str; -{ - const char *char_ptr; - const unsigned long int *longword_ptr; - unsigned long int longword, himagic, lomagic; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = str; ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - ++char_ptr) - if (*char_ptr == '\0') - return char_ptr - str; - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - himagic = 0x80808080L; - lomagic = 0x01010101L; - if (sizeof (longword) > 4) - { - /* 64-bit version of the magic. */ - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - himagic = ((himagic << 16) << 16) | himagic; - lomagic = ((lomagic << 16) << 16) | lomagic; - } - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) - { - longword = *longword_ptr++; - - if (((longword - lomagic) & ~longword & himagic) != 0) - { - /* Which of the bytes was the zero? If none of them were, it was - a misfire; continue the search. */ - - const char *cp = (const char *) (longword_ptr - 1); - - if (cp[0] == 0) - return cp - str; - if (cp[1] == 0) - return cp - str + 1; - if (cp[2] == 0) - return cp - str + 2; - if (cp[3] == 0) - return cp - str + 3; - if (sizeof (longword) > 4) - { - if (cp[4] == 0) - return cp - str + 4; - if (cp[5] == 0) - return cp - str + 5; - if (cp[6] == 0) - return cp - str + 6; - if (cp[7] == 0) - return cp - str + 7; - } - } - } -} diff --git a/reference/glibc-c/wordcopy.c b/reference/glibc-c/wordcopy.c deleted file mode 100644 index b757a62..0000000 --- a/reference/glibc-c/wordcopy.c +++ /dev/null @@ -1,412 +0,0 @@ -/* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ - -#include <stddef.h> -#include "memcopy.h" - -/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to - block beginning at DSTP with LEN `op_t' words (not LEN bytes!). - Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ - -void -_wordcopy_fwd_aligned (dstp, srcp, len) - long int dstp; - long int srcp; - size_t len; -{ - op_t a0, a1; - - switch (len % 8) - { - case 2: - a0 = ((op_t *) srcp)[0]; - srcp -= 6 * OPSIZ; - dstp -= 7 * OPSIZ; - len += 6; - goto do1; - case 3: - a1 = ((op_t *) srcp)[0]; - srcp -= 5 * OPSIZ; - dstp -= 6 * OPSIZ; - len += 5; - goto do2; - case 4: - a0 = ((op_t *) srcp)[0]; - srcp -= 4 * OPSIZ; - dstp -= 5 * OPSIZ; - len += 4; - goto do3; - case 5: - a1 = ((op_t *) srcp)[0]; - srcp -= 3 * OPSIZ; - dstp -= 4 * OPSIZ; - len += 3; - goto do4; - case 6: - a0 = ((op_t *) srcp)[0]; - srcp -= 2 * OPSIZ; - dstp -= 3 * OPSIZ; - len += 2; - goto do5; - case 7: - a1 = ((op_t *) srcp)[0]; - srcp -= 1 * OPSIZ; - dstp -= 2 * OPSIZ; - len += 1; - goto do6; - - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return; - a0 = ((op_t *) srcp)[0]; - srcp -= 0 * OPSIZ; - dstp -= 1 * OPSIZ; - goto do7; - case 1: - a1 = ((op_t *) srcp)[0]; - srcp -=-1 * OPSIZ; - dstp -= 0 * OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - goto do8; /* No-op. */ - } - - do - { - do8: - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[0] = a1; - do7: - a1 = ((op_t *) srcp)[1]; - ((op_t *) dstp)[1] = a0; - do6: - a0 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[2] = a1; - do5: - a1 = ((op_t *) srcp)[3]; - ((op_t *) dstp)[3] = a0; - do4: - a0 = ((op_t *) srcp)[4]; - ((op_t *) dstp)[4] = a1; - do3: - a1 = ((op_t *) srcp)[5]; - ((op_t *) dstp)[5] = a0; - do2: - a0 = ((op_t *) srcp)[6]; - ((op_t *) dstp)[6] = a1; - do1: - a1 = ((op_t *) srcp)[7]; - ((op_t *) dstp)[7] = a0; - - srcp += 8 * OPSIZ; - dstp += 8 * OPSIZ; - len -= 8; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - ((op_t *) dstp)[0] = a1; -} - -/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to - block beginning at DSTP with LEN `op_t' words (not LEN bytes!). - DSTP should be aligned for memory operations on `op_t's, but SRCP must - *not* be aligned. */ - -void -_wordcopy_fwd_dest_aligned (dstp, srcp, len) - long int dstp; - long int srcp; - size_t len; -{ - op_t a0, a1, a2, a3; - int sh_1, sh_2; - - /* Calculate how to shift a word read at the memory operation - aligned srcp to make it aligned for copy. */ - - sh_1 = 8 * (srcp % OPSIZ); - sh_2 = 8 * OPSIZ - sh_1; - - /* Make SRCP aligned by rounding it down to the beginning of the `op_t' - it points in the middle of. */ - srcp &= -OPSIZ; - - switch (len % 4) - { - case 2: - a1 = ((op_t *) srcp)[0]; - a2 = ((op_t *) srcp)[1]; - srcp -= 1 * OPSIZ; - dstp -= 3 * OPSIZ; - len += 2; - goto do1; - case 3: - a0 = ((op_t *) srcp)[0]; - a1 = ((op_t *) srcp)[1]; - srcp -= 0 * OPSIZ; - dstp -= 2 * OPSIZ; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return; - a3 = ((op_t *) srcp)[0]; - a0 = ((op_t *) srcp)[1]; - srcp -=-1 * OPSIZ; - dstp -= 1 * OPSIZ; - len += 0; - goto do3; - case 1: - a2 = ((op_t *) srcp)[0]; - a3 = ((op_t *) srcp)[1]; - srcp -=-2 * OPSIZ; - dstp -= 0 * OPSIZ; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - goto do4; /* No-op. */ - } - - do - { - do4: - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); - do3: - a1 = ((op_t *) srcp)[1]; - ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); - do2: - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); - do1: - a3 = ((op_t *) srcp)[3]; - ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); - - srcp += 4 * OPSIZ; - dstp += 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); -} - -/* _wordcopy_bwd_aligned -- Copy block finishing right before - SRCP to block finishing right before DSTP with LEN `op_t' words - (not LEN bytes!). Both SRCP and DSTP should be aligned for memory - operations on `op_t's. */ - -void -_wordcopy_bwd_aligned (dstp, srcp, len) - long int dstp; - long int srcp; - size_t len; -{ - op_t a0, a1; - - switch (len % 8) - { - case 2: - srcp -= 2 * OPSIZ; - dstp -= 1 * OPSIZ; - a0 = ((op_t *) srcp)[1]; - len += 6; - goto do1; - case 3: - srcp -= 3 * OPSIZ; - dstp -= 2 * OPSIZ; - a1 = ((op_t *) srcp)[2]; - len += 5; - goto do2; - case 4: - srcp -= 4 * OPSIZ; - dstp -= 3 * OPSIZ; - a0 = ((op_t *) srcp)[3]; - len += 4; - goto do3; - case 5: - srcp -= 5 * OPSIZ; - dstp -= 4 * OPSIZ; - a1 = ((op_t *) srcp)[4]; - len += 3; - goto do4; - case 6: - srcp -= 6 * OPSIZ; - dstp -= 5 * OPSIZ; - a0 = ((op_t *) srcp)[5]; - len += 2; - goto do5; - case 7: - srcp -= 7 * OPSIZ; - dstp -= 6 * OPSIZ; - a1 = ((op_t *) srcp)[6]; - len += 1; - goto do6; - - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return; - srcp -= 8 * OPSIZ; - dstp -= 7 * OPSIZ; - a0 = ((op_t *) srcp)[7]; - goto do7; - case 1: - srcp -= 9 * OPSIZ; - dstp -= 8 * OPSIZ; - a1 = ((op_t *) srcp)[8]; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - goto do8; /* No-op. */ - } - - do - { - do8: - a0 = ((op_t *) srcp)[7]; - ((op_t *) dstp)[7] = a1; - do7: - a1 = ((op_t *) srcp)[6]; - ((op_t *) dstp)[6] = a0; - do6: - a0 = ((op_t *) srcp)[5]; - ((op_t *) dstp)[5] = a1; - do5: - a1 = ((op_t *) srcp)[4]; - ((op_t *) dstp)[4] = a0; - do4: - a0 = ((op_t *) srcp)[3]; - ((op_t *) dstp)[3] = a1; - do3: - a1 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[2] = a0; - do2: - a0 = ((op_t *) srcp)[1]; - ((op_t *) dstp)[1] = a1; - do1: - a1 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[0] = a0; - - srcp -= 8 * OPSIZ; - dstp -= 8 * OPSIZ; - len -= 8; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - ((op_t *) dstp)[7] = a1; -} - -/* _wordcopy_bwd_dest_aligned -- Copy block finishing right - before SRCP to block finishing right before DSTP with LEN `op_t' - words (not LEN bytes!). DSTP should be aligned for memory - operations on `op_t', but SRCP must *not* be aligned. */ - -void -_wordcopy_bwd_dest_aligned (dstp, srcp, len) - long int dstp; - long int srcp; - size_t len; -{ - op_t a0, a1, a2, a3; - int sh_1, sh_2; - - /* Calculate how to shift a word read at the memory operation - aligned srcp to make it aligned for copy. */ - - sh_1 = 8 * (srcp % OPSIZ); - sh_2 = 8 * OPSIZ - sh_1; - - /* Make srcp aligned by rounding it down to the beginning of the op_t - it points in the middle of. */ - srcp &= -OPSIZ; - srcp += OPSIZ; - - switch (len % 4) - { - case 2: - srcp -= 3 * OPSIZ; - dstp -= 1 * OPSIZ; - a2 = ((op_t *) srcp)[2]; - a1 = ((op_t *) srcp)[1]; - len += 2; - goto do1; - case 3: - srcp -= 4 * OPSIZ; - dstp -= 2 * OPSIZ; - a3 = ((op_t *) srcp)[3]; - a2 = ((op_t *) srcp)[2]; - len += 1; - goto do2; - case 0: - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - return; - srcp -= 5 * OPSIZ; - dstp -= 3 * OPSIZ; - a0 = ((op_t *) srcp)[4]; - a3 = ((op_t *) srcp)[3]; - goto do3; - case 1: - srcp -= 6 * OPSIZ; - dstp -= 4 * OPSIZ; - a1 = ((op_t *) srcp)[5]; - a0 = ((op_t *) srcp)[4]; - len -= 1; - if (OP_T_THRES <= 3 * OPSIZ && len == 0) - goto do0; - goto do4; /* No-op. */ - } - - do - { - do4: - a3 = ((op_t *) srcp)[3]; - ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); - do3: - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); - do2: - a1 = ((op_t *) srcp)[1]; - ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); - do1: - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); - - srcp -= 4 * OPSIZ; - dstp -= 4 * OPSIZ; - len -= 4; - } - while (len != 0); - - /* This is the right position for do0. Please don't move - it into the loop. */ - do0: - ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); -} diff --git a/reference/glibc/memcpy.S b/reference/glibc/memcpy.S deleted file mode 100644 index 357a89a..0000000 --- a/reference/glibc/memcpy.S +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright (C) 2006, 2009 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - Contributed by MontaVista Software, Inc. (written by Nicolas Pitre) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* - * Data preload for architectures that support it (ARM V5TE and above) - */ -#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ - && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ - && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ - && !defined (__ARM_ARCH_5T__)) -#define PLD(code...) code -#else -#define PLD(code...) -#endif - -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - -/* - * Endian independent macros for shifting bytes within registers. - */ -#ifndef __ARMEB__ -#define pull lsr -#define push lsl -#else -#define pull lsl -#define push lsr -#endif - - .text - .global memcpy - .type memcpy, %function - -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ - -memcpy: - - stmfd sp!, {r0, r4, lr} - - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f - -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f - - CALGN( ands ip, r1, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) - - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -3: PLD( pld [r1, #124] ) -4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} - subs r2, r2, #32 - stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) - -5: ands ip, r2, #28 - rsb ip, ip, #32 - addne pc, pc, ip @ C is always clear here - b 7f -6: nop - ldr r3, [r1], #4 - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldr r6, [r1], #4 - ldr r7, [r1], #4 - ldr r8, [r1], #4 - ldr lr, [r1], #4 - - add pc, pc, ip - nop - nop - str r3, [r0], #4 - str r4, [r0], #4 - str r5, [r0], #4 - str r6, [r0], #4 - str r7, [r0], #4 - str r8, [r0], #4 - str lr, [r0], #4 - - CALGN( bcs 2b ) - -7: ldmfd sp!, {r5 - r8} - -8: movs r2, r2, lsl #31 - ldrneb r3, [r1], #1 - ldrcsb r4, [r1], #1 - ldrcsb ip, [r1] - strneb r3, [r0], #1 - strcsb r4, [r0], #1 - strcsb ip, [r0] - -#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__) - ldmfd sp!, {r0, r4, lr} - bx lr -#else - ldmfd sp!, {r0, r4, pc} -#endif - -9: rsb ip, ip, #4 - cmp ip, #2 - ldrgtb r3, [r1], #1 - ldrgeb r4, [r1], #1 - ldrb lr, [r1], #1 - strgtb r3, [r0], #1 - strgeb r4, [r0], #1 - subs r2, r2, ip - strb lr, [r0], #1 - blt 8b - ands ip, r1, #3 - beq 1b - -10: bic r1, r1, #3 - cmp ip, #2 - ldr lr, [r1], #4 - beq 17f - bgt 18f - - - .macro forward_copy_shift pull push - - subs r2, r2, #28 - blt 14f - - CALGN( ands ip, r1, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) - -11: stmfd sp!, {r5 - r9} - - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) - -12: PLD( pld [r1, #124] ) -13: ldmia r1!, {r4, r5, r6, r7} - mov r3, lr, pull #\pull - subs r2, r2, #32 - ldmia r1!, {r8, r9, ip, lr} - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push - stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) - - ldmfd sp!, {r5 - r9} - -14: ands ip, r2, #28 - beq 16f - -15: mov r3, lr, pull #\pull - ldr lr, [r1], #4 - subs ip, ip, #4 - orr r3, r3, lr, push #\push - str r3, [r0], #4 - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) - -16: sub r1, r1, #(\push / 8) - b 8b - - .endm - - - forward_copy_shift pull=8 push=24 - -17: forward_copy_shift pull=16 push=16 - -18: forward_copy_shift pull=24 push=8 diff --git a/reference/glibc/memset.S b/reference/glibc/memset.S deleted file mode 100644 index 51585f4..0000000 --- a/reference/glibc/memset.S +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 1998, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Philip Blundell <philb@gnu.org> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -/* void *memset (dstpp, c, len) */ - .text - .global memset - .type memset, %function - -memset: - mov r3, r0 - cmp r2, #8 - bcc 2f @ less than 8 bytes to move - -1: - tst r3, #3 @ aligned yet? - strneb r1, [r3], #1 - subne r2, r2, #1 - bne 1b - - and r1, r1, #255 @ clear any sign bits - orr r1, r1, r1, lsl $8 - orr r1, r1, r1, lsl $16 - mov ip, r1 - -1: - subs r2, r2, #8 - stmcsia r3!, {r1, ip} @ store up to 32 bytes per loop iteration - subcss r2, r2, #8 - stmcsia r3!, {r1, ip} - subcss r2, r2, #8 - stmcsia r3!, {r1, ip} - subcss r2, r2, #8 - stmcsia r3!, {r1, ip} - bcs 1b - - and r2, r2, #7 -2: - subs r2, r2, #1 @ store up to 4 bytes per loop iteration - strcsb r1, [r3], #1 - subcss r2, r2, #1 - strcsb r1, [r3], #1 - subcss r2, r2, #1 - strcsb r1, [r3], #1 - subcss r2, r2, #1 - strcsb r1, [r3], #1 - bcs 2b - - bx lr diff --git a/reference/glibc/strchr.S b/reference/glibc/strchr.S deleted file mode 100644 index a096027..0000000 --- a/reference/glibc/strchr.S +++ /dev/null @@ -1,132 +0,0 @@ -/* strchr -- find the first instance of C in a nul-terminated string. - Copyright (C) 2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <http://www.gnu.org/licenses/>. */ - -#define ARCH_HAS_T2 - - .syntax unified - .text - .global strchr - .type strchr,%function - .align 4 - -strchr: - @ r0 = start of string - @ r1 = character to match - @ returns NULL for no match, or a pointer to the match - ldrb r2, [r0] @ load the first byte asap - uxtb r1, r1 - - @ To cater to long strings, we want to search through a few - @ characters until we reach an aligned pointer. To cater to - @ small strings, we don't want to start doing word operations - @ immediately. The compromise is a maximum of 16 bytes less - @ whatever is required to end with an aligned pointer. - @ r3 = number of characters to search in alignment loop - and r3, r0, #7 - rsb r3, r3, #15 @ 16 - 1 peeled loop iteration - cmp r2, r1 @ Found C? - it ne - cmpne r2, #0 @ Found EOS? - beq 99f - - @ Loop until we find ... -1: ldrb r2, [r0, #1]! - subs r3, r3, #1 @ ... the aligment point - it ne - cmpne r2, r1 @ ... or the character - it ne - cmpne r2, #0 @ ... or EOS - bne 1b - - @ Disambiguate the exit possibilites above - cmp r2, r1 @ Found the character - it ne - cmpne r2, #0 @ Found EOS - beq 99f - add r0, r0, #1 - - @ So now we're aligned. Now we actually need a stack frame. - push { r4, r5, r6, r7 } - - ldrd r2, r3, [r0], #8 - orr r1, r1, r1, lsl #8 @ Replicate C to all bytes -#ifdef ARCH_HAS_T2 - movw ip, #0x0101 - pld [r0, #64] - movt ip, #0x0101 -#else - ldr ip, =0x01010101 - pld [r0, #64] -#endif - orr r1, r1, r1, lsl #16 - - @ Loop searching for EOS or C, 8 bytes at a time. -2: - @ Subtracting (unsigned saturating) from 1 means result of 1 for - @ any byte that was originally zero and 0 otherwise. Therefore - @ we consider the lsb of each byte the "found" bit. - uqsub8 r4, ip, r2 @ Find EOS - eor r6, r2, r1 @ Convert C bytes to 0 - uqsub8 r5, ip, r3 - eor r7, r3, r1 - uqsub8 r6, ip, r6 @ Find C - pld [r0, #128] @ Prefetch 2 lines ahead - uqsub8 r7, ip, r7 - orr r4, r4, r6 @ Combine found for EOS and C - orr r5, r5, r7 - orrs r6, r4, r5 @ Combine the two words - it eq - ldrdeq r2, r3, [r0], #8 - beq 2b - - @ Found something. Disambiguate between first and second words. - @ Adjust r0 to point to the word containing the match. - @ Adjust r2 to the contents of the word containing the match. - @ Adjust r4 to the found bits for the word containing the match. - cmp r4, #0 - sub r0, r0, #4 - itte eq - moveq r4, r5 - moveq r2, r3 - subne r0, r0, #4 - - @ Find the bit-offset of the match within the word. -#if defined(__ARMEL__) - @ For LE, swap the found word so clz searches from the little end. - rev r4, r4 -#else - @ For BE, byte swap the word to make it easier to extract the byte. - rev r2, r2 -#endif - @ We're counting 0x01 (not 0x80), so the bit offset is 7 too high. - clz r3, r4 - sub r3, r3, #7 - lsr r2, r2, r3 @ Shift down found byte - uxtb r1, r1 @ Undo replication of C - uxtb r2, r2 @ Extract found byte - add r0, r0, r3, lsr #3 @ Adjust the pointer to the found byte - - pop { r4, r5, r6, r7 } - - @ Disambiguate between EOS and C. -99: - cmp r2, r1 - it ne - movne r0, #0 @ Found EOS, return NULL - bx lr - .size strchr,.-strchr diff --git a/reference/glibc/strlen.S b/reference/glibc/strlen.S deleted file mode 100644 index 6b3ce0a..0000000 --- a/reference/glibc/strlen.S +++ /dev/null @@ -1,99 +0,0 @@ -/* strlen -- find the length of a nul-terminated string. - Copyright (C) 2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <http://www.gnu.org/licenses/>. */ - -#define ARCH_HAS_T2 - - .syntax unified - .text - .global strlen - .type strlen,%function - .align 4 -strlen: - @ r0 = start of string - ldrb r2, [r0] @ load the first byte asap - - @ To cater to long strings, we want to search through a few - @ characters until we reach an aligned pointer. To cater to - @ small strings, we don't want to start doing word operations - @ immediately. The compromise is a maximum of 16 bytes less - @ whatever is required to end with an aligned pointer. - @ r3 = number of characters to search in alignment loop - and r3, r0, #7 - mov r1, r0 @ Save the input pointer - rsb r3, r3, #15 @ 16 - 1 peeled loop iteration - cmp r2, #0 - beq 99f - - @ Loop until we find ... -1: - ldrb r2, [r0, #1]! - subs r3, r3, #1 @ ... the aligment point - it ne - cmpne r2, #0 @ ... or EOS - bne 1b - - @ Disambiguate the exit possibilites above - cmp r2, #0 @ Found EOS - beq 99f - add r0, r0, #1 - - @ So now we're aligned. - ldrd r2, r3, [r0], #8 -#ifdef ARCH_HAS_T2 - movw ip, #0x0101 - pld [r0, #64] - movt ip, #0x0101 -#else - ldr ip, =0x01010101 - pld [r0, #64] -#endif - - @ Loop searching for EOS, 8 bytes at a time. - @ Subtracting (unsigned saturating) from 1 for any byte means that - @ we get 1 for any byte that was originally zero and 0 otherwise. - @ Therefore we consider the lsb of each byte the "found" bit. - .balign 16 -2: uqsub8 r2, ip, r2 @ Find EOS - uqsub8 r3, ip, r3 - pld [r0, #128] @ Prefetch 2 lines ahead - orrs r3, r3, r2 @ Combine the two words - it eq - ldrdeq r2, r3, [r0], #8 - beq 2b - - @ Found something. Disambiguate between first and second words. - @ Adjust r0 to point to the word containing the match. - @ Adjust r2 to the found bits for the word containing the match. - cmp r2, #0 - sub r0, r0, #4 - ite eq - moveq r2, r3 - subne r0, r0, #4 - - @ Find the bit-offset of the match within the word. Note that the - @ bit result from clz will be 7 higher than "true", but we'll - @ immediately discard those bits converting to a byte offset. -#ifdef __ARMEL__ - rev r2, r2 @ For LE, count from the little end -#endif - clz r2, r2 - add r0, r0, r2, lsr #3 @ Adjust the pointer to the found byte -99: - sub r0, r0, r1 @ Subtract input to compute length - bx lr - .size strlen,.-strlen diff --git a/reference/helpers/bounce.c b/reference/helpers/bounce.c deleted file mode 100644 index f2d44b3..0000000 --- a/reference/helpers/bounce.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stddef.h> - -/** Just returns. Used to calibrate the loop overhead */ -void *bounce(void *dst0, const void *src0, size_t len0) -{ - return dst0; -} diff --git a/reference/helpers/spawn.c b/reference/helpers/spawn.c deleted file mode 100644 index 54b5e51..0000000 --- a/reference/helpers/spawn.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stddef.h> - -/** Helper called from Python that calls a C function a certain number - of times. -*/ -int spawniis(int (*fun)(int, int, size_t), int runs, int a, int b, size_t c) -{ - int result; - int i; - - for (i = 0; i != runs; i++) - { - result = fun(a, b, c); - } - - return result; -} diff --git a/reference/newlib-c/memchr.c b/reference/newlib-c/memchr.c deleted file mode 100644 index 688f795..0000000 --- a/reference/newlib-c/memchr.c +++ /dev/null @@ -1,134 +0,0 @@ -/* -FUNCTION - <<memchr>>---find character in memory - -INDEX - memchr - -ANSI_SYNOPSIS - #include <string.h> - void *memchr(const void *<[src]>, int <[c]>, size_t <[length]>); - -TRAD_SYNOPSIS - #include <string.h> - void *memchr(<[src]>, <[c]>, <[length]>) - void *<[src]>; - void *<[c]>; - size_t <[length]>; - -DESCRIPTION - This function searches memory starting at <<*<[src]>>> for the - character <[c]>. The search only ends with the first - occurrence of <[c]>, or after <[length]> characters; in - particular, <<NUL>> does not terminate the search. - -RETURNS - If the character <[c]> is found within <[length]> characters - of <<*<[src]>>>, a pointer to the character is returned. If - <[c]> is not found, then <<NULL>> is returned. - -PORTABILITY -<<memchr>> is ANSI C. - -<<memchr>> requires no supporting OS subroutines. - -QUICKREF - memchr ansi pure -*/ - -#include "shim.h" -#include <string.h> -#include <limits.h> - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X) ((long)X & (sizeof (long) - 1)) - -/* How many bytes are loaded each iteration of the word copy loop. */ -#define LBLOCKSIZE (sizeof (long)) - -/* Threshhold for punting to the bytewise iterator. */ -#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) - -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -/* Nonzero if X (a long int) contains a NULL byte. */ -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -/* DETECTCHAR returns nonzero if (long)X contains the byte used - to fill (long)MASK. */ -#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK)) - -_PTR -_DEFUN (memchr, (src_void, c, length), - _CONST _PTR src_void _AND - int c _AND - size_t length) -{ - _CONST unsigned char *src = (_CONST unsigned char *) src_void; - unsigned char d = c; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - unsigned long *asrc; - unsigned long mask; - int i; - - while (UNALIGNED (src)) - { - if (!length--) - return NULL; - if (*src == d) - return (void *) src; - src++; - } - - if (!TOO_SMALL (length)) - { - /* If we get this far, we know that length is large and src is - word-aligned. */ - /* The fast code reads the source one word at a time and only - performs the bytewise search on word-sized segments if they - contain the search character, which is detected by XORing - the word-sized segment with a word-sized block of the search - character and then detecting for the presence of NUL in the - result. */ - asrc = (unsigned long *) src; - mask = d << 8 | d; - mask = mask << 16 | mask; - for (i = 32; i < LBLOCKSIZE * 8; i <<= 1) - mask = (mask << i) | mask; - - while (length >= LBLOCKSIZE) - { - if (DETECTCHAR (*asrc, mask)) - break; - length -= LBLOCKSIZE; - asrc++; - } - - /* If there are fewer than LBLOCKSIZE characters left, - then we resort to the bytewise loop. */ - - src = (unsigned char *) asrc; - } - -#endif /* not PREFER_SIZE_OVER_SPEED */ - - while (length--) - { - if (*src == d) - return (void *) src; - src++; - } - - return NULL; -} diff --git a/reference/newlib-c/memcmp.c b/reference/newlib-c/memcmp.c deleted file mode 100644 index 4f7ef1a..0000000 --- a/reference/newlib-c/memcmp.c +++ /dev/null @@ -1,114 +0,0 @@ -/* -FUNCTION - <<memcmp>>---compare two memory areas - -INDEX - memcmp - -ANSI_SYNOPSIS - #include <string.h> - int memcmp(const void *<[s1]>, const void *<[s2]>, size_t <[n]>); - -TRAD_SYNOPSIS - #include <string.h> - int memcmp(<[s1]>, <[s2]>, <[n]>) - void *<[s1]>; - void *<[s2]>; - size_t <[n]>; - -DESCRIPTION - This function compares not more than <[n]> characters of the - object pointed to by <[s1]> with the object pointed to by <[s2]>. - - -RETURNS - The function returns an integer greater than, equal to or - less than zero according to whether the object pointed to by - <[s1]> is greater than, equal to or less than the object - pointed to by <[s2]>. - -PORTABILITY -<<memcmp>> is ANSI C. - -<<memcmp>> requires no supporting OS subroutines. - -QUICKREF - memcmp ansi pure -*/ - -#include "shim.h" -#include <string.h> - - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* How many bytes are copied each iteration of the word copy loop. */ -#define LBLOCKSIZE (sizeof (long)) - -/* Threshhold for punting to the byte copier. */ -#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) - -int -_DEFUN (memcmp, (m1, m2, n), - _CONST _PTR m1 _AND - _CONST _PTR m2 _AND - size_t n) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - unsigned char *s1 = (unsigned char *) m1; - unsigned char *s2 = (unsigned char *) m2; - - while (n--) - { - if (*s1 != *s2) - { - return *s1 - *s2; - } - s1++; - s2++; - } - return 0; -#else - unsigned char *s1 = (unsigned char *) m1; - unsigned char *s2 = (unsigned char *) m2; - unsigned long *a1; - unsigned long *a2; - - /* If the size is too small, or either pointer is unaligned, - then we punt to the byte compare loop. Hopefully this will - not turn up in inner loops. */ - if (!TOO_SMALL(n) && !UNALIGNED(s1,s2)) - { - /* Otherwise, load and compare the blocks of memory one - word at a time. */ - a1 = (unsigned long*) s1; - a2 = (unsigned long*) s2; - while (n >= LBLOCKSIZE) - { - if (*a1 != *a2) - break; - a1++; - a2++; - n -= LBLOCKSIZE; - } - - /* check m mod LBLOCKSIZE remaining characters */ - - s1 = (unsigned char*)a1; - s2 = (unsigned char*)a2; - } - - while (n--) - { - if (*s1 != *s2) - return *s1 - *s2; - s1++; - s2++; - } - - return 0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} - diff --git a/reference/newlib-c/memcpy.c b/reference/newlib-c/memcpy.c deleted file mode 100644 index d7556ce..0000000 --- a/reference/newlib-c/memcpy.c +++ /dev/null @@ -1,110 +0,0 @@ -/* -FUNCTION - <<memcpy>>---copy memory regions - -ANSI_SYNOPSIS - #include <string.h> - void* memcpy(void *<[out]>, const void *<[in]>, size_t <[n]>); - -TRAD_SYNOPSIS - #include <string.h> - void *memcpy(<[out]>, <[in]>, <[n]> - void *<[out]>; - void *<[in]>; - size_t <[n]>; - -DESCRIPTION - This function copies <[n]> bytes from the memory region - pointed to by <[in]> to the memory region pointed to by - <[out]>. - - If the regions overlap, the behavior is undefined. - -RETURNS - <<memcpy>> returns a pointer to the first byte of the <[out]> - region. - -PORTABILITY -<<memcpy>> is ANSI C. - -<<memcpy>> requires no supporting OS subroutines. - -QUICKREF - memcpy ansi pure - */ - -#include "shim.h" -#include <string.h> - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* How many bytes are copied each iteration of the 4X unrolled loop. */ -#define BIGBLOCKSIZE (sizeof (long) << 2) - -/* How many bytes are copied each iteration of the word copy loop. */ -#define LITTLEBLOCKSIZE (sizeof (long)) - -/* Threshhold for punting to the byte copier. */ -#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) - -_PTR -_DEFUN (memcpy, (dst0, src0, len0), - _PTR dst0 _AND - _CONST _PTR src0 _AND - size_t len0) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - char *dst = (char *) dst0; - char *src = (char *) src0; - - _PTR save = dst0; - - while (len0--) - { - *dst++ = *src++; - } - - return save; -#else - char *dst = dst0; - _CONST char *src = src0; - long *aligned_dst; - _CONST long *aligned_src; - - /* If the size is small, or either SRC or DST is unaligned, - then punt into the byte copy loop. This should be rare. */ - if (!TOO_SMALL(len0) && !UNALIGNED (src, dst)) - { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy 4X long words at a time if possible. */ - while (len0 >= BIGBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - len0 -= BIGBLOCKSIZE; - } - - /* Copy one long word at a time if possible. */ - while (len0 >= LITTLEBLOCKSIZE) - { - *aligned_dst++ = *aligned_src++; - len0 -= LITTLEBLOCKSIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (len0--) - *dst++ = *src++; - - return dst0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} diff --git a/reference/newlib-c/memset.c b/reference/newlib-c/memset.c deleted file mode 100644 index a47b166..0000000 --- a/reference/newlib-c/memset.c +++ /dev/null @@ -1,103 +0,0 @@ -/* -FUNCTION - <<memset>>---set an area of memory - -INDEX - memset - -ANSI_SYNOPSIS - #include <string.h> - void *memset(void *<[dst]>, int <[c]>, size_t <[length]>); - -TRAD_SYNOPSIS - #include <string.h> - void *memset(<[dst]>, <[c]>, <[length]>) - void *<[dst]>; - int <[c]>; - size_t <[length]>; - -DESCRIPTION - This function converts the argument <[c]> into an unsigned - char and fills the first <[length]> characters of the array - pointed to by <[dst]> to the value. - -RETURNS - <<memset>> returns the value of <[dst]>. - -PORTABILITY -<<memset>> is ANSI C. - - <<memset>> requires no supporting OS subroutines. - -QUICKREF - memset ansi pure -*/ - -#include "shim.h" -#include <string.h> - -#define LBLOCKSIZE (sizeof(long)) -#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1)) -#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE) - -_PTR -_DEFUN (memset, (m, c, n), - _PTR m _AND - int c _AND - size_t n) -{ - char *s = (char *) m; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - int i; - unsigned long buffer; - unsigned long *aligned_addr; - unsigned int d = c & 0xff; /* To avoid sign extension, copy C to an - unsigned variable. */ - - while (UNALIGNED (s)) - { - if (n--) - *s++ = (char) c; - else - return m; - } - - if (!TOO_SMALL (n)) - { - /* If we get this far, we know that n is large and s is word-aligned. */ - aligned_addr = (unsigned long *) s; - - /* Store D into each char sized location in BUFFER so that - we can set large blocks quickly. */ - buffer = (d << 8) | d; - buffer |= (buffer << 16); - for (i = 32; i < LBLOCKSIZE * 8; i <<= 1) - buffer = (buffer << i) | buffer; - - /* Unroll the loop. */ - while (n >= LBLOCKSIZE*4) - { - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - *aligned_addr++ = buffer; - n -= 4*LBLOCKSIZE; - } - - while (n >= LBLOCKSIZE) - { - *aligned_addr++ = buffer; - n -= LBLOCKSIZE; - } - /* Pick up the remainder with a bytewise loop. */ - s = (char*)aligned_addr; - } - -#endif /* not PREFER_SIZE_OVER_SPEED */ - - while (n--) - *s++ = (char) c; - - return m; -} diff --git a/reference/newlib-c/shim.h b/reference/newlib-c/shim.h deleted file mode 100644 index e265e97..0000000 --- a/reference/newlib-c/shim.h +++ /dev/null @@ -1,5 +0,0 @@ -/* Basic macros that newlib uses */ -#define _PTR void * -#define _DEFUN(_name, _args, _def) _name (_def) -#define _CONST const -#define _AND , diff --git a/reference/newlib-c/strchr.c b/reference/newlib-c/strchr.c deleted file mode 100644 index a639e3d..0000000 --- a/reference/newlib-c/strchr.c +++ /dev/null @@ -1,126 +0,0 @@ -/* -FUNCTION - <<strchr>>---search for character in string - -INDEX - strchr - -ANSI_SYNOPSIS - #include <string.h> - char * strchr(const char *<[string]>, int <[c]>); - -TRAD_SYNOPSIS - #include <string.h> - char * strchr(<[string]>, <[c]>); - const char *<[string]>; - int <[c]>; - -DESCRIPTION - This function finds the first occurence of <[c]> (converted to - a char) in the string pointed to by <[string]> (including the - terminating null character). - -RETURNS - Returns a pointer to the located character, or a null pointer - if <[c]> does not occur in <[string]>. - -PORTABILITY -<<strchr>> is ANSI C. - -<<strchr>> requires no supporting OS subroutines. - -QUICKREF - strchr ansi pure -*/ - -#include "shim.h" -#include <string.h> -#include <limits.h> - -#undef strchr - -/* Nonzero if X is not aligned on a "long" boundary. */ -#define UNALIGNED(X) ((long)X & (sizeof (long) - 1)) - -/* How many bytes are loaded each iteration of the word copy loop. */ -#define LBLOCKSIZE (sizeof (long)) - -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -/* Nonzero if X (a long int) contains a NULL byte. */ -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -/* DETECTCHAR returns nonzero if (long)X contains the byte used - to fill (long)MASK. */ -#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK)) - -char * -_DEFUN (strchr, (s1, i), - _CONST char *s1 _AND - int i) -{ - _CONST unsigned char *s = (_CONST unsigned char *)s1; - unsigned char c = i; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - unsigned long mask,j; - unsigned long *aligned_addr; - - /* Special case for finding 0. */ - if (!c) - { - while (UNALIGNED (s)) - { - if (!*s) - return (char *) s; - s++; - } - /* Operate a word at a time. */ - aligned_addr = (unsigned long *) s; - while (!DETECTNULL (*aligned_addr)) - aligned_addr++; - /* Found the end of string. */ - s = (const unsigned char *) aligned_addr; - while (*s) - s++; - return (char *) s; - } - - /* All other bytes. Align the pointer, then search a long at a time. */ - while (UNALIGNED (s)) - { - if (!*s) - return NULL; - if (*s == c) - return (char *) s; - s++; - } - - mask = c; - for (j = 8; j < LBLOCKSIZE * 8; j <<= 1) - mask = (mask << j) | mask; - - aligned_addr = (unsigned long *) s; - while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask)) - aligned_addr++; - - /* The block of bytes currently pointed to by aligned_addr - contains either a null or the target char, or both. We - catch it using the bytewise search. */ - - s = (unsigned char *) aligned_addr; - -#endif /* not PREFER_SIZE_OVER_SPEED */ - - while (*s && *s != c) - s++; - if (*s == c) - return (char *)s; - return NULL; -} diff --git a/reference/newlib-c/strcmp.c b/reference/newlib-c/strcmp.c deleted file mode 100644 index 459841d..0000000 --- a/reference/newlib-c/strcmp.c +++ /dev/null @@ -1,109 +0,0 @@ -/* -FUNCTION - <<strcmp>>---character string compare - -INDEX - strcmp - -ANSI_SYNOPSIS - #include <string.h> - int strcmp(const char *<[a]>, const char *<[b]>); - -TRAD_SYNOPSIS - #include <string.h> - int strcmp(<[a]>, <[b]>) - char *<[a]>; - char *<[b]>; - -DESCRIPTION - <<strcmp>> compares the string at <[a]> to - the string at <[b]>. - -RETURNS - If <<*<[a]>>> sorts lexicographically after <<*<[b]>>>, - <<strcmp>> returns a number greater than zero. If the two - strings match, <<strcmp>> returns zero. If <<*<[a]>>> - sorts lexicographically before <<*<[b]>>>, <<strcmp>> returns a - number less than zero. - -PORTABILITY -<<strcmp>> is ANSI C. - -<<strcmp>> requires no supporting OS subroutines. - -QUICKREF - strcmp ansi pure -*/ - -#include "shim.h" -#include <string.h> -#include <limits.h> - -#undef strcmp - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -/* DETECTNULL returns nonzero if (long)X contains a NULL byte. */ -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -int -_DEFUN (strcmp, (s1, s2), - _CONST char *s1 _AND - _CONST char *s2) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - while (*s1 != '\0' && *s1 == *s2) - { - s1++; - s2++; - } - - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -#else - unsigned long *a1; - unsigned long *a2; - - /* If s1 or s2 are unaligned, then compare bytes. */ - if (!UNALIGNED (s1, s2)) - { - /* If s1 and s2 are word-aligned, compare them a word at a time. */ - a1 = (unsigned long*)s1; - a2 = (unsigned long*)s2; - while (*a1 == *a2) - { - /* To get here, *a1 == *a2, thus if we find a null in *a1, - then the strings must be equal, so return zero. */ - if (DETECTNULL (*a1)) - return 0; - - a1++; - a2++; - } - - /* A difference was detected in last few bytes of s1, so search bytewise */ - s1 = (char*)a1; - s2 = (char*)a2; - } - - while (*s1 != '\0' && *s1 == *s2) - { - s1++; - s2++; - } - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -#endif /* not PREFER_SIZE_OVER_SPEED */ -} diff --git a/reference/newlib-c/strcpy.c b/reference/newlib-c/strcpy.c deleted file mode 100644 index ec69937..0000000 --- a/reference/newlib-c/strcpy.c +++ /dev/null @@ -1,100 +0,0 @@ -/* -FUNCTION - <<strcpy>>---copy string - -INDEX - strcpy - -ANSI_SYNOPSIS - #include <string.h> - char *strcpy(char *<[dst]>, const char *<[src]>); - -TRAD_SYNOPSIS - #include <string.h> - char *strcpy(<[dst]>, <[src]>) - char *<[dst]>; - char *<[src]>; - -DESCRIPTION - <<strcpy>> copies the string pointed to by <[src]> - (including the terminating null character) to the array - pointed to by <[dst]>. - -RETURNS - This function returns the initial value of <[dst]>. - -PORTABILITY -<<strcpy>> is ANSI C. - -<<strcpy>> requires no supporting OS subroutines. - -QUICKREF - strcpy ansi pure -*/ - -#include "shim.h" -#include <string.h> -#include <limits.h> - -/*SUPPRESS 560*/ -/*SUPPRESS 530*/ - -/* Nonzero if either X or Y is not aligned on a "long" boundary. */ -#define UNALIGNED(X, Y) \ - (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) - -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -/* Nonzero if X (a long int) contains a NULL byte. */ -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -char* -_DEFUN (strcpy, (dst0, src0), - char *dst0 _AND - _CONST char *src0) -{ -#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) - char *s = dst0; - - while (*dst0++ = *src0++) - ; - - return s; -#else - char *dst = dst0; - _CONST char *src = src0; - long *aligned_dst; - _CONST long *aligned_src; - - /* If SRC or DEST is unaligned, then copy bytes. */ - if (!UNALIGNED (src, dst)) - { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* SRC and DEST are both "long int" aligned, try to do "long int" - sized copies. */ - while (!DETECTNULL(*aligned_src)) - { - *aligned_dst++ = *aligned_src++; - } - - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while ((*dst++ = *src++)) - ; - return dst0; -#endif /* not PREFER_SIZE_OVER_SPEED */ -} diff --git a/reference/newlib-c/strlen.c b/reference/newlib-c/strlen.c deleted file mode 100644 index 64efa28..0000000 --- a/reference/newlib-c/strlen.c +++ /dev/null @@ -1,88 +0,0 @@ -/* -FUNCTION - <<strlen>>---character string length - -INDEX - strlen - -ANSI_SYNOPSIS - #include <string.h> - size_t strlen(const char *<[str]>); - -TRAD_SYNOPSIS - #include <string.h> - size_t strlen(<[str]>) - char *<[src]>; - -DESCRIPTION - The <<strlen>> function works out the length of the string - starting at <<*<[str]>>> by counting chararacters until it - reaches a <<NULL>> character. - -RETURNS - <<strlen>> returns the character count. - -PORTABILITY -<<strlen>> is ANSI C. - -<<strlen>> requires no supporting OS subroutines. - -QUICKREF - strlen ansi pure -*/ - -#include "shim.h" -#include <string.h> -#include <limits.h> - -#define LBLOCKSIZE (sizeof (long)) -#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1)) - -#if LONG_MAX == 2147483647L -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) -#else -#if LONG_MAX == 9223372036854775807L -/* Nonzero if X (a long int) contains a NULL byte. */ -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) -#else -#error long int is not a 32bit or 64bit type. -#endif -#endif - -#ifndef DETECTNULL -#error long int is not a 32bit or 64bit byte -#endif - -size_t -_DEFUN (strlen, (str), - _CONST char *str) -{ - _CONST char *start = str; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - unsigned long *aligned_addr; - - /* Align the pointer, so we can search a word at a time. */ - while (UNALIGNED (str)) - { - if (!*str) - return str - start; - str++; - } - - /* If the string is word-aligned, we can check for the presence of - a null in each word-sized block. */ - aligned_addr = (unsigned long *)str; - while (!DETECTNULL (*aligned_addr)) - aligned_addr++; - - /* Once a null is detected, we check each byte in that block for a - precise position of the null. */ - str = (char *) aligned_addr; - -#endif /* not PREFER_SIZE_OVER_SPEED */ - - while (*str) - str++; - return str - start; -} diff --git a/reference/newlib-xscale/memchr.c b/reference/newlib-xscale/memchr.c deleted file mode 100644 index d574efb..0000000 --- a/reference/newlib-xscale/memchr.c +++ /dev/null @@ -1,95 +0,0 @@ -#include <string.h> -#include "xscale.h" - -void * -memchr (const void *start, int c, size_t len) -{ - const char *str = start; - - if (len == 0) - return 0; - - asm (PRELOADSTR ("%0") : : "r" (start)); - - c &= 0xff; - -#ifndef __OPTIMIZE_SIZE__ - /* Skip unaligned part. */ - if ((long)str & 3) - { - str--; - do - { - if (*++str == c) - return (void *)str; - } - while (((long)str & 3) != 0 && --len > 0); - } - - if (len > 3) - { - unsigned int c2 = c + (c << 8); - c2 += c2 << 16; - - /* Load two constants: - R7 = 0xfefefeff [ == ~(0x80808080 << 1) ] - R6 = 0x80808080 */ - - asm ( - "mov r6, #0x80\n\ - add r6, r6, #0x8000\n\ - add r6, r6, r6, lsl #16\n\ - mvn r7, r6, lsl #1\n\ -\n\ -0:\n\ - cmp %1, #0x7\n\ - bls 1f\n\ -\n\ - ldmia %0!, { r3, r9 }\n\ -" PRELOADSTR ("%0") "\n\ - sub %1, %1, #8\n\ - eor r3, r3, %2\n\ - eor r9, r9, %2\n\ - add r2, r3, r7\n\ - add r8, r9, r7\n\ - bic r2, r2, r3\n\ - bic r8, r8, r9\n\ - and r1, r2, r6\n\ - and r9, r8, r6\n\ - orrs r1, r1, r9\n\ - beq 0b\n\ -\n\ - add %1, %1, #8\n\ - sub %0, %0, #8\n\ -1:\n\ - cmp %1, #0x3\n\ - bls 2f\n\ -\n\ - ldr r3, [%0], #4\n\ -" PRELOADSTR ("%0") "\n\ - sub %1, %1, #4\n\ - eor r3, r3, %2\n\ - add r2, r3, r7\n\ - bic r2, r2, r3\n\ - ands r1, r2, r6\n\ - beq 1b\n\ -\n\ - sub %0, %0, #4\n\ - add %1, %1, #4\n\ -2:\n\ -" - : "=&r" (str), "=&r" (len) - : "r" (c2), "0" (str), "1" (len) - : "r1", "r2", "r3", "r6", "r7", "r8", "r9", "cc"); - } -#endif - - while (len-- > 0) - { - if (*str == c) - return (void *)str; - str++; - } - - return 0; -} diff --git a/reference/newlib-xscale/memcpy.c b/reference/newlib-xscale/memcpy.c deleted file mode 100644 index 434c914..0000000 --- a/reference/newlib-xscale/memcpy.c +++ /dev/null @@ -1,94 +0,0 @@ -#include <string.h> -#include "xscale.h" - -void * -memcpy (void *dst0, const void *src0, size_t len) -{ - int dummy; - asm volatile ( -#ifndef __OPTIMIZE_SIZE__ - "cmp %2, #0x3\n\ - bls 3f\n\ - and lr, %1, #0x3\n\ - and r3, %0, #0x3\n\ - cmp lr, r3\n\ - bne 3f\n\ - cmp lr, #0x0\n\ - beq 2f\n\ - b 1f\n\ -0:\n\ - ldrb r3, [%1], #1\n\ -" - PRELOADSTR ("%1") -"\n\ - tst %1, #0x3\n\ - strb r3, [%0], #1\n\ - beq 3f\n\ -1:\n\ - sub %2, %2, #1\n\ - cmn %2, #1\n\ - bne 0b\n\ -2:\n\ - cmp %2, #0xf\n\ - bls 1f\n\ -0:\n\ - ldmia %1!, { r3, r4, r5, lr }\n\ -" - PRELOADSTR ("%1") -"\n\ -\n\ - sub %2, %2, #16\n\ - cmp %2, #0xf\n\ - stmia %0!, { r3, r4, r5, lr }\n\ - bhi 0b\n\ -1:\n\ - cmp %2, #0x7\n\ - bls 1f\n\ -0:\n\ - ldmia %1!, { r3, r4 }\n\ -" - PRELOADSTR ("%1") -"\n\ -\n\ - sub %2, %2, #8\n\ - cmp %2, #0x7\n\ - stmia %0!, { r3, r4 }\n\ - bhi 0b\n\ -1:\n\ - cmp %2, #0x3\n\ - bls 3f\n\ -0:\n\ - sub %2, %2, #4\n\ - ldr r3, [%1], #4\n\ -" - PRELOADSTR ("%1") -"\n\ -\n\ - cmp %2, #0x3\n\ - str r3, [%0], #4\n\ - bhi 0b\n\ -" -#endif /* !__OPTIMIZE_SIZE__ */ -"\n\ -3:\n\ -" - PRELOADSTR ("%1") -"\n\ - sub %2, %2, #1\n\ - cmn %2, #1\n\ - beq 1f\n\ -0:\n\ - sub %2, %2, #1\n\ - ldrb r3, [%1], #1\n\ -" - PRELOADSTR ("%1") -"\n\ - cmn %2, #1\n\ - strb r3, [%0], #1\n\ - bne 0b\n\ -1:" - : "=&r" (dummy), "=&r" (src0), "=&r" (len) - : "0" (dst0), "1" (src0), "2" (len) - : "memory", "lr", "r3", "r4", "r5", "cc"); - return dst0; -} diff --git a/reference/newlib-xscale/memset.c b/reference/newlib-xscale/memset.c deleted file mode 100644 index 3ff9b99..0000000 --- a/reference/newlib-xscale/memset.c +++ /dev/null @@ -1,81 +0,0 @@ -#include <string.h> -#include "xscale.h" - -void * -memset (void *dst, int c, size_t len) -{ - int dummy; - - asm volatile ("tst %0, #0x3" -#ifndef __OPTIMIZE_SIZE__ -"\n\ - beq 1f\n\ - b 2f\n\ -0:\n\ - strb %1, [%0], #1\n\ - tst %0, #0x3\n\ - beq 1f\n\ -2:\n\ - movs r3, %2\n\ - sub %2, %2, #1\n\ - bne 0b\n\ -# At this point we know that %2 == len == -1 (since the SUB has already taken\n\ -# place). If we fall through to the 1: label (as the code used to do), the\n\ -# CMP will detect this negative value and branch to the 2: label. This will\n\ -# test %2 again, but this time against 0. The test will fail and the loop\n\ -# at 2: will go on for (almost) ever. Hence the explicit branch to the end\n\ -# of the hand written assembly code.\n\ - b 4f\n\ -1:\n\ - cmp %2, #0x3\n\ - bls 2f\n\ - and %1, %1, #0xff\n\ - orr lr, %1, %1, asl #8\n\ - cmp %2, #0xf\n\ - orr lr, lr, lr, asl #16\n\ - bls 1f\n\ - mov r3, lr\n\ - mov r4, lr\n\ - mov r5, lr\n\ -0:\n\ - sub %2, %2, #16\n\ - stmia %0!, { r3, r4, r5, lr }\n\ - cmp %2, #0xf\n\ - bhi 0b\n\ -1:\n\ - cmp %2, #0x7\n\ - bls 1f\n\ - mov r3, lr\n\ -0:\n\ - sub %2, %2, #8\n\ - stmia %0!, { r3, lr }\n\ - cmp %2, #0x7\n\ - bhi 0b\n\ -1:\n\ - cmp %2, #0x3\n\ - bls 2f\n\ -0:\n\ - sub %2, %2, #4\n\ - str lr, [%0], #4\n\ - cmp %2, #0x3\n\ - bhi 0b\n\ -" -#endif /* !__OPTIMIZE_SIZE__ */ -"\n\ -2:\n\ - movs r3, %2\n\ - sub %2, %2, #1\n\ - beq 4f\n\ -0:\n\ - movs r3, %2\n\ - sub %2, %2, #1\n\ - strb %1, [%0], #1\n\ - bne 0b\n\ -4:" - - : "=&r" (dummy), "=&r" (c), "=&r" (len) - : "0" (dst), "1" (c), "2" (len) - : "memory", "r3", "r4", "r5", "lr"); - - return dst; -} diff --git a/reference/newlib-xscale/strchr.c b/reference/newlib-xscale/strchr.c deleted file mode 100644 index 73bfec5..0000000 --- a/reference/newlib-xscale/strchr.c +++ /dev/null @@ -1,66 +0,0 @@ -#include <string.h> -#include "xscale.h" -#undef strchr - -char * -strchr (const char *s, int c) -{ - unsigned int c2; - asm (PRELOADSTR ("%0") : : "r" (s)); - - c &= 0xff; - -#ifndef __OPTIMIZE_SIZE__ - /* Skip unaligned part. */ - if ((long)s & 3) - { - s--; - do - { - int c2 = *++s; - if (c2 == c) - return (char *)s; - if (c2 == '\0') - return 0; - } - while (((long)s & 3) != 0); - } - - c2 = c + (c << 8); - c2 += c2 << 16; - - /* Load two constants: - R6 = 0xfefefeff [ == ~(0x80808080 << 1) ] - R5 = 0x80808080 */ - - asm (PRELOADSTR ("%0") "\n\ - mov r5, #0x80\n\ - add r5, r5, #0x8000\n\ - add r5, r5, r5, lsl #16\n\ - mvn r6, r5, lsl #1\n\ -\n\ - sub %0, %0, #4\n\ -0:\n\ - ldr r1, [%0, #4]!\n\ -" PRELOADSTR ("%0") "\n\ - add r3, r1, r6\n\ - bic r3, r3, r1\n\ - ands r2, r3, r5\n\ - bne 1f\n\ - eor r2, r1, %1\n\ - add r3, r2, r6\n\ - bic r3, r3, r2\n\ - ands r1, r3, r5\n\ - beq 0b\n\ -1:" - : "=&r" (s) - : "r" (c2), "0" (s) - : "r1", "r2", "r3", "r5", "r6", "cc"); -#endif - - while (*s && *s != c) - s++; - if (*s == c) - return (char *)s; - return NULL; -} diff --git a/reference/newlib-xscale/strcmp.c b/reference/newlib-xscale/strcmp.c deleted file mode 100644 index 086d7ff..0000000 --- a/reference/newlib-xscale/strcmp.c +++ /dev/null @@ -1,100 +0,0 @@ -#include <string.h> -#include "xscale.h" -#undef strcmp - -int -strcmp (const char *s1, const char *s2) -{ - asm (PRELOADSTR ("%0") : : "r" (s1)); - asm (PRELOADSTR ("%0") : : "r" (s2)); - -#ifndef __OPTIMIZE_SIZE__ - if (((long)s1 & 3) == ((long)s2 & 3)) - { - int result; - - /* Skip unaligned part. */ - while ((long)s1 & 3) - { - if (*s1 == '\0' || *s1 != *s2) - goto out; - s1++; - s2++; - } - - /* Load two constants: - lr = 0xfefefeff [ == ~(0x80808080 << 1) ] - ip = 0x80808080 */ - - asm ( - "ldr r2, [%1, #0]\n\ - ldr r3, [%2, #0]\n\ - cmp r2, r3\n\ - bne 2f\n\ -\n\ - mov ip, #0x80\n\ - add ip, ip, #0x8000\n\ - add ip, ip, ip, lsl #16\n\ - mvn lr, ip, lsl #1\n\ -\n\ -0:\n\ - ldr r2, [%1, #0]\n\ - add r3, r2, lr\n\ - bic r3, r3, r2\n\ - tst r3, ip\n\ - beq 1f\n\ - mov %0, #0x0\n\ - b 3f\n\ -1:\n\ - ldr r2, [%1, #4]!\n\ - ldr r3, [%2, #4]!\n\ -" PRELOADSTR("%1") "\n\ -" PRELOADSTR("%2") "\n\ - cmp r2, r3\n\ - beq 0b" - - /* The following part could be done in a C loop as well, but it needs - to be assembler to save some cycles in the case where the optimized - loop above finds the strings to be equal. */ -"\n\ -2:\n\ - ldrb r2, [%1, #0]\n\ -" PRELOADSTR("%1") "\n\ -" PRELOADSTR("%2") "\n\ - cmp r2, #0x0\n\ - beq 1f\n\ - ldrb r3, [%2, #0]\n\ - cmp r2, r3\n\ - bne 1f\n\ -0:\n\ - ldrb r3, [%1, #1]!\n\ - add %2, %2, #1\n\ - ands ip, r3, #0xff\n\ - beq 1f\n\ - ldrb r3, [%2]\n\ - cmp ip, r3\n\ - beq 0b\n\ -1:\n\ - ldrb lr, [%1, #0]\n\ - ldrb ip, [%2, #0]\n\ - rsb %0, ip, lr\n\ -3:\n\ -" - - : "=r" (result), "=&r" (s1), "=&r" (s2) - : "1" (s1), "2" (s2) - : "lr", "ip", "r2", "r3", "cc"); - return result; - } -#endif - - while (*s1 != '\0' && *s1 == *s2) - { - asm (PRELOADSTR("%0") : : "r" (s1)); - asm (PRELOADSTR("%0") : : "r" (s2)); - s1++; - s2++; - } - out: - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -} diff --git a/reference/newlib-xscale/strcpy.c b/reference/newlib-xscale/strcpy.c deleted file mode 100644 index 325fa37..0000000 --- a/reference/newlib-xscale/strcpy.c +++ /dev/null @@ -1,55 +0,0 @@ -#include <string.h> -#include "xscale.h" -#undef strcpy - -char * -strcpy (char *dest, const char *src) -{ - char *dest0 = dest; - - asm (PRELOADSTR ("%0") : : "r" (src)); - -#ifndef __OPTIMIZE_SIZE__ - if (((long)src & 3) == ((long)dest & 3)) - { - /* Skip unaligned part. */ - while ((long)src & 3) - { - if (! (*dest++ = *src++)) - return dest0; - } - - /* Load two constants: - R4 = 0xfefefeff [ == ~(0x80808080 << 1) ] - R5 = 0x80808080 */ - - asm ("mov r5, #0x80\n\ - ldr r1, [%1, #0]\n\ - add r5, r5, #0x8000\n\ - add r5, r5, r5, lsl #16\n\ - mvn r4, r5, lsl #1\n\ -\n\ - add r3, r1, r5\n\ - bic r3, r3, r1\n\ - ands r2, r3, r4\n\ - bne 1f\n\ -0:\n\ - ldr r3, [%1, #0]\n\ - ldr r1, [%1, #4]!\n\ -" PRELOADSTR("%1") "\n\ - str r3, [%0], #4\n\ - add r2, r1, r4\n\ - bic r2, r2, r1\n\ - ands r3, r2, r5\n\ - beq 0b\n\ -1:" - : "=&r" (dest), "=&r" (src) - : "0" (dest), "1" (src) - : "r1", "r2", "r3", "r4", "r5", "memory", "cc"); - } -#endif - - while (*dest++ = *src++) - asm (PRELOADSTR ("%0") : : "r" (src)); - return dest0; -} diff --git a/reference/newlib-xscale/strlen.c b/reference/newlib-xscale/strlen.c deleted file mode 100644 index cac958a..0000000 --- a/reference/newlib-xscale/strlen.c +++ /dev/null @@ -1,94 +0,0 @@ -#include <string.h> -#include "xscale.h" -#define _CONST const - -size_t -strlen (const char *str) -{ - _CONST char *start = str; - - /* Skip unaligned part. */ - if ((long)str & 3) - { - str--; - do - { - if (*++str == '\0') - goto out; - } - while ((long)str & 3); - } - - /* Load two constants: - R4 = 0xfefefeff [ == ~(0x80808080 << 1) ] - R5 = 0x80808080 */ - - asm ("mov r5, #0x80\n\ - add r5, r5, #0x8000\n\ - add r5, r5, r5, lsl #16\n\ - mvn r4, r5, lsl #1\n\ -" - -#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5E__ || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_7A__ - -" tst %0, #0x7\n\ - itt eq\n\ - ldreqd r6, [%0]\n\ - beq 1f\n\ - ldr r2, [%0]\n\ - add r3, r2, r4\n\ - bic r3, r3, r2\n\ - ands r2, r3, r5\n\ - bne 2f\n\ - sub %0, %0, #4\n\ -\n\ -0:\n\ - ldrd r6, [%0, #8]!\n\ -" - PRELOADSTR ("%0") -"\n\ -1:\n\ - add r3, r6, r4\n\ - add r2, r7, r4\n\ - bic r3, r3, r6\n\ - bic r2, r2, r7\n\ - and r3, r3, r5\n\ - and r2, r2, r5\n\ - orrs r3, r2, r3\n\ - beq 0b\n\ -" -#else - -" sub %0, %0, #4\n\ -\n\ -0:\n\ - ldr r6, [%0, #4]!\n\ -" - PRELOADSTR ("%0") -"\n\ - add r3, r6, r4\n\ - bic r3, r3, r6\n\ - ands r3, r3, r5\n\ - beq 0b\n\ -" -#endif /* __ARM_ARCH_5[T][E]__ */ -"\n\ -2:\n\ - ldrb r3, [%0]\n\ - cmp r3, #0x0\n\ - beq 1f\n\ -\n\ -0:\n\ - ldrb r3, [%0, #1]!\n\ -" - PRELOADSTR ("%0") -"\n\ - cmp r3, #0x0\n\ - bne 0b\n\ -1:\n\ -" - : "=r" (str) : "0" (str) : "r2", "r3", "r4", "r5", "r6", "r7"); - - out: - return str - start; -} diff --git a/reference/newlib-xscale/xscale.h b/reference/newlib-xscale/xscale.h deleted file mode 100644 index 90fb211..0000000 --- a/reference/newlib-xscale/xscale.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __XSCALE_MACH_H__ -#define __XSCALE_MACH_H__ - -/* These are predefined by new versions of GNU cpp. */ - -#ifndef __USER_LABEL_PREFIX__ -#define __USER_LABEL_PREFIX__ _ -#endif - -#ifndef __REGISTER_PREFIX__ -#define __REGISTER_PREFIX__ -#endif - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a##b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1(__USER_LABEL_PREFIX__, x) - -#define PRELOAD(X) pld [X] -#define PRELOADSTR(X) " pld [" X "]" - -#endif /* !__XSCALE_MACH_H__ */ diff --git a/reference/newlib/arm_asm.h b/reference/newlib/arm_asm.h deleted file mode 100644 index 5a63a8d..0000000 --- a/reference/newlib/arm_asm.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2009 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ARM_ASM__H -#define ARM_ASM__H - -/* First define some macros that keep everything else sane. */ -#if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) -#define _ISA_ARM_7 -#endif - -#if defined (_ISA_ARM_7) || defined (__ARM_ARCH_6__) || \ - defined (__ARM_ARCH_6J__) || defined (__ARM_ARCH_6T2__) || \ - defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__) || \ - defined (__ARM_ARCH_6Z__) -#define _ISA_ARM_6 -#endif - -#if defined (_ISA_ARM_6) || defined (__ARM_ARCH_5__) || \ - defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5TE__) || \ - defined (__ARM_ARCH_5TEJ__) -#define _ISA_ARM_5 -#endif - -#if defined (_ISA_ARM_5) || defined (__ARM_ARCH_4T__) -#define _ISA_ARM_4T -#endif - -#if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7__) || \ - defined (__ARM_ARCH_7EM__) -#define _ISA_THUMB_2 -#endif - -#if defined (_ISA_THUMB_2) || defined (__ARM_ARCH_6M__) -#define _ISA_THUMB_1 -#endif - - -/* Now some macros for common instruction sequences. */ -#ifdef __ASSEMBLER__ -.macro RETURN cond= -#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1) - bx\cond lr -#else - mov\cond pc, lr -#endif -.endm - -.macro optpld base, offset=#0 -#if defined (_ISA_ARM_7) - pld [\base, \offset] -#endif -.endm - -#else -asm(".macro RETURN cond=\n\t" -#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1) - "bx\\cond lr\n\t" -#else - "mov\\cond pc, lr\n\t" -#endif - ".endm" - ); - -asm(".macro optpld base, offset=#0\n\t" -#if defined (_ISA_ARM_7) - "pld [\\base, \\offset]\n\t" -#endif - ".endm" - ); -#endif - -#endif /* ARM_ASM__H */ diff --git a/reference/newlib/memcpy.S b/reference/newlib/memcpy.S deleted file mode 100644 index e408ed0..0000000 --- a/reference/newlib/memcpy.S +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Copyright (c) 2011 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ - (!(defined (__ARM_ARCH_7A__)))) - - /* Do nothing here. See memcpy-stub.c in the same directory. */ - -#else - /* Prototype: void *memcpy (void *dst, const void *src, size_t count). */ - - /* Use the version of memcpy implemented using LDRD and STRD. - This version is tuned for Cortex-A15. - This might not be the best for other ARMv7-A CPUs, - but there is no predefine to distinguish between - different CPUs in the same architecture, - and this version is better than the plain memcpy provided in newlib. - - Therefore, we use this version for all ARMv7-A CPUS. */ - - /* To make the same code compile for both ARM and Thumb instruction - sets, switch to unified syntax at the beginning of this function. - However, by using the same code, we may be missing optimization - opportunities. For instance, in LDRD/STRD instructions, the first - destination register must be even and the second consecutive in - ARM state, but not in Thumb state. */ - - .syntax unified - -#if defined (__thumb__) - .thumb - .thumb_func -#endif - - .global memcpy - .type memcpy, %function -memcpy: - - /* Assumes that n >= 0, and dst, src are valid pointers. - If there is at least 8 bytes to copy, use LDRD/STRD. - If src and dst are misaligned with different offsets, - first copy byte by byte until dst is aligned, - and then copy using LDRD/STRD and shift if needed. - When less than 8 left, copy a word and then byte by byte. */ - - /* Save registers (r0 holds the return value): - optimized push {r0, r4, r5, lr}. - To try and improve performance, stack layout changed, - i.e., not keeping the stack looking like users expect - (highest numbered register at highest address). */ - push {r0, lr} - strd r4, r5, [sp, #-8]! - - /* TODO: Add debug frame directives. - We don't need exception unwind directives, because the code below - does not throw any exceptions and does not call any other functions. - Generally, newlib functions like this lack debug information for - assembler source. */ - - /* Get copying of tiny blocks out of the way first. */ - /* Is there at least 4 bytes to copy? */ - subs r2, r2, #4 - blt copy_less_than_4 /* If n < 4. */ - - /* Check word alignment. */ - ands ip, r0, #3 /* ip = last 2 bits of dst. */ - bne dst_not_word_aligned /* If dst is not word-aligned. */ - - /* Get here if dst is word-aligned. */ - ands ip, r1, #3 /* ip = last 2 bits of src. */ - bne src_not_word_aligned /* If src is not word-aligned. */ -word_aligned: - /* Get here if source and dst both are word-aligned. - The number of bytes remaining to copy is r2+4. */ - - /* Is there is at least 64 bytes to copy? */ - subs r2, r2, #60 - blt copy_less_than_64 /* If r2 + 4 < 64. */ - - /* First, align the destination buffer to 8-bytes, - to make sure double loads and stores don't cross cache line boundary, - as they are then more expensive even if the data is in the cache - (require two load/store issue cycles instead of one). - If only one of the buffers is not 8-bytes aligned, - then it's more important to align dst than src, - because there is more penalty for stores - than loads that cross cacheline boundary. - This check and realignment are only worth doing - if there is a lot to copy. */ - - /* Get here if dst is word aligned, - i.e., the 2 least significant bits are 0. - If dst is not 2w aligned (i.e., the 3rd bit is not set in dst), - then copy 1 word (4 bytes). */ - ands r3, r0, #4 - beq 11f /* If dst already two-word aligned. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - subs r2, r2, #4 - blt copy_less_than_64 - -11: - /* TODO: Align to cacheline (useful for PLD optimization). */ - - /* Every loop iteration copies 64 bytes. */ -1: - .irp offset, #0, #8, #16, #24, #32, #40, #48, #56 - ldrd r4, r5, [r1, \offset] - strd r4, r5, [r0, \offset] - .endr - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 1b /* If there is more to copy. */ - -copy_less_than_64: - - /* Get here if less than 64 bytes to copy, -64 <= r2 < 0. - Restore the count if there is more than 7 bytes to copy. */ - adds r2, r2, #56 - blt copy_less_than_8 - - /* Copy 8 bytes at a time. */ -2: - ldrd r4, r5, [r1], #8 - strd r4, r5, [r0], #8 - subs r2, r2, #8 - bge 2b /* If there is more to copy. */ - -copy_less_than_8: - - /* Get here if less than 8 bytes to copy, -8 <= r2 < 0. - Check if there is more to copy. */ - cmn r2, #8 - beq return /* If r2 + 8 == 0. */ - - /* Restore the count if there is more than 3 bytes to copy. */ - adds r2, r2, #4 - blt copy_less_than_4 - - /* Copy 4 bytes. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - -copy_less_than_4: - /* Get here if less than 4 bytes to copy, -4 <= r2 < 0. */ - - /* Restore the count, check if there is more to copy. */ - adds r2, r2, #4 - beq return /* If r2 == 0. */ - - /* Get here with r2 is in {1,2,3}={01,10,11}. */ - /* Logical shift left r2, insert 0s, update flags. */ - lsls r2, r2, #31 - - /* Copy byte by byte. - Condition ne means the last bit of r2 is 0. - Condition cs means the second to last bit of r2 is set, - i.e., r2 is 1 or 3. */ - itt ne - ldrbne r3, [r1], #1 - strbne r3, [r0], #1 - - itttt cs - ldrbcs r4, [r1], #1 - ldrbcs r5, [r1] - strbcs r4, [r0], #1 - strbcs r5, [r0] - -return: - /* Restore registers: optimized pop {r0, r4, r5, pc} */ - ldrd r4, r5, [sp], #8 - pop {r0, pc} /* This is the only return point of memcpy. */ - -#ifndef __ARM_FEATURE_UNALIGNED - - /* The following assembly macro implements misaligned copy in software. - Assumes that dst is word aligned, src is at offset "pull" bits from - word, push = 32 - pull, and the number of bytes that remain to copy - is r2 + 4, r2 >= 0. */ - - /* In the code below, r2 is the number of bytes that remain to be - written. The number of bytes read is always larger, because we have - partial words in the shift queue. */ - - .macro miscopy pull push shiftleft shiftright - - /* Align src to the previous word boundary. */ - bic r1, r1, #3 - - /* Initialize the shift queue. */ - ldr r5, [r1], #4 /* Load a word from source. */ - - subs r2, r2, #4 - blt 6f /* Go to misaligned copy of less than 8 bytes. */ - - /* Get here if there is more than 8 bytes to copy. - The number of bytes to copy is r2+8, r2 >= 0. */ - - /* Save registers: push { r6, r7 }. - We need additional registers for LDRD and STRD, because in ARM state - the first destination register must be even and the second - consecutive. */ - strd r6, r7, [sp, #-8]! - - subs r2, r2, #56 - blt 4f /* Go to misaligned copy of less than 64 bytes. */ - -3: - /* Get here if there is more than 64 bytes to copy. - The number of bytes to copy is r2+64, r2 >= 0. */ - - /* Copy 64 bytes in every iteration. - Use a partial word from the shift queue. */ - .irp offset, #0, #8, #16, #24, #32, #40, #48, #56 - mov r6, r5, \shiftleft #\pull - ldrd r4, r5, [r1, \offset] - orr r6, r6, r4, \shiftright #\push - mov r7, r4, \shiftleft #\pull - orr r7, r7, r5, \shiftright #\push - strd r6, r7, [r0, \offset] - .endr - - add r1, r1, #64 - add r0, r0, #64 - subs r2, r2, #64 - bge 3b - -4: - /* Get here if there is less than 64 bytes to copy (-64 <= r2 < 0) - and they are misaligned. */ - - /* Restore the count if there is more than 7 bytes to copy. */ - adds r2, r2, #56 - - /* If less than 8 bytes to copy, - restore registers saved for this loop: optimized poplt { r6, r7 }. */ - itt lt - ldrdlt r6, r7, [sp], #8 - blt 6f /* Go to misaligned copy of less than 8 bytes. */ - -5: - /* Copy 8 bytes at a time. - Use a partial word from the shift queue. */ - mov r6, r5, \shiftleft #\pull - ldrd r4, r5, [r1], #8 - orr r6, r6, r4, \shiftright #\push - mov r7, r4, \shiftleft #\pull - orr r7, r7, r5, \shiftright #\push - strd r6, r7, [r0], #8 - - subs r2, r2, #8 - bge 5b /* If there is more to copy. */ - - /* Restore registers saved for this loop: optimized pop { r6, r7 }. */ - ldrd r6, r7, [sp], #8 - -6: - /* Get here if there less than 8 bytes to copy (-8 <= r2 < 0) - and they are misaligned. */ - - /* Check if there is more to copy. */ - cmn r2, #8 - beq return - - /* Check if there is less than 4 bytes to copy. */ - cmn r2, #4 - - itt lt - /* Restore src offset from word-align. */ - sublt r1, r1, #(\push / 8) - blt copy_less_than_4 - - /* Use a partial word from the shift queue. */ - mov r3, r5, \shiftleft #\pull - /* Load a word from src, but without writeback - (this word is not fully written to dst). */ - ldr r5, [r1] - - /* Restore src offset from word-align. */ - add r1, r1, #(\pull / 8) - - /* Shift bytes to create one dst word and store it. */ - orr r3, r3, r5, \shiftright #\push - str r3, [r0], #4 - - /* Use single byte copying of the remaining bytes. */ - b copy_less_than_4 - - .endm - -#endif /* not __ARM_FEATURE_UNALIGNED */ - -dst_not_word_aligned: - - /* Get here when dst is not aligned and ip has the last 2 bits of dst, - i.e., ip is the offset of dst from word. - The number of bytes that remains to copy is r2 + 4, - i.e., there are at least 4 bytes to copy. - Write a partial word (0 to 3 bytes), such that dst becomes - word-aligned. */ - - /* If dst is at ip bytes offset from a word (with 0 < ip < 4), - then there are (4 - ip) bytes to fill up to align dst to the next - word. */ - rsb ip, ip, #4 /* ip = #4 - ip. */ - cmp ip, #2 - - /* Copy byte by byte with conditionals. */ - itt gt - ldrbgt r3, [r1], #1 - strbgt r3, [r0], #1 - - itt ge - ldrbge r4, [r1], #1 - strbge r4, [r0], #1 - - ldrb lr, [r1], #1 - strb lr, [r0], #1 - - /* Update the count. - ip holds the number of bytes we have just copied. */ - subs r2, r2, ip /* r2 = r2 - ip. */ - blt copy_less_than_4 /* If r2 < ip. */ - - /* Get here if there are more than 4 bytes to copy. - Check if src is aligned. If beforehand src and dst were not word - aligned but congruent (same offset), then now they are both - word-aligned, and we can copy the rest efficiently (without - shifting). */ - ands ip, r1, #3 /* ip = last 2 bits of src. */ - beq word_aligned /* If r1 is word-aligned. */ - -src_not_word_aligned: - /* Get here when src is not word-aligned, but dst is word-aligned. - The number of bytes that remains to copy is r2+4. */ - -#ifdef __ARM_FEATURE_UNALIGNED - /* Copy word by word using LDR when alignment can be done in hardware, - i.e., SCTLR.A is set, supporting unaligned access in LDR and STR. */ - subs r2, r2, #60 - blt 8f - -7: - /* Copy 64 bytes in every loop iteration. */ - .irp offset, #0, #4, #8, #12, #16, #20, #24, #28, #32, #36, #40, #44, #48, #52, #56, #60 - ldr r3, [r1, \offset] - str r3, [r0, \offset] - .endr - - add r0, r0, #64 - add r1, r1, #64 - subs r2, r2, #64 - bge 7b - -8: - /* Get here if less than 64 bytes to copy, -64 <= r2 < 0. - Check if there is more than 3 bytes to copy. */ - adds r2, r2, #60 - blt copy_less_than_4 - -9: - /* Get here if there is less than 64 but at least 4 bytes to copy, - where the number of bytes to copy is r2+4. */ - ldr r3, [r1], #4 - str r3, [r0], #4 - subs r2, r2, #4 - bge 9b - - b copy_less_than_4 - -#else /* not __ARM_FEATURE_UNALIGNED */ - - /* ip has last 2 bits of src, - i.e., ip is the offset of src from word, and ip > 0. - Compute shifts needed to copy from src to dst. */ - cmp ip, #2 - beq miscopy_16_16 /* If ip == 2. */ - bge miscopy_24_8 /* If ip == 3. */ - - /* Get here if ip == 1. */ - - /* Endian independent macros for shifting bytes within registers. */ - -#ifndef __ARMEB__ -miscopy_8_24: miscopy pull=8 push=24 shiftleft=lsr shiftright=lsl -miscopy_16_16: miscopy pull=16 push=16 shiftleft=lsr shiftright=lsl -miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsr shiftright=lsl -#else /* not __ARMEB__ */ -miscopy_8_24: miscopy pull=8 push=24 shiftleft=lsl shiftright=lsr -miscopy_16_16: miscopy pull=16 push=16 shiftleft=lsl shiftright=lsr -miscopy_24_8: miscopy pull=24 push=8 shiftleft=lsl shiftright=lsr -#endif /* not __ARMEB__ */ - -#endif /* not __ARM_FEATURE_UNALIGNED */ - -#endif /* memcpy */ diff --git a/reference/newlib/shim.h b/reference/newlib/shim.h deleted file mode 100644 index e265e97..0000000 --- a/reference/newlib/shim.h +++ /dev/null @@ -1,5 +0,0 @@ -/* Basic macros that newlib uses */ -#define _PTR void * -#define _DEFUN(_name, _args, _def) _name (_def) -#define _CONST const -#define _AND , diff --git a/reference/newlib/strcmp.S b/reference/newlib/strcmp.S deleted file mode 100644 index 6346f06..0000000 --- a/reference/newlib/strcmp.S +++ /dev/null @@ -1,777 +0,0 @@ -/* - * Copyright (c) 2012 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" - -#ifdef __ARMEB__ -#define S2LOMEM lsl -#define S2LOMEMEQ lsleq -#define S2HIMEM lsr -#define MSB 0x000000ff -#define LSB 0xff000000 -#define BYTE0_OFFSET 24 -#define BYTE1_OFFSET 16 -#define BYTE2_OFFSET 8 -#define BYTE3_OFFSET 0 -#else /* not __ARMEB__ */ -#define S2LOMEM lsr -#define S2LOMEMEQ lsreq -#define S2HIMEM lsl -#define BYTE0_OFFSET 0 -#define BYTE1_OFFSET 8 -#define BYTE2_OFFSET 16 -#define BYTE3_OFFSET 24 -#define MSB 0xff000000 -#define LSB 0x000000ff -#endif /* not __ARMEB__ */ - -.syntax unified - -#if defined (__thumb__) - .thumb - .thumb_func -#endif - .global strcmp - .type strcmp, %function -strcmp: - -#if (defined (__thumb__) && !defined (__thumb2__)) -1: - ldrb r2, [r0] - ldrb r3, [r1] - adds r0, r0, #1 - adds r1, r1, #1 - cmp r2, #0 - beq 2f - cmp r2, r3 - beq 1b -2: - subs r0, r2, r3 - bx lr -#elif (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) -1: - ldrb r2, [r0], #1 - ldrb r3, [r1], #1 - cmp r2, #1 - it cs - cmpcs r2, r3 - beq 1b - subs r0, r2, r3 - RETURN - - -#elif (defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)) - /* Use LDRD whenever possible. */ - -/* The main thing to look out for when comparing large blocks is that - the loads do not cross a page boundary when loading past the index - of the byte with the first difference or the first string-terminator. - - For example, if the strings are identical and the string-terminator - is at index k, byte by byte comparison will not load beyond address - s1+k and s2+k; word by word comparison may load up to 3 bytes beyond - k; double word - up to 7 bytes. If the load of these bytes crosses - a page boundary, it might cause a memory fault (if the page is not mapped) - that would not have happened in byte by byte comparison. - - If an address is (double) word aligned, then a load of a (double) word - from that address will not cross a page boundary. - Therefore, the algorithm below considers word and double-word alignment - of strings separately. */ - -/* High-level description of the algorithm. - - * The fast path: if both strings are double-word aligned, - use LDRD to load two words from each string in every loop iteration. - * If the strings have the same offset from a word boundary, - use LDRB to load and compare byte by byte until - the first string is aligned to a word boundary (at most 3 bytes). - This is optimized for quick return on short unaligned strings. - * If the strings have the same offset from a double-word boundary, - use LDRD to load two words from each string in every loop iteration, as in the fast path. - * If the strings do not have the same offset from a double-word boundary, - load a word from the second string before the loop to initialize the queue. - Use LDRD to load two words from every string in every loop iteration. - Inside the loop, load the second word from the second string only after comparing - the first word, using the queued value, to guarantee safety across page boundaries. - * If the strings do not have the same offset from a word boundary, - use LDR and a shift queue. Order of loads and comparisons matters, - similarly to the previous case. - - * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. - * The only difference between ARM and Thumb modes is the use of CBZ instruction. - * The only difference between big and little endian is the use of REV in little endian - to compute the return value, instead of MOV. - * No preload. [TODO.] -*/ - - .macro m_cbz reg label -#ifdef __thumb2__ - cbz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - beq \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbz */ - - .macro m_cbnz reg label -#ifdef __thumb2__ - cbnz \reg, \label -#else /* not defined __thumb2__ */ - cmp \reg, #0 - bne \label -#endif /* not defined __thumb2__ */ - .endm /* m_cbnz */ - - .macro init - /* Macro to save temporary registers and prepare magic values. */ - subs sp, sp, #16 - strd r4, r5, [sp, #8] - strd r6, r7, [sp] - mvn r6, #0 /* all F */ - mov r7, #0 /* all 0 */ - .endm /* init */ - - .macro magic_compare_and_branch w1 w2 label - /* Macro to compare registers w1 and w2 and conditionally branch to label. */ - cmp \w1, \w2 /* Are w1 and w2 the same? */ - magic_find_zero_bytes \w1 - it eq - cmpeq ip, #0 /* Is there a zero byte in w1? */ - bne \label - .endm /* magic_compare_and_branch */ - - .macro magic_find_zero_bytes w1 - /* Macro to find all-zero bytes in w1, result is in ip. */ -#if (defined (__ARM_FEATURE_DSP)) - uadd8 ip, \w1, r6 - sel ip, r7, r6 -#else /* not defined (__ARM_FEATURE_DSP) */ - /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors. - Coincidently, these processors only have Thumb-2 mode, where we can use the - the (large) magic constant available directly as an immediate in instructions. - Note that we cannot use the magic constant in ARM mode, where we need - to create the constant in a register. */ - sub ip, \w1, #0x01010101 - bic ip, ip, \w1 - and ip, ip, #0x80808080 -#endif /* not defined (__ARM_FEATURE_DSP) */ - .endm /* magic_find_zero_bytes */ - - .macro setup_return w1 w2 -#ifdef __ARMEB__ - mov r1, \w1 - mov r2, \w2 -#else /* not __ARMEB__ */ - rev r1, \w1 - rev r2, \w2 -#endif /* not __ARMEB__ */ - .endm /* setup_return */ - - /* - optpld r0, #0 - optpld r1, #0 - */ - - /* Are both strings double-word aligned? */ - orr ip, r0, r1 - tst ip, #7 - bne do_align - - /* Fast path. */ - init - -doubleword_aligned: - - /* Get here when the strings to compare are double-word aligned. */ - /* Compare two words in every iteration. */ - .p2align 2 -2: - /* - optpld r0, #16 - optpld r1, #16 - */ - - /* Load the next double-word from each string. */ - ldrd r2, r3, [r0], #8 - ldrd r4, r5, [r1], #8 - - magic_compare_and_branch w1=r2, w2=r4, label=return_24 - magic_compare_and_branch w1=r3, w2=r5, label=return_35 - b 2b - -do_align: - /* Is the first string word-aligned? */ - ands ip, r0, #3 - beq word_aligned_r0 - - /* Fast compare byte by byte until the first string is word-aligned. */ - /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes - to read until the next word boudnary is 4-ip. */ - bic r0, r0, #3 - ldr r2, [r0], #4 - lsls ip, ip, #31 - beq byte2 - bcs byte3 - -byte1: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE1_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte2: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE2_OFFSET - subs ip, r3, ip - bne fast_return - m_cbz reg=r3, label=fast_return - -byte3: - ldrb ip, [r1], #1 - uxtb r3, r2, ror #BYTE3_OFFSET - subs ip, r3, ip - bne fast_return - m_cbnz reg=r3, label=word_aligned_r0 - -fast_return: - mov r0, ip - bx lr - -word_aligned_r0: - init - /* The first string is word-aligned. */ - /* Is the second string word-aligned? */ - ands ip, r1, #3 - bne strcmp_unaligned - -word_aligned: - /* The strings are word-aligned. */ - /* Is the first string double-word aligned? */ - tst r0, #4 - beq doubleword_aligned_r0 - - /* If r0 is not double-word aligned yet, align it by loading - and comparing the next word from each string. */ - ldr r2, [r0], #4 - ldr r4, [r1], #4 - magic_compare_and_branch w1=r2 w2=r4 label=return_24 - -doubleword_aligned_r0: - /* Get here when r0 is double-word aligned. */ - /* Is r1 doubleword_aligned? */ - tst r1, #4 - beq doubleword_aligned - - /* Get here when the strings to compare are word-aligned, - r0 is double-word aligned, but r1 is not double-word aligned. */ - - /* Initialize the queue. */ - ldr r5, [r1], #4 - - /* Compare two words in every iteration. */ - .p2align 2 -3: - /* - optpld r0, #16 - optpld r1, #16 - */ - - /* Load the next double-word from each string and compare. */ - ldrd r2, r3, [r0], #8 - magic_compare_and_branch w1=r2 w2=r5 label=return_25 - ldrd r4, r5, [r1], #8 - magic_compare_and_branch w1=r3 w2=r4 label=return_34 - b 3b - - .macro miscmp_word offsetlo offsethi - /* Macro to compare misaligned strings. */ - /* r0, r1 are word-aligned, and at least one of the strings - is not double-word aligned. */ - /* Compare one word in every loop iteration. */ - /* OFFSETLO is the original bit-offset of r1 from a word-boundary, - OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ - - /* Initialize the shift queue. */ - ldr r5, [r1], #4 - - /* Compare one word from each string in every loop iteration. */ - .p2align 2 -7: - ldr r3, [r0], #4 - S2LOMEM r5, r5, #\offsetlo - magic_find_zero_bytes w1=r3 - cmp r7, ip, S2HIMEM #\offsetlo - and r2, r3, r6, S2LOMEM #\offsetlo - it eq - cmpeq r2, r5 - bne return_25 - ldr r5, [r1], #4 - cmp ip, #0 - eor r3, r2, r3 - S2HIMEM r2, r5, #\offsethi - it eq - cmpeq r3, r2 - bne return_32 - b 7b - .endm /* miscmp_word */ - -strcmp_unaligned: - /* r0 is word-aligned, r1 is at offset ip from a word. */ - /* Align r1 to the (previous) word-boundary. */ - bic r1, r1, #3 - - /* Unaligned comparison word by word using LDRs. */ - cmp ip, #2 - beq miscmp_word_16 /* If ip == 2. */ - bge miscmp_word_24 /* If ip == 3. */ - miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ -miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 -miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 - - -return_32: - setup_return w1=r3, w2=r2 - b do_return -return_34: - setup_return w1=r3, w2=r4 - b do_return -return_25: - setup_return w1=r2, w2=r5 - b do_return -return_35: - setup_return w1=r3, w2=r5 - b do_return -return_24: - setup_return w1=r2, w2=r4 - -do_return: - -#ifdef __ARMEB__ - mov r0, ip -#else /* not __ARMEB__ */ - rev r0, ip -#endif /* not __ARMEB__ */ - - /* Restore temporaries early, before computing the return value. */ - ldrd r6, r7, [sp] - ldrd r4, r5, [sp, #8] - adds sp, sp, #16 - - /* There is a zero or a different byte between r1 and r2. */ - /* r0 contains a mask of all-zero bytes in r1. */ - /* Using r0 and not ip here because cbz requires low register. */ - m_cbz reg=r0, label=compute_return_value - clz r0, r0 - /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ - rsb r0, r0, #24 - /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ - lsr r1, r1, r0 - lsr r2, r2, r0 - -compute_return_value: - subs r0, r1, r2 - bx lr - - -#else /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) - defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || - (defined (__thumb__) && !defined (__thumb2__))) */ - - /* Use LDR whenever possible. */ - -#ifdef __thumb2__ -#define magic1(REG) 0x01010101 -#define magic2(REG) 0x80808080 -#else -#define magic1(REG) REG -#define magic2(REG) REG, lsl #7 -#endif - - optpld r0 - optpld r1 - eor r2, r0, r1 - tst r2, #3 - /* Strings not at same byte offset from a word boundary. */ - bne strcmp_unaligned - ands r2, r0, #3 - bic r0, r0, #3 - bic r1, r1, #3 - ldr ip, [r0], #4 - it eq - ldreq r3, [r1], #4 - beq 1f - /* Although s1 and s2 have identical initial alignment, they are - not currently word aligned. Rather than comparing bytes, - make sure that any bytes fetched from before the addressed - bytes are forced to 0xff. Then they will always compare - equal. */ - eor r2, r2, #3 - lsl r2, r2, #3 - mvn r3, MSB - S2LOMEM r2, r3, r2 - ldr r3, [r1], #4 - orr ip, ip, r2 - orr r3, r3, r2 -1: -#ifndef __thumb2__ - /* Load the 'magic' constant 0x01010101. */ - str r4, [sp, #-4]! - mov r4, #1 - orr r4, r4, r4, lsl #8 - orr r4, r4, r4, lsl #16 -#endif - .p2align 2 -4: - optpld r0, #8 - optpld r1, #8 - sub r2, ip, magic1(r4) - cmp ip, r3 - itttt eq - /* check for any zero bytes in first word */ - biceq r2, r2, ip - tsteq r2, magic2(r4) - ldreq ip, [r0], #4 - ldreq r3, [r1], #4 - beq 4b -2: - /* There's a zero or a different byte in the word */ - S2HIMEM r0, ip, #24 - S2LOMEM ip, ip, #8 - cmp r0, #1 - it cs - cmpcs r0, r3, S2HIMEM #24 - it eq - S2LOMEMEQ r3, r3, #8 - beq 2b - /* On a big-endian machine, r0 contains the desired byte in bits - 0-7; on a little-endian machine they are in bits 24-31. In - both cases the other bits in r0 are all zero. For r3 the - interesting byte is at the other end of the word, but the - other bits are not necessarily zero. We need a signed result - representing the differnece in the unsigned bytes, so for the - little-endian case we can't just shift the interesting bits - up. */ -#ifdef __ARMEB__ - sub r0, r0, r3, lsr #24 -#else - and r3, r3, #255 -#ifdef __thumb2__ - /* No RSB instruction in Thumb2 */ - lsr r0, r0, #24 - sub r0, r0, r3 -#else - rsb r0, r3, r0, lsr #24 -#endif -#endif -#ifndef __thumb2__ - ldr r4, [sp], #4 -#endif - RETURN - - -strcmp_unaligned: - -#if 0 - /* The assembly code below is based on the following alogrithm. */ -#ifdef __ARMEB__ -#define RSHIFT << -#define LSHIFT >> -#else -#define RSHIFT >> -#define LSHIFT << -#endif - -#define body(shift) \ - mask = 0xffffffffU RSHIFT shift; \ - w1 = *wp1++; \ - w2 = *wp2++; \ - do \ - { \ - t1 = w1 & mask; \ - if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ - { \ - w2 RSHIFT= shift; \ - break; \ - } \ - if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \ - { \ - /* See comment in assembler below re syndrome on big-endian */\ - if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \ - w2 RSHIFT= shift; \ - else \ - { \ - w2 = *wp2; \ - t1 = w1 RSHIFT (32 - shift); \ - w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ - } \ - break; \ - } \ - w2 = *wp2++; \ - t1 ^= w1; \ - if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ - { \ - t1 = w1 >> (32 - shift); \ - w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ - break; \ - } \ - w1 = *wp1++; \ - } while (1) - - const unsigned* wp1; - const unsigned* wp2; - unsigned w1, w2; - unsigned mask; - unsigned shift; - unsigned b1 = 0x01010101; - char c1, c2; - unsigned t1; - - while (((unsigned) s1) & 3) - { - c1 = *s1++; - c2 = *s2++; - if (c1 == 0 || c1 != c2) - return c1 - (int)c2; - } - wp1 = (unsigned*) (((unsigned)s1) & ~3); - wp2 = (unsigned*) (((unsigned)s2) & ~3); - t1 = ((unsigned) s2) & 3; - if (t1 == 1) - { - body(8); - } - else if (t1 == 2) - { - body(16); - } - else - { - body (24); - } - - do - { -#ifdef __ARMEB__ - c1 = (char) t1 >> 24; - c2 = (char) w2 >> 24; -#else /* not __ARMEB__ */ - c1 = (char) t1; - c2 = (char) w2; -#endif /* not __ARMEB__ */ - t1 RSHIFT= 8; - w2 RSHIFT= 8; - } while (c1 != 0 && c1 == c2); - return c1 - c2; -#endif /* 0 */ - - - wp1 .req r0 - wp2 .req r1 - b1 .req r2 - w1 .req r4 - w2 .req r5 - t1 .req ip - @ r3 is scratch - - /* First of all, compare bytes until wp1(sp1) is word-aligned. */ -1: - tst wp1, #3 - beq 2f - ldrb r2, [wp1], #1 - ldrb r3, [wp2], #1 - cmp r2, #1 - it cs - cmpcs r2, r3 - beq 1b - sub r0, r2, r3 - RETURN - -2: - str r5, [sp, #-4]! - str r4, [sp, #-4]! - //stmfd sp!, {r4, r5} - mov b1, #1 - orr b1, b1, b1, lsl #8 - orr b1, b1, b1, lsl #16 - - and t1, wp2, #3 - bic wp2, wp2, #3 - ldr w1, [wp1], #4 - ldr w2, [wp2], #4 - cmp t1, #2 - beq 2f - bhi 3f - - /* Critical inner Loop: Block with 3 bytes initial overlap */ - .p2align 2 -1: - bic t1, w1, MSB - cmp t1, w2, S2LOMEM #8 - sub r3, w1, b1 - bic r3, r3, w1 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #24 - bne 6f - ldr w1, [wp1], #4 - b 1b -4: - S2LOMEM w2, w2, #8 - b 8f - -5: -#ifdef __ARMEB__ - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00 */ - tst w1, #0xff000000 - itt ne - tstne w1, #0x00ff0000 - tstne w1, #0x0000ff00 - beq 7f -#else - bics r3, r3, #0xff000000 - bne 7f -#endif - ldrb w2, [wp2] - S2LOMEM t1, w1, #24 -#ifdef __ARMEB__ - lsl w2, w2, #24 -#endif - b 8f - -6: - S2LOMEM t1, w1, #24 - and w2, w2, LSB - b 8f - - /* Critical inner Loop: Block with 2 bytes initial overlap */ - .p2align 2 -2: - S2HIMEM t1, w1, #16 - sub r3, w1, b1 - S2LOMEM t1, t1, #16 - bic r3, r3, w1 - cmp t1, w2, S2LOMEM #16 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #16 - bne 6f - ldr w1, [wp1], #4 - b 2b - -5: -#ifdef __ARMEB__ - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00 */ - tst w1, #0xff000000 - it ne - tstne w1, #0x00ff0000 - beq 7f -#else - lsls r3, r3, #16 - bne 7f -#endif - ldrh w2, [wp2] - S2LOMEM t1, w1, #16 -#ifdef __ARMEB__ - lsl w2, w2, #16 -#endif - b 8f - -6: - S2HIMEM w2, w2, #16 - S2LOMEM t1, w1, #16 -4: - S2LOMEM w2, w2, #16 - b 8f - - /* Critical inner Loop: Block with 1 byte initial overlap */ - .p2align 2 -3: - and t1, w1, LSB - cmp t1, w2, S2LOMEM #24 - sub r3, w1, b1 - bic r3, r3, w1 - bne 4f - ands r3, r3, b1, lsl #7 - it eq - ldreq w2, [wp2], #4 - bne 5f - eor t1, t1, w1 - cmp t1, w2, S2HIMEM #8 - bne 6f - ldr w1, [wp1], #4 - b 3b -4: - S2LOMEM w2, w2, #24 - b 8f -5: - /* The syndrome value may contain false ones if the string ends - with the bytes 0x01 0x00 */ - tst w1, LSB - beq 7f - ldr w2, [wp2], #4 -6: - S2LOMEM t1, w1, #8 - bic w2, w2, MSB - b 8f -7: - mov r0, #0 - //ldmfd sp!, {r4, r5} - ldr r4, [sp], #4 - ldr r5, [sp], #4 - RETURN -8: - and r2, t1, LSB - and r0, w2, LSB - cmp r0, #1 - it cs - cmpcs r0, r2 - itt eq - S2LOMEMEQ t1, t1, #8 - S2LOMEMEQ w2, w2, #8 - beq 8b - sub r0, r2, r0 - //ldmfd sp!, {r4, r5} - ldr r4, [sp], #4 - ldr r5, [sp], #4 - RETURN - -#endif /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) - defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || - (defined (__thumb__) && !defined (__thumb2__))) */ diff --git a/reference/newlib/strcpy.c b/reference/newlib/strcpy.c deleted file mode 100644 index 93426d4..0000000 --- a/reference/newlib/strcpy.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2008 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" - -#ifdef __thumb2__ -#define magic1(REG) "#0x01010101" -#define magic2(REG) "#0x80808080" -#else -#define magic1(REG) #REG -#define magic2(REG) #REG ", lsl #7" -#endif - -char* __attribute__((naked)) -strcpy (char* dst, const char* src) -{ - asm ( -#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ - (defined (__thumb__) && !defined (__thumb2__))) - "optpld r1\n\t" - "eor r2, r0, r1\n\t" - "mov ip, r0\n\t" - "tst r2, #3\n\t" - "bne 4f\n\t" - "tst r1, #3\n\t" - "bne 3f\n" - "5:\n\t" -#ifndef __thumb2__ - "str r5, [sp, #-4]!\n\t" - "mov r5, #0x01\n\t" - "orr r5, r5, r5, lsl #8\n\t" - "orr r5, r5, r5, lsl #16\n\t" -#endif - - "str r4, [sp, #-4]!\n\t" - "tst r1, #4\n\t" - "ldr r3, [r1], #4\n\t" - "beq 2f\n\t" - "sub r2, r3, "magic1(r5)"\n\t" - "bics r2, r2, r3\n\t" - "tst r2, "magic2(r5)"\n\t" - "itt eq\n\t" - "streq r3, [ip], #4\n\t" - "ldreq r3, [r1], #4\n" - "bne 1f\n\t" - /* Inner loop. We now know that r1 is 64-bit aligned, so we - can safely fetch up to two words. This allows us to avoid - load stalls. */ - ".p2align 2\n" - "2:\n\t" - "optpld r1, #8\n\t" - "ldr r4, [r1], #4\n\t" - "sub r2, r3, "magic1(r5)"\n\t" - "bics r2, r2, r3\n\t" - "tst r2, "magic2(r5)"\n\t" - "sub r2, r4, "magic1(r5)"\n\t" - "bne 1f\n\t" - "str r3, [ip], #4\n\t" - "bics r2, r2, r4\n\t" - "tst r2, "magic2(r5)"\n\t" - "itt eq\n\t" - "ldreq r3, [r1], #4\n\t" - "streq r4, [ip], #4\n\t" - "beq 2b\n\t" - "mov r3, r4\n" - "1:\n\t" -#ifdef __ARMEB__ - "rors r3, r3, #24\n\t" -#endif - "strb r3, [ip], #1\n\t" - "tst r3, #0xff\n\t" -#ifdef __ARMEL__ - "ror r3, r3, #8\n\t" -#endif - "bne 1b\n\t" - "ldr r4, [sp], #4\n\t" -#ifndef __thumb2__ - "ldr r5, [sp], #4\n\t" -#endif - "RETURN\n" - - /* Strings have the same offset from word alignment, but it's - not zero. */ - "3:\n\t" - "tst r1, #1\n\t" - "beq 1f\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [ip], #1\n\t" - "cmp r2, #0\n\t" - "it eq\n" - "RETURN eq\n" - "1:\n\t" - "tst r1, #2\n\t" - "beq 5b\n\t" - "ldrh r2, [r1], #2\n\t" -#ifdef __ARMEB__ - "tst r2, #0xff00\n\t" - "iteet ne\n\t" - "strneh r2, [ip], #2\n\t" - "lsreq r2, r2, #8\n\t" - "streqb r2, [ip]\n\t" - "tstne r2, #0xff\n\t" -#else - "tst r2, #0xff\n\t" - "itet ne\n\t" - "strneh r2, [ip], #2\n\t" - "streqb r2, [ip]\n\t" - "tstne r2, #0xff00\n\t" -#endif - "bne 5b\n\t" - "RETURN\n" - - /* src and dst do not have a common word-alignement. Fall back to - byte copying. */ - "4:\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [ip], #1\n\t" - "cmp r2, #0\n\t" - "bne 4b\n\t" - "RETURN" - -#elif !defined (__thumb__) || defined (__thumb2__) - "mov r3, r0\n\t" - "1:\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [r3], #1\n\t" - "cmp r2, #0\n\t" - "bne 1b\n\t" - "RETURN" -#else - "mov r3, r0\n\t" - "1:\n\t" - "ldrb r2, [r1]\n\t" - "add r1, r1, #1\n\t" - "strb r2, [r3]\n\t" - "add r3, r3, #1\n\t" - "cmp r2, #0\n\t" - "bne 1b\n\t" - "RETURN" -#endif - ); -} diff --git a/reference/newlib/strlen.c b/reference/newlib/strlen.c deleted file mode 100644 index 93ec8bb..0000000 --- a/reference/newlib/strlen.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2008 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arm_asm.h" -#include <limits.h> -#include <stddef.h> - -#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ - (defined (__thumb__) && !defined (__thumb2__)) - -size_t -strlen (const char* str) -{ - int scratch; -#if defined (__thumb__) && !defined (__thumb2__) - size_t len; - asm ("mov %0, #0\n" - "1:\n\t" - "ldrb %1, [%2, %0]\n\t" - "add %0, %0, #1\n\t" - "cmp %1, #0\n\t" - "bne 1b" - : "=&r" (len), "=&r" (scratch) : "r" (str) : "memory", "cc"); - return len - 1; -#else - const char* end; - asm ("1:\n\t" - "ldrb %1, [%0], #1\n\t" - "cmp %1, #0\n\t" - "bne 1b" - : "=&r" (end), "=&r" (scratch) : "0" (str) : "memory", "cc"); - return end - str - 1; -#endif -} -#else - -size_t __attribute__((naked)) -strlen (const char* str) -{ - asm ("len .req r0\n\t" - "data .req r3\n\t" - "addr .req r1\n\t" - - "optpld r0\n\t" - /* Word-align address */ - "bic addr, r0, #3\n\t" - /* Get adjustment for start ... */ - "ands len, r0, #3\n\t" - "neg len, len\n\t" - /* First word of data */ - "ldr data, [addr], #4\n\t" - /* Ensure bytes preceeding start ... */ - "add ip, len, #4\n\t" - "mov ip, ip, asl #3\n\t" - "mvn r2, #0\n\t" - /* ... are masked out */ -#ifdef __thumb__ - "itt ne\n\t" -# ifdef __ARMEB__ - "lslne r2, ip\n\t" -# else - "lsrne r2, ip\n\t" -# endif - "orrne data, data, r2\n\t" -#else - "it ne\n\t" -# ifdef __ARMEB__ - "orrne data, data, r2, lsl ip\n\t" -# else - "orrne data, data, r2, lsr ip\n\t" -# endif -#endif - /* Magic const 0x01010101 */ -#ifdef _ISA_ARM_7 - "movw ip, #0x101\n\t" -#else - "mov ip, #0x1\n\t" - "orr ip, ip, ip, lsl #8\n\t" -#endif - "orr ip, ip, ip, lsl #16\n" - - /* This is the main loop. We subtract one from each byte in - the word: the sign bit changes iff the byte was zero or - 0x80 -- we eliminate the latter case by anding the result - with the 1-s complement of the data. */ - "1:\n\t" - /* test (data - 0x01010101) */ - "sub r2, data, ip\n\t" - /* ... & ~data */ - "bic r2, r2, data\n\t" - /* ... & 0x80808080 == 0? */ - "ands r2, r2, ip, lsl #7\n\t" -#ifdef _ISA_ARM_7 - /* yes, get more data... */ - "itt eq\n\t" - "ldreq data, [addr], #4\n\t" - /* and 4 more bytes */ - "addeq len, len, #4\n\t" - /* If we have PLD, then unroll the loop a bit. */ - "optpld addr, #8\n\t" - /* test (data - 0x01010101) */ - "ittt eq\n\t" - "subeq r2, data, ip\n\t" - /* ... & ~data */ - "biceq r2, r2, data\n\t" - /* ... & 0x80808080 == 0? */ - "andeqs r2, r2, ip, lsl #7\n\t" -#endif - "itt eq\n\t" - /* yes, get more data... */ - "ldreq data, [addr], #4\n\t" - /* and 4 more bytes */ - "addeq len, len, #4\n\t" - "beq 1b\n\t" -#ifdef __ARMEB__ - "tst data, #0xff000000\n\t" - "itttt ne\n\t" - "addne len, len, #1\n\t" - "tstne data, #0xff0000\n\t" - "addne len, len, #1\n\t" - "tstne data, #0xff00\n\t" - "it ne\n\t" - "addne len, len, #1\n\t" -#else -# ifdef _ISA_ARM_5 - /* R2 is the residual sign bits from the above test. All we - need to do now is establish the position of the first zero - byte... */ - /* Little-endian is harder, we need the number of trailing - zeros / 8 */ -# ifdef _ISA_ARM_7 - "rbit r2, r2\n\t" - "clz r2, r2\n\t" -# else - "rsb r1, r2, #0\n\t" - "and r2, r2, r1\n\t" - "clz r2, r2\n\t" - "rsb r2, r2, #31\n\t" -# endif - "add len, len, r2, lsr #3\n\t" -# else /* No CLZ instruction */ - "tst data, #0xff\n\t" - "itttt ne\n\t" - "addne len, len, #1\n\t" - "tstne data, #0xff00\n\t" - "addne len, len, #1\n\t" - "tstne data, #0xff0000\n\t" - "it ne\n\t" - "addne len, len, #1\n\t" -# endif -#endif - "RETURN"); -} -#endif diff --git a/reference/plain/memcpy.c b/reference/plain/memcpy.c deleted file mode 100644 index af226c8..0000000 --- a/reference/plain/memcpy.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stddef.h> - -/** A plain, byte-by-byte memcpy */ -void *memcpy(void *dst0, const void *src0, size_t len0) -{ - char *dst = (char *) dst0; - char *src = (char *) src0; - - void *save = dst0; - - while (len0--) - { - *dst++ = *src++; - } - - return save; -} diff --git a/reference/plain/memset.c b/reference/plain/memset.c deleted file mode 100644 index f171304..0000000 --- a/reference/plain/memset.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <stddef.h> - -/** A plain, byte-by-byte memset */ -void *memset(void *dst0, int c, size_t len0) -{ - char *dst = (char *) dst0; - void *save = dst0; - - while (len0--) - { - *dst++ = c; - } - - return save; -} diff --git a/reference/plain/strcmp.c b/reference/plain/strcmp.c deleted file mode 100644 index 5ef534f..0000000 --- a/reference/plain/strcmp.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** A plain, byte-by-byte strcmp */ -int strcmp(const char *s1, const char *s2) -{ - while (*s1 != '\0' && *s1 == *s2) - { - s1++; - s2++; - } - - return (*(unsigned char *) s1) - (*(unsigned char *) s2); -} diff --git a/reference/plain/strcpy.c b/reference/plain/strcpy.c deleted file mode 100644 index 66c5820..0000000 --- a/reference/plain/strcpy.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2011, Linaro Limited - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Linaro nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** A plain, byte-by-byte strcpy */ -char* strcpy(char *dst0, const char *src0) -{ - char *s = dst0; - - while (*dst0++ = *src0++) - { - } - - return s; -} diff --git a/scripts/add-license.sh b/scripts/add-license.sh deleted file mode 100644 index 8a6c071..0000000 --- a/scripts/add-license.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# -# Add the modified BSD license to a file -# - -f=`mktemp -d` -trap "rm -rf $f" EXIT - -year=`date +%Y` -cat > $f/original <<EOF -Copyright (c) $year, Linaro Limited -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -EOF - -# Translate it to C style -echo "/*" > $f/c -sed -r 's/(.*)/ * \1/' $f/original | sed -r 's/ +$//' >> $f/c -echo " */" >> $f/c -echo >> $f/c - -# ...and shell style -sed -r 's/(.*)/# \1/' $f/original | sed -r 's/ +$//' >> $f/shell -echo '#' >> $f/shell -echo >> $f/shell - -for name in $@; do - if grep -q Copyright $name; then - echo $name already has some type of copyright - continue - fi - - case $name in - # These files don't have an explicit license - *autogen.sh*) - continue;; - *reference/newlib/*) - continue;; - *reference/newlib-xscale/*) - continue;; - */dhry/*) - continue;; - - *.c) - src=$f/c - ;; - *.sh|*.am|*.ac) - src=$f/shell - ;; - *) - echo Unrecognied extension on $name - continue - esac - - cat $src $name > $f/next - mv $f/next $name - echo Updated $name -done diff --git a/scripts/bench.py b/scripts/bench.py deleted file mode 100644 index 476a532..0000000 --- a/scripts/bench.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python - -"""Simple harness that benchmarks different variants of the routines, -caches the results, and emits all of the records at the end. - -Results are generated for different values of: - * Source - * Routine - * Length - * Alignment -""" - -import argparse -import subprocess -import math -import sys - -# Prefix to the executables -build = '../build/try-' - -ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen' - -HAS = { - 'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen', - 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen', - 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen', - 'bionic-c': ALL, - 'csl': 'memcpy memset', - 'glibc': 'memcpy memset strchr strlen', - 'glibc-c': ALL, - 'newlib': 'memcpy strcmp strcpy strlen', - 'newlib-c': ALL, - 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen', - 'plain': 'memset memcpy strcmp strcpy', -} - -BOUNCE_ALIGNMENTS = ['1'] -SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32'] -DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32'] - -ALIGNMENTS = { - 'bounce': BOUNCE_ALIGNMENTS, - 'memchr': SINGLE_BUFFER_ALIGNMENTS, - 'memset': SINGLE_BUFFER_ALIGNMENTS, - 'strchr': SINGLE_BUFFER_ALIGNMENTS, - 'strlen': SINGLE_BUFFER_ALIGNMENTS, - 'memcmp': DUAL_BUFFER_ALIGNMENTS, - 'memcpy': DUAL_BUFFER_ALIGNMENTS, - 'strcmp': DUAL_BUFFER_ALIGNMENTS, - 'strcpy': DUAL_BUFFER_ALIGNMENTS, -} - -VARIANTS = sorted(HAS.keys()) -FUNCTIONS = sorted(ALIGNMENTS.keys()) - -NUM_RUNS = 5 - -def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False): - """Perform a single run, exercising the cache as appropriate.""" - key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id)) - - if key in cache: - got = cache[key] - else: - xbuild = build - cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals() - - try: - got = subprocess.check_output(cmd.split()).strip() - except OSError, ex: - assert False, 'Error %s while running %s' % (ex, cmd) - - parts = got.split(':') - took = float(parts[7]) - - cache[key] = got - - if not quiet: - print got - sys.stdout.flush() - - return took - -def run_many(cache, variants, bytes, all_functions): - # We want the data to come out in a useful order. So fix an - # alignment and function, and do all sizes for a variant first - bytes = sorted(bytes) - mid = bytes[int(len(bytes)/1.5)] - - if not all_functions: - # Use the ordering in 'this' as the default - all_functions = HAS['this'].split() - - # Find all other functions - for functions in HAS.values(): - for function in functions.split(): - if function not in all_functions: - all_functions.append(function) - - for function in all_functions: - for alignment in ALIGNMENTS[function]: - for variant in variants: - if function not in HAS[variant].split(): - continue - - # Run a tracer through and see how long it takes and - # adjust the number of loops based on that. Not great - # for memchr() and similar which are O(n), but it will - # do - f = 50000000 - want = 5.0 - - loops = int(f / math.sqrt(max(1, mid))) - took = run(cache, variant, function, mid, loops, alignment, 0, - quiet=True) - # Keep it reasonable for silly routines like bounce - factor = min(20, max(0.05, want/took)) - f = f * factor - - # Round f to a few significant figures - scale = 10**int(math.log10(f) - 1) - f = scale*int(f/scale) - - for b in sorted(bytes): - # Figure out the number of loops to give a roughly consistent run - loops = int(f / math.sqrt(max(1, b))) - for run_id in range(0, NUM_RUNS): - run(cache, variant, function, b, loops, alignment, - run_id) - -def run_top(cache): - parser = argparse.ArgumentParser() - parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS) - parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS) - parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024) - args = parser.parse_args() - - # Test all powers of 2 - step1 = 2.0 - # Test intermediate powers of 1.4 - step2 = 1.4 - - bytes = [] - - for step in [step1, step2]: - if step: - # Figure out how many steps get us up to the top - steps = int(round(math.log(args.limit) / math.log(step))) - bytes.extend([int(step**x) for x in range(0, steps+1)]) - - run_many(cache, args.variants, bytes, args.functions) - -def main(): - cachename = 'cache.txt' - - cache = {} - - try: - with open(cachename) as f: - for line in f: - line = line.strip() - parts = line.split(':') - cache[':'.join(parts[:7])] = line - except: - pass - - try: - run_top(cache) - finally: - with open(cachename, 'w') as f: - for line in sorted(cache.values()): - print >> f, line - -if __name__ == '__main__': - main() diff --git a/scripts/fixup.py b/scripts/fixup.py deleted file mode 100644 index 003783a..0000000 --- a/scripts/fixup.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Simple script that enables target specific blocks based on the first argument. - -Matches comment blocks like this: - -/* For Foo: abc -def -*/ - -and de-comments them giving: -abc -def -""" -import re -import sys - -def main(): - key = sys.argv[1] - expr = re.compile(r'/\* For %s:\s([^*]+)\*/' % key, re.M) - - for arg in sys.argv[2:]: - with open(arg) as f: - body = f.read() - with open(arg, 'w') as f: - f.write(expr.sub(r'\1', body)) - -if __name__ == '__main__': - main() diff --git a/scripts/libplot.py b/scripts/libplot.py deleted file mode 100644 index 034ffd3..0000000 --- a/scripts/libplot.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Shared routines for the plotters.""" - -import fileinput -import collections - -Record = collections.namedtuple('Record', 'variant function bytes loops src_alignment dst_alignment run_id elapsed rest') - - -def make_colours(): - return iter('m b g r c y k pink orange brown grey'.split()) - -def parse_value(v): - """Turn text into a primitive""" - try: - if '.' in v: - return float(v) - else: - return int(v) - except ValueError: - return v - -def create_column_tuple(record, names): - cols = [getattr(record, name) for name in names] - return tuple(cols) - -def unique(records, name, prefer=''): - """Return the unique values of a column in the records""" - if type(name) == tuple: - values = list(set(create_column_tuple(x, name) for x in records)) - else: - values = list(set(getattr(x, name) for x in records)) - - if not values: - return values - elif type(values[0]) == str: - return sorted(values, key=lambda x: '%-06d|%s' % (-prefer.find(x), x)) - else: - return sorted(values) - -def alignments_equal(alignments): - for alignment in alignments: - if alignment[0] != alignment[1]: - return False - return True - -def parse_row(line): - return Record(*[parse_value(y) for y in line.split(':')]) - -def parse(): - """Parse a record file into named tuples, correcting for loop - overhead along the way. - """ - records = [parse_row(x) for x in fileinput.input()] - - # Pull out any bounce values - costs = {} - - for record in [x for x in records if x.function=='bounce']: - costs[(record.bytes, record.loops)] = record.elapsed - - # Fix up all of the records for cost - out = [] - - for record in records: - if record.function == 'bounce': - continue - - cost = costs.get((record.bytes, record.loops), None) - - if not cost: - out.append(record) - else: - # Unfortunately you can't update a namedtuple... - values = list(record) - values[-2] -= cost - out.append(Record(*values)) - - return out diff --git a/scripts/plot-align.py b/scripts/plot-align.py deleted file mode 100644 index 524aa20..0000000 --- a/scripts/plot-align.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python - -"""Plot the performance of different variants of one routine versus alignment. -""" - -import libplot - -import pylab - - -def plot(records, bytes, function): - records = [x for x in records if x.bytes==bytes and x.function==function] - - variants = libplot.unique(records, 'variant', prefer='this') - alignments = libplot.unique(records, ('src_alignment', 'dst_alignment')) - - X = pylab.arange(len(alignments)) - width = 1.0/(len(variants)+1) - - colours = libplot.make_colours() - - pylab.figure(1).set_size_inches((16, 12)) - pylab.clf() - - for i, variant in enumerate(variants): - heights = [] - - for alignment in alignments: - matches = [x for x in records if x.variant==variant and x.src_alignment==alignment[0] and x.dst_alignment==alignment[1]] - - if matches: - vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for - match in matches] - mean = sum(vals)/len(vals) - heights.append(mean) - else: - heights.append(0) - - pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant) - - - axes = pylab.axes() - if libplot.alignments_equal(alignments): - alignment_labels = ["%s" % x[0] for x in alignments] - else: - alignment_labels = ["%s:%s" % (x[0], x[1]) for x in alignments] - axes.set_xticklabels(alignment_labels) - axes.set_xticks(X + 0.5) - - pylab.title('Performance of different variants of %(function)s for %(bytes)d byte blocks' % locals()) - pylab.xlabel('Alignment') - pylab.ylabel('Rate (MB/s)') - pylab.legend(loc='lower right', ncol=3) - pylab.grid() - pylab.savefig('alignment-%(function)s-%(bytes)d.png' % locals(), dpi=72) - -def main(): - records = libplot.parse() - - for function in libplot.unique(records, 'function'): - for bytes in libplot.unique(records, 'bytes'): - plot(records, bytes, function) - - pylab.show() - -if __name__ == '__main__': - main() diff --git a/scripts/plot-sizes.py b/scripts/plot-sizes.py deleted file mode 100644 index 26a22bc..0000000 --- a/scripts/plot-sizes.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python - -"""Plot the performance for different block sizes of one function across -variants. -""" - -import libplot - -import pylab -import pdb -import math - -def pretty_kb(v): - if v < 1024: - return '%d' % v - else: - if v % 1024 == 0: - return '%d k' % (v//1024) - else: - return '%.1f k' % (v/1024) - -def plot(records, function, alignment=None, scale=1): - variants = libplot.unique(records, 'variant', prefer='this') - records = [x for x in records if x.function==function] - - if alignment != None: - records = [x for x in records if x.src_alignment==alignment[0] and - x.dst_alignment==alignment[1]] - - alignments = libplot.unique(records, ('src_alignment', 'dst_alignment')) - if len(alignments) != 1: - return False - if libplot.alignments_equal(alignments): - aalignment = alignments[0][0] - else: - aalignment = "%s:%s" % (alignments[0][0], alignments[0][1]) - - bytes = libplot.unique(records, 'bytes')[0] - - colours = libplot.make_colours() - all_x = [] - - pylab.figure(1).set_size_inches((6.4*scale, 4.8*scale)) - pylab.clf() - - if 'str' in function: - # The harness fills out to 16k. Anything past that is an - # early match - top = 16384 - else: - top = 2**31 - - for variant in variants: - matches = [x for x in records if x.variant==variant and x.bytes <= top] - matches.sort(key=lambda x: x.bytes) - - X = sorted(list(set([x.bytes for x in matches]))) - Y = [] - Yerr = [] - for xbytes in X: - vals = [x.bytes*x.loops/x.elapsed/(1024*1024) for x in matches if x.bytes == xbytes] - if len(vals) > 1: - mean = sum(vals)/len(vals) - Y.append(mean) - if len(Yerr) == 0: - Yerr = [[], []] - err1 = max(vals) - mean - assert err1 >= 0 - err2 = min(vals) - mean - assert err2 <= 0 - Yerr[0].append(abs(err2)) - Yerr[1].append(err1) - else: - Y.append(vals[0]) - - all_x.extend(X) - colour = colours.next() - - if X: - pylab.plot(X, Y, c=colour) - if len(Yerr) > 0: - pylab.errorbar(X, Y, yerr=Yerr, c=colour, label=variant, fmt='o') - else: - pylab.scatter(X, Y, c=colour, label=variant, edgecolors='none') - - pylab.legend(loc='upper left', ncol=3, prop={'size': 'small'}) - pylab.grid() - pylab.title('%(function)s of %(aalignment)s byte aligned blocks' % locals()) - pylab.xlabel('Size (B)') - pylab.ylabel('Rate (MB/s)') - - # Figure out how high the range goes - top = max(all_x) - - power = int(round(math.log(max(all_x)) / math.log(2))) - - pylab.semilogx() - - pylab.axes().set_xticks([2**x for x in range(0, power+1)]) - pylab.axes().set_xticklabels([pretty_kb(2**x) for x in range(0, power+1)]) - pylab.xlim(0, top) - pylab.ylim(0, pylab.ylim()[1]) - return True - -def main(): - records = libplot.parse() - - functions = libplot.unique(records, 'function') - alignments = libplot.unique(records, ('src_alignment', 'dst_alignment')) - - for function in functions: - for alignment in alignments: - for scale in [1, 2.5]: - if plot(records, function, alignment, scale): - pylab.savefig('sizes-%s-%02d-%02d-%.1f.png' % (function, alignment[0], alignment[1], scale), dpi=72) - - pylab.show() - -if __name__ == '__main__': - main() diff --git a/scripts/plot-top.py b/scripts/plot-top.py deleted file mode 100644 index 4095239..0000000 --- a/scripts/plot-top.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -"""Plot the performance of different variants of the string routines -for one size. -""" - -import libplot - -import pylab - - -def plot(records, bytes): - records = [x for x in records if x.bytes==bytes] - - variants = libplot.unique(records, 'variant', prefer='this') - functions = libplot.unique(records, 'function') - - X = pylab.arange(len(functions)) - width = 1.0/(len(variants)+1) - - colours = libplot.make_colours() - - pylab.figure(1).set_size_inches((16, 12)) - pylab.clf() - - for i, variant in enumerate(variants): - heights = [] - - for function in functions: - matches = [x for x in records if x.variant==variant and x.function==function and x.src_alignment==8] - - if matches: - vals = [match.bytes*match.loops/match.elapsed/(1024*1024) for - match in matches] - mean = sum(vals)/len(vals) - heights.append(mean) - else: - heights.append(0) - - pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant) - - axes = pylab.axes() - axes.set_xticklabels(functions) - axes.set_xticks(X + 0.5) - - pylab.title('Performance of different variants for %d byte blocks' % bytes) - pylab.ylabel('Rate (MB/s)') - pylab.legend(loc='upper left', ncol=3) - pylab.grid() - pylab.savefig('top-%06d.png' % bytes, dpi=72) - -def main(): - records = libplot.parse() - - for bytes in libplot.unique(records, 'bytes'): - plot(records, bytes) - - pylab.show() - -if __name__ == '__main__': - main() diff --git a/scripts/plot.py b/scripts/plot.py deleted file mode 100644 index aa2bb1a..0000000 --- a/scripts/plot.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Plot the results for each test. Spits out a set of images into the -current directory. -""" - -import libplot - -import fileinput -import collections -import pprint - -import pylab - -Record = collections.namedtuple('Record', 'variant test size loops src_alignment dst_alignment run_id rawtime comment time bytes rate') - -def unique(rows, name): - """Takes a list of values, pulls out the named field, and returns - a list of the unique values of this field. - """ - return sorted(set(getattr(x, name) for x in rows)) - -def to_float(v): - """Convert a string into a better type. - - >>> to_float('foo') - 'foo' - >>> to_float('1.23') - 1.23 - >>> to_float('45') - 45 - """ - try: - if '.' in v: - return float(v) - else: - return int(v) - except: - return v - -def parse(): - # Split the input up - rows = [x.strip().split(':') for x in fileinput.input()] - # Automatically turn numbers into the base type - rows = [[to_float(y) for y in x] for x in rows] - - # Scan once to calculate the overhead - r = [Record(*(x + [0, 0, 0])) for x in rows] - bounces = pylab.array([(x.loops, x.rawtime) for x in r if x.test == 'bounce']) - fit = pylab.polyfit(bounces[:,0], bounces[:,1], 1) - - records = [] - - for row in rows: - # Make a dummy record so we can use the names - r1 = Record(*(row + [0, 0, 0])) - - bytes = r1.size * r1.loops - # Calculate the bounce time - delta = pylab.polyval(fit, [r1.loops]) - time = r1.rawtime - delta - rate = bytes / time - - records.append(Record(*(row + [time, bytes, rate]))) - - return records - -def plot(records, field, scale, ylabel): - variants = unique(records, 'variant') - tests = unique(records, 'test') - - colours = libplot.make_colours() - - # A little hack. We want the 'all' record to be drawn last so - # that it's obvious on the graph. Assume that no tests come - # before it alphabetically - variants.reverse() - - for test in tests: - for variant in variants: - v = [x for x in records if x.test==test and x.variant==variant] - v.sort(key=lambda x: x.size) - V = pylab.array([(x.size, getattr(x, field)) for x in v]) - - # Ensure our results appear - order = 1 if variant == 'this' else 0 - - try: - # A little hack. We want the 'all' to be obvious on - # the graph - if variant == 'all': - pylab.scatter(V[:,0], V[:,1]/scale, label=variant) - pylab.plot(V[:,0], V[:,1]/scale) - else: - pylab.plot(V[:,0], V[:,1]/scale, label=variant, - zorder=order, c = colours.next()) - - except Exception, ex: - # michaelh1 likes to run this script while the test is - # still running which can lead to bad data - print ex, 'on %s of %s' % (variant, test) - - pylab.legend(loc='lower right', ncol=2, prop={'size': 'small'}) - pylab.xlabel('Block size (B)') - pylab.ylabel(ylabel) - pylab.title('%s %s' % (test, field)) - pylab.grid() - - pylab.savefig('%s-%s.png' % (test, field), dpi=100) - pylab.semilogx(basex=2) - pylab.savefig('%s-%s-semilog.png' % (test, field), dpi=100) - pylab.clf() - -def test(): - import doctest - doctest.testmod() - -def main(): - records = parse() - - plot(records, 'rate', 1024**2, 'Rate (MB/s)') - plot(records, 'time', 1, 'Total time (s)') - -if __name__ == '__main__': - main() diff --git a/scripts/trim.sh b/scripts/trim.sh deleted file mode 100644 index dab1047..0000000 --- a/scripts/trim.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# -# Trims the whitespace from around any given images -# - -for i in $@; do - convert $i -bordercolor white -border 1x1 -trim +repage -alpha off +dither -colors 32 PNG8:next-$i - mv next-$i $i -done diff --git a/src/aarch64/memchr.S b/src/aarch64/memchr.S deleted file mode 100644 index 8da65ec..0000000 --- a/src/aarch64/memchr.S +++ /dev/null @@ -1,172 +0,0 @@ -/* - * memchr - find a character in a memory zone - * - * Copyright (c) 2014, ARM Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * Neon Available. - */ - -/* Arguments and results. */ -#define srcin x0 -#define chrin w1 -#define cntin x2 - -#define result x0 - -#define src x3 -#define tmp x4 -#define wtmp2 w5 -#define synd x6 -#define soff x9 -#define cntrem x10 - -#define vrepchr v0 -#define vdata1 v1 -#define vdata2 v2 -#define vhas_chr1 v3 -#define vhas_chr2 v4 -#define vrepmask v5 -#define vend v6 - -/* - * Core algorithm: - * - * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits - * per byte. For each tuple, bit 0 is set if the relevant byte matched the - * requested character and bit 1 is not used (faster than using a 32bit - * syndrome). Since the bits in the syndrome reflect exactly the order in which - * things occur in the original string, counting trailing zeros allows to - * identify exactly which byte has matched. - */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn memchr - /* Do not dereference srcin if no bytes to compare. */ - cbz cntin, .Lzero_length - /* - * Magic constant 0x40100401 allows us to identify which lane matches - * the requested byte. - */ - mov wtmp2, #0x0401 - movk wtmp2, #0x4010, lsl #16 - dup vrepchr.16b, chrin - /* Work with aligned 32-byte chunks */ - bic src, srcin, #31 - dup vrepmask.4s, wtmp2 - ands soff, srcin, #31 - and cntrem, cntin, #31 - b.eq .Lloop - - /* - * Input string is not 32-byte aligned. We calculate the syndrome - * value for the aligned 32 bytes block containing the first bytes - * and mask the irrelevant part. - */ - - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - sub tmp, soff, #32 - adds cntin, cntin, tmp - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ - addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.2d[0] - /* Clear the soff*2 lower bits */ - lsl tmp, soff, #1 - lsr synd, synd, tmp - lsl synd, synd, tmp - /* The first block can also be the last */ - b.ls .Lmasklast - /* Have we found something already? */ - cbnz synd, .Ltail - -.Lloop: - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - subs cntin, cntin, #32 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - /* If we're out of data we finish regardless of the result */ - b.ls .Lend - /* Use a fast check for the termination condition */ - orr vend.16b, vhas_chr1.16b, vhas_chr2.16b - addp vend.2d, vend.2d, vend.2d - mov synd, vend.2d[0] - /* We're not out of data, loop if we haven't found the character */ - cbz synd, .Lloop - -.Lend: - /* Termination condition found, let's calculate the syndrome value */ - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ - addp vend.16b, vend.16b, vend.16b /* 128->64 */ - mov synd, vend.2d[0] - /* Only do the clear for the last possible block */ - b.hi .Ltail - -.Lmasklast: - /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ - add tmp, cntrem, soff - and tmp, tmp, #31 - sub tmp, tmp, #32 - neg tmp, tmp, lsl #1 - lsl synd, synd, tmp - lsr synd, synd, tmp - -.Ltail: - /* Count the trailing zeros using bit reversing */ - rbit synd, synd - /* Compensate the last post-increment */ - sub src, src, #32 - /* Check that we have found a character */ - cmp synd, #0 - /* And count the leading zeros */ - clz synd, synd - /* Compute the potential result */ - add result, src, synd, lsr #1 - /* Select result or NULL */ - csel result, xzr, result, eq - ret - -.Lzero_length: - mov result, #0 - ret - - .size memchr, . - memchr diff --git a/src/aarch64/memcmp.S b/src/aarch64/memcmp.S deleted file mode 100644 index 854bfd9..0000000 --- a/src/aarch64/memcmp.S +++ /dev/null @@ -1,191 +0,0 @@ -/* memcmp - compare memory - - Copyright (c) 2013, 2018 Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* - * Copyright (c) 2017 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses. - */ - -#define L(l) .L ## l - -/* Parameters and result. */ -#define src1 x0 -#define src2 x1 -#define limit x2 -#define result w0 - -/* Internal variables. */ -#define data1 x3 -#define data1w w3 -#define data1h x4 -#define data2 x5 -#define data2w w5 -#define data2h x6 -#define tmp1 x7 -#define tmp2 x8 - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn memcmp p2align=6 - subs limit, limit, 8 - b.lo L(less8) - - ldr data1, [src1], 8 - ldr data2, [src2], 8 - cmp data1, data2 - b.ne L(return) - - subs limit, limit, 8 - b.gt L(more16) - - ldr data1, [src1, limit] - ldr data2, [src2, limit] - b L(return) - -L(more16): - ldr data1, [src1], 8 - ldr data2, [src2], 8 - cmp data1, data2 - bne L(return) - - /* Jump directly to comparing the last 16 bytes for 32 byte (or less) - strings. */ - subs limit, limit, 16 - b.ls L(last_bytes) - - /* We overlap loads between 0-32 bytes at either side of SRC1 when we - try to align, so limit it only to strings larger than 128 bytes. */ - cmp limit, 96 - b.ls L(loop16) - - /* Align src1 and adjust src2 with bytes not yet done. */ - and tmp1, src1, 15 - add limit, limit, tmp1 - sub src1, src1, tmp1 - sub src2, src2, tmp1 - - /* Loop performing 16 bytes per iteration using aligned src1. - Limit is pre-decremented by 16 and must be larger than zero. - Exit if <= 16 bytes left to do or if the data is not equal. */ - .p2align 4 -L(loop16): - ldp data1, data1h, [src1], 16 - ldp data2, data2h, [src2], 16 - subs limit, limit, 16 - ccmp data1, data2, 0, hi - ccmp data1h, data2h, 0, eq - b.eq L(loop16) - - cmp data1, data2 - bne L(return) - mov data1, data1h - mov data2, data2h - cmp data1, data2 - bne L(return) - - /* Compare last 1-16 bytes using unaligned access. */ -L(last_bytes): - add src1, src1, limit - add src2, src2, limit - ldp data1, data1h, [src1] - ldp data2, data2h, [src2] - cmp data1, data2 - bne L(return) - mov data1, data1h - mov data2, data2h - cmp data1, data2 - - /* Compare data bytes and set return value to 0, -1 or 1. */ -L(return): -#ifndef __AARCH64EB__ - rev data1, data1 - rev data2, data2 -#endif - cmp data1, data2 -L(ret_eq): - cset result, ne - cneg result, result, lo - ret - - .p2align 4 - /* Compare up to 8 bytes. Limit is [-8..-1]. */ -L(less8): - adds limit, limit, 4 - b.lo L(less4) - ldr data1w, [src1], 4 - ldr data2w, [src2], 4 - cmp data1w, data2w - b.ne L(return) - sub limit, limit, 4 -L(less4): - adds limit, limit, 4 - beq L(ret_eq) -L(byte_loop): - ldrb data1w, [src1], 1 - ldrb data2w, [src2], 1 - subs limit, limit, 1 - ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ - b.eq L(byte_loop) - sub result, data1w, data2w - ret - - .size memcmp, . - memcmp diff --git a/src/aarch64/memcpy.S b/src/aarch64/memcpy.S deleted file mode 100644 index cbae371..0000000 --- a/src/aarch64/memcpy.S +++ /dev/null @@ -1,225 +0,0 @@ -/* Copyright (c) 2012, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* - * Copyright (c) 2015 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses. - * - */ - -#define dstin x0 -#define src x1 -#define count x2 -#define dst x3 -#define srcend x4 -#define dstend x5 -#define A_l x6 -#define A_lw w6 -#define A_h x7 -#define A_hw w7 -#define B_l x8 -#define B_lw w8 -#define B_h x9 -#define C_l x10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l src -#define E_h count -#define F_l srcend -#define F_h dst -#define tmp1 x9 - -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -/* Copies are split into 3 main cases: small copies of up to 16 bytes, - medium copies of 17..96 bytes which are fully unrolled. Large copies - of more than 96 bytes align the destination and use an unrolled loop - processing 64 bytes per iteration. - Small and medium copies read all data before writing, allowing any - kind of overlap, and memmove tailcalls memcpy for these cases as - well as non-overlapping copies. -*/ - -def_fn memcpy p2align=6 - prfm PLDL1KEEP, [src] - add srcend, src, count - add dstend, dstin, count - cmp count, 16 - b.ls L(copy16) - cmp count, 96 - b.hi L(copy_long) - - /* Medium copies: 17..96 bytes. */ - sub tmp1, count, 1 - ldp A_l, A_h, [src] - tbnz tmp1, 6, L(copy96) - ldp D_l, D_h, [srcend, -16] - tbz tmp1, 5, 1f - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] -1: - stp A_l, A_h, [dstin] - stp D_l, D_h, [dstend, -16] - ret - - .p2align 4 - /* Small copies: 0..16 bytes. */ -L(copy16): - cmp count, 8 - b.lo 1f - ldr A_l, [src] - ldr A_h, [srcend, -8] - str A_l, [dstin] - str A_h, [dstend, -8] - ret - .p2align 4 -1: - tbz count, 2, 1f - ldr A_lw, [src] - ldr A_hw, [srcend, -4] - str A_lw, [dstin] - str A_hw, [dstend, -4] - ret - - /* Copy 0..3 bytes. Use a branchless sequence that copies the same - byte 3 times if count==1, or the 2nd byte twice if count==2. */ -1: - cbz count, 2f - lsr tmp1, count, 1 - ldrb A_lw, [src] - ldrb A_hw, [srcend, -1] - ldrb B_lw, [src, tmp1] - strb A_lw, [dstin] - strb B_lw, [dstin, tmp1] - strb A_hw, [dstend, -1] -2: ret - - .p2align 4 - /* Copy 64..96 bytes. Copy 64 bytes from the start and - 32 bytes from the end. */ -L(copy96): - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [src, 32] - ldp D_l, D_h, [src, 48] - ldp E_l, E_h, [srcend, -32] - ldp F_l, F_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin, 32] - stp D_l, D_h, [dstin, 48] - stp E_l, E_h, [dstend, -32] - stp F_l, F_h, [dstend, -16] - ret - - /* Align DST to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 96 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ - - .p2align 4 -L(copy_long): - and tmp1, dstin, 15 - bic dst, dstin, 15 - ldp D_l, D_h, [src] - sub src, src, tmp1 - add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! - subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls 2f -1: - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! - subs count, count, 64 - b.hi 1b - - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the end even if - there is just 1 byte left. */ -2: - ldp E_l, E_h, [srcend, -64] - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [srcend, -48] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [srcend, -16] - stp D_l, D_h, [dst, 64] - stp E_l, E_h, [dstend, -64] - stp A_l, A_h, [dstend, -48] - stp B_l, B_h, [dstend, -32] - stp C_l, C_h, [dstend, -16] - ret - - .size memcpy, . - memcpy diff --git a/src/aarch64/memmove.S b/src/aarch64/memmove.S deleted file mode 100644 index c9fe6c1..0000000 --- a/src/aarch64/memmove.S +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2013, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* - * Copyright (c) 2015 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses - */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -/* Parameters and result. */ -#define dstin x0 -#define src x1 -#define count x2 -#define srcend x3 -#define dstend x4 -#define tmp1 x5 -#define A_l x6 -#define A_h x7 -#define B_l x8 -#define B_h x9 -#define C_l x10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l count -#define E_h tmp1 - -/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps. - Larger backwards copies are also handled by memcpy. The only remaining - case is forward large copies. The destination is aligned, and an - unrolled loop processes 64 bytes per iteration. -*/ - -def_fn memmove, 6 - sub tmp1, dstin, src - cmp count, 96 - ccmp tmp1, count, 2, hi - b.hs memcpy - - cbz tmp1, 3f - add dstend, dstin, count - add srcend, src, count - - /* Align dstend to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 96 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ - - and tmp1, dstend, 15 - ldp D_l, D_h, [srcend, -16] - sub srcend, srcend, tmp1 - sub count, count, tmp1 - ldp A_l, A_h, [srcend, -16] - stp D_l, D_h, [dstend, -16] - ldp B_l, B_h, [srcend, -32] - ldp C_l, C_h, [srcend, -48] - ldp D_l, D_h, [srcend, -64]! - sub dstend, dstend, tmp1 - subs count, count, 128 - b.ls 2f - nop -1: - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [srcend, -16] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [srcend, -48] - stp D_l, D_h, [dstend, -64]! - ldp D_l, D_h, [srcend, -64]! - subs count, count, 64 - b.hi 1b - - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the start even if - there is just 1 byte left. */ -2: - ldp E_l, E_h, [src, 48] - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [src, 32] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [src, 16] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [src] - stp D_l, D_h, [dstend, -64] - stp E_l, E_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin] -3: ret - - .size memmove, . - memmove diff --git a/src/aarch64/memset.S b/src/aarch64/memset.S deleted file mode 100644 index 2d6675a..0000000 --- a/src/aarch64/memset.S +++ /dev/null @@ -1,235 +0,0 @@ -/* Copyright (c) 2012, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* - * Copyright (c) 2015 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses - * - */ - - -#define dstin x0 -#define val x1 -#define valw w1 -#define count x2 -#define dst x3 -#define dstend x4 -#define tmp1 x5 -#define tmp1w w5 -#define tmp2 x6 -#define tmp2w w6 -#define zva_len x7 -#define zva_lenw w7 - -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn memset p2align=6 - - dup v0.16B, valw - add dstend, dstin, count - - cmp count, 96 - b.hi L(set_long) - cmp count, 16 - b.hs L(set_medium) - mov val, v0.D[0] - - /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] - ret -2: cbz count, 3f - strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] -3: ret - - /* Set 17..96 bytes. */ -L(set_medium): - str q0, [dstin] - tbnz count, 6, L(set96) - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -L(set96): - str q0, [dstin, 16] - stp q0, q0, [dstin, 32] - stp q0, q0, [dstend, -32] - ret - - .p2align 3 - nop -L(set_long): - and valw, valw, 255 - bic dst, dstin, 15 - str q0, [dstin] - cmp count, 256 - ccmp valw, 0, 0, cs - b.eq L(try_zva) -L(no_zva): - sub count, dstend, dst /* Count is 16 too large. */ - add dst, dst, 16 - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] -L(tail64): - subs count, count, 64 - b.hi 1b -2: stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - - .p2align 3 -L(try_zva): - mrs tmp1, dczid_el0 - tbnz tmp1w, 4, L(no_zva) - and tmp1w, tmp1w, 15 - cmp tmp1w, 4 /* ZVA size is 64 bytes. */ - b.ne L(zva_128) - - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. - */ -L(zva_64): - str q0, [dst, 16] - stp q0, q0, [dst, 32] - bic dst, dst, 63 - stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 - nop -1: dc zva, dst - add dst, dst, 64 - subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] - stp q0, q0, [dst, 32] - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - - .p2align 3 -L(zva_128): - cmp tmp1w, 5 /* ZVA size is 128 bytes. */ - b.ne L(zva_other) - - str q0, [dst, 16] - stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - bic dst, dst, 127 - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+128 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 128 - subs count, count, 128 - b.hi 1b - stp q0, q0, [dstend, -128] - stp q0, q0, [dstend, -96] - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - -L(zva_other): - mov tmp2w, 4 - lsl zva_lenw, tmp2w, tmp1w - add tmp1, zva_len, 64 /* Max alignment bytes written. */ - cmp count, tmp1 - blo L(no_zva) - - sub tmp2, zva_len, 1 - add tmp1, dst, zva_len - add dst, dst, 16 - subs count, tmp1, dst /* Actual alignment bytes to write. */ - bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ - beq 2f -1: stp q0, q0, [dst], 64 - stp q0, q0, [dst, -32] - subs count, count, 64 - b.hi 1b -2: mov dst, tmp1 - sub count, dstend, tmp1 /* Remaining bytes to write. */ - subs count, count, zva_len - b.lo 4f -3: dc zva, dst - add dst, dst, zva_len - subs count, count, zva_len - b.hs 3b -4: add count, count, zva_len - b L(tail64) - - .size memset, . - memset diff --git a/src/aarch64/strchr.S b/src/aarch64/strchr.S deleted file mode 100644 index 2f94651..0000000 --- a/src/aarch64/strchr.S +++ /dev/null @@ -1,159 +0,0 @@ -/* - strchr - find a character in a string - - Copyright (c) 2014, ARM Limited - All rights Reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the company nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * Neon Available. - */ - -/* Arguments and results. */ -#define srcin x0 -#define chrin w1 - -#define result x0 - -#define src x2 -#define tmp1 x3 -#define wtmp2 w4 -#define tmp3 x5 - -#define vrepchr v0 -#define vdata1 v1 -#define vdata2 v2 -#define vhas_nul1 v3 -#define vhas_nul2 v4 -#define vhas_chr1 v5 -#define vhas_chr2 v6 -#define vrepmask_0 v7 -#define vrepmask_c v16 -#define vend1 v17 -#define vend2 v18 - -/* Core algorithm. - - For each 32-byte hunk we calculate a 64-bit syndrome value, with - two bits per byte (LSB is always in bits 0 and 1, for both big - and little-endian systems). For each tuple, bit 0 is set iff - the relevant byte matched the requested character; bit 1 is set - iff the relevant byte matched the NUL end of string (we trigger - off bit0 for the special case of looking for NUL). Since the bits - in the syndrome reflect exactly the order in which things occur - in the original string a count_trailing_zeros() operation will - identify exactly which byte is causing the termination, and why. */ - -/* Locals and temporaries. */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn strchr - /* Magic constant 0x40100401 to allow us to identify which lane - matches the requested byte. Magic constant 0x80200802 used - similarly for NUL termination. */ - mov wtmp2, #0x0401 - movk wtmp2, #0x4010, lsl #16 - dup vrepchr.16b, chrin - bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ - dup vrepmask_c.4s, wtmp2 - ands tmp1, srcin, #31 - add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ - b.eq .Lloop - - /* Input string is not 32-byte aligned. Rather than forcing - the padding bytes to a safe value, we calculate the syndrome - for all the bytes, but then mask off those bits of the - syndrome that are related to the padding. */ - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - neg tmp1, tmp1 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b - and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b - orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b - orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b - lsl tmp1, tmp1, #1 - addp vend1.16b, vend1.16b, vend2.16b // 256->128 - mov tmp3, #~0 - addp vend1.16b, vend1.16b, vend2.16b // 128->64 - lsr tmp1, tmp3, tmp1 - - mov tmp3, vend1.2d[0] - bic tmp1, tmp3, tmp1 // Mask padding bits. - cbnz tmp1, .Ltail - -.Lloop: - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - /* Use a fast check for the termination condition. */ - orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b - orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b - orr vend1.16b, vend1.16b, vend2.16b - addp vend1.2d, vend1.2d, vend1.2d - mov tmp1, vend1.2d[0] - cbz tmp1, .Lloop - - /* Termination condition found. Now need to establish exactly why - we terminated. */ - and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b - and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b - orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b - orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b - addp vend1.16b, vend1.16b, vend2.16b // 256->128 - addp vend1.16b, vend1.16b, vend2.16b // 128->64 - - mov tmp1, vend1.2d[0] -.Ltail: - /* Count the trailing zeros, by bit reversing... */ - rbit tmp1, tmp1 - /* Re-bias source. */ - sub src, src, #32 - clz tmp1, tmp1 /* And counting the leading zeros. */ - /* Tmp1 is even if the target charager was found first. Otherwise - we've found the end of string and we weren't looking for NUL. */ - tst tmp1, #1 - add result, src, tmp1, lsr #1 - csel result, result, xzr, eq - ret - - .size strchr, . - strchr diff --git a/src/aarch64/strchrnul.S b/src/aarch64/strchrnul.S deleted file mode 100644 index 928f90d..0000000 --- a/src/aarch64/strchrnul.S +++ /dev/null @@ -1,144 +0,0 @@ -/* - strchrnul - find a character or nul in a string - - Copyright (c) 2014, ARM Limited - All rights Reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the company nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * Neon Available. - */ - -/* Arguments and results. */ -#define srcin x0 -#define chrin w1 - -#define result x0 - -#define src x2 -#define tmp1 x3 -#define wtmp2 w4 -#define tmp3 x5 - -#define vrepchr v0 -#define vdata1 v1 -#define vdata2 v2 -#define vhas_nul1 v3 -#define vhas_nul2 v4 -#define vhas_chr1 v5 -#define vhas_chr2 v6 -#define vrepmask v7 -#define vend1 v16 - -/* Core algorithm. - - For each 32-byte hunk we calculate a 64-bit syndrome value, with - two bits per byte (LSB is always in bits 0 and 1, for both big - and little-endian systems). For each tuple, bit 0 is set iff - the relevant byte matched the requested character or nul. Since the - bits in the syndrome reflect exactly the order in which things occur - in the original string a count_trailing_zeros() operation will - identify exactly which byte is causing the termination. */ - -/* Locals and temporaries. */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn strchrnul - /* Magic constant 0x40100401 to allow us to identify which lane - matches the termination condition. */ - mov wtmp2, #0x0401 - movk wtmp2, #0x4010, lsl #16 - dup vrepchr.16b, chrin - bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ - dup vrepmask.4s, wtmp2 - ands tmp1, srcin, #31 - b.eq .Lloop - - /* Input string is not 32-byte aligned. Rather than forcing - the padding bytes to a safe value, we calculate the syndrome - for all the bytes, but then mask off those bits of the - syndrome that are related to the padding. */ - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - neg tmp1, tmp1 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b - orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - lsl tmp1, tmp1, #1 - addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - mov tmp3, #~0 - addp vend1.16b, vend1.16b, vend1.16b // 128->64 - lsr tmp1, tmp3, tmp1 - - mov tmp3, vend1.2d[0] - bic tmp1, tmp3, tmp1 // Mask padding bits. - cbnz tmp1, .Ltail - -.Lloop: - ld1 {vdata1.16b, vdata2.16b}, [src], #32 - cmeq vhas_nul1.16b, vdata1.16b, #0 - cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b - cmeq vhas_nul2.16b, vdata2.16b, #0 - cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b - /* Use a fast check for the termination condition. */ - orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b - orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b - orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b - addp vend1.2d, vend1.2d, vend1.2d - mov tmp1, vend1.2d[0] - cbz tmp1, .Lloop - - /* Termination condition found. Now need to establish exactly why - we terminated. */ - and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b - and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b - addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 - addp vend1.16b, vend1.16b, vend1.16b // 128->64 - - mov tmp1, vend1.2d[0] -.Ltail: - /* Count the trailing zeros, by bit reversing... */ - rbit tmp1, tmp1 - /* Re-bias source. */ - sub src, src, #32 - clz tmp1, tmp1 /* ... and counting the leading zeros. */ - /* tmp1 is twice the offset into the fragment. */ - add result, src, tmp1, lsr #1 - ret - - .size strchrnul, . - strchrnul diff --git a/src/aarch64/strcmp.S b/src/aarch64/strcmp.S deleted file mode 100644 index 75326be..0000000 --- a/src/aarch64/strcmp.S +++ /dev/null @@ -1,196 +0,0 @@ -/* Copyright (c) 2012, 2018, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -#define L(label) .L ## label - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -/* Parameters and result. */ -#define src1 x0 -#define src2 x1 -#define result x0 - -/* Internal variables. */ -#define data1 x2 -#define data1w w2 -#define data2 x3 -#define data2w w3 -#define has_nul x4 -#define diff x5 -#define syndrome x6 -#define tmp1 x7 -#define tmp2 x8 -#define tmp3 x9 -#define zeroones x10 -#define pos x11 - - /* Start of performance-critical section -- one 64B cache line. */ -def_fn strcmp p2align=6 - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 - b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ -L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 -L(start_realigned): - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - orr syndrome, diff, has_nul - cbz syndrome, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - -L(end): -#ifndef __AARCH64EB__ - rev syndrome, syndrome - rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome - rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret -1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#endif - -L(mutual_align): - /* Sources are mutually aligned, but are not currently at an - alignment boundary. Round down the addresses and then mask off - the bytes that preceed the start point. */ - bic src1, src1, #7 - bic src2, src2, #7 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr data1, data1, tmp2 - orr data2, data2, tmp2 - b L(start_realigned) - -L(misaligned8): - /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always - checking to make sure that we don't access beyond page boundary in - SRC2. */ - tst src1, #7 - b.eq L(loop_misaligned) -L(do_misaligned): - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - cmp data1w, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.ne L(done) - tst src1, #7 - b.ne L(do_misaligned) - -L(loop_misaligned): - /* Test if we are within the last dword of the end of a 4K page. If - yes then jump back to the misaligned loop to copy a byte at a time. */ - and tmp1, src2, #0xff8 - eor tmp1, tmp1, #0xff8 - cbz tmp1, L(do_misaligned) - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - orr syndrome, diff, has_nul - cbz syndrome, L(loop_misaligned) - b L(end) - -L(done): - sub result, data1, data2 - ret - .size strcmp, .-strcmp diff --git a/src/aarch64/strcpy.S b/src/aarch64/strcpy.S deleted file mode 100644 index 3d0d7f5..0000000 --- a/src/aarch64/strcpy.S +++ /dev/null @@ -1,336 +0,0 @@ -/* - strcpy/stpcpy - copy a string returning pointer to start/end. - - Copyright (c) 2013, 2014, 2015 ARM Ltd. - All Rights Reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the company nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses, min page size 4k. - */ - -/* To build as stpcpy, define BUILD_STPCPY before compiling this file. - - To test the page crossing code path more thoroughly, compile with - -DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower - entry path. This option is not intended for production use. */ - -/* Arguments and results. */ -#define dstin x0 -#define srcin x1 - -/* Locals and temporaries. */ -#define src x2 -#define dst x3 -#define data1 x4 -#define data1w w4 -#define data2 x5 -#define data2w w5 -#define has_nul1 x6 -#define has_nul2 x7 -#define tmp1 x8 -#define tmp2 x9 -#define tmp3 x10 -#define tmp4 x11 -#define zeroones x12 -#define data1a x13 -#define data2a x14 -#define pos x15 -#define len x16 -#define to_align x17 - -#ifdef BUILD_STPCPY -#define STRCPY stpcpy -#else -#define STRCPY strcpy -#endif - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - - /* AArch64 systems have a minimum page size of 4k. We can do a quick - page size check for crossing this boundary on entry and if we - do not, then we can short-circuit much of the entry code. We - expect early page-crossing strings to be rare (probability of - 16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite - predictable, even with random strings. - - We don't bother checking for larger page sizes, the cost of setting - up the correct page size is just not worth the extra gain from - a small reduction in the cases taking the slow path. Note that - we only care about whether the first fetch, which may be - misaligned, crosses a page boundary - after that we move to aligned - fetches for the remainder of the string. */ - -#ifdef STRCPY_TEST_PAGE_CROSS - /* Make everything that isn't Qword aligned look like a page cross. */ -#define MIN_PAGE_P2 4 -#else -#define MIN_PAGE_P2 12 -#endif - -#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2) - -def_fn STRCPY p2align=6 - /* For moderately short strings, the fastest way to do the copy is to - calculate the length of the string in the same way as strlen, then - essentially do a memcpy of the result. This avoids the need for - multiple byte copies and further means that by the time we - reach the bulk copy loop we know we can always use DWord - accesses. We expect strcpy to rarely be called repeatedly - with the same source string, so branch prediction is likely to - always be difficult - we mitigate against this by preferring - conditional select operations over branches whenever this is - feasible. */ - and tmp2, srcin, #(MIN_PAGE_SIZE - 1) - mov zeroones, #REP8_01 - and to_align, srcin, #15 - cmp tmp2, #(MIN_PAGE_SIZE - 16) - neg tmp1, to_align - /* The first fetch will straddle a (possible) page boundary iff - srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte - aligned string will never fail the page align check, so will - always take the fast path. */ - b.gt .Lpage_cross - -.Lpage_cross_ok: - ldp data1, data2, [srcin] -#ifdef __AARCH64EB__ - /* Because we expect the end to be found within 16 characters - (profiling shows this is the most common case), it's worth - swapping the bytes now to save having to recalculate the - termination syndrome later. We preserve data1 and data2 - so that we can re-use the values later on. */ - rev tmp2, data1 - sub tmp1, tmp2, zeroones - orr tmp2, tmp2, #REP8_7f - bics has_nul1, tmp1, tmp2 - b.ne .Lfp_le8 - rev tmp4, data2 - sub tmp3, tmp4, zeroones - orr tmp4, tmp4, #REP8_7f -#else - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bics has_nul1, tmp1, tmp2 - b.ne .Lfp_le8 - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f -#endif - bics has_nul2, tmp3, tmp4 - b.eq .Lbulk_entry - - /* The string is short (<=16 bytes). We don't know exactly how - short though, yet. Work out the exact length so that we can - quickly select the optimal copy strategy. */ -.Lfp_gt8: - rev has_nul2, has_nul2 - clz pos, has_nul2 - mov tmp2, #56 - add dst, dstin, pos, lsr #3 /* Bits to bytes. */ - sub pos, tmp2, pos -#ifdef __AARCH64EB__ - lsr data2, data2, pos -#else - lsl data2, data2, pos -#endif - str data2, [dst, #1] - str data1, [dstin] -#ifdef BUILD_STPCPY - add dstin, dst, #8 -#endif - ret - -.Lfp_le8: - rev has_nul1, has_nul1 - clz pos, has_nul1 - add dst, dstin, pos, lsr #3 /* Bits to bytes. */ - subs tmp2, pos, #24 /* Pos in bits. */ - b.lt .Lfp_lt4 -#ifdef __AARCH64EB__ - mov tmp2, #56 - sub pos, tmp2, pos - lsr data2, data1, pos - lsr data1, data1, #32 -#else - lsr data2, data1, tmp2 -#endif - /* 4->7 bytes to copy. */ - str data2w, [dst, #-3] - str data1w, [dstin] -#ifdef BUILD_STPCPY - mov dstin, dst -#endif - ret -.Lfp_lt4: - cbz pos, .Lfp_lt2 - /* 2->3 bytes to copy. */ -#ifdef __AARCH64EB__ - lsr data1, data1, #48 -#endif - strh data1w, [dstin] - /* Fall-through, one byte (max) to go. */ -.Lfp_lt2: - /* Null-terminated string. Last character must be zero! */ - strb wzr, [dst] -#ifdef BUILD_STPCPY - mov dstin, dst -#endif - ret - - .p2align 6 - /* Aligning here ensures that the entry code and main loop all lies - within one 64-byte cache line. */ -.Lbulk_entry: - sub to_align, to_align, #16 - stp data1, data2, [dstin] - sub src, srcin, to_align - sub dst, dstin, to_align - b .Lentry_no_page_cross - - /* The inner loop deals with two Dwords at a time. This has a - slightly higher start-up cost, but we should win quite quickly, - especially on cores with a high number of issue slots per - cycle, as we get much better parallelism out of the operations. */ -.Lmain_loop: - stp data1, data2, [dst], #16 -.Lentry_no_page_cross: - ldp data1, data2, [src], #16 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bics has_nul2, tmp3, tmp4 - ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ - b.eq .Lmain_loop - - /* Since we know we are copying at least 16 bytes, the fastest way - to deal with the tail is to determine the location of the - trailing NUL, then (re)copy the 16 bytes leading up to that. */ - cmp has_nul1, #0 -#ifdef __AARCH64EB__ - /* For big-endian, carry propagation (if the final byte in the - string is 0x01) means we cannot use has_nul directly. The - easiest way to get the correct byte is to byte-swap the data - and calculate the syndrome a second time. */ - csel data1, data1, data2, ne - rev data1, data1 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - bic has_nul1, tmp1, tmp2 -#else - csel has_nul1, has_nul1, has_nul2, ne -#endif - rev has_nul1, has_nul1 - clz pos, has_nul1 - add tmp1, pos, #72 - add pos, pos, #8 - csel pos, pos, tmp1, ne - add src, src, pos, lsr #3 - add dst, dst, pos, lsr #3 - ldp data1, data2, [src, #-32] - stp data1, data2, [dst, #-16] -#ifdef BUILD_STPCPY - sub dstin, dst, #1 -#endif - ret - -.Lpage_cross: - bic src, srcin, #15 - /* Start by loading two words at [srcin & ~15], then forcing the - bytes that precede srcin to 0xff. This means they never look - like termination bytes. */ - ldp data1, data2, [src] - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - tst to_align, #7 - csetm tmp2, ne -#ifdef __AARCH64EB__ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr data1, data1, tmp2 - orr data2a, data2, tmp2 - cmp to_align, #8 - csinv data1, data1, xzr, lt - csel data2, data2, data2a, lt - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bics has_nul2, tmp3, tmp4 - ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ - b.eq .Lpage_cross_ok - /* We now need to make data1 and data2 look like they've been - loaded directly from srcin. Do a rotate on the 128-bit value. */ - lsl tmp1, to_align, #3 /* Bytes->bits. */ - neg tmp2, to_align, lsl #3 -#ifdef __AARCH64EB__ - lsl data1a, data1, tmp1 - lsr tmp4, data2, tmp2 - lsl data2, data2, tmp1 - orr tmp4, tmp4, data1a - cmp to_align, #8 - csel data1, tmp4, data2, lt - rev tmp2, data1 - rev tmp4, data2 - sub tmp1, tmp2, zeroones - orr tmp2, tmp2, #REP8_7f - sub tmp3, tmp4, zeroones - orr tmp4, tmp4, #REP8_7f -#else - lsr data1a, data1, tmp1 - lsl tmp4, data2, tmp2 - lsr data2, data2, tmp1 - orr tmp4, tmp4, data1a - cmp to_align, #8 - csel data1, tmp4, data2, lt - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f -#endif - bic has_nul1, tmp1, tmp2 - cbnz has_nul1, .Lfp_le8 - bic has_nul2, tmp3, tmp4 - b .Lfp_gt8 - - .size STRCPY, . - STRCPY diff --git a/src/aarch64/strlen.S b/src/aarch64/strlen.S deleted file mode 100644 index 5850640..0000000 --- a/src/aarch64/strlen.S +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2013-2015, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64, unaligned accesses, min page size 4k. - */ - -/* To test the page crossing code path more thoroughly, compile with - -DTEST_PAGE_CROSS - this will force all calls through the slower - entry path. This option is not intended for production use. */ - -/* Arguments and results. */ -#define srcin x0 -#define len x0 - -/* Locals and temporaries. */ -#define src x1 -#define data1 x2 -#define data2 x3 -#define has_nul1 x4 -#define has_nul2 x5 -#define tmp1 x4 -#define tmp2 x5 -#define tmp3 x6 -#define tmp4 x7 -#define zeroones x8 - -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. A faster check - (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives - false hits for characters 129..255. */ - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -#ifdef TEST_PAGE_CROSS -# define MIN_PAGE_SIZE 15 -#else -# define MIN_PAGE_SIZE 4096 -#endif - - /* Since strings are short on average, we check the first 16 bytes - of the string for a NUL character. In order to do an unaligned ldp - safely we have to do a page cross check first. If there is a NUL - byte we calculate the length from the 2 8-byte words using - conditional select to reduce branch mispredictions (it is unlikely - strlen will be repeatedly called on strings with the same length). - - If the string is longer than 16 bytes, we align src so don't need - further page cross checks, and process 32 bytes per iteration - using the fast NUL check. If we encounter non-ASCII characters, - fallback to a second loop using the full NUL check. - - If the page cross check fails, we read 16 bytes from an aligned - address, remove any characters before the string, and continue - in the main loop using aligned loads. Since strings crossing a - page in the first 16 bytes are rare (probability of - 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized. - - AArch64 systems have a minimum page size of 4k. We don't bother - checking for larger page sizes - the cost of setting up the correct - page size is just not worth the extra gain from a small reduction in - the cases taking the slow path. Note that we only care about - whether the first fetch, which may be misaligned, crosses a page - boundary. */ - -def_fn strlen p2align=6 - and tmp1, srcin, MIN_PAGE_SIZE - 1 - mov zeroones, REP8_01 - cmp tmp1, MIN_PAGE_SIZE - 16 - b.gt L(page_cross) - ldp data1, data2, [srcin] -#ifdef __AARCH64EB__ - /* For big-endian, carry propagation (if the final byte in the - string is 0x01) means we cannot use has_nul1/2 directly. - Since we expect strings to be small and early-exit, - byte-swap the data now so has_null1/2 will be correct. */ - rev data1, data1 - rev data2, data2 -#endif - sub tmp1, data1, zeroones - orr tmp2, data1, REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, REP8_7f - bics has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - ccmp has_nul2, 0, 0, eq - beq L(main_loop_entry) - - /* Enter with C = has_nul1 == 0. */ - csel has_nul1, has_nul1, has_nul2, cc - mov len, 8 - rev has_nul1, has_nul1 - clz tmp1, has_nul1 - csel len, xzr, len, cc - add len, len, tmp1, lsr 3 - ret - - /* The inner loop processes 32 bytes per iteration and uses the fast - NUL check. If we encounter non-ASCII characters, use a second - loop with the accurate NUL check. */ - .p2align 4 -L(main_loop_entry): - bic src, srcin, 15 - sub src, src, 16 -L(main_loop): - ldp data1, data2, [src, 32]! -.Lpage_cross_entry: - sub tmp1, data1, zeroones - sub tmp3, data2, zeroones - orr tmp2, tmp1, tmp3 - tst tmp2, zeroones, lsl 7 - bne 1f - ldp data1, data2, [src, 16] - sub tmp1, data1, zeroones - sub tmp3, data2, zeroones - orr tmp2, tmp1, tmp3 - tst tmp2, zeroones, lsl 7 - beq L(main_loop) - add src, src, 16 -1: - /* The fast check failed, so do the slower, accurate NUL check. */ - orr tmp2, data1, REP8_7f - orr tmp4, data2, REP8_7f - bics has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - ccmp has_nul2, 0, 0, eq - beq L(nonascii_loop) - - /* Enter with C = has_nul1 == 0. */ -L(tail): -#ifdef __AARCH64EB__ - /* For big-endian, carry propagation (if the final byte in the - string is 0x01) means we cannot use has_nul1/2 directly. The - easiest way to get the correct byte is to byte-swap the data - and calculate the syndrome a second time. */ - csel data1, data1, data2, cc - rev data1, data1 - sub tmp1, data1, zeroones - orr tmp2, data1, REP8_7f - bic has_nul1, tmp1, tmp2 -#else - csel has_nul1, has_nul1, has_nul2, cc -#endif - sub len, src, srcin - rev has_nul1, has_nul1 - add tmp2, len, 8 - clz tmp1, has_nul1 - csel len, len, tmp2, cc - add len, len, tmp1, lsr 3 - ret - -L(nonascii_loop): - ldp data1, data2, [src, 16]! - sub tmp1, data1, zeroones - orr tmp2, data1, REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, REP8_7f - bics has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - ccmp has_nul2, 0, 0, eq - bne L(tail) - ldp data1, data2, [src, 16]! - sub tmp1, data1, zeroones - orr tmp2, data1, REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, REP8_7f - bics has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - ccmp has_nul2, 0, 0, eq - beq L(nonascii_loop) - b L(tail) - - /* Load 16 bytes from [srcin & ~15] and force the bytes that precede - srcin to 0x7f, so we ignore any NUL bytes before the string. - Then continue in the aligned loop. */ -L(page_cross): - bic src, srcin, 15 - ldp data1, data2, [src] - lsl tmp1, srcin, 3 - mov tmp4, -1 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr tmp1, tmp1, REP8_80 - orn data1, data1, tmp1 - orn tmp2, data2, tmp1 - tst srcin, 8 - csel data1, data1, tmp4, eq - csel data2, data2, tmp2, eq - b L(page_cross_entry) - - .size strlen, . - strlen diff --git a/src/aarch64/strncmp.S b/src/aarch64/strncmp.S deleted file mode 100644 index 3d4ea7c..0000000 --- a/src/aarch64/strncmp.S +++ /dev/null @@ -1,285 +0,0 @@ -/* Copyright (c) 2013, 2018, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - -/* Parameters and result. */ -#define src1 x0 -#define src2 x1 -#define limit x2 -#define result x0 - -/* Internal variables. */ -#define data1 x3 -#define data1w w3 -#define data2 x4 -#define data2w w4 -#define has_nul x5 -#define diff x6 -#define syndrome x7 -#define tmp1 x8 -#define tmp2 x9 -#define tmp3 x10 -#define zeroones x11 -#define pos x12 -#define limit_wd x13 -#define mask x14 -#define endloop x15 -#define count mask - - .text - .p2align 6 - .rep 7 - nop /* Pad so that the loop below fits a cache line. */ - .endr -def_fn strncmp - cbz limit, .Lret0 - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 - and count, src1, #7 - b.ne .Lmisaligned8 - cbnz count, .Lmutual_align - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ - - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ - /* Start of performance-critical section -- one 64B cache line. */ -.Lloop_aligned: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned: - subs limit_wd, limit_wd, #1 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, pl /* Last Dword or differences. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp endloop, #0, #0, eq - b.eq .Lloop_aligned - /* End of performance-critical section -- one 64B cache line. */ - - /* Not reached the limit, must have found the end or a diff. */ - tbz limit_wd, #63, .Lnot_limit - - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq .Lnot_limit - - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -#ifdef __AARCH64EB__ - lsr mask, mask, limit -#else - lsl mask, mask, limit -#endif - bic data1, data1, mask - bic data2, data2, mask - - /* Make sure that the NUL byte is marked in the syndrome. */ - orr has_nul, has_nul, mask - -.Lnot_limit: - orr syndrome, diff, has_nul - -#ifndef __AARCH64EB__ - rev syndrome, syndrome - rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome - rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret -1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#endif - -.Lmutual_align: - /* Sources are mutually aligned, but are not currently at an - alignment boundary. Round down the addresses and then mask off - the bytes that precede the start point. - We also need to adjust the limit calculations, but without - overflowing if the limit is near ULONG_MAX. */ - bic src1, src1, #7 - bic src2, src2, #7 - ldr data1, [src1], #8 - neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ - ldr data2, [src2], #8 - mov tmp2, #~0 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#endif - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ - add limit, limit, count - add tmp3, tmp3, count - orr data1, data1, tmp2 - orr data2, data2, tmp2 - add limit_wd, limit_wd, tmp3, lsr #3 - b .Lstart_realigned - - .p2align 6 - /* Don't bother with dwords for up to 16 bytes. */ -.Lmisaligned8: - cmp limit, #16 - b.hs .Ltry_misaligned_words - -.Lbyte_loop: - /* Perhaps we can do better than this. */ - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs limit, limit, #1 - ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */ - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq .Lbyte_loop -.Ldone: - sub result, data1, data2 - ret - /* Align the SRC1 to a dword by doing a bytewise compare and then do - the dword loop. */ -.Ltry_misaligned_words: - lsr limit_wd, limit, #3 - cbz count, .Ldo_misaligned - - neg count, count - and count, count, #7 - sub limit, limit, count - lsr limit_wd, limit, #3 - -.Lpage_end_loop: - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - cmp data1w, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.ne .Ldone - subs count, count, #1 - b.hi .Lpage_end_loop - -.Ldo_misaligned: - /* Prepare ourselves for the next page crossing. Unlike the aligned - loop, we fetch 1 less dword because we risk crossing bounds on - SRC2. */ - mov count, #8 - subs limit_wd, limit_wd, #1 - b.lo .Ldone_loop -.Lloop_misaligned: - and tmp2, src2, #0xff8 - eor tmp2, tmp2, #0xff8 - cbz tmp2, .Lpage_end_loop - - ldr data1, [src1], #8 - ldr data2, [src2], #8 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne .Lnot_limit - subs limit_wd, limit_wd, #1 - b.pl .Lloop_misaligned - -.Ldone_loop: - /* We found a difference or a NULL before the limit was reached. */ - and limit, limit, #7 - cbz limit, .Lnot_limit - /* Read the last word. */ - sub src1, src1, 8 - sub src2, src2, 8 - ldr data1, [src1, limit] - ldr data2, [src2, limit] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne .Lnot_limit - -.Lret0: - mov result, #0 - ret - .size strncmp, . - strncmp diff --git a/src/aarch64/strnlen.S b/src/aarch64/strnlen.S deleted file mode 100644 index c0e6098..0000000 --- a/src/aarch64/strnlen.S +++ /dev/null @@ -1,181 +0,0 @@ -/* strnlen - calculate the length of a string with limit. - - Copyright (c) 2013, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Linaro nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - */ - -/* Arguments and results. */ -#define srcin x0 -#define len x0 -#define limit x1 - -/* Locals and temporaries. */ -#define src x2 -#define data1 x3 -#define data2 x4 -#define data2a x5 -#define has_nul1 x6 -#define has_nul2 x7 -#define tmp1 x8 -#define tmp2 x9 -#define tmp3 x10 -#define tmp4 x11 -#define zeroones x12 -#define pos x13 -#define limit_wd x14 - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -#define REP8_01 0x0101010101010101 -#define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 - - .text - .p2align 6 -.Lstart: - /* Pre-pad to ensure critical loop begins an icache line. */ - .rep 7 - nop - .endr - /* Put this code here to avoid wasting more space with pre-padding. */ -.Lhit_limit: - mov len, limit - ret - -def_fn strnlen - cbz limit, .Lhit_limit - mov zeroones, #REP8_01 - bic src, srcin, #15 - ands tmp1, srcin, #15 - b.ne .Lmisaligned - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ - - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ - /* The inner loop deals with two Dwords at a time. This has a - slightly higher start-up cost, but we should win quite quickly, - especially on cores with a high number of issue slots per - cycle, as we get much better parallelism out of the operations. */ - - /* Start of critial section -- keep to one 64Byte cache line. */ -.Lloop: - ldp data1, data2, [src], #16 -.Lrealigned: - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - sub tmp3, data2, zeroones - orr tmp4, data2, #REP8_7f - bic has_nul1, tmp1, tmp2 - bic has_nul2, tmp3, tmp4 - subs limit_wd, limit_wd, #1 - orr tmp1, has_nul1, has_nul2 - ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ - b.eq .Lloop - /* End of critical section -- keep to one 64Byte cache line. */ - - orr tmp1, has_nul1, has_nul2 - cbz tmp1, .Lhit_limit /* No null in final Qword. */ - - /* We know there's a null in the final Qword. The easiest thing - to do now is work out the length of the string and return - MIN (len, limit). */ - - sub len, src, srcin - cbz has_nul1, .Lnul_in_data2 -#ifdef __AARCH64EB__ - mov data2, data1 -#endif - sub len, len, #8 - mov has_nul2, has_nul1 -.Lnul_in_data2: -#ifdef __AARCH64EB__ - /* For big-endian, carry propagation (if the final byte in the - string is 0x01) means we cannot use has_nul directly. The - easiest way to get the correct byte is to byte-swap the data - and calculate the syndrome a second time. */ - rev data2, data2 - sub tmp1, data2, zeroones - orr tmp2, data2, #REP8_7f - bic has_nul2, tmp1, tmp2 -#endif - sub len, len, #8 - rev has_nul2, has_nul2 - clz pos, has_nul2 - add len, len, pos, lsr #3 /* Bits to bytes. */ - cmp len, limit - csel len, len, limit, ls /* Return the lower value. */ - ret - -.Lmisaligned: - /* Deal with a partial first word. - We're doing two things in parallel here; - 1) Calculate the number of words (but avoiding overflow if - limit is near ULONG_MAX) - to do this we need to work out - limit + tmp1 - 1 as a 65-bit value before shifting it; - 2) Load and mask the initial data words - we force the bytes - before the ones we are interested in to 0xff - this ensures - early bytes will not hit any zero detection. */ - sub limit_wd, limit, #1 - neg tmp4, tmp1 - cmp tmp1, #8 - - and tmp3, limit_wd, #15 - lsr limit_wd, limit_wd, #4 - mov tmp2, #~0 - - ldp data1, data2, [src], #16 - lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ - add tmp3, tmp3, tmp1 - -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */ -#endif - add limit_wd, limit_wd, tmp3, lsr #4 - - orr data1, data1, tmp2 - orr data2a, data2, tmp2 - - csinv data1, data1, xzr, le - csel data2, data2, data2a, le - b .Lrealigned - .size strnlen, . - .Lstart /* Include pre-padding in size. */ diff --git a/src/arm/memchr.S b/src/arm/memchr.S deleted file mode 100644 index 92a2d9f..0000000 --- a/src/arm/memchr.S +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2010-2011, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Written by Dave Gilbert <david.gilbert@linaro.org> - - This memchr routine is optimised on a Cortex-A9 and should work on - all ARMv7 processors. It has a fast past for short sizes, and has - an optimised path for large data sets; the worst case is finding the - match early in a large data set. - - */ - -@ 2011-02-07 david.gilbert@linaro.org -@ Extracted from local git a5b438d861 -@ 2011-07-14 david.gilbert@linaro.org -@ Import endianness fix from local git ea786f1b -@ 2011-12-07 david.gilbert@linaro.org -@ Removed unneeded cbz from align loop - - .syntax unified - .arch armv7-a - -@ this lets us check a flag in a 00/ff byte easily in either endianness -#ifdef __ARMEB__ -#define CHARTSTMASK(c) 1<<(31-(c*8)) -#else -#define CHARTSTMASK(c) 1<<(c*8) -#endif - .text - .thumb - -@ --------------------------------------------------------------------------- - .thumb_func - .align 2 - .p2align 4,,15 - .global memchr - .type memchr,%function -memchr: - @ r0 = start of memory to scan - @ r1 = character to look for - @ r2 = length - @ returns r0 = pointer to character or NULL if not found - and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char - - cmp r2,#16 @ If it's short don't bother with anything clever - blt 20f - - tst r0, #7 @ If it's already aligned skip the next bit - beq 10f - - @ Work up to an aligned point -5: - ldrb r3, [r0],#1 - subs r2, r2, #1 - cmp r3, r1 - beq 50f @ If it matches exit found - tst r0, #7 - bne 5b @ If not aligned yet then do next byte - -10: - @ At this point, we are aligned, we know we have at least 8 bytes to work with - push {r4,r5,r6,r7} - orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes - orr r1, r1, r1, lsl #16 - bic r4, r2, #7 @ Number of double words to work with - mvns r7, #0 @ all F's - movs r3, #0 - -15: - ldmia r0!,{r5,r6} - subs r4, r4, #8 - eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target - eor r6,r6, r1 - uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 - sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION - uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 - sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION - cbnz r6, 60f - bne 15b @ (Flags from the subs above) If not run out of bytes then go around again - - pop {r4,r5,r6,r7} - and r1,r1,#0xff @ Get r1 back to a single character from the expansion above - and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done - -20: - cbz r2, 40f @ 0 length or hit the end already then not found - -21: @ Post aligned section, or just a short call - ldrb r3,[r0],#1 - subs r2,r2,#1 - eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub - cbz r3, 50f - bne 21b @ on r2 flags - -40: - movs r0,#0 @ not found - bx lr - -50: - subs r0,r0,#1 @ found - bx lr - -60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was - @ r0 points to the start of the double word after the one that was tested - @ r5 has the 00/ff pattern for the first word, r6 has the chained value - cmp r5, #0 - itte eq - moveq r5, r6 @ the end is in the 2nd word - subeq r0,r0,#3 @ Points to 2nd byte of 2nd word - subne r0,r0,#7 @ or 2nd byte of 1st word - - @ r0 currently points to the 3rd byte of the word containing the hit - tst r5, # CHARTSTMASK(0) @ 1st character - bne 61f - adds r0,r0,#1 - tst r5, # CHARTSTMASK(1) @ 2nd character - ittt eq - addeq r0,r0,#1 - tsteq r5, # (3<<15) @ 2nd & 3rd character - @ If not the 3rd must be the last one - addeq r0,r0,#1 - -61: - pop {r4,r5,r6,r7} - subs r0,r0,#1 - bx lr diff --git a/src/arm/memcpy.S b/src/arm/memcpy.S deleted file mode 100644 index dd405ec..0000000 --- a/src/arm/memcpy.S +++ /dev/null @@ -1,617 +0,0 @@ -/* Copyright (c) 2013, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - This memcpy routine is optimised for Cortex-A15 cores and takes advantage - of VFP or NEON when built with the appropriate flags. - - Assumptions: - - ARMv6 (ARMv7-a if using Neon) - ARM state - Unaligned accesses - - */ - - .syntax unified - /* This implementation requires ARM state. */ - .arm - -#ifdef __ARM_NEON__ - - .fpu neon - .arch armv7-a -# define FRAME_SIZE 4 -# define USE_VFP -# define USE_NEON - -#elif !defined (__SOFTFP__) - - .arch armv6 - .fpu vfpv2 -# define FRAME_SIZE 32 -# define USE_VFP - -#else - .arch armv6 -# define FRAME_SIZE 32 - -#endif - -/* Old versions of GAS incorrectly implement the NEON align semantics. */ -#ifdef BROKEN_ASM_NEON_ALIGN -#define ALIGN(addr, align) addr,:align -#else -#define ALIGN(addr, align) addr:align -#endif - -#define PC_OFFSET 8 /* PC pipeline compensation. */ -#define INSN_SIZE 4 - -/* Call parameters. */ -#define dstin r0 -#define src r1 -#define count r2 - -/* Locals. */ -#define tmp1 r3 -#define dst ip -#define tmp2 r10 - -#ifndef USE_NEON -/* For bulk copies using GP registers. */ -#define A_l r2 /* Call-clobbered. */ -#define A_h r3 /* Call-clobbered. */ -#define B_l r4 -#define B_h r5 -#define C_l r6 -#define C_h r7 -#define D_l r8 -#define D_h r9 -#endif - -/* Number of lines ahead to pre-fetch data. If you change this the code - below will need adjustment to compensate. */ - -#define prefetch_lines 5 - -#ifdef USE_VFP - .macro cpy_line_vfp vreg, base - vstr \vreg, [dst, #\base] - vldr \vreg, [src, #\base] - vstr d0, [dst, #\base + 8] - vldr d0, [src, #\base + 8] - vstr d1, [dst, #\base + 16] - vldr d1, [src, #\base + 16] - vstr d2, [dst, #\base + 24] - vldr d2, [src, #\base + 24] - vstr \vreg, [dst, #\base + 32] - vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] - vstr d0, [dst, #\base + 40] - vldr d0, [src, #\base + 40] - vstr d1, [dst, #\base + 48] - vldr d1, [src, #\base + 48] - vstr d2, [dst, #\base + 56] - vldr d2, [src, #\base + 56] - .endm - - .macro cpy_tail_vfp vreg, base - vstr \vreg, [dst, #\base] - vldr \vreg, [src, #\base] - vstr d0, [dst, #\base + 8] - vldr d0, [src, #\base + 8] - vstr d1, [dst, #\base + 16] - vldr d1, [src, #\base + 16] - vstr d2, [dst, #\base + 24] - vldr d2, [src, #\base + 24] - vstr \vreg, [dst, #\base + 32] - vstr d0, [dst, #\base + 40] - vldr d0, [src, #\base + 40] - vstr d1, [dst, #\base + 48] - vldr d1, [src, #\base + 48] - vstr d2, [dst, #\base + 56] - vldr d2, [src, #\base + 56] - .endm -#endif - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn memcpy p2align=6 - - mov dst, dstin /* Preserve dstin, we need to return it. */ - cmp count, #64 - bge .Lcpy_not_short - /* Deal with small copies quickly by dropping straight into the - exit block. */ - -.Ltail63unaligned: -#ifdef USE_NEON - and tmp1, count, #0x38 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - vld1.8 {d0}, [src]! /* 14 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 12 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 10 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 8 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 6 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 4 words to go. */ - vst1.8 {d0}, [dst]! - vld1.8 {d0}, [src]! /* 2 words to go. */ - vst1.8 {d0}, [dst]! - - tst count, #4 - ldrne tmp1, [src], #4 - strne tmp1, [dst], #4 -#else - /* Copy up to 15 full words of data. May not be aligned. */ - /* Cannot use VFP for unaligned data. */ - and tmp1, count, #0x3c - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2) - /* Jump directly into the sequence below at the correct offset. */ - add pc, pc, tmp1, lsl #1 - - ldr tmp1, [src, #-60] /* 15 words to go. */ - str tmp1, [dst, #-60] - - ldr tmp1, [src, #-56] /* 14 words to go. */ - str tmp1, [dst, #-56] - ldr tmp1, [src, #-52] - str tmp1, [dst, #-52] - - ldr tmp1, [src, #-48] /* 12 words to go. */ - str tmp1, [dst, #-48] - ldr tmp1, [src, #-44] - str tmp1, [dst, #-44] - - ldr tmp1, [src, #-40] /* 10 words to go. */ - str tmp1, [dst, #-40] - ldr tmp1, [src, #-36] - str tmp1, [dst, #-36] - - ldr tmp1, [src, #-32] /* 8 words to go. */ - str tmp1, [dst, #-32] - ldr tmp1, [src, #-28] - str tmp1, [dst, #-28] - - ldr tmp1, [src, #-24] /* 6 words to go. */ - str tmp1, [dst, #-24] - ldr tmp1, [src, #-20] - str tmp1, [dst, #-20] - - ldr tmp1, [src, #-16] /* 4 words to go. */ - str tmp1, [dst, #-16] - ldr tmp1, [src, #-12] - str tmp1, [dst, #-12] - - ldr tmp1, [src, #-8] /* 2 words to go. */ - str tmp1, [dst, #-8] - ldr tmp1, [src, #-4] - str tmp1, [dst, #-4] -#endif - - lsls count, count, #31 - ldrhcs tmp1, [src], #2 - ldrbne src, [src] /* Src is dead, use as a scratch. */ - strhcs tmp1, [dst], #2 - strbne src, [dst] - bx lr - -.Lcpy_not_short: - /* At least 64 bytes to copy, but don't know the alignment yet. */ - str tmp2, [sp, #-FRAME_SIZE]! - and tmp2, src, #7 - and tmp1, dst, #7 - cmp tmp1, tmp2 - bne .Lcpy_notaligned - -#ifdef USE_VFP - /* Magic dust alert! Force VFP on Cortex-A9. Experiments show - that the FP pipeline is much better at streaming loads and - stores. This is outside the critical loop. */ - vmov.f32 s0, s0 -#endif - - /* SRC and DST have the same mutual 64-bit alignment, but we may - still need to pre-copy some bytes to get to natural alignment. - We bring SRC and DST into full 64-bit alignment. */ - lsls tmp2, dst, #29 - beq 1f - rsbs tmp2, tmp2, #0 - sub count, count, tmp2, lsr #29 - ldrmi tmp1, [src], #4 - strmi tmp1, [dst], #4 - lsls tmp2, tmp2, #2 - ldrhcs tmp1, [src], #2 - ldrbne tmp2, [src], #1 - strhcs tmp1, [dst], #2 - strbne tmp2, [dst], #1 - -1: - subs tmp2, count, #64 /* Use tmp2 for count. */ - blt .Ltail63aligned - - cmp tmp2, #512 - bge .Lcpy_body_long - -.Lcpy_body_medium: /* Count in tmp2. */ -#ifdef USE_VFP -1: - vldr d0, [src, #0] - subs tmp2, tmp2, #64 - vldr d1, [src, #8] - vstr d0, [dst, #0] - vldr d0, [src, #16] - vstr d1, [dst, #8] - vldr d1, [src, #24] - vstr d0, [dst, #16] - vldr d0, [src, #32] - vstr d1, [dst, #24] - vldr d1, [src, #40] - vstr d0, [dst, #32] - vldr d0, [src, #48] - vstr d1, [dst, #40] - vldr d1, [src, #56] - vstr d0, [dst, #48] - add src, src, #64 - vstr d1, [dst, #56] - add dst, dst, #64 - bge 1b - tst tmp2, #0x3f - beq .Ldone - -.Ltail63aligned: /* Count in tmp2. */ - and tmp1, tmp2, #0x38 - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - - vldr d0, [src, #-56] /* 14 words to go. */ - vstr d0, [dst, #-56] - vldr d0, [src, #-48] /* 12 words to go. */ - vstr d0, [dst, #-48] - vldr d0, [src, #-40] /* 10 words to go. */ - vstr d0, [dst, #-40] - vldr d0, [src, #-32] /* 8 words to go. */ - vstr d0, [dst, #-32] - vldr d0, [src, #-24] /* 6 words to go. */ - vstr d0, [dst, #-24] - vldr d0, [src, #-16] /* 4 words to go. */ - vstr d0, [dst, #-16] - vldr d0, [src, #-8] /* 2 words to go. */ - vstr d0, [dst, #-8] -#else - sub src, src, #8 - sub dst, dst, #8 -1: - ldrd A_l, A_h, [src, #8] - strd A_l, A_h, [dst, #8] - ldrd A_l, A_h, [src, #16] - strd A_l, A_h, [dst, #16] - ldrd A_l, A_h, [src, #24] - strd A_l, A_h, [dst, #24] - ldrd A_l, A_h, [src, #32] - strd A_l, A_h, [dst, #32] - ldrd A_l, A_h, [src, #40] - strd A_l, A_h, [dst, #40] - ldrd A_l, A_h, [src, #48] - strd A_l, A_h, [dst, #48] - ldrd A_l, A_h, [src, #56] - strd A_l, A_h, [dst, #56] - ldrd A_l, A_h, [src, #64]! - strd A_l, A_h, [dst, #64]! - subs tmp2, tmp2, #64 - bge 1b - tst tmp2, #0x3f - bne 1f - ldr tmp2,[sp], #FRAME_SIZE - bx lr -1: - add src, src, #8 - add dst, dst, #8 - -.Ltail63aligned: /* Count in tmp2. */ - /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but - we know that the src and dest are 64-bit aligned so we can use - LDRD/STRD to improve efficiency. */ - /* TMP2 is now negative, but we don't care about that. The bottom - six bits still tell us how many bytes are left to copy. */ - - and tmp1, tmp2, #0x38 - add dst, dst, tmp1 - add src, src, tmp1 - rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE) - add pc, pc, tmp1 - ldrd A_l, A_h, [src, #-56] /* 14 words to go. */ - strd A_l, A_h, [dst, #-56] - ldrd A_l, A_h, [src, #-48] /* 12 words to go. */ - strd A_l, A_h, [dst, #-48] - ldrd A_l, A_h, [src, #-40] /* 10 words to go. */ - strd A_l, A_h, [dst, #-40] - ldrd A_l, A_h, [src, #-32] /* 8 words to go. */ - strd A_l, A_h, [dst, #-32] - ldrd A_l, A_h, [src, #-24] /* 6 words to go. */ - strd A_l, A_h, [dst, #-24] - ldrd A_l, A_h, [src, #-16] /* 4 words to go. */ - strd A_l, A_h, [dst, #-16] - ldrd A_l, A_h, [src, #-8] /* 2 words to go. */ - strd A_l, A_h, [dst, #-8] - -#endif - tst tmp2, #4 - ldrne tmp1, [src], #4 - strne tmp1, [dst], #4 - lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ - ldrhcs tmp1, [src], #2 - ldrbne tmp2, [src] - strhcs tmp1, [dst], #2 - strbne tmp2, [dst] - -.Ldone: - ldr tmp2, [sp], #FRAME_SIZE - bx lr - -.Lcpy_body_long: /* Count in tmp2. */ - - /* Long copy. We know that there's at least (prefetch_lines * 64) - bytes to go. */ -#ifdef USE_VFP - /* Don't use PLD. Instead, read some data in advance of the current - copy position into a register. This should act like a PLD - operation but we won't have to repeat the transfer. */ - - vldr d3, [src, #0] - vldr d4, [src, #64] - vldr d5, [src, #128] - vldr d6, [src, #192] - vldr d7, [src, #256] - - vldr d0, [src, #8] - vldr d1, [src, #16] - vldr d2, [src, #24] - add src, src, #32 - - subs tmp2, tmp2, #prefetch_lines * 64 * 2 - blt 2f -1: - cpy_line_vfp d3, 0 - cpy_line_vfp d4, 64 - cpy_line_vfp d5, 128 - add dst, dst, #3 * 64 - add src, src, #3 * 64 - cpy_line_vfp d6, 0 - cpy_line_vfp d7, 64 - add dst, dst, #2 * 64 - add src, src, #2 * 64 - subs tmp2, tmp2, #prefetch_lines * 64 - bge 1b - -2: - cpy_tail_vfp d3, 0 - cpy_tail_vfp d4, 64 - cpy_tail_vfp d5, 128 - add src, src, #3 * 64 - add dst, dst, #3 * 64 - cpy_tail_vfp d6, 0 - vstr d7, [dst, #64] - vldr d7, [src, #64] - vstr d0, [dst, #64 + 8] - vldr d0, [src, #64 + 8] - vstr d1, [dst, #64 + 16] - vldr d1, [src, #64 + 16] - vstr d2, [dst, #64 + 24] - vldr d2, [src, #64 + 24] - vstr d7, [dst, #64 + 32] - add src, src, #96 - vstr d0, [dst, #64 + 40] - vstr d1, [dst, #64 + 48] - vstr d2, [dst, #64 + 56] - add dst, dst, #128 - add tmp2, tmp2, #prefetch_lines * 64 - b .Lcpy_body_medium -#else - /* Long copy. Use an SMS style loop to maximize the I/O - bandwidth of the core. We don't have enough spare registers - to synthesise prefetching, so use PLD operations. */ - /* Pre-bias src and dst. */ - sub src, src, #8 - sub dst, dst, #8 - pld [src, #8] - pld [src, #72] - subs tmp2, tmp2, #64 - pld [src, #136] - ldrd A_l, A_h, [src, #8] - strd B_l, B_h, [sp, #8] - ldrd B_l, B_h, [src, #16] - strd C_l, C_h, [sp, #16] - ldrd C_l, C_h, [src, #24] - strd D_l, D_h, [sp, #24] - pld [src, #200] - ldrd D_l, D_h, [src, #32]! - b 1f - .p2align 6 -2: - pld [src, #232] - strd A_l, A_h, [dst, #40] - ldrd A_l, A_h, [src, #40] - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [src, #48] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [src, #56] - strd D_l, D_h, [dst, #64]! - ldrd D_l, D_h, [src, #64]! - subs tmp2, tmp2, #64 -1: - strd A_l, A_h, [dst, #8] - ldrd A_l, A_h, [src, #8] - strd B_l, B_h, [dst, #16] - ldrd B_l, B_h, [src, #16] - strd C_l, C_h, [dst, #24] - ldrd C_l, C_h, [src, #24] - strd D_l, D_h, [dst, #32] - ldrd D_l, D_h, [src, #32] - bcs 2b - /* Save the remaining bytes and restore the callee-saved regs. */ - strd A_l, A_h, [dst, #40] - add src, src, #40 - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [sp, #8] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [sp, #16] - strd D_l, D_h, [dst, #64] - ldrd D_l, D_h, [sp, #24] - add dst, dst, #72 - tst tmp2, #0x3f - bne .Ltail63aligned - ldr tmp2, [sp], #FRAME_SIZE - bx lr -#endif - -.Lcpy_notaligned: - pld [src] - pld [src, #64] - /* There's at least 64 bytes to copy, but there is no mutual - alignment. */ - /* Bring DST to 64-bit alignment. */ - lsls tmp2, dst, #29 - pld [src, #(2 * 64)] - beq 1f - rsbs tmp2, tmp2, #0 - sub count, count, tmp2, lsr #29 - ldrmi tmp1, [src], #4 - strmi tmp1, [dst], #4 - lsls tmp2, tmp2, #2 - ldrbne tmp1, [src], #1 - ldrhcs tmp2, [src], #2 - strbne tmp1, [dst], #1 - strhcs tmp2, [dst], #2 -1: - pld [src, #(3 * 64)] - subs count, count, #64 - ldrmi tmp2, [sp], #FRAME_SIZE - bmi .Ltail63unaligned - pld [src, #(4 * 64)] - -#ifdef USE_NEON - vld1.8 {d0-d3}, [src]! - vld1.8 {d4-d7}, [src]! - subs count, count, #64 - bmi 2f -1: - pld [src, #(4 * 64)] - vst1.8 {d0-d3}, [ALIGN (dst, 64)]! - vld1.8 {d0-d3}, [src]! - vst1.8 {d4-d7}, [ALIGN (dst, 64)]! - vld1.8 {d4-d7}, [src]! - subs count, count, #64 - bpl 1b -2: - vst1.8 {d0-d3}, [ALIGN (dst, 64)]! - vst1.8 {d4-d7}, [ALIGN (dst, 64)]! - ands count, count, #0x3f -#else - /* Use an SMS style loop to maximize the I/O bandwidth. */ - sub src, src, #4 - sub dst, dst, #8 - subs tmp2, count, #64 /* Use tmp2 for count. */ - ldr A_l, [src, #4] - ldr A_h, [src, #8] - strd B_l, B_h, [sp, #8] - ldr B_l, [src, #12] - ldr B_h, [src, #16] - strd C_l, C_h, [sp, #16] - ldr C_l, [src, #20] - ldr C_h, [src, #24] - strd D_l, D_h, [sp, #24] - ldr D_l, [src, #28] - ldr D_h, [src, #32]! - b 1f - .p2align 6 -2: - pld [src, #(5 * 64) - (32 - 4)] - strd A_l, A_h, [dst, #40] - ldr A_l, [src, #36] - ldr A_h, [src, #40] - strd B_l, B_h, [dst, #48] - ldr B_l, [src, #44] - ldr B_h, [src, #48] - strd C_l, C_h, [dst, #56] - ldr C_l, [src, #52] - ldr C_h, [src, #56] - strd D_l, D_h, [dst, #64]! - ldr D_l, [src, #60] - ldr D_h, [src, #64]! - subs tmp2, tmp2, #64 -1: - strd A_l, A_h, [dst, #8] - ldr A_l, [src, #4] - ldr A_h, [src, #8] - strd B_l, B_h, [dst, #16] - ldr B_l, [src, #12] - ldr B_h, [src, #16] - strd C_l, C_h, [dst, #24] - ldr C_l, [src, #20] - ldr C_h, [src, #24] - strd D_l, D_h, [dst, #32] - ldr D_l, [src, #28] - ldr D_h, [src, #32] - bcs 2b - - /* Save the remaining bytes and restore the callee-saved regs. */ - strd A_l, A_h, [dst, #40] - add src, src, #36 - strd B_l, B_h, [dst, #48] - ldrd B_l, B_h, [sp, #8] - strd C_l, C_h, [dst, #56] - ldrd C_l, C_h, [sp, #16] - strd D_l, D_h, [dst, #64] - ldrd D_l, D_h, [sp, #24] - add dst, dst, #72 - ands count, tmp2, #0x3f -#endif - ldr tmp2, [sp], #FRAME_SIZE - bne .Ltail63unaligned - bx lr - - .size memcpy, . - memcpy diff --git a/src/arm/memset.S b/src/arm/memset.S deleted file mode 100644 index c0ad588..0000000 --- a/src/arm/memset.S +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright (c) 2010-2011, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Written by Dave Gilbert <david.gilbert@linaro.org> - - This memset routine is optimised on a Cortex-A9 and should work on - all ARMv7 processors. - - */ - - .syntax unified - .arch armv7-a - -@ 2011-08-30 david.gilbert@linaro.org -@ Extracted from local git 2f11b436 - -@ this lets us check a flag in a 00/ff byte easily in either endianness -#ifdef __ARMEB__ -#define CHARTSTMASK(c) 1<<(31-(c*8)) -#else -#define CHARTSTMASK(c) 1<<(c*8) -#endif - .text - .thumb - -@ --------------------------------------------------------------------------- - .thumb_func - .align 2 - .p2align 4,,15 - .global memset - .type memset,%function -memset: - @ r0 = address - @ r1 = character - @ r2 = count - @ returns original address in r0 - - mov r3, r0 @ Leave r0 alone - cbz r2, 10f @ Exit if 0 length - - tst r0, #7 - beq 2f @ Already aligned - - @ Ok, so we're misaligned here -1: - strb r1, [r3], #1 - subs r2,r2,#1 - tst r3, #7 - cbz r2, 10f @ Exit if we hit the end - bne 1b @ go round again if still misaligned - -2: - @ OK, so we're aligned - push {r4,r5,r6,r7} - bics r4, r2, #15 @ if less than 16 bytes then need to finish it off - beq 5f - -3: - @ POSIX says that ch is cast to an unsigned char. A uxtb is one - @ byte and takes two cycles, where an AND is four bytes but one - @ cycle. - and r1, #0xFF - orr r1, r1, r1, lsl#8 @ Same character into all bytes - orr r1, r1, r1, lsl#16 - mov r5,r1 - mov r6,r1 - mov r7,r1 - -4: - subs r4,r4,#16 - stmia r3!,{r1,r5,r6,r7} - bne 4b - and r2,r2,#15 - - @ At this point we're still aligned and we have upto align-1 bytes left to right - @ we can avoid some of the byte-at-a time now by testing for some big chunks - tst r2,#8 - itt ne - subne r2,r2,#8 - stmiane r3!,{r1,r5} - -5: - pop {r4,r5,r6,r7} - cbz r2, 10f - - @ Got to do any last < alignment bytes -6: - subs r2,r2,#1 - strb r1,[r3],#1 - bne 6b - -10: - bx lr @ goodbye diff --git a/src/arm/strchr.S b/src/arm/strchr.S deleted file mode 100644 index 8e06dd4..0000000 --- a/src/arm/strchr.S +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2010-2011, Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Written by Dave Gilbert <david.gilbert@linaro.org> - - A very simple strchr routine, from benchmarks on A9 it's a bit faster than - the current version in eglibc (2.12.1-0ubuntu14 package) - I don't think doing a word at a time version is worth it since a lot - of strchr cases are very short anyway. - - */ - -@ 2011-02-07 david.gilbert@linaro.org -@ Extracted from local git a5b438d861 - - .syntax unified - .arch armv7-a - - .text - .thumb - -@ --------------------------------------------------------------------------- - - .thumb_func - .align 2 - .p2align 4,,15 - .global strchr - .type strchr,%function -strchr: - @ r0 = start of string - @ r1 = character to match - @ returns NULL for no match, or a pointer to the match - and r1,r1, #255 - -1: - ldrb r2,[r0],#1 - cmp r2,r1 - cbz r2,10f - bne 1b - - @ We're here if it matched -5: - subs r0,r0,#1 - bx lr - -10: - @ We're here if we ran off the end - cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it - beq 5b @ A bit messy, if it's common we should branch at the start to a special loop - mov r0,#0 - bx lr diff --git a/src/arm/strcmp.S b/src/arm/strcmp.S deleted file mode 100644 index 5fad272..0000000 --- a/src/arm/strcmp.S +++ /dev/null @@ -1,500 +0,0 @@ -/* - * Copyright (c) 2012-2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Implementation of strcmp for ARMv7 when DSP instructions are - available. Use ldrd to support wider loads, provided the data - is sufficiently aligned. Use saturating arithmetic to optimize - the compares. */ - -/* Build Options: - STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first - byte in the string. If comparing completely random strings - the pre-check will save time, since there is a very high - probability of a mismatch in the first character: we save - significant overhead if this is the common case. However, - if strings are likely to be identical (eg because we're - verifying a hit in a hash table), then this check is largely - redundant. */ - -#define STRCMP_NO_PRECHECK 0 - - /* This version uses Thumb-2 code. */ - .thumb - .syntax unified - -#ifdef __ARM_BIG_ENDIAN -#define S2LO lsl -#define S2LOEQ lsleq -#define S2HI lsr -#define MSB 0x000000ff -#define LSB 0xff000000 -#define BYTE0_OFFSET 24 -#define BYTE1_OFFSET 16 -#define BYTE2_OFFSET 8 -#define BYTE3_OFFSET 0 -#else /* not __ARM_BIG_ENDIAN */ -#define S2LO lsr -#define S2LOEQ lsreq -#define S2HI lsl -#define BYTE0_OFFSET 0 -#define BYTE1_OFFSET 8 -#define BYTE2_OFFSET 16 -#define BYTE3_OFFSET 24 -#define MSB 0xff000000 -#define LSB 0x000000ff -#endif /* not __ARM_BIG_ENDIAN */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -/* Parameters and result. */ -#define src1 r0 -#define src2 r1 -#define result r0 /* Overlaps src1. */ - -/* Internal variables. */ -#define tmp1 r4 -#define tmp2 r5 -#define const_m1 r12 - -/* Additional internal variables for 64-bit aligned data. */ -#define data1a r2 -#define data1b r3 -#define data2a r6 -#define data2b r7 -#define syndrome_a tmp1 -#define syndrome_b tmp2 - -/* Additional internal variables for 32-bit aligned data. */ -#define data1 r2 -#define data2 r3 -#define syndrome tmp2 - - - /* Macro to compute and return the result value for word-aligned - cases. */ - .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 -#ifdef __ARM_BIG_ENDIAN - /* If data1 contains a zero byte, then syndrome will contain a 1 in - bit 7 of that byte. Otherwise, the highest set bit in the - syndrome will highlight the first different bit. It is therefore - sufficient to extract the eight bits starting with the syndrome - bit. */ - clz tmp1, \synd - lsl r1, \d2, tmp1 - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsl \d1, \d1, tmp1 - .cfi_remember_state - lsr result, \d1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1, lsr #24 - bx lr -#else - /* To use the big-endian trick we'd have to reverse all three words. - that's slower than this approach. */ - rev \synd, \synd - clz tmp1, \synd - bic tmp1, tmp1, #7 - lsr r1, \d2, tmp1 - .cfi_remember_state - .if \restore_r6 - ldrd r6, r7, [sp, #8] - .endif - .cfi_restore 6 - .cfi_restore 7 - lsr \d1, \d1, tmp1 - and result, \d1, #255 - and r1, r1, #255 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - sub result, result, r1 - - bx lr -#endif - .endm - - .text - .p2align 5 -.Lstrcmp_start_addr: -#if STRCMP_NO_PRECHECK == 0 -.Lfastpath_exit: - sub r0, r2, r3 - bx lr - nop -#endif -def_fn strcmp -#if STRCMP_NO_PRECHECK == 0 - ldrb r2, [src1] - ldrb r3, [src2] - cmp r2, #1 - it cs - cmpcs r2, r3 - bne .Lfastpath_exit -#endif - .cfi_startproc - strd r4, r5, [sp, #-16]! - .cfi_def_cfa_offset 16 - .cfi_offset 4, -16 - .cfi_offset 5, -12 - orr tmp1, src1, src2 - strd r6, r7, [sp, #8] - .cfi_offset 6, -8 - .cfi_offset 7, -4 - mvn const_m1, #0 - lsl r2, tmp1, #29 - cbz r2, .Lloop_aligned8 - -.Lnot_aligned: - eor tmp1, src1, src2 - tst tmp1, #7 - bne .Lmisaligned8 - - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - and tmp1, src1, #7 - bic src1, src1, #7 - and tmp2, tmp1, #3 - bic src2, src2, #7 - lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - ldrd data1a, data1b, [src1], #16 - tst tmp1, #4 - ldrd data2a, data2b, [src2], #16 - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp2 - orn data1a, data1a, tmp1 - orn data2a, data2a, tmp1 - beq .Lstart_realigned8 - orn data1b, data1b, tmp1 - mov data1a, const_m1 - orn data2b, data2b, tmp1 - mov data2a, const_m1 - b .Lstart_realigned8 - - /* Unwind the inner loop by a factor of 2, giving 16 bytes per - pass. */ - .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ - .p2align 2 /* Always word aligned. */ -.Lloop_aligned8: - ldrd data1a, data1b, [src1], #16 - ldrd data2a, data2b, [src2], #16 -.Lstart_realigned8: - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - cbnz syndrome_a, .Ldiff_in_a - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - cbnz syndrome_b, .Ldiff_in_b - - ldrd data1a, data1b, [src1, #-8] - ldrd data2a, data2b, [src2, #-8] - uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ - eor syndrome_a, data1a, data2a - sel syndrome_a, syndrome_a, const_m1 - uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ - eor syndrome_b, data1b, data2b - sel syndrome_b, syndrome_b, const_m1 - /* Can't use CBZ for backwards branch. */ - orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ - beq .Lloop_aligned8 - -.Ldiff_found: - cbnz syndrome_a, .Ldiff_in_a - -.Ldiff_in_b: - strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 - -.Ldiff_in_a: - .cfi_restore_state - strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 - - .cfi_restore_state -.Lmisaligned8: - tst tmp1, #3 - bne .Lmisaligned4 - ands tmp1, src1, #3 - bne .Lmutual_align4 - - /* Unrolled by a factor of 2, to reduce the number of post-increment - operations. */ -.Lloop_aligned4: - ldr data1, [src1], #8 - ldr data2, [src2], #8 -.Lstart_realigned4: - uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cbnz syndrome, .Laligned4_done - ldr data1, [src1, #-4] - ldr data2, [src2, #-4] - uadd8 syndrome, data1, const_m1 - eor syndrome, data1, data2 - sel syndrome, syndrome, const_m1 - cmp syndrome, #0 - beq .Lloop_aligned4 - -.Laligned4_done: - strcmp_epilogue_aligned syndrome, data1, data2, 0 - -.Lmutual_align4: - .cfi_restore_state - /* Deal with mutual misalignment by aligning downwards and then - masking off the unwanted loaded data to prevent a difference. */ - lsl tmp1, tmp1, #3 /* Bytes -> bits. */ - bic src1, src1, #3 - ldr data1, [src1], #8 - bic src2, src2, #3 - ldr data2, [src2], #8 - - /* In thumb code we can't use MVN with a register shift, but - we do have ORN. */ - S2HI tmp1, const_m1, tmp1 - orn data1, data1, tmp1 - orn data2, data2, tmp1 - b .Lstart_realigned4 - -.Lmisaligned4: - ands tmp1, src1, #3 - beq .Lsrc1_aligned - sub src2, src2, tmp1 - bic src1, src1, #3 - lsls tmp1, tmp1, #31 - ldr data1, [src1], #4 - beq .Laligned_m2 - bcs .Laligned_m1 - -#if STRCMP_NO_PRECHECK == 1 - ldrb data2, [src2, #1] - uxtb tmp1, data1, ror #BYTE1_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m1: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - add src2, src2, #4 - cbnz data2, .Lsrc1_aligned -#else /* STRCMP_NO_PRECHECK */ - /* If we've done the pre-check, then we don't need to check the - first byte again here. */ - ldrb data2, [src2, #2] - uxtb tmp1, data1, ror #BYTE2_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbz data2, .Lmisaligned_exit - -.Laligned_m2: - ldrb data2, [src2, #3] - uxtb tmp1, data1, ror #BYTE3_OFFSET - subs tmp1, tmp1, data2 - bne .Lmisaligned_exit - cbnz data2, .Laligned_m1 -#endif - -.Lmisaligned_exit: - .cfi_remember_state - mov result, tmp1 - ldr r4, [sp], #16 - .cfi_restore 4 - bx lr - -#if STRCMP_NO_PRECHECK == 0 -.Laligned_m1: - add src2, src2, #4 -#endif -.Lsrc1_aligned: - .cfi_restore_state - /* src1 is word aligned, but src2 has no common alignment - with it. */ - ldr data1, [src1], #4 - lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ - - bic src2, src2, #3 - ldr data2, [src2], #4 - bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ - bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ - - /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ -.Loverlap3: - bic tmp1, data1, #MSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #8 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #24 - bne 6f - ldr data1, [src1], #4 - b .Loverlap3 -4: - S2LO data2, data2, #8 - b .Lstrcmp_tail - -5: - bics syndrome, syndrome, #MSB - bne .Lstrcmp_done_equal - - /* We can only get here if the MSB of data1 contains 0, so - fast-path the exit. */ - ldrb result, [src2] - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 Not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - neg result, result - bx lr - -6: - .cfi_restore_state - S2LO data1, data1, #24 - and data2, data2, #LSB - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap2: - and tmp1, data1, const_m1, S2LO #16 - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #16 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #16 - bne 6f - ldr data1, [src1], #4 - b .Loverlap2 -4: - S2LO data2, data2, #16 - b .Lstrcmp_tail -5: - ands syndrome, syndrome, const_m1, S2LO #16 - bne .Lstrcmp_done_equal - - ldrh data2, [src2] - S2LO data1, data1, #16 -#ifdef __ARM_BIG_ENDIAN - lsl data2, data2, #16 -#endif - b .Lstrcmp_tail - -6: - S2LO data1, data1, #16 - and data2, data2, const_m1, S2LO #16 - b .Lstrcmp_tail - - .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ -.Loverlap1: - and tmp1, data1, #LSB - uadd8 syndrome, data1, const_m1 - eors syndrome, tmp1, data2, S2LO #24 - sel syndrome, syndrome, const_m1 - bne 4f - cbnz syndrome, 5f - ldr data2, [src2], #4 - eor tmp1, tmp1, data1 - cmp tmp1, data2, S2HI #8 - bne 6f - ldr data1, [src1], #4 - b .Loverlap1 -4: - S2LO data2, data2, #24 - b .Lstrcmp_tail -5: - tst syndrome, #LSB - bne .Lstrcmp_done_equal - ldr data2, [src2] -6: - S2LO data1, data1, #8 - bic data2, data2, #MSB - b .Lstrcmp_tail - -.Lstrcmp_done_equal: - mov result, #0 - .cfi_remember_state - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - bx lr - -.Lstrcmp_tail: - .cfi_restore_state -#ifndef __ARM_BIG_ENDIAN - rev data1, data1 - rev data2, data2 - /* Now everything looks big-endian... */ -#endif - uadd8 tmp1, data1, const_m1 - eor tmp1, data1, data2 - sel syndrome, tmp1, const_m1 - clz tmp1, syndrome - lsl data1, data1, tmp1 - lsl data2, data2, tmp1 - lsr result, data1, #24 - ldrd r4, r5, [sp], #16 - .cfi_restore 4 - .cfi_restore 5 - /* R6/7 not used in this sequence. */ - .cfi_restore 6 - .cfi_restore 7 - sub result, result, data2, lsr #24 - bx lr - .cfi_endproc - .size strcmp, . - .Lstrcmp_start_addr diff --git a/src/sve/memchr.S b/src/sve/memchr.S deleted file mode 100644 index 55dfe32..0000000 --- a/src/sve/memchr.S +++ /dev/null @@ -1,85 +0,0 @@ -/* - * memchr - find a character in a memory zone - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl memchr - .type memchr, %function - .p2align 4 -memchr: - dup z1.b, w1 /* duplicate c to a vector */ - setffr /* initialize FFR */ - mov x3, 0 /* initialize off */ - nop - -0: whilelo p1.b, x3, x2 /* make sure off < max */ - b.none 9f - - /* Read a vector's worth of bytes, bounded by max, - stopping on first fault. */ - ldff1b z0.b, p1/z, [x0, x3] - rdffrs p0.b, p1/z - b.nlast 2f - - /* First fault did not fail: the vector bounded by max is valid. - Avoid depending on the contents of FFR beyond the branch. */ - incb x3 /* speculate increment */ - cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ - b.none 0b - decb x3 /* undo speculate */ - - /* Found C. */ -1: brkb p2.b, p1/z, p2.b /* find the first c */ - add x0, x0, x3 /* form partial pointer */ - incp x0, p2.b /* form final pointer to c */ - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparision only on the valid bytes. */ -2: cmpeq p2.b, p0/z, z0.b, z1.b - b.any 1b - - /* No C found. Re-init FFR, increment, and loop. */ - setffr - incp x3, p0.b - b 0b - - /* Found end of count. */ -9: mov x0, 0 /* return null */ - ret - - .size memchr, . - memchr diff --git a/src/sve/memcmp.S b/src/sve/memcmp.S deleted file mode 100644 index cc12974..0000000 --- a/src/sve/memcmp.S +++ /dev/null @@ -1,71 +0,0 @@ -/* - * memcmp - compare memory - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl memcmp - .type memcmp, %function - .p2align 4 -memcmp: - mov x3, 0 /* initialize off */ - -0: whilelo p0.b, x3, x2 /* while off < max */ - b.none 9f - - ld1b z0.b, p0/z, [x0, x3] /* read vectors bounded by max. */ - ld1b z1.b, p0/z, [x1, x3] - - /* Increment for a whole vector, even if we've only read a partial. - This is significantly cheaper than INCP, and since OFF is not - used after the loop it is ok to increment OFF past MAX. */ - incb x3 - - cmpne p1.b, p0/z, z0.b, z1.b /* while no inequalities */ - b.none 0b - - /* Found inequality. */ -1: brkb p1.b, p0/z, p1.b /* find first such */ - lasta w0, p1, z0.b /* extract each byte */ - lasta w1, p1, z1.b - sub x0, x0, x1 /* return comparison */ - ret - - /* Found end-of-count. */ -9: mov x0, 0 /* return equality */ - ret - - .size memcmp, . - memcmp diff --git a/src/sve/strchr.S b/src/sve/strchr.S deleted file mode 100644 index 8cf079b..0000000 --- a/src/sve/strchr.S +++ /dev/null @@ -1,92 +0,0 @@ -/* - * strchr/strchrnul - find a character in a string - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - -/* To build as strchrnul, define BUILD_STPCPY before compiling this file. */ -#ifdef BUILD_STRCHRNUL -#define FUNC strchrnul -#else -#define FUNC strchr -#endif - - .globl FUNC - .type FUNC, %function - .p2align 4 -FUNC: - dup z1.b, w1 /* replicate byte across vector */ - setffr /* initialize FFR */ - ptrue p1.b /* all ones; loop invariant */ - - .p2align 4 - /* Read a vector's worth of bytes, stopping on first fault. */ -0: ldff1b z0.b, p1/z, [x0, xzr] - rdffrs p0.b, p1/z - b.nlast 2f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contents of FFR beyond the branch. */ - incb x0 /* speculate increment */ - cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ - cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */ - orrs p4.b, p1/z, p2.b, p3.b /* c | 0 */ - b.none 0b - decb x0 /* undo speculate */ - - /* Found C or 0. */ -1: brka p4.b, p1/z, p4.b /* find first such */ - sub x0, x0, 1 /* adjust pointer for that byte */ - incp x0, p4.b -#ifndef BUILD_STRCHRNUL - ptest p4, p2.b /* was first in c? */ - csel x0, xzr, x0, none /* if there was no c, return null */ -#endif - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparision only on the valid bytes. */ -2: cmpeq p2.b, p0/z, z0.b, z1.b /* search for c */ - cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */ - orrs p4.b, p0/z, p2.b, p3.b /* c | 0 */ - b.any 1b - - /* No C or 0 found. Re-init FFR, increment, and loop. */ - setffr - incp x0, p0.b - b 0b - - .size FUNC, . - FUNC diff --git a/src/sve/strchrnul.S b/src/sve/strchrnul.S deleted file mode 100644 index 61d2877..0000000 --- a/src/sve/strchrnul.S +++ /dev/null @@ -1,32 +0,0 @@ -/* - * strchrnul - find a character or nul in a string - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define BUILD_STRCHRNUL -#include "strchr.S" diff --git a/src/sve/strcmp.S b/src/sve/strcmp.S deleted file mode 100644 index 9574c2d..0000000 --- a/src/sve/strcmp.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * strcmp - compare two strings - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl strcmp - .type strcmp, %function - .p2align 4 -strcmp: - setffr /* initialize FFR */ - ptrue p1.b, all /* all ones; loop invariant */ - mov x2, 0 /* initialize offset */ - nop - - /* Read a vector's worth of bytes, stopping on first fault. */ -0: ldff1b z0.b, p1/z, [x0, x2] - ldff1b z1.b, p1/z, [x1, x2] - rdffrs p0.b, p1/z - b.nlast 2f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contents of FFR beyond the branch. */ - incb x2, all /* skip bytes for next round */ - cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings */ - cmpne p3.b, p1/z, z0.b, 0 /* search for ~zero */ - nands p2.b, p1/z, p2.b, p3.b /* ~(eq & ~zero) -> ne | zero */ - b.none 0b - - /* Found end-of-string or inequality. */ -1: brkb p2.b, p1/z, p2.b /* find first such */ - lasta w0, p2, z0.b /* extract each char */ - lasta w1, p2, z1.b - sub x0, x0, x1 /* return comparison */ - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparison only on the valid bytes. */ -2: incp x2, p0.b /* skip bytes for next round */ - setffr /* re-init FFR for next round */ - cmpeq p2.b, p0/z, z0.b, z1.b /* compare strings, as above */ - cmpne p3.b, p0/z, z0.b, 0 - nands p2.b, p0/z, p2.b, p3.b - b.none 0b - b 1b - - .size strcmp, . - strcmp diff --git a/src/sve/strcpy.S b/src/sve/strcpy.S deleted file mode 100644 index 83511f5..0000000 --- a/src/sve/strcpy.S +++ /dev/null @@ -1,92 +0,0 @@ -/* - * strcpy/stpcpy - copy a string returning pointer to start/end. - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - -/* To build as stpcpy, define BUILD_STPCPY before compiling this file. */ -#ifdef BUILD_STPCPY -#define FUNC stpcpy -#else -#define FUNC strcpy -#endif - - .globl FUNC - .type FUNC, %function - .p2align 4 -FUNC: - setffr /* initialize FFR */ - ptrue p2.b, all /* all ones; loop invariant */ - mov x2, 0 /* initialize offset */ - - .p2align 4 - /* Read a vector's worth of bytes, stopping on first fault. */ -0: ldff1b z0.b, p2/z, [x1, x2] - rdffrs p0.b, p2/z - b.nlast 1f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contexts of FFR beyond the branch. */ - cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */ - b.any 2f - - /* No zero found. Store the whole vector and loop. */ - st1b z0.b, p2, [x0, x2] - incb x2, all - b 0b - - /* First fault failed: only some of the vector is valid. - Perform the comparison only on the valid bytes. */ -1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */ - b.any 2f - - /* No zero found. Store the valid portion of the vector and loop. */ - setffr /* re-init FFR */ - st1b z0.b, p0, [x0, x2] - incp x2, p0.b - b 0b - - /* Zero found. Crop the vector to the found zero and finish. */ -2: brka p0.b, p2/z, p1.b - st1b z0.b, p0, [x0, x2] -#ifdef BUILD_STPCPY - add x0, x0, x2 - sub x0, x0, 1 - incp x0, p0.b -#endif - ret - - .size FUNC, . - FUNC diff --git a/src/sve/strlen.S b/src/sve/strlen.S deleted file mode 100644 index cae2887..0000000 --- a/src/sve/strlen.S +++ /dev/null @@ -1,78 +0,0 @@ -/* - * strlen - compute the length of a string - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl strlen - .type strlen, %function - .p2align 4 -strlen: - setffr /* initialize FFR */ - ptrue p2.b /* all ones; loop invariant */ - mov x1, 0 /* initialize length */ - nop - - /* Read a vector's worth of bytes, stopping on first fault. */ -0: ldff1b z0.b, p2/z, [x0, x1] - nop - rdffrs p0.b, p2/z - b.nlast 2f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contents of FFR beyond the branch. */ - incb x1, all /* speculate increment */ - cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */ - b.none 0b - decb x1, all /* undo speculate */ - - /* Zero found. Select the bytes before the first and count them. */ -1: brkb p0.b, p2/z, p1.b - incp x1, p0.b - mov x0, x1 - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparison only on the valid bytes. */ -2: cmpeq p1.b, p0/z, z0.b, 0 - b.any 1b - - /* No zero found. Re-init FFR, increment, and loop. */ - setffr - incp x1, p0.b - b 0b - - .size strlen, . - strlen diff --git a/src/sve/strncmp.S b/src/sve/strncmp.S deleted file mode 100644 index 49fd94c..0000000 --- a/src/sve/strncmp.S +++ /dev/null @@ -1,89 +0,0 @@ -/* - * strncmp - compare two strings with limit - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl strncmp - .type strncmp, %function - .p2align 4 -strncmp: - setffr /* initialize FFR */ - mov x3, 0 /* initialize off */ - -0: whilelo p0.b, x3, x2 /* while off < max */ - b.none 9f - - ldff1b z0.b, p0/z, [x0, x3] - ldff1b z1.b, p0/z, [x1, x3] - rdffrs p1.b, p0/z - b.nlast 2f - - /* First fault did not fail: the vector up to max is valid. - Avoid depending on the contents of FFR beyond the branch. - Increment for a whole vector, even if we've only read a partial. - This is significantly cheaper than INCP, and since OFF is not - used after the loop it is ok to increment OFF past MAX. */ - incb x3 - cmpeq p1.b, p0/z, z0.b, z1.b /* compare strings */ - cmpne p2.b, p0/z, z0.b, 0 /* search for ~zero */ - nands p2.b, p0/z, p1.b, p2.b /* ~(eq & ~zero) -> ne | zero */ - b.none 0b - - /* Found end-of-string or inequality. */ -1: brkb p2.b, p0/z, p2.b /* find first such */ - lasta w0, p2, z0.b /* extract each char */ - lasta w1, p2, z1.b - sub x0, x0, x1 /* return comparison */ - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparison only on the valid bytes. */ -2: cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings, as above */ - cmpne p3.b, p1/z, z0.b, 0 - nands p2.b, p1/z, p2.b, p3.b - b.any 1b - - /* No inequality or zero found. Re-init FFR, incr and loop. */ - setffr - incp x3, p1.b - b 0b - - /* Found end-of-count. */ -9: mov x0, 0 /* return equal */ - ret - - .size strncmp, . - strncmp diff --git a/src/sve/strnlen.S b/src/sve/strnlen.S deleted file mode 100644 index 459facc..0000000 --- a/src/sve/strnlen.S +++ /dev/null @@ -1,95 +0,0 @@ -/* - * strnlen - calculate the length of a string with limit. - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl strnlen - .type strnlen, %function - .p2align 4 -strnlen: - setffr /* initialize FFR */ - mov x2, 0 /* initialize len */ - b 1f - - .p2align 4 - /* We have off + vl <= max, and so may read the whole vector. */ -0: ldff1b z0.b, p0/z, [x0, x2] - rdffrs p1.b, p0/z - b.nlast 2f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contents of FFR beyond the branch. */ - cmpeq p2.b, p0/z, z0.b, 0 - b.any 8f - incb x2 - -1: whilelo p0.b, x2, x1 - b.last 0b - - /* We have off + vl < max. Test for off == max before proceeding. */ - b.none 9f - - ldff1b z0.b, p0/z, [x0, x2] - rdffrs p1.b, p0/z - b.nlast 2f - - /* First fault did not fail: the vector up to max is valid. - Avoid depending on the contents of FFR beyond the branch. - Compare for end-of-string, but there are no more bytes. */ - cmpeq p2.b, p0/z, z0.b, 0 - - /* Found end-of-string or zero. */ -8: brkb p2.b, p0/z, p2.b - mov x0, x2 - incp x0, p2.b - ret - - /* First fault failed: only some of the vector is valid. - Perform the comparison only on the valid bytes. */ -2: cmpeq p2.b, p1/z, z0.b, 0 - b.any 8b - - /* No inequality or zero found. Re-init FFR, incr and loop. */ - setffr - incp x2, p1.b - b 1b - - /* End of count. Return max. */ -9: mov x0, x2 - ret - - .size strnlen, . - strnlen diff --git a/src/sve/strrchr.S b/src/sve/strrchr.S deleted file mode 100644 index 820bb80..0000000 --- a/src/sve/strrchr.S +++ /dev/null @@ -1,106 +0,0 @@ -/* - * strrchr - find the last of a character in a string - * - * Copyright (c) 2018, Linaro Limited - * All rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the company nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Assumptions: - * - * ARMv8-a, AArch64 - * SVE Available. - */ - - .arch armv8-a+sve - .text - - .globl strrchr - .type strrchr, %function - .p2align 4 -strrchr: - dup z1.b, w1 /* replicate byte across vector */ - setffr /* initialize FFR */ - ptrue p1.b /* all ones; loop invariant */ - mov x2, 0 /* no match found so far */ - pfalse p2.b - - .p2align 4 - /* Read a vector's worth of bytes, stopping on first fault. */ -0: ldff1b z0.b, p1/z, [x0, xzr] - rdffrs p0.b, p1/z - b.nlast 1f - - /* First fault did not fail: the whole vector is valid. - Avoid depending on the contents of FFR beyond the branch. */ - incb x0, all /* skip bytes this round */ - cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */ - b.any 3f - - cmpeq p3.b, p1/z, z0.b, z1.b /* search for c; no eos */ - b.none 0b - - mov x2, x0 /* save advanced base */ - mov p2.b, p3.b /* save current search */ - b 0b - - /* First fault failed: only some of the vector is valid. - Perform the comparisions only on the valid bytes. */ -1: cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */ - b.any 2f - - cmpeq p3.b, p0/z, z0.b, z1.b /* search for c; no eos */ - mov x3, x0 - incp x0, p0.b /* skip bytes this round */ - setffr /* re-init FFR */ - b.none 0b - - addvl x2, x3, 1 /* save advanced base */ - mov p2.b, p3.b /* save current search */ - b 0b - - /* Found end-of-string. */ -2: incb x0, all /* advance base */ -3: brka p3.b, p1/z, p3.b /* mask after first 0 */ - cmpeq p3.b, p3/z, z0.b, z1.b /* search for c not after eos */ - b.any 4f - - /* No C within last vector. Did we have one before? */ - cbz x2, 5f - mov x0, x2 /* restore advanced base */ - mov p3.b, p2.b /* restore saved search */ - - /* Find the *last* match in the predicate. This is slightly - more complicated than finding the first match. */ -4: rev p3.b, p3.b /* reverse the bits */ - brka p3.b, p1/z, p3.b /* find position of last match */ - decp x0, p3.b /* retard pointer to last match */ - ret - - /* No C whatsoever. Return NULL. */ -5: mov x0, 0 - ret - - .size strrchr, . - strrchr diff --git a/src/thumb-2/strcpy.c b/src/thumb-2/strcpy.c deleted file mode 100644 index 7819500..0000000 --- a/src/thumb-2/strcpy.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2008 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* For GLIBC: -#include <string.h> -#include <memcopy.h> - -#undef strcmp -*/ - -#ifdef __thumb2__ -#define magic1(REG) "#0x01010101" -#define magic2(REG) "#0x80808080" -#else -#define magic1(REG) #REG -#define magic2(REG) #REG ", lsl #7" -#endif - -char* __attribute__((naked)) -strcpy (char* dst, const char* src) -{ - asm ( -#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \ - (defined (__thumb__) && !defined (__thumb2__))) - "pld [r1, #0]\n\t" - "eor r2, r0, r1\n\t" - "mov ip, r0\n\t" - "tst r2, #3\n\t" - "bne 4f\n\t" - "tst r1, #3\n\t" - "bne 3f\n" - "5:\n\t" -#ifndef __thumb2__ - "str r5, [sp, #-4]!\n\t" - "mov r5, #0x01\n\t" - "orr r5, r5, r5, lsl #8\n\t" - "orr r5, r5, r5, lsl #16\n\t" -#endif - - "str r4, [sp, #-4]!\n\t" - "tst r1, #4\n\t" - "ldr r3, [r1], #4\n\t" - "beq 2f\n\t" - "sub r2, r3, "magic1(r5)"\n\t" - "bics r2, r2, r3\n\t" - "tst r2, "magic2(r5)"\n\t" - "itt eq\n\t" - "streq r3, [ip], #4\n\t" - "ldreq r3, [r1], #4\n" - "bne 1f\n\t" - /* Inner loop. We now know that r1 is 64-bit aligned, so we - can safely fetch up to two words. This allows us to avoid - load stalls. */ - ".p2align 2\n" - "2:\n\t" - "pld [r1, #8]\n\t" - "ldr r4, [r1], #4\n\t" - "sub r2, r3, "magic1(r5)"\n\t" - "bics r2, r2, r3\n\t" - "tst r2, "magic2(r5)"\n\t" - "sub r2, r4, "magic1(r5)"\n\t" - "bne 1f\n\t" - "str r3, [ip], #4\n\t" - "bics r2, r2, r4\n\t" - "tst r2, "magic2(r5)"\n\t" - "itt eq\n\t" - "ldreq r3, [r1], #4\n\t" - "streq r4, [ip], #4\n\t" - "beq 2b\n\t" - "mov r3, r4\n" - "1:\n\t" -#ifdef __ARMEB__ - "rors r3, r3, #24\n\t" -#endif - "strb r3, [ip], #1\n\t" - "tst r3, #0xff\n\t" -#ifdef __ARMEL__ - "ror r3, r3, #8\n\t" -#endif - "bne 1b\n\t" - "ldr r4, [sp], #4\n\t" -#ifndef __thumb2__ - "ldr r5, [sp], #4\n\t" -#endif - "BX LR\n" - - /* Strings have the same offset from word alignment, but it's - not zero. */ - "3:\n\t" - "tst r1, #1\n\t" - "beq 1f\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [ip], #1\n\t" - "cmp r2, #0\n\t" - "it eq\n" - "BXEQ LR\n" - "1:\n\t" - "tst r1, #2\n\t" - "beq 5b\n\t" - "ldrh r2, [r1], #2\n\t" -#ifdef __ARMEB__ - "tst r2, #0xff00\n\t" - "iteet ne\n\t" - "strneh r2, [ip], #2\n\t" - "lsreq r2, r2, #8\n\t" - "streqb r2, [ip]\n\t" - "tstne r2, #0xff\n\t" -#else - "tst r2, #0xff\n\t" - "itet ne\n\t" - "strneh r2, [ip], #2\n\t" - "streqb r2, [ip]\n\t" - "tstne r2, #0xff00\n\t" -#endif - "bne 5b\n\t" - "BX LR\n" - - /* src and dst do not have a common word-alignement. Fall back to - byte copying. */ - "4:\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [ip], #1\n\t" - "cmp r2, #0\n\t" - "bne 4b\n\t" - "BX LR" - -#elif !defined (__thumb__) || defined (__thumb2__) - "mov r3, r0\n\t" - "1:\n\t" - "ldrb r2, [r1], #1\n\t" - "strb r2, [r3], #1\n\t" - "cmp r2, #0\n\t" - "bne 1b\n\t" - "BX LR" -#else - "mov r3, r0\n\t" - "1:\n\t" - "ldrb r2, [r1]\n\t" - "add r1, r1, #1\n\t" - "strb r2, [r3]\n\t" - "add r3, r3, #1\n\t" - "cmp r2, #0\n\t" - "bne 1b\n\t" - "BX LR" -#endif - ); -} -/* For GLIBC: libc_hidden_builtin_def (strcpy) */ diff --git a/src/thumb-2/strlen.S b/src/thumb-2/strlen.S deleted file mode 100644 index 8efa235..0000000 --- a/src/thumb-2/strlen.S +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright (c) 2010-2011,2013 Linaro Limited - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Linaro Limited nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Assumes: - ARMv6T2, AArch32 - - */ - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -#ifdef __ARMEB__ -#define S2LO lsl -#define S2HI lsr -#else -#define S2LO lsr -#define S2HI lsl -#endif - - /* This code requires Thumb. */ - .thumb - .syntax unified - -/* Parameters and result. */ -#define srcin r0 -#define result r0 - -/* Internal variables. */ -#define src r1 -#define data1a r2 -#define data1b r3 -#define const_m1 r12 -#define const_0 r4 -#define tmp1 r4 /* Overlaps const_0 */ -#define tmp2 r5 - -def_fn strlen p2align=6 - pld [srcin, #0] - strd r4, r5, [sp, #-8]! - bic src, srcin, #7 - mvn const_m1, #0 - ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ - pld [src, #32] - bne.w .Lmisaligned8 - mov const_0, #0 - mov result, #-8 -.Lloop_aligned: - /* Bytes 0-7. */ - ldrd data1a, data1b, [src] - pld [src, #64] - add result, result, #8 -.Lstart_realigned: - uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ - sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ - uadd8 data1b, data1b, const_m1 - sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ - cbnz data1b, .Lnull_found - - /* Bytes 8-15. */ - ldrd data1a, data1b, [src, #8] - uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ - add result, result, #8 - sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ - uadd8 data1b, data1b, const_m1 - sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ - cbnz data1b, .Lnull_found - - /* Bytes 16-23. */ - ldrd data1a, data1b, [src, #16] - uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ - add result, result, #8 - sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ - uadd8 data1b, data1b, const_m1 - sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ - cbnz data1b, .Lnull_found - - /* Bytes 24-31. */ - ldrd data1a, data1b, [src, #24] - add src, src, #32 - uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ - add result, result, #8 - sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ - uadd8 data1b, data1b, const_m1 - sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ - cmp data1b, #0 - beq .Lloop_aligned - -.Lnull_found: - cmp data1a, #0 - itt eq - addeq result, result, #4 - moveq data1a, data1b -#ifndef __ARMEB__ - rev data1a, data1a -#endif - clz data1a, data1a - ldrd r4, r5, [sp], #8 - add result, result, data1a, lsr #3 /* Bits -> Bytes. */ - bx lr - -.Lmisaligned8: - ldrd data1a, data1b, [src] - and tmp2, tmp1, #3 - rsb result, tmp1, #0 - lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - tst tmp1, #4 - pld [src, #64] - S2HI tmp2, const_m1, tmp2 - orn data1a, data1a, tmp2 - itt ne - ornne data1b, data1b, tmp2 - movne data1a, const_m1 - mov const_0, #0 - b .Lstart_realigned - .size strlen, . - strlen - diff --git a/src/thumb/aeabi_idiv.S b/src/thumb/aeabi_idiv.S deleted file mode 100644 index b15a02c..0000000 --- a/src/thumb/aeabi_idiv.S +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* An executable stack is *not* required for these functions. */ - -.section .note.GNU-stack,"",%progbits -.previous -.eabi_attribute 25, 1 - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) - -#define TYPE(x) .type SYM(x),function -#define SIZE(x) .size SYM(x), . - SYM(x) -#define LSYM(x) .x - -.macro cfi_start start_label, end_label - .pushsection .debug_frame -LSYM(Lstart_frame): - .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) -LSYM(Lstart_cie): - .4byte 0xffffffff - .byte 0x1 - .ascii "\0" - .uleb128 0x1 - .sleb128 -4 - .byte 0xe - .byte 0xc - .uleb128 0xd - .uleb128 0x0 - - .align 2 -LSYM(Lend_cie): - .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) -LSYM(Lstart_fde): - .4byte LSYM(Lstart_frame) - .4byte \start_label - .4byte \end_label-\start_label - .popsection -.endm - -.macro cfi_end end_label - .pushsection .debug_frame - .align 2 -LSYM(Lend_fde): - .popsection -\end_label: -.endm - -.macro THUMB_LDIV0 name signed - push {r0, lr} - movs r0, #0 - bl SYM(__aeabi_idiv0) - pop {r1, pc} -.endm - -.macro FUNC_END name - SIZE (__\name) -.endm - -.macro DIV_FUNC_END name signed - cfi_start __\name, LSYM(Lend_div0) -LSYM(Ldiv0): - THUMB_LDIV0 \name \signed - cfi_end LSYM(Lend_div0) - FUNC_END \name -.endm - -.macro THUMB_FUNC_START name - .globl SYM (\name) - TYPE (\name) - .thumb_func -SYM (\name): -.endm - -.macro FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - .force_thumb - .thumb_func - .syntax unified -SYM (__\name): -.endm - -.macro FUNC_ALIAS new old - .globl SYM (__\new) - .thumb_set SYM (__\new), SYM (__\old) -.endm - -/* Register aliases. */ -work .req r4 -dividend .req r0 -divisor .req r1 -overdone .req r2 -result .req r2 -curbit .req r3 - -/* ------------------------------------------------------------------------ */ -/* Bodies of the division and modulo routines. */ -/* ------------------------------------------------------------------------ */ -.macro BranchToDiv n, label - lsrs curbit, dividend, \n - cmp curbit, divisor - bcc \label -.endm - -.macro DoDiv n - lsrs curbit, dividend, \n - cmp curbit, divisor - bcc 1f - lsls curbit, divisor, \n - subs dividend, dividend, curbit - -1: adcs result, result -.endm - -.macro THUMB1_Div_Positive - movs result, #0 - BranchToDiv #1, LSYM(Lthumb1_div1) - BranchToDiv #4, LSYM(Lthumb1_div4) - BranchToDiv #8, LSYM(Lthumb1_div8) - BranchToDiv #12, LSYM(Lthumb1_div12) - BranchToDiv #16, LSYM(Lthumb1_div16) -LSYM(Lthumb1_div_large_positive): - movs result, #0xff - lsls divisor, divisor, #8 - rev result, result - lsrs curbit, dividend, #16 - cmp curbit, divisor - bcc 1f - asrs result, #8 - lsls divisor, divisor, #8 - beq LSYM(Ldivbyzero_waypoint) - -1: lsrs curbit, dividend, #12 - cmp curbit, divisor - bcc LSYM(Lthumb1_div12) - b LSYM(Lthumb1_div16) -LSYM(Lthumb1_div_loop): - lsrs divisor, divisor, #8 -LSYM(Lthumb1_div16): - Dodiv #15 - Dodiv #14 - Dodiv #13 - Dodiv #12 -LSYM(Lthumb1_div12): - Dodiv #11 - Dodiv #10 - Dodiv #9 - Dodiv #8 - bcs LSYM(Lthumb1_div_loop) -LSYM(Lthumb1_div8): - Dodiv #7 - Dodiv #6 - Dodiv #5 -LSYM(Lthumb1_div5): - Dodiv #4 -LSYM(Lthumb1_div4): - Dodiv #3 -LSYM(Lthumb1_div3): - Dodiv #2 -LSYM(Lthumb1_div2): - Dodiv #1 -LSYM(Lthumb1_div1): - subs divisor, dividend, divisor - bcs 1f - mov divisor, dividend - -1: adcs result, result - mov dividend, result - bx lr - -LSYM(Ldivbyzero_waypoint): - b LSYM(Ldiv0) -.endm - -.macro THUMB1_Div_Negative - lsrs result, divisor, #31 - beq 1f - rsbs divisor, divisor, #0 - -1: asrs curbit, dividend, #32 - bcc 2f - rsbs dividend, dividend, #0 - -2: eors curbit, result - movs result, #0 - mov ip, curbit - BranchToDiv #4, LSYM(Lthumb1_div_negative4) - BranchToDiv #8, LSYM(Lthumb1_div_negative8) -LSYM(Lthumb1_div_large): - movs result, #0xfc - lsls divisor, divisor, #6 - rev result, result - lsrs curbit, dividend, #8 - cmp curbit, divisor - bcc LSYM(Lthumb1_div_negative8) - - lsls divisor, divisor, #6 - asrs result, result, #6 - cmp curbit, divisor - bcc LSYM(Lthumb1_div_negative8) - - lsls divisor, divisor, #6 - asrs result, result, #6 - cmp curbit, divisor - bcc LSYM(Lthumb1_div_negative8) - - lsls divisor, divisor, #6 - beq LSYM(Ldivbyzero_negative) - asrs result, result, #6 - b LSYM(Lthumb1_div_negative8) -LSYM(Lthumb1_div_negative_loop): - lsrs divisor, divisor, #6 -LSYM(Lthumb1_div_negative8): - DoDiv #7 - DoDiv #6 - DoDiv #5 - DoDiv #4 -LSYM(Lthumb1_div_negative4): - DoDiv #3 - DoDiv #2 - bcs LSYM(Lthumb1_div_negative_loop) - DoDiv #1 - subs divisor, dividend, divisor - bcs 1f - mov divisor, dividend - -1: mov curbit, ip - adcs result, result - asrs curbit, curbit, #1 - mov dividend, result - bcc 2f - rsbs dividend, dividend, #0 - cmp curbit, #0 - -2: bpl 3f - rsbs divisor, divisor, #0 - -3: bx lr - -LSYM(Ldivbyzero_negative): - mov curbit, ip - asrs curbit, curbit, #1 - bcc LSYM(Ldiv0) - rsbs dividend, dividend, #0 -.endm - -/* ------------------------------------------------------------------------ */ -/* Start of the Real Functions */ -/* ------------------------------------------------------------------------ */ - - FUNC_START aeabi_idiv0 - bx lr - FUNC_END aeabi_idiv0 - - FUNC_START divsi3 - FUNC_ALIAS aeabi_idiv divsi3 - -LSYM(divsi3_skip_div0_test): - mov curbit, dividend - orrs curbit, divisor - bmi LSYM(Lthumb1_div_negative) - -LSYM(Lthumb1_div_positive): - THUMB1_Div_Positive - -LSYM(Lthumb1_div_negative): - THUMB1_Div_Negative - - DIV_FUNC_END divsi3 signed - - FUNC_START aeabi_idivmod - - cmp r1, #0 - beq LSYM(Ldiv0) - push {r0, r1, lr} - bl LSYM(divsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 - - FUNC_END aeabi_idivmod -/* ------------------------------------------------------------------------ */ diff --git a/src/thumb/strcmp-armv6m.S b/src/thumb/strcmp-armv6m.S deleted file mode 100644 index d1255e0..0000000 --- a/src/thumb/strcmp-armv6m.S +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2014 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Implementation of strcmp for ARMv6m. This version is only used in - ARMv6-M when we want an efficient implementation. Otherwize if the - code size is preferred, strcmp-armv4t.S will be used. */ - - .thumb_func - .syntax unified - .arch armv6-m - - .macro DoSub n, label - subs r0, r0, r1 -#ifdef __ARM_BIG_ENDIAN - lsrs r1, r4, \n -#else - lsls r1, r4, \n -#endif - orrs r1, r0 - bne \label - .endm - - .macro Byte_Test n, label - lsrs r0, r2, \n - lsrs r1, r3, \n - DoSub \n, \label - .endm - - .text - .p2align 0 - .global strcmp - .type strcmp, %function -strcmp: - .cfi_startproc - mov r2, r0 - push {r4, r5, r6, lr} - orrs r2, r1 - lsls r2, r2, #30 - bne 6f - ldr r5, =0x01010101 - lsls r6, r5, #7 -1: - ldmia r0!, {r2} - ldmia r1!, {r3} - subs r4, r2, r5 - bics r4, r2 - ands r4, r6 - beq 3f - -#ifdef __ARM_BIG_ENDIAN - Byte_Test #24, 4f - Byte_Test #16, 4f - Byte_Test #8, 4f - - b 7f -3: - cmp r2, r3 - beq 1b - cmp r2, r3 -#else - uxtb r0, r2 - uxtb r1, r3 - DoSub #24, 2f - - uxth r0, r2 - uxth r1, r3 - DoSub #16, 2f - - lsls r0, r2, #8 - lsls r1, r3, #8 - lsrs r0, r0, #8 - lsrs r1, r1, #8 - DoSub #8, 2f - - lsrs r0, r2, #24 - lsrs r1, r3, #24 - subs r0, r0, r1 -2: - pop {r4, r5, r6, pc} - -3: - cmp r2, r3 - beq 1b - rev r0, r2 - rev r1, r3 - cmp r0, r1 -#endif - - bls 5f - movs r0, #1 -4: - pop {r4, r5, r6, pc} -5: - movs r0, #0 - mvns r0, r0 - pop {r4, r5, r6, pc} -6: - ldrb r2, [r0, #0] - ldrb r3, [r1, #0] - adds r0, #1 - adds r1, #1 - cmp r2, #0 - beq 7f - cmp r2, r3 - bne 7f - ldrb r2, [r0, #0] - ldrb r3, [r1, #0] - adds r0, #1 - adds r1, #1 - cmp r2, #0 - beq 7f - cmp r2, r3 - beq 6b -7: - subs r0, r2, r3 - pop {r4, r5, r6, pc} - .cfi_endproc - .size strcmp, . - strcmp diff --git a/tests/test-memchr.c b/tests/test-memchr.c deleted file mode 100644 index 96cce28..0000000 --- a/tests/test-memchr.c +++ /dev/null @@ -1,178 +0,0 @@ -/* Test and measure memchr functions. - Copyright (C) 1999, 2002, 2003, 2005, 2009 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#define TEST_MAIN -#include "test-string.h" - -typedef char *(*proto_t) (const char *, int, size_t); -char *simple_memchr (const char *, int, size_t); - -IMPL (simple_memchr, 0) -IMPL (memchr, 1) - -char * -simple_memchr (const char *s, int c, size_t n) -{ - while (n--) - if (*s++ == (char) c) - return (char *) s - 1; - return NULL; -} - -static void -do_one_test (impl_t *impl, const char *s, int c, size_t n, char *exp_res) -{ - char *res = CALL (impl, s, c, n); - if (res != exp_res) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, exp_res); - ret = 1; - return; - } -} - -static void -do_test (size_t align, size_t pos, size_t len, int seek_char) -{ - size_t i; - char *result; - - align &= 7; - if (align + len >= page_size) - return; - - for (i = 0; i < len; ++i) - { - buf1[align + i] = 1 + 23 * i % 127; - if (buf1[align + i] == seek_char) - buf1[align + i] = seek_char + 1; - } - buf1[align + len] = 0; - - if (pos < len) - { - buf1[align + pos] = seek_char; - buf1[align + len] = -seek_char; - result = (char *) (buf1 + align + pos); - } - else - { - result = NULL; - buf1[align + len] = seek_char; - } - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) (buf1 + align), seek_char, len, result); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align, pos, len; - int seek_char; - char *result; - unsigned char *p = buf1 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align = random () & 15; - pos = random () & 511; - if (pos + align >= 512) - pos = 511 - align - (random () & 7); - len = random () & 511; - if (pos >= len) - len = pos + (random () & 7); - if (len + align >= 512) - len = 512 - align - (random () & 7); - seek_char = random () & 255; - j = len + align + 64; - if (j > 512) - j = 512; - - for (i = 0; i < j; i++) - { - if (i == pos + align) - p[i] = seek_char; - else - { - p[i] = random () & 255; - if (i < pos + align && p[i] == seek_char) - p[i] = seek_char + 13; - } - } - - if (pos < len) - { - size_t r = random (); - if ((r & 31) == 0) - len = ~(uintptr_t) (p + align) - ((r >> 5) & 31); - result = (char *) (p + pos + align); - } - else - result = NULL; - - FOR_EACH_IMPL (impl, 1) - if (CALL (impl, (char *) (p + align), seek_char, len) != result) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd, %zd) %p != %p, p %p", - n, impl->name, align, seek_char, len, pos, - CALL (impl, (char *) (p + align), seek_char, len), - result, p); - ret = 1; - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%20s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 8; ++i) - { - do_test (0, 16 << i, 2048, 23); - do_test (i, 64, 256, 23); - do_test (0, 16 << i, 2048, 0); - do_test (i, 64, 256, 0); - } - for (i = 1; i < 32; ++i) - { - do_test (0, i, i + 1, 23); - do_test (0, i, i + 1, 0); - } - - do_random_tests (); - - /* Test for behaviour with zero length. */ - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, NULL, 'a', 0, NULL); - - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-memcmp.c b/tests/test-memcmp.c deleted file mode 100644 index 038951d..0000000 --- a/tests/test-memcmp.c +++ /dev/null @@ -1,500 +0,0 @@ -/* Test and measure memcmp functions. - Copyright (C) 1999-2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - Added wmemcmp support by Liubov Dmitrieva <liubov.dmitrieva@gmail.com>, 2011. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define TEST_MAIN -#ifdef WIDE -# define TEST_NAME "wmemcmp" -#else -# define TEST_NAME "memcmp" -#endif -#include "test-string.h" -#ifdef WIDE -# include <inttypes.h> -# include <wchar.h> - -# define MEMCMP wmemcmp -# define MEMCPY wmemcpy -# define SIMPLE_MEMCMP simple_wmemcmp -# define CHAR wchar_t -# define UCHAR wchar_t -# define CHARBYTES 4 -# define CHAR__MIN WCHAR_MIN -# define CHAR__MAX WCHAR_MAX -int -simple_wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) -{ - int ret = 0; - /* Warning! - wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. - */ - while (n-- && (ret = *s1 < *s2 ? -1 : *s1 == *s2 ? 0 : 1) == 0) {s1++; s2++;} - return ret; -} -#else -# include <limits.h> - -# define MEMCMP memcmp -# define MEMCPY memcpy -# define SIMPLE_MEMCMP simple_memcmp -# define CHAR char -# define MAX_CHAR 255 -# define UCHAR unsigned char -# define CHARBYTES 1 -# define CHAR__MIN CHAR_MIN -# define CHAR__MAX CHAR_MAX - -int -simple_memcmp (const char *s1, const char *s2, size_t n) -{ - int ret = 0; - - while (n-- && (ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) == 0); - return ret; -} -#endif - -typedef int (*proto_t) (const CHAR *, const CHAR *, size_t); - -IMPL (SIMPLE_MEMCMP, 0) -IMPL (MEMCMP, 1) - -static int -check_result (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len, - int exp_result) -{ - int result = CALL (impl, s1, s2, len); - if ((exp_result == 0 && result != 0) - || (exp_result < 0 && result >= 0) - || (exp_result > 0 && result <= 0)) - { - error (0, 0, "Wrong result in function %s %d %d", impl->name, - result, exp_result); - ret = 1; - return -1; - } - - return 0; -} - -static void -do_one_test (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len, - int exp_result) -{ - if (check_result (impl, s1, s2, len, exp_result) < 0) - return; -} - -static void -do_test (size_t align1, size_t align2, size_t len, int exp_result) -{ - size_t i; - CHAR *s1, *s2; - - if (len == 0) - return; - - align1 &= 63; - if (align1 + (len + 1) * CHARBYTES >= page_size) - return; - - align2 &= 63; - if (align2 + (len + 1) * CHARBYTES >= page_size) - return; - - s1 = (CHAR *) (buf1 + align1); - s2 = (CHAR *) (buf2 + align2); - - for (i = 0; i < len; i++) - s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % CHAR__MAX; - - s1[len] = align1; - s2[len] = align2; - s2[len - 1] -= exp_result; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s1, s2, len, exp_result); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align1, align2, pos, len; - int result; - long r; - UCHAR *p1 = (UCHAR *) (buf1 + page_size - 512 * CHARBYTES); - UCHAR *p2 = (UCHAR *) (buf2 + page_size - 512 * CHARBYTES); - - for (n = 0; n < ITERATIONS; n++) - { - align1 = random () & 31; - if (random () & 1) - align2 = random () & 31; - else - align2 = align1 + (random () & 24); - pos = random () & 511; - j = align1; - if (align2 > j) - j = align2; - if (pos + j >= 512) - pos = 511 - j - (random () & 7); - len = random () & 511; - if (len + j >= 512) - len = 511 - j - (random () & 7); - j = len + align1 + 64; - if (j > 512) j = 512; - for (i = 0; i < j; ++i) - p1[i] = random () & 255; - for (i = 0; i < j; ++i) - p2[i] = random () & 255; - - result = 0; - if (pos >= len) - MEMCPY ((CHAR *) p2 + align2, (const CHAR *) p1 + align1, len); - else - { - MEMCPY ((CHAR *) p2 + align2, (const CHAR *) p1 + align1, pos); - if (p2[align2 + pos] == p1[align1 + pos]) - { - p2[align2 + pos] = random () & 255; - if (p2[align2 + pos] == p1[align1 + pos]) - p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127); - } - - if (p1[align1 + pos] < p2[align2 + pos]) - result = -1; - else - result = 1; - } - - FOR_EACH_IMPL (impl, 1) - { - r = CALL (impl, (CHAR *) p1 + align1, (const CHAR *) p2 + align2, - len); - if ((r == 0 && result) - || (r < 0 && result >= 0) - || (r > 0 && result <= 0)) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", - n, impl->name, align1 * CHARBYTES & 63, align2 * CHARBYTES & 63, len, pos, r, result, p1, p2); - ret = 1; - } - } - } -} - -static void -check1 (void) -{ - CHAR s1[116], s2[116]; - int n, exp_result; - - s1[0] = -108; - s2[0] = -108; - s1[1] = 99; - s2[1] = 99; - s1[2] = -113; - s2[2] = -113; - s1[3] = 1; - s2[3] = 1; - s1[4] = 116; - s2[4] = 116; - s1[5] = 99; - s2[5] = 99; - s1[6] = -113; - s2[6] = -113; - s1[7] = 1; - s2[7] = 1; - s1[8] = 84; - s2[8] = 84; - s1[9] = 99; - s2[9] = 99; - s1[10] = -113; - s2[10] = -113; - s1[11] = 1; - s2[11] = 1; - s1[12] = 52; - s2[12] = 52; - s1[13] = 99; - s2[13] = 99; - s1[14] = -113; - s2[14] = -113; - s1[15] = 1; - s2[15] = 1; - s1[16] = -76; - s2[16] = -76; - s1[17] = -14; - s2[17] = -14; - s1[18] = -109; - s2[18] = -109; - s1[19] = 1; - s2[19] = 1; - s1[20] = -108; - s2[20] = -108; - s1[21] = -14; - s2[21] = -14; - s1[22] = -109; - s2[22] = -109; - s1[23] = 1; - s2[23] = 1; - s1[24] = 84; - s2[24] = 84; - s1[25] = -15; - s2[25] = -15; - s1[26] = -109; - s2[26] = -109; - s1[27] = 1; - s2[27] = 1; - s1[28] = 52; - s2[28] = 52; - s1[29] = -15; - s2[29] = -15; - s1[30] = -109; - s2[30] = -109; - s1[31] = 1; - s2[31] = 1; - s1[32] = 20; - s2[32] = 20; - s1[33] = -15; - s2[33] = -15; - s1[34] = -109; - s2[34] = -109; - s1[35] = 1; - s2[35] = 1; - s1[36] = 20; - s2[36] = 20; - s1[37] = -14; - s2[37] = -14; - s1[38] = -109; - s2[38] = -109; - s1[39] = 1; - s2[39] = 1; - s1[40] = 52; - s2[40] = 52; - s1[41] = -14; - s2[41] = -14; - s1[42] = -109; - s2[42] = -109; - s1[43] = 1; - s2[43] = 1; - s1[44] = 84; - s2[44] = 84; - s1[45] = -14; - s2[45] = -14; - s1[46] = -109; - s2[46] = -109; - s1[47] = 1; - s2[47] = 1; - s1[48] = 116; - s2[48] = 116; - s1[49] = -14; - s2[49] = -14; - s1[50] = -109; - s2[50] = -109; - s1[51] = 1; - s2[51] = 1; - s1[52] = 116; - s2[52] = 116; - s1[53] = -15; - s2[53] = -15; - s1[54] = -109; - s2[54] = -109; - s1[55] = 1; - s2[55] = 1; - s1[56] = -44; - s2[56] = -44; - s1[57] = -14; - s2[57] = -14; - s1[58] = -109; - s2[58] = -109; - s1[59] = 1; - s2[59] = 1; - s1[60] = -108; - s2[60] = -108; - s1[61] = -15; - s2[61] = -15; - s1[62] = -109; - s2[62] = -109; - s1[63] = 1; - s2[63] = 1; - s1[64] = -76; - s2[64] = -76; - s1[65] = -15; - s2[65] = -15; - s1[66] = -109; - s2[66] = -109; - s1[67] = 1; - s2[67] = 1; - s1[68] = -44; - s2[68] = -44; - s1[69] = -15; - s2[69] = -15; - s1[70] = -109; - s2[70] = -109; - s1[71] = 1; - s2[71] = 1; - s1[72] = -12; - s2[72] = -12; - s1[73] = -15; - s2[73] = -15; - s1[74] = -109; - s2[74] = -109; - s1[75] = 1; - s2[75] = 1; - s1[76] = -12; - s2[76] = -12; - s1[77] = -14; - s2[77] = -14; - s1[78] = -109; - s2[78] = -109; - s1[79] = 1; - s2[79] = 1; - s1[80] = 20; - s2[80] = -68; - s1[81] = -12; - s2[81] = 64; - s1[82] = -109; - s2[82] = -106; - s1[83] = 1; - s2[83] = 1; - s1[84] = -12; - s2[84] = -12; - s1[85] = -13; - s2[85] = -13; - s1[86] = -109; - s2[86] = -109; - s1[87] = 1; - s2[87] = 1; - s1[88] = -44; - s2[88] = -44; - s1[89] = -13; - s2[89] = -13; - s1[90] = -109; - s2[90] = -109; - s1[91] = 1; - s2[91] = 1; - s1[92] = -76; - s2[92] = -76; - s1[93] = -13; - s2[93] = -13; - s1[94] = -109; - s2[94] = -109; - s1[95] = 1; - s2[95] = 1; - s1[96] = -108; - s2[96] = -108; - s1[97] = -13; - s2[97] = -13; - s1[98] = -109; - s2[98] = -109; - s1[99] = 1; - s2[99] = 1; - s1[100] = 116; - s2[100] = 116; - s1[101] = CHAR__MIN; - s2[101] = CHAR__MAX; - s1[102] = -109; - s2[102] = -109; - s1[103] = 1; - s2[103] = 1; - s1[104] = 84; - s2[104] = 84; - s1[105] = -13; - s2[105] = -13; - s1[106] = -109; - s2[106] = -109; - s1[107] = 1; - s2[107] = 1; - s1[108] = 52; - s2[108] = 52; - s1[109] = -13; - s2[109] = -13; - s1[110] = -109; - s2[110] = -109; - s1[111] = 1; - s2[111] = 1; - s1[112] = CHAR__MAX; - s2[112] = CHAR__MIN; - s1[113] = -13; - s2[113] = -13; - s1[114] = -109; - s2[114] = -109; - s1[115] = 1; - s2[115] = 1; - - n = 116; - for (size_t i = 0; i < n; i++) - { - exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i); - FOR_EACH_IMPL (impl, 0) - check_result (impl, s1 + i, s2 + i, n - i, exp_result); - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - check1 (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 16; ++i) - { - do_test (i * CHARBYTES, i * CHARBYTES, i, 0); - do_test (i * CHARBYTES, i * CHARBYTES, i, 1); - do_test (i * CHARBYTES, i * CHARBYTES, i, -1); - } - - for (i = 0; i < 16; ++i) - { - do_test (0, 0, i, 0); - do_test (0, 0, i, 1); - do_test (0, 0, i, -1); - } - - for (i = 1; i < 10; ++i) - { - do_test (0, 0, 2 << i, 0); - do_test (0, 0, 2 << i, 1); - do_test (0, 0, 2 << i, -1); - do_test (0, 0, 16 << i, 0); - do_test ((8 - i) * CHARBYTES, (2 * i) * CHARBYTES, 16 << i, 0); - do_test (0, 0, 16 << i, 1); - do_test (0, 0, 16 << i, -1); - } - - for (i = 1; i < 8; ++i) - { - do_test (i * CHARBYTES, 2 * (i * CHARBYTES), 8 << i, 0); - do_test (i * CHARBYTES, 2 * (i * CHARBYTES), 8 << i, 1); - do_test (i * CHARBYTES, 2 * (i * CHARBYTES), 8 << i, -1); - } - - do_random_tests (); - return ret; -} -#include "test-skeleton.c" diff --git a/tests/test-memcpy.c b/tests/test-memcpy.c deleted file mode 100644 index 564ac1f..0000000 --- a/tests/test-memcpy.c +++ /dev/null @@ -1,253 +0,0 @@ -/* Test and measure memcpy functions. - Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef MEMCPY_RESULT -# define MEMCPY_RESULT(dst, len) dst -# define MIN_PAGE_SIZE 131072 -# define TEST_MAIN -# include "test-string.h" - -char *simple_memcpy (char *, const char *, size_t); -char *builtin_memcpy (char *, const char *, size_t); - -IMPL (simple_memcpy, 0) -IMPL (builtin_memcpy, 0) -IMPL (memcpy, 1) - -char * -simple_memcpy (char *dst, const char *src, size_t n) -{ - char *ret = dst; - while (n--) - *dst++ = *src++; - return ret; -} - -char * -builtin_memcpy (char *dst, const char *src, size_t n) -{ - return __builtin_memcpy (dst, src, n); -} -#endif - -typedef char *(*proto_t) (char *, const char *, size_t); - -static void -do_one_test (impl_t *impl, char *dst, const char *src, - size_t len) -{ - if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len)) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - CALL (impl, dst, src, len), MEMCPY_RESULT (dst, len)); - ret = 1; - return; - } - - if (memcmp (dst, src, len) != 0) - { - error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"", - impl->name, dst, src); - ret = 1; - return; - } -} - -static void -do_test (size_t align1, size_t align2, size_t len) -{ - size_t i, j; - char *s1, *s2; - - align1 &= 63; - if (align1 + len >= page_size) - return; - - align2 &= 63; - if (align2 + len >= page_size) - return; - - s1 = (char *) (buf1 + align1); - s2 = (char *) (buf2 + align2); - - for (i = 0, j = 1; i < len; i++, j += 23) - s1[i] = j; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s2, s1, len); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align1, align2, len, size1, size2, size; - int c; - unsigned char *p1, *p2; - unsigned char *res; - - for (n = 0; n < ITERATIONS; n++) - { - if (n == 0) - { - len = getpagesize (); - size = len + 512; - size1 = size; - size2 = size; - align1 = 512; - align2 = 512; - } - else - { - if ((random () & 255) == 0) - size = 65536; - else - size = 768; - if (size > page_size) - size = page_size; - size1 = size; - size2 = size; - i = random (); - if (i & 3) - size -= 256; - if (i & 1) - size1 -= 256; - if (i & 2) - size2 -= 256; - if (i & 4) - { - len = random () % size; - align1 = size1 - len - (random () & 31); - align2 = size2 - len - (random () & 31); - if (align1 > size1) - align1 = 0; - if (align2 > size2) - align2 = 0; - } - else - { - align1 = random () & 63; - align2 = random () & 63; - len = random () % size; - if (align1 + len > size1) - align1 = size1 - len; - if (align2 + len > size2) - align2 = size2 - len; - } - } - p1 = buf1 + page_size - size1; - p2 = buf2 + page_size - size2; - c = random () & 255; - j = align1 + len + 256; - if (j > size1) - j = size1; - for (i = 0; i < j; ++i) - p1[i] = random () & 255; - - FOR_EACH_IMPL (impl, 1) - { - j = align2 + len + 256; - if (j > size2) - j = size2; - memset (p2, c, j); - res = (unsigned char *) CALL (impl, - (char *) (p2 + align2), - (char *) (p1 + align1), len); - if (res != MEMCPY_RESULT (p2 + align2, len)) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd) %p != %p", - n, impl->name, align1, align2, len, res, - MEMCPY_RESULT (p2 + align2, len)); - ret = 1; - } - for (i = 0; i < align2; ++i) - { - if (p2[i] != c) - { - error (0, 0, "Iteration %zd - garbage before, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - for (i = align2 + len; i < j; ++i) - { - if (p2[i] != c) - { - error (0, 0, "Iteration %zd - garbage after, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - if (memcmp (p1 + align1, p2 + align2, len)) - { - error (0, 0, "Iteration %zd - different strings, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - } - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 0; i < 18; ++i) - { - do_test (0, 0, 1 << i); - do_test (i, 0, 1 << i); - do_test (0, i, 1 << i); - do_test (i, i, 1 << i); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, 0, i); - do_test (i, 0, i); - do_test (0, i, i); - do_test (i, i, i); - } - - for (i = 3; i < 32; ++i) - { - if ((i & (i - 1)) == 0) - continue; - do_test (0, 0, 16 * i); - do_test (i, 0, 16 * i); - do_test (0, i, 16 * i); - do_test (i, i, 16 * i); - } - - do_test (0, 0, getpagesize ()); - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-memmove.c b/tests/test-memmove.c deleted file mode 100644 index f783363..0000000 --- a/tests/test-memmove.c +++ /dev/null @@ -1,288 +0,0 @@ -/* Test and measure memmove functions. - Copyright (C) 1999-2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define TEST_MAIN -#ifdef TEST_BCOPY -# define TEST_NAME "bcopy" -#else -# define TEST_NAME "memmove" -#endif -#include "test-string.h" - -char *simple_memmove (char *, const char *, size_t); - -#ifdef TEST_BCOPY -typedef void (*proto_t) (const char *, char *, size_t); -void simple_bcopy (const char *, char *, size_t); - -IMPL (simple_bcopy, 0) -IMPL (bcopy, 1) - -void -simple_bcopy (const char *src, char *dst, size_t n) -{ - simple_memmove (dst, src, n); -} -#else -typedef char *(*proto_t) (char *, const char *, size_t); - -IMPL (simple_memmove, 0) -IMPL (memmove, 1) -#endif - -char * -simple_memmove (char *dst, const char *src, size_t n) -{ - char *ret = dst; - if (src < dst) - { - dst += n; - src += n; - while (n--) - *--dst = *--src; - } - else - while (n--) - *dst++ = *src++; - return ret; -} - -static void -do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src, - size_t len) -{ - memcpy (src, orig_src, len); -#ifdef TEST_BCOPY - CALL (impl, src, dst, len); -#else - char *res; - - res = CALL (impl, dst, src, len); - if (res != dst) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, dst); - ret = 1; - return; - } -#endif - - if (memcmp (dst, orig_src, len) != 0) - { - error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"", - impl->name, dst, src); - ret = 1; - return; - } -} - -static void -do_test (size_t align1, size_t align2, size_t len) -{ - size_t i, j; - char *s1, *s2; - - align1 &= 63; - if (align1 + len >= page_size) - return; - - align2 &= 63; - if (align2 + len >= page_size) - return; - - s1 = (char *) (buf1 + align1); - s2 = (char *) (buf2 + align2); - - for (i = 0, j = 1; i < len; i++, j += 23) - s1[i] = j; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s2, (char *) (buf2 + align1), s1, len); -} - -static void -do_random_tests (void) -{ - size_t i, n, align1, align2, len, size; - size_t srcstart, srcend, dststart, dstend; - int c; - unsigned char *p1, *p2; -#ifndef TEST_BCOPY - unsigned char *res; -#endif - - for (n = 0; n < ITERATIONS; n++) - { - if ((random () & 255) == 0) - size = 65536; - else - size = 512; - if (size > page_size) - size = page_size; - if ((random () & 3) == 0) - { - len = random () & (size - 1); - align1 = size - len - (random () & 31); - align2 = size - len - (random () & 31); - if (align1 > size) - align1 = 0; - if (align2 > size) - align2 = 0; - } - else - { - align1 = random () & (size / 2 - 1); - align2 = random () & (size / 2 - 1); - len = random () & (size - 1); - if (align1 + len > size) - align1 = size - len; - if (align2 + len > size) - align2 = size - len; - } - - p1 = buf1 + page_size - size; - p2 = buf2 + page_size - size; - c = random () & 255; - srcend = align1 + len + 256; - if (srcend > size) - srcend = size; - if (align1 > 256) - srcstart = align1 - 256; - else - srcstart = 0; - for (i = srcstart; i < srcend; ++i) - p1[i] = random () & 255; - dstend = align2 + len + 256; - if (dstend > size) - dstend = size; - if (align2 > 256) - dststart = align2 - 256; - else - dststart = 0; - - FOR_EACH_IMPL (impl, 1) - { - memset (p2 + dststart, c, dstend - dststart); - memcpy (p2 + srcstart, p1 + srcstart, srcend - srcstart); -#ifdef TEST_BCOPY - CALL (impl, (char *) (p2 + align1), (char *) (p2 + align2), len); -#else - res = (unsigned char *) CALL (impl, - (char *) (p2 + align2), - (char *) (p2 + align1), len); - if (res != p2 + align2) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd) %p != %p", - n, impl->name, align1, align2, len, res, p2 + align2); - ret = 1; - } -#endif - if (memcmp (p1 + align1, p2 + align2, len)) - { - error (0, 0, "Iteration %zd - different strings, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - } - for (i = dststart; i < dstend; ++i) - { - if (i >= align2 && i < align2 + len) - { - i = align2 + len - 1; - continue; - } - if (i >= srcstart && i < srcend) - { - i = srcend - 1; - continue; - } - if (p2[i] != c) - { - error (0, 0, "Iteration %zd - garbage in memset area, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - - if (srcstart < align2 - && memcmp (p2 + srcstart, p1 + srcstart, - (srcend > align2 ? align2 : srcend) - srcstart)) - { - error (0, 0, "Iteration %zd - garbage before dst, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - - i = srcstart > align2 + len ? srcstart : align2 + len; - if (srcend > align2 + len - && memcmp (p2 + i, p1 + i, srcend - i)) - { - error (0, 0, "Iteration %zd - garbage after dst, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 0; i < 14; ++i) - { - do_test (0, 32, 1 << i); - do_test (32, 0, 1 << i); - do_test (0, i, 1 << i); - do_test (i, 0, 1 << i); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, 32, i); - do_test (32, 0, i); - do_test (0, i, i); - do_test (i, 0, i); - } - - for (i = 3; i < 32; ++i) - { - if ((i & (i - 1)) == 0) - continue; - do_test (0, 32, 16 * i); - do_test (32, 0, 16 * i); - do_test (0, i, 16 * i); - do_test (i, 0, 16 * i); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-memset.c b/tests/test-memset.c deleted file mode 100644 index 4e905f3..0000000 --- a/tests/test-memset.c +++ /dev/null @@ -1,192 +0,0 @@ -/* Test and measure memset functions. - Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#define TEST_MAIN -#define MIN_PAGE_SIZE 131072 -#include "test-string.h" - -typedef char *(*proto_t) (char *, int, size_t); -char *simple_memset (char *, int, size_t); -char *builtin_memset (char *, int, size_t); - -IMPL (simple_memset, 0) -IMPL (builtin_memset, 0) -IMPL (memset, 1) - -char * -simple_memset (char *s, int c, size_t n) -{ - char *r = s, *end = s + n; - while (r < end) - *r++ = c; - return s; -} - -char * -builtin_memset (char *s, int c, size_t n) -{ - return __builtin_memset (s, c, n); -} - -static void -do_one_test (impl_t *impl, char *s, int c, size_t n) -{ - char *res = CALL (impl, s, c, n); - char tstbuf[n]; - if (res != s - || simple_memset (tstbuf, c, n) != tstbuf - || memcmp (s, tstbuf, n) != 0) - { - error (0, 0, "Wrong result in function %s", impl->name); - ret = 1; - return; - } -} - -static void -do_test (size_t align, int c, size_t len) -{ - align &= 7; - if (align + len > page_size) - return; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) buf1 + align, c, len); -} - -static void -do_random_tests (void) -{ - size_t i, j, k, n, align, len, size; - int c, o; - unsigned char *p, *res; - - for (i = 0; i < 65536; ++i) - buf2[i] = random () & 255; - - for (n = 0; n < ITERATIONS; n++) - { - if ((random () & 31) == 0) - size = 65536; - else - size = 512; - p = buf1 + page_size - size; - len = random () & (size - 1); - align = size - len - (random () & 31); - if (align > size) - align = size - len; - if ((random () & 7) == 0) - align &= ~63; - if ((random () & 7) == 0) - c = 0; - else - c = random () & 255; - o = random () & 255; - if (o == c) - o = (c + 1) & 255; - j = len + align + 128; - if (j > size) - j = size; - if (align >= 128) - k = align - 128; - else - k = 0; - for (i = k; i < align; ++i) - p[i] = o; - for (i = align + len; i < j; ++i) - p[i] = o; - - FOR_EACH_IMPL (impl, 1) - { - for (i = 0; i < len; ++i) - { - p[i + align] = buf2[i]; - if (p[i + align] == c) - p[i + align] = o; - } - res = (unsigned char *) CALL (impl, (char *) p + align, c, len); - if (res != p + align) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd) %p != %p", - n, impl->name, align, c, len, res, p + align); - ret = 1; - } - for (i = k; i < align; ++i) - if (p[i] != o) - { - error (0, 0, "Iteration %zd - garbage before %s (%zd, %d, %zd)", - n, impl->name, align, c, len); - ret = 1; - break; - } - for (; i < align + len; ++i) - if (p[i] != c) - { - error (0, 0, "Iteration %zd - not cleared correctly %s (%zd, %d, %zd)", - n, impl->name, align, c, len); - ret = 1; - break; - } - for (; i < j; ++i) - if (p[i] != o) - { - error (0, 0, "Iteration %zd - garbage after %s (%zd, %d, %zd)", - n, impl->name, align, c, len); - ret = 1; - break; - } - } - } -} - -int -test_main (void) -{ - size_t i; - int c; - - test_init (); - - printf ("%24s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (c = -65; c <= 130; c += 65) - { - for (i = 0; i < 18; ++i) - do_test (0, c, 1 << i); - for (i = 1; i < 32; ++i) - { - do_test (i, c, i); - if (i & (i - 1)) - do_test (0, c, i); - } - do_test (1, c, 14); - do_test (3, c, 1024); - do_test (4, c, 64); - do_test (2, c, 25); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-skeleton.c b/tests/test-skeleton.c deleted file mode 100644 index 1839e07..0000000 --- a/tests/test-skeleton.c +++ /dev/null @@ -1,447 +0,0 @@ -/* Skeleton for test programs. - Copyright (C) 1998,2000-2004, 2005, 2009 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <assert.h> -#include <errno.h> -#include <getopt.h> -#include <malloc.h> -#include <search.h> -#include <signal.h> -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> -#include <sys/param.h> -#include <sys/resource.h> -#include <sys/time.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <time.h> -#include <unistd.h> - -/* The test function is normally called `do_test' and it is called - with argc and argv as the arguments. We nevertheless provide the - possibility to overwrite this name. */ -#ifndef TEST_FUNCTION -# define TEST_FUNCTION do_test (argc, argv) -#endif - -#define OPT_DIRECT 1000 -#define OPT_TESTDIR 1001 - -static struct option options[] = -{ -#ifdef CMDLINE_OPTIONS - CMDLINE_OPTIONS -#endif - { "direct", no_argument, NULL, OPT_DIRECT }, - { "test-dir", required_argument, NULL, OPT_TESTDIR }, - { NULL, 0, NULL, 0 } -}; - -/* PID of the test itself. */ -static pid_t pid; - -/* Directory to place temporary files in. */ -static const char *test_dir; -unsigned int test_verbose = 0; - -/* Show people how to run the program. */ -static void -usage (const struct option *options) -{ - size_t i; - - printf ("Usage: %s [options]\n" - "\n" - "Environment Variables:\n" - " TIMEOUTFACTOR An integer used to scale the timeout\n" - " TMPDIR Where to place temporary files\n" - " TEST_COREDUMPS Do not disable coredumps if set\n" - "\n", - program_invocation_short_name); - printf ("Options:\n"); - for (i = 0; options[i].name; ++i) - { - int indent; - - indent = printf (" --%s", options[i].name); - if (options[i].has_arg == required_argument) - indent += printf (" <arg>"); - printf ("%*s", 25 - indent, ""); - switch (options[i].val) - { - case 'v': - printf ("Increase the output verbosity"); - break; - case OPT_DIRECT: - printf ("Run the test directly (instead of forking & monitoring)"); - break; - case OPT_TESTDIR: - printf ("Override the TMPDIR env var"); - break; - } - printf ("\n"); - } -} - -/* List of temporary files. */ -struct temp_name_list -{ - struct qelem q; - const char *name; -} *temp_name_list; - -/* Add temporary files in list. */ -static void -__attribute__ ((unused)) -add_temp_file (const char *name) -{ - struct temp_name_list *newp - = (struct temp_name_list *) calloc (sizeof (*newp), 1); - if (newp != NULL) - { - newp->name = name; - if (temp_name_list == NULL) - temp_name_list = (struct temp_name_list *) &newp->q; - else - insque (newp, temp_name_list); - } -} - -/* Delete all temporary files. */ -static void -delete_temp_files (void) -{ - while (temp_name_list != NULL) - { - remove (temp_name_list->name); - temp_name_list = (struct temp_name_list *) temp_name_list->q.q_forw; - } -} - -/* Create a temporary file. */ -static int -__attribute__ ((unused)) -create_temp_file (const char *base, char **filename) -{ - char *fname; - int fd; - - fname = (char *) malloc (strlen (test_dir) + 1 + strlen (base) - + sizeof ("XXXXXX")); - if (fname == NULL) - { - puts ("out of memory"); - return -1; - } - strcpy (stpcpy (stpcpy (stpcpy (fname, test_dir), "/"), base), "XXXXXX"); - - fd = mkstemp (fname); - if (fd == -1) - { - printf ("cannot open temporary file '%s': %m\n", fname); - free (fname); - return -1; - } - - add_temp_file (fname); - if (filename != NULL) - *filename = fname; - - return fd; -} - -static void -print_timestamp (const char *what, struct timeval tv) -{ - struct tm tm; - if (gmtime_r (&tv.tv_sec, &tm) == NULL) - printf ("%s: %lld.%06d\n", - what, (long long int) tv.tv_sec, (int) tv.tv_usec); - else - printf ("%s: %04d-%02d-%02dT%02d:%02d:%02d.%06d\n", - what, 1900 + tm.tm_year, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, (int) tv.tv_usec); -} - -/* Timeout handler. We kill the child and exit with an error. */ -static void -__attribute__ ((noreturn)) -signal_handler (int sig) -{ - int killed; - int status; - - /* Do this first to avoid further interference from the - subprocess. */ - struct timeval now; - bool now_available = gettimeofday (&now, NULL) == 0; - struct stat64 st; - bool st_available = fstat64 (STDOUT_FILENO, &st) == 0 && st.st_mtime != 0; - - assert (pid > 1); - /* Kill the whole process group. */ - kill (-pid, SIGKILL); - /* In case setpgid failed in the child, kill it individually too. */ - kill (pid, SIGKILL); - - /* Wait for it to terminate. */ - int i; - for (i = 0; i < 5; ++i) - { - killed = waitpid (pid, &status, WNOHANG|WUNTRACED); - if (killed != 0) - break; - - /* Delay, give the system time to process the kill. If the - nanosleep() call return prematurely, all the better. We - won't restart it since this probably means the child process - finally died. */ - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000000; - nanosleep (&ts, NULL); - } - if (killed != 0 && killed != pid) - { - printf ("Failed to kill test process: %m\n"); - exit (1); - } - -#ifdef CLEANUP_HANDLER - CLEANUP_HANDLER; -#endif - - if (sig == SIGINT) - { - signal (sig, SIG_DFL); - raise (sig); - } - - if (killed == 0 || (WIFSIGNALED (status) && WTERMSIG (status) == SIGKILL)) - puts ("Timed out: killed the child process"); - else if (WIFSTOPPED (status)) - printf ("Timed out: the child process was %s\n", - strsignal (WSTOPSIG (status))); - else if (WIFSIGNALED (status)) - printf ("Timed out: the child process got signal %s\n", - strsignal (WTERMSIG (status))); - else - printf ("Timed out: killed the child process but it exited %d\n", - WEXITSTATUS (status)); - - if (now_available) - print_timestamp ("Termination time", now); - if (st_available) - print_timestamp ("Last write to standard output", - (struct timeval) { st.st_mtim.tv_sec, - st.st_mtim.tv_nsec / 1000 }); - - /* Exit with an error. */ - exit (1); -} - -/* We provide the entry point here. */ -int -main (int argc, char *argv[]) -{ - int direct = 0; /* Directly call the test function? */ - int status; - int opt; - unsigned int timeoutfactor = 1; - pid_t termpid; - - { - /* Make uses of freed and uninitialized memory known. Do not - pull in a definition for mallopt if it has not been defined - already. */ - extern __typeof__ (mallopt) mallopt __attribute__ ((weak)); - if (mallopt != NULL) - mallopt (M_PERTURB, 42); - } - - while ((opt = getopt_long (argc, argv, "+", options, NULL)) != -1) - switch (opt) - { - case '?': - usage (options); - exit (1); - case 'v': - ++test_verbose; - break; - case OPT_DIRECT: - direct = 1; - break; - case OPT_TESTDIR: - test_dir = optarg; - break; -#ifdef CMDLINE_PROCESS - CMDLINE_PROCESS -#endif - } - - /* If set, read the test TIMEOUTFACTOR value from the environment. - This value is used to scale the default test timeout values. */ - char *envstr_timeoutfactor = getenv ("TIMEOUTFACTOR"); - if (envstr_timeoutfactor != NULL) - { - char *envstr_conv = envstr_timeoutfactor; - unsigned long int env_fact; - - env_fact = strtoul (envstr_timeoutfactor, &envstr_conv, 0); - if (*envstr_conv == '\0' && envstr_conv != envstr_timeoutfactor) - timeoutfactor = MAX (env_fact, 1); - } - - /* Set TMPDIR to specified test directory. */ - if (test_dir != NULL) - { - setenv ("TMPDIR", test_dir, 1); - - if (chdir (test_dir) < 0) - { - printf ("chdir: %m\n"); - exit (1); - } - } - else - { - test_dir = getenv ("TMPDIR"); - if (test_dir == NULL || test_dir[0] == '\0') - test_dir = "/tmp"; - } - - /* Make sure we see all message, even those on stdout. */ - setvbuf (stdout, NULL, _IONBF, 0); - - /* make sure temporary files are deleted. */ - atexit (delete_temp_files); - - /* Correct for the possible parameters. */ - argv[optind - 1] = argv[0]; - argv += optind - 1; - argc -= optind - 1; - - /* Call the initializing function, if one is available. */ -#ifdef PREPARE - PREPARE (argc, argv); -#endif - - /* If we are not expected to fork run the function immediately. */ - if (direct) - return TEST_FUNCTION; - - /* Set up the test environment: - - prevent core dumps - - set up the timer - - fork and execute the function. */ - - pid = fork (); - if (pid == 0) - { - /* This is the child. */ - { - /* Try to avoid dumping core. This is necessary because we - run the test from the source tree, and the coredumps - would end up there (and not in the build tree). */ - struct rlimit core_limit; - core_limit.rlim_cur = 0; - core_limit.rlim_max = 0; - setrlimit (RLIMIT_CORE, &core_limit); - } - - /* We put the test process in its own pgrp so that if it bogusly - generates any job control signals, they won't hit the whole build. */ - if (setpgid (0, 0) != 0) - printf ("Failed to set the process group ID: %m\n"); - - /* Execute the test function and exit with the return value. */ - exit (TEST_FUNCTION); - } - else if (pid < 0) - { - printf ("Cannot fork test program: %m\n"); - exit (1); - } - - /* Set timeout. */ -#ifndef TIMEOUT - /* Default timeout is two seconds. */ -# define TIMEOUT 2 -#endif - signal (SIGALRM, signal_handler); - alarm (TIMEOUT * timeoutfactor); - - /* Make sure we clean up if the wrapper gets interrupted. */ - signal (SIGINT, signal_handler); - - /* Wait for the regular termination. */ - termpid = TEMP_FAILURE_RETRY (waitpid (pid, &status, 0)); - if (termpid == -1) - { - printf ("Waiting for test program failed: %m\n"); - exit (1); - } - if (termpid != pid) - { - printf ("Oops, wrong test program terminated: expected %ld, got %ld\n", - (long int) pid, (long int) termpid); - exit (1); - } - -#ifndef EXPECTED_SIGNAL - /* We don't expect any signal. */ -# define EXPECTED_SIGNAL 0 -#endif - if (WTERMSIG (status) != EXPECTED_SIGNAL) - { - if (EXPECTED_SIGNAL != 0) - { - if (WTERMSIG (status) == 0) - fprintf (stderr, - "Expected signal '%s' from child, got none\n", - strsignal (EXPECTED_SIGNAL)); - else - fprintf (stderr, - "Incorrect signal from child: got `%s', need `%s'\n", - strsignal (WTERMSIG (status)), - strsignal (EXPECTED_SIGNAL)); - } - else - fprintf (stderr, "Didn't expect signal from child: got `%s'\n", - strsignal (WTERMSIG (status))); - exit (1); - } - - /* Simply exit with the return value of the test. */ -#ifndef EXPECTED_STATUS - return WEXITSTATUS (status); -#else - if (WEXITSTATUS (status) != EXPECTED_STATUS) - { - fprintf (stderr, "Expected status %d, got %d\n", - EXPECTED_STATUS, WEXITSTATUS (status)); - exit (1); - } - - return 0; -#endif -} diff --git a/tests/test-strchr.c b/tests/test-strchr.c deleted file mode 100644 index 4aadb2b..0000000 --- a/tests/test-strchr.c +++ /dev/null @@ -1,214 +0,0 @@ -/* Test and measure strchr functions. - Copyright (C) 1999, 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#define TEST_MAIN -#include "test-string.h" - -typedef char *(*proto_t) (const char *, int); -char *simple_strchr (const char *, int); -char *stupid_strchr (const char *, int); - -IMPL (stupid_strchr, 0) -IMPL (simple_strchr, 0) -IMPL (strchr, 1) - -char * -simple_strchr (const char *s, int c) -{ - for (; *s != (char) c; ++s) - if (*s == '\0') - return NULL; - return (char *) s; -} - -char * -stupid_strchr (const char *s, int c) -{ - size_t n = strlen (s) + 1; - - while (n--) - if (*s++ == (char) c) - return (char *) s - 1; - return NULL; -} - -static void -do_one_test (impl_t *impl, const char *s, int c, char *exp_res) -{ - char *res = CALL (impl, s, c); - if (res != exp_res) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, exp_res); - ret = 1; - return; - } -} - -static void -do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char) -{ - size_t i; - char *result; - - align &= 7; - if (align + len >= page_size) - return; - - for (i = 0; i < len; ++i) - { - buf1[align + i] = 32 + 23 * i % (max_char - 32); - if (buf1[align + i] == seek_char) - buf1[align + i] = seek_char + 1; - } - buf1[align + len] = 0; - - if (pos < len) - { - buf1[align + pos] = seek_char; - result = (char *) (buf1 + align + pos); - } - else if (seek_char == 0) - result = (char *) (buf1 + align + len); - else - result = NULL; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) (buf1 + align), seek_char, result); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align, pos, len; - int seek_char; - char *result; - unsigned char *p = buf1 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align = random () & 15; - pos = random () & 511; - seek_char = random () & 255; - if (pos + align >= 511) - pos = 510 - align - (random () & 7); - len = random () & 511; - if ((pos == len && seek_char) - || (pos > len && (random () & 1))) - len = pos + 1 + (random () & 7); - if (len + align >= 512) - len = 511 - align - (random () & 7); - if (pos == len && seek_char) - len = pos + 1; - j = (pos > len ? pos : len) + align + 64; - if (j > 512) - j = 512; - - for (i = 0; i < j; i++) - { - if (i == pos + align) - p[i] = seek_char; - else if (i == len + align) - p[i] = 0; - else - { - p[i] = random () & 255; - if (i < pos + align && p[i] == seek_char) - p[i] = seek_char + 13; - if (i < len + align && !p[i]) - { - p[i] = seek_char - 13; - if (!p[i]) - p[i] = 140; - } - } - } - - if (pos <= len) - result = (char *) (p + pos + align); - else if (seek_char == 0) - result = (char *) (p + len + align); - else - result = NULL; - - FOR_EACH_IMPL (impl, 1) - if (CALL (impl, (char *) (p + align), seek_char) != result) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd, %zd) %p != %p, p %p", - n, impl->name, align, seek_char, len, pos, - CALL (impl, (char *) (p + align), seek_char), result, p); - ret = 1; - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%20s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 8; ++i) - { - do_test (0, 16 << i, 2048, 23, 127); - do_test (i, 16 << i, 2048, 23, 127); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 64, 256, 23, 127); - do_test (i, 64, 256, 23, 255); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, i, i + 1, 23, 127); - do_test (0, i, i + 1, 23, 255); - } - - for (i = 1; i < 8; ++i) - { - do_test (0, 16 << i, 2048, 0, 127); - do_test (i, 16 << i, 2048, 0, 127); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 64, 256, 0, 127); - do_test (i, 64, 256, 0, 255); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, i, i + 1, 0, 127); - do_test (0, i, i + 1, 0, 255); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-strcmp.c b/tests/test-strcmp.c deleted file mode 100644 index a55fcd8..0000000 --- a/tests/test-strcmp.c +++ /dev/null @@ -1,236 +0,0 @@ -/* Test and measure strcmp functions. - Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#define TEST_MAIN -#include "test-string.h" - -typedef int (*proto_t) (const char *, const char *); -int simple_strcmp (const char *, const char *); -int stupid_strcmp (const char *, const char *); - -IMPL (stupid_strcmp, 0) -IMPL (simple_strcmp, 0) -IMPL (strcmp, 1) - -int -simple_strcmp (const char *s1, const char *s2) -{ - int ret; - - while ((ret = *(unsigned char *) s1 - *(unsigned char *) s2++) == 0 - && *s1++); - return ret; -} - -int -stupid_strcmp (const char *s1, const char *s2) -{ - size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1; - size_t n = ns1 < ns2 ? ns1 : ns2; - int ret = 0; - - while (n--) - if ((ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) != 0) - break; - return ret; -} - -static void -do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result) -{ - int result = CALL (impl, s1, s2); - if ((exp_result == 0 && result != 0) - || (exp_result < 0 && result >= 0) - || (exp_result > 0 && result <= 0)) - { - error (0, 0, "Wrong result in function %s %d %d", impl->name, - result, exp_result); - ret = 1; - return; - } -} - -static void -do_test (size_t align1, size_t align2, size_t len, int max_char, - int exp_result) -{ - size_t i; - char *s1, *s2; - - if (len == 0) - return; - - align1 &= 7; - if (align1 + len + 1 >= page_size) - return; - - align2 &= 7; - if (align2 + len + 1 >= page_size) - return; - - s1 = (char *) (buf1 + align1); - s2 = (char *) (buf2 + align2); - - for (i = 0; i < len; i++) - s1[i] = s2[i] = 1 + 23 * i % max_char; - - s1[len] = s2[len] = 0; - s1[len + 1] = 23; - s2[len + 1] = 24 + exp_result; - s2[len - 1] -= exp_result; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s1, s2, exp_result); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align1, align2, pos, len1, len2; - int result; - long r; - unsigned char *p1 = buf1 + page_size - 512; - unsigned char *p2 = buf2 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align1 = random () & 31; - if (random () & 1) - align2 = random () & 31; - else - align2 = align1 + (random () & 24); - pos = random () & 511; - j = align1 > align2 ? align1 : align2; - if (pos + j >= 511) - pos = 510 - j - (random () & 7); - len1 = random () & 511; - if (pos >= len1 && (random () & 1)) - len1 = pos + (random () & 7); - if (len1 + j >= 512) - len1 = 511 - j - (random () & 7); - if (pos >= len1) - len2 = len1; - else - len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0); - j = (pos > len2 ? pos : len2) + align1 + 64; - if (j > 512) - j = 512; - for (i = 0; i < j; ++i) - { - p1[i] = random () & 255; - if (i < len1 + align1 && !p1[i]) - { - p1[i] = random () & 255; - if (!p1[i]) - p1[i] = 1 + (random () & 127); - } - } - for (i = 0; i < j; ++i) - { - p2[i] = random () & 255; - if (i < len2 + align2 && !p2[i]) - { - p2[i] = random () & 255; - if (!p2[i]) - p2[i] = 1 + (random () & 127); - } - } - - result = 0; - memcpy (p2 + align2, p1 + align1, pos); - if (pos < len1) - { - if (p2[align2 + pos] == p1[align1 + pos]) - { - p2[align2 + pos] = random () & 255; - if (p2[align2 + pos] == p1[align1 + pos]) - p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127); - } - - if (p1[align1 + pos] < p2[align2 + pos]) - result = -1; - else - result = 1; - } - p1[len1 + align1] = 0; - p2[len2 + align2] = 0; - - FOR_EACH_IMPL (impl, 1) - { - r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2)); - /* Test whether on 64-bit architectures where ABI requires - callee to promote has the promotion been done. */ - asm ("" : "=g" (r) : "0" (r)); - if ((r == 0 && result) - || (r < 0 && result >= 0) - || (r > 0 && result <= 0)) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", - n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2); - ret = 1; - } - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 16; ++i) - { - do_test (i, i, i, 127, 0); - do_test (i, i, i, 127, 1); - do_test (i, i, i, 127, -1); - } - - for (i = 1; i < 10; ++i) - { - do_test (0, 0, 2 << i, 127, 0); - do_test (0, 0, 2 << i, 254, 0); - do_test (0, 0, 2 << i, 127, 1); - do_test (0, 0, 2 << i, 254, 1); - do_test (0, 0, 2 << i, 127, -1); - do_test (0, 0, 2 << i, 254, -1); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 2 * i, 8 << i, 127, 0); - do_test (2 * i, i, 8 << i, 254, 0); - do_test (i, 2 * i, 8 << i, 127, 1); - do_test (2 * i, i, 8 << i, 254, 1); - do_test (i, 2 * i, 8 << i, 127, -1); - do_test (2 * i, i, 8 << i, 254, -1); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-strcpy.c b/tests/test-strcpy.c deleted file mode 100644 index 0e94ed9..0000000 --- a/tests/test-strcpy.c +++ /dev/null @@ -1,204 +0,0 @@ -/* Test and measure strcpy functions. - Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef STRCPY_RESULT -# define STRCPY_RESULT(dst, len) dst -# define TEST_MAIN -# include "test-string.h" - -char *simple_strcpy (char *, const char *); - -IMPL (simple_strcpy, 0) -IMPL (strcpy, 1) - -char * -simple_strcpy (char *dst, const char *src) -{ - char *ret = dst; - while ((*dst++ = *src++) != '\0'); - return ret; -} -#endif - -typedef char *(*proto_t) (char *, const char *); - -static void -do_one_test (impl_t *impl, char *dst, const char *src, - size_t len __attribute__((unused))) -{ - if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len)) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - CALL (impl, dst, src), STRCPY_RESULT (dst, len)); - ret = 1; - return; - } - - if (strcmp (dst, src) != 0) - { - error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"", - impl->name, dst, src); - ret = 1; - return; - } -} - -static void -do_test (size_t align1, size_t align2, size_t len, int max_char) -{ - size_t i; - char *s1, *s2; - - align1 &= 7; - if (align1 + len >= page_size) - return; - - align2 &= 7; - if (align2 + len >= page_size) - return; - - s1 = (char *) (buf1 + align1); - s2 = (char *) (buf2 + align2); - - for (i = 0; i < len; i++) - s1[i] = 32 + 23 * i % (max_char - 32); - s1[len] = 0; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s2, s1, len); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align1, align2, len; - unsigned char *p1 = buf1 + page_size - 512; - unsigned char *p2 = buf2 + page_size - 512; - unsigned char *res; - - for (n = 0; n < ITERATIONS; n++) - { - align1 = random () & 31; - if (random () & 1) - align2 = random () & 31; - else - align2 = align1 + (random () & 24); - len = random () & 511; - j = align1; - if (align2 > j) - j = align2; - if (len + j >= 511) - len = 510 - j - (random () & 7); - j = len + align1 + 64; - if (j > 512) - j = 512; - for (i = 0; i < j; i++) - { - if (i == len + align1) - p1[i] = 0; - else - { - p1[i] = random () & 255; - if (i >= align1 && i < len + align1 && !p1[i]) - p1[i] = (random () & 127) + 3; - } - } - - FOR_EACH_IMPL (impl, 1) - { - memset (p2 - 64, '\1', 512 + 64); - res = (unsigned char *) CALL (impl, (char *) (p2 + align2), - (char *) (p1 + align1)); - if (res != STRCPY_RESULT (p2 + align2, len)) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd) %p != %p", - n, impl->name, align1, align2, len, res, - STRCPY_RESULT (p2 + align2, len)); - ret = 1; - } - for (j = 0; j < align2 + 64; ++j) - { - if (p2[j - 64] != '\1') - { - error (0, 0, "Iteration %zd - garbage before, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - for (j = align2 + len + 1; j < 512; ++j) - { - if (p2[j] != '\1') - { - error (0, 0, "Iteration %zd - garbage after, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - break; - } - } - if (memcmp (p1 + align1, p2 + align2, len + 1)) - { - error (0, 0, "Iteration %zd - different strings, %s (%zd, %zd, %zd)", - n, impl->name, align1, align2, len); - ret = 1; - } - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 0; i < 16; ++i) - { - do_test (0, 0, i, 127); - do_test (0, 0, i, 255); - do_test (0, i, i, 127); - do_test (i, 0, i, 255); - } - - for (i = 1; i < 8; ++i) - { - do_test (0, 0, 8 << i, 127); - do_test (8 - i, 2 * i, 8 << i, 127); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 2 * i, 8 << i, 127); - do_test (2 * i, i, 8 << i, 255); - do_test (i, i, 8 << i, 127); - do_test (i, i, 8 << i, 255); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-string.h b/tests/test-string.h deleted file mode 100644 index f748b53..0000000 --- a/tests/test-string.h +++ /dev/null @@ -1,212 +0,0 @@ -/* Test and measure string and memory functions. - Copyright (C) 1999-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sys/cdefs.h> - -typedef struct -{ - const char *name; - void (*fn) (void); - long test; -} impl_t; -extern impl_t __start_impls[], __stop_impls[]; - -#define IMPL(name, test) \ - impl_t tst_ ## name \ - __attribute__ ((section ("impls"), aligned (sizeof (void *)))) \ - = { __STRING (name), (void (*) (void))name, test }; - -#ifdef TEST_MAIN - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#undef __USE_STRING_INLINES - -/* We are compiled under _ISOMAC, so libc-symbols.h does not do this - for us. */ -#include "config.h" -#ifdef HAVE_CC_INHIBIT_LOOP_TO_LIBCALL -# define inhibit_loop_to_libcall \ - __attribute__ ((__optimize__ ("-fno-tree-loop-distribute-patterns"))) -#else -# define inhibit_loop_to_libcall -#endif - -#include <getopt.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/param.h> -#include <unistd.h> -#include <fcntl.h> -#include <error.h> -#include <errno.h> -#include <time.h> -#define GL(x) _##x -#define GLRO(x) _##x - -/* Do not check libc ifunc implementations. */ -#undef TEST_NAME - -# define TEST_FUNCTION test_main () -# define TIMEOUT (4 * 60) -# define OPT_ITERATIONS 10000 -# define OPT_RANDOM 10001 -# define OPT_SEED 10002 - -unsigned char *buf1, *buf2; -int ret, do_srandom; -unsigned int seed; -size_t page_size; - -# ifndef ITERATIONS -size_t iterations = 100000; -# define ITERATIONS_OPTIONS \ - { "iterations", required_argument, NULL, OPT_ITERATIONS }, -# define ITERATIONS_PROCESS \ - case OPT_ITERATIONS: \ - iterations = strtoul (optarg, NULL, 0); \ - break; -# define ITERATIONS iterations -# else -# define ITERATIONS_OPTIONS -# define ITERATIONS_PROCESS -# endif - -# define CMDLINE_OPTIONS ITERATIONS_OPTIONS \ - { "random", no_argument, NULL, OPT_RANDOM }, \ - { "seed", required_argument, NULL, OPT_SEED }, -# define CMDLINE_PROCESS ITERATIONS_PROCESS \ - case OPT_RANDOM: \ - { \ - int fdr = open ("/dev/urandom", O_RDONLY); \ - \ - if (fdr < 0 || read (fdr, &seed, sizeof(seed)) != sizeof (seed)) \ - seed = time (NULL); \ - if (fdr >= 0) \ - close (fdr); \ - do_srandom = 1; \ - break; \ - } \ - \ - case OPT_SEED: \ - seed = strtoul (optarg, NULL, 0); \ - do_srandom = 1; \ - break; - -#define CALL(impl, ...) \ - (* (proto_t) (impl)->fn) (__VA_ARGS__) - -#ifdef TEST_NAME -/* Increase size of FUNC_LIST if assert is triggered at run-time. */ -static struct libc_ifunc_impl func_list[32]; -static int func_count; -static int impl_count = -1; -static impl_t *impl_array; - -# define FOR_EACH_IMPL(impl, notall) \ - impl_t *impl; \ - int count; \ - if (impl_count == -1) \ - { \ - impl_count = 0; \ - if (func_count != 0) \ - { \ - int f; \ - impl_t *skip = NULL, *a; \ - for (impl = __start_impls; impl < __stop_impls; ++impl) \ - if (strcmp (impl->name, TEST_NAME) == 0) \ - skip = impl; \ - else \ - impl_count++; \ - a = impl_array = malloc ((impl_count + func_count) * \ - sizeof (impl_t)); \ - for (impl = __start_impls; impl < __stop_impls; ++impl) \ - if (impl != skip) \ - *a++ = *impl; \ - for (f = 0; f < func_count; f++) \ - if (func_list[f].usable) \ - { \ - a->name = func_list[f].name; \ - a->fn = func_list[f].fn; \ - a->test = 1; \ - a++; \ - } \ - impl_count = a - impl_array; \ - } \ - else \ - { \ - impl_count = __stop_impls - __start_impls; \ - impl_array = __start_impls; \ - } \ - } \ - impl = impl_array; \ - for (count = 0; count < impl_count; ++count, ++impl) \ - if (!notall || impl->test) -#else -# define FOR_EACH_IMPL(impl, notall) \ - for (impl_t *impl = __start_impls; impl < __stop_impls; ++impl) \ - if (!notall || impl->test) -#endif - -#ifndef BUF1PAGES -# define BUF1PAGES 1 -#endif - -static void -test_init (void) -{ -#ifdef TEST_NAME - func_count = __libc_ifunc_impl_list (TEST_NAME, func_list, - (sizeof func_list - / sizeof func_list[0])); -#endif - - page_size = 2 * getpagesize (); -#ifdef MIN_PAGE_SIZE - if (page_size < MIN_PAGE_SIZE) - page_size = MIN_PAGE_SIZE; -#endif - buf1 = mmap (0, (BUF1PAGES + 1) * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); - if (buf1 == MAP_FAILED) - error (EXIT_FAILURE, errno, "mmap failed"); - if (mprotect (buf1 + BUF1PAGES * page_size, page_size, PROT_NONE)) - error (EXIT_FAILURE, errno, "mprotect failed"); - buf2 = mmap (0, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); - if (buf2 == MAP_FAILED) - error (EXIT_FAILURE, errno, "mmap failed"); - if (mprotect (buf2 + page_size, page_size, PROT_NONE)) - error (EXIT_FAILURE, errno, "mprotect failed"); - if (do_srandom) - { - printf ("Setting seed to 0x%x\n", seed); - srandom (seed); - } - - memset (buf1, 0xa5, BUF1PAGES * page_size); - memset (buf2, 0x5a, page_size); -} - -#endif diff --git a/tests/test-strlen.c b/tests/test-strlen.c deleted file mode 100644 index 4b1e407..0000000 --- a/tests/test-strlen.c +++ /dev/null @@ -1,156 +0,0 @@ -/* Test and measure strlen functions. - Copyright (C) 1999, 2002, 2003, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#define TEST_MAIN -#include "test-string.h" - -typedef size_t (*proto_t) (const char *); -size_t simple_strlen (const char *); -size_t builtin_strlen (const char *); - -IMPL (simple_strlen, 0) -IMPL (builtin_strlen, 0) -IMPL (strlen, 1) - -size_t -simple_strlen (const char *s) -{ - const char *p; - - for (p = s; *p; ++p); - return p - s; -} - -size_t -builtin_strlen (const char *p) -{ - return __builtin_strlen (p); -} - -static void -do_one_test (impl_t *impl, const char *s, size_t exp_len) -{ - size_t len = CALL (impl, s); - if (len != exp_len) - { - error (0, 0, "Wrong result in function %s %zd %zd", impl->name, - len, exp_len); - ret = 1; - return; - } -} - -static void -do_test (size_t align, size_t len, int max_char) -{ - size_t i; - - align &= 7; - if (align + len >= page_size) - return; - - for (i = 0; i < len; ++i) - buf1[align + i] = 1 + 7 * i % max_char; - buf1[align + len] = 0; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) (buf1 + align), len); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align, len; - unsigned char *p = buf1 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align = random () & 15; - len = random () & 511; - if (len + align > 510) - len = 511 - align - (random () & 7); - j = len + align + 64; - if (j > 512) - j = 512; - - for (i = 0; i < j; i++) - { - if (i == len + align) - p[i] = 0; - else - { - p[i] = random () & 255; - if (i >= align && i < len + align && !p[i]) - p[i] = (random () & 127) + 1; - } - } - - FOR_EACH_IMPL (impl, 1) - if (CALL (impl, (char *) (p + align)) != len) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd) %zd != %zd, p %p", - n, impl->name, align, CALL (impl, (char *) (p + align)), - len, p); - ret = 1; - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%20s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 8; ++i) - do_test (0, i, 127); - - for (i = 1; i < 8; ++i) - do_test (i, i, 127); - - for (i = 2; i <= 10; ++i) - { - do_test (0, 1 << i, 127); - do_test (1, 1 << i, 127); - } - - for (i = 1; i < 8; ++i) - do_test (0, i, 255); - - for (i = 1; i < 8; ++i) - do_test (i, i, 255); - - for (i = 2; i <= 10; ++i) - { - do_test (0, 1 << i, 255); - do_test (1, 1 << i, 255); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-strncmp.c b/tests/test-strncmp.c deleted file mode 100644 index 58c8bc3..0000000 --- a/tests/test-strncmp.c +++ /dev/null @@ -1,393 +0,0 @@ -/* Test and measure strncmp functions. - Copyright (C) 1999-2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define TEST_MAIN -#define TEST_NAME "strncmp" -#include "test-string.h" - -typedef int (*proto_t) (const char *, const char *, size_t); -int simple_strncmp (const char *, const char *, size_t); -int stupid_strncmp (const char *, const char *, size_t); - -IMPL (stupid_strncmp, 0) -IMPL (simple_strncmp, 0) -IMPL (strncmp, 1) - -int -simple_strncmp (const char *s1, const char *s2, size_t n) -{ - int ret = 0; - - while (n-- && (ret = *(unsigned char *) s1 - * (unsigned char *) s2++) == 0 - && *s1++); - return ret; -} - -int -stupid_strncmp (const char *s1, const char *s2, size_t n) -{ - size_t ns1 = strnlen (s1, n) + 1, ns2 = strnlen (s2, n) + 1; - int ret = 0; - - n = ns1 < n ? ns1 : n; - n = ns2 < n ? ns2 : n; - while (n-- && (ret = *(unsigned char *) s1++ - * (unsigned char *) s2++) == 0); - return ret; -} - -static int -check_result (impl_t *impl, const char *s1, const char *s2, size_t n, - int exp_result) -{ - int result = CALL (impl, s1, s2, n); - if ((exp_result == 0 && result != 0) - || (exp_result < 0 && result >= 0) - || (exp_result > 0 && result <= 0)) - { - error (0, 0, "Wrong result in function %s %d %d", impl->name, - result, exp_result); - ret = 1; - return -1; - } - - return 0; -} - -static void -do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n, - int exp_result) -{ - if (check_result (impl, s1, s2, n, exp_result) < 0) - return; -} - -static void -do_test_limit (size_t align1, size_t align2, size_t len, size_t n, int max_char, - int exp_result) -{ - size_t i, align_n; - char *s1, *s2; - - if (n == 0) - { - s1 = (char*)(buf1 + page_size); - s2 = (char*)(buf2 + page_size); - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s1, s2, n, 0); - - return; - } - - align1 &= 15; - align2 &= 15; - align_n = (page_size - n) & 15; - - s1 = (char*)(buf1 + page_size - n); - s2 = (char*)(buf2 + page_size - n); - - if (align1 < align_n) - s1 -= (align_n - align1); - - if (align2 < align_n) - s2 -= (align_n - align2); - - for (i = 0; i < n; i++) - s1[i] = s2[i] = 1 + 23 * i % max_char; - - if (len < n) - { - s1[len] = 0; - s2[len] = 0; - if (exp_result < 0) - s2[len] = 32; - else if (exp_result > 0) - s1[len] = 64; - } - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, s1, s2, n, exp_result); -} - -static void -do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char, - int exp_result) -{ - size_t i; - char *s1, *s2; - - if (n == 0) - return; - - align1 &= 7; - if (align1 + n + 1 >= page_size) - return; - - align2 &= 7; - if (align2 + n + 1 >= page_size) - return; - - s1 = (char*)(buf1 + align1); - s2 = (char*)(buf2 + align2); - - for (i = 0; i < n; i++) - s1[i] = s2[i] = 1 + 23 * i % max_char; - - s1[n] = 24 + exp_result; - s2[n] = 23; - s1[len] = 0; - s2[len] = 0; - if (exp_result < 0) - s2[len] = 32; - else if (exp_result > 0) - s1[len] = 64; - if (len >= n) - s2[n - 1] -= exp_result; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char*)s1, (char*)s2, n, exp_result); -} - -static void -do_page_test (size_t offset1, size_t offset2, char *s2) -{ - char *s1; - int exp_result; - - if (offset1 >= page_size || offset2 >= page_size) - return; - - s1 = (char *) (buf1 + offset1); - s2 += offset2; - - exp_result= *s1; - - FOR_EACH_IMPL (impl, 0) - { - check_result (impl, s1, s2, page_size, -exp_result); - check_result (impl, s2, s1, page_size, exp_result); - } -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align1, align2, pos, len1, len2, size; - int result; - long r; - unsigned char *p1 = buf1 + page_size - 512; - unsigned char *p2 = buf2 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align1 = random () & 31; - if (random () & 1) - align2 = random () & 31; - else - align2 = align1 + (random () & 24); - pos = random () & 511; - size = random () & 511; - j = align1 > align2 ? align1 : align2; - if (pos + j >= 511) - pos = 510 - j - (random () & 7); - len1 = random () & 511; - if (pos >= len1 && (random () & 1)) - len1 = pos + (random () & 7); - if (len1 + j >= 512) - len1 = 511 - j - (random () & 7); - if (pos >= len1) - len2 = len1; - else - len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0); - j = (pos > len2 ? pos : len2) + align1 + 64; - if (j > 512) - j = 512; - for (i = 0; i < j; ++i) - { - p1[i] = random () & 255; - if (i < len1 + align1 && !p1[i]) - { - p1[i] = random () & 255; - if (!p1[i]) - p1[i] = 1 + (random () & 127); - } - } - for (i = 0; i < j; ++i) - { - p2[i] = random () & 255; - if (i < len2 + align2 && !p2[i]) - { - p2[i] = random () & 255; - if (!p2[i]) - p2[i] = 1 + (random () & 127); - } - } - - result = 0; - memcpy (p2 + align2, p1 + align1, pos); - if (pos < len1) - { - if (p2[align2 + pos] == p1[align1 + pos]) - { - p2[align2 + pos] = random () & 255; - if (p2[align2 + pos] == p1[align1 + pos]) - p2[align2 + pos] = p1[align1 + pos] + 3 + (random () & 127); - } - - if (pos < size) - { - if (p1[align1 + pos] < p2[align2 + pos]) - result = -1; - else - result = 1; - } - } - p1[len1 + align1] = 0; - p2[len2 + align2] = 0; - - FOR_EACH_IMPL (impl, 1) - { - r = CALL (impl, (char*)(p1 + align1), (char*)(p2 + align2), size); - /* Test whether on 64-bit architectures where ABI requires - callee to promote has the promotion been done. */ - asm ("" : "=g" (r) : "0" (r)); - if ((r == 0 && result) - || (r < 0 && result >= 0) - || (r > 0 && result <= 0)) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", - n, impl->name, align1, align2, len1, len2, pos, size, r, result, p1, p2); - ret = 1; - } - } - } -} - -static void -check1 (void) -{ - char *s1 = (char *)(buf1 + 0xb2c); - char *s2 = (char *)(buf1 + 0xfd8); - size_t i; - int exp_result; - - strcpy(s1, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrs"); - strcpy(s2, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkLMNOPQRSTUV"); - - for (i = 0; i < 80; i++) - { - exp_result = simple_strncmp (s1, s2, i); - FOR_EACH_IMPL (impl, 0) - check_result (impl, s1, s2, i, exp_result); - } -} - -static void -check2 (void) -{ - size_t i; - char *s1, *s2; - - s1 = (char *) buf1; - for (i = 0; i < page_size - 1; i++) - s1[i] = 23; - s1[i] = 0; - - s2 = strdup (s1); - - for (i = 0; i < 64; ++i) - do_page_test (3990 + i, 2635, s2); - - free (s2); -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - check1 (); - check2 (); - - printf ("%23s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i =0; i < 16; ++i) - { - do_test (0, 0, 8, i, 127, 0); - do_test (0, 0, 8, i, 127, -1); - do_test (0, 0, 8, i, 127, 1); - do_test (i, i, 8, i, 127, 0); - do_test (i, i, 8, i, 127, 1); - do_test (i, i, 8, i, 127, -1); - do_test (i, 2 * i, 8, i, 127, 0); - do_test (2 * i, i, 8, i, 127, 1); - do_test (i, 3 * i, 8, i, 127, -1); - do_test (0, 0, 8, i, 255, 0); - do_test (0, 0, 8, i, 255, -1); - do_test (0, 0, 8, i, 255, 1); - do_test (i, i, 8, i, 255, 0); - do_test (i, i, 8, i, 255, 1); - do_test (i, i, 8, i, 255, -1); - do_test (i, 2 * i, 8, i, 255, 0); - do_test (2 * i, i, 8, i, 255, 1); - do_test (i, 3 * i, 8, i, 255, -1); - } - - for (i = 1; i < 8; ++i) - { - do_test (0, 0, 8 << i, 16 << i, 127, 0); - do_test (0, 0, 8 << i, 16 << i, 127, 1); - do_test (0, 0, 8 << i, 16 << i, 127, -1); - do_test (0, 0, 8 << i, 16 << i, 255, 0); - do_test (0, 0, 8 << i, 16 << i, 255, 1); - do_test (0, 0, 8 << i, 16 << i, 255, -1); - do_test (8 - i, 2 * i, 8 << i, 16 << i, 127, 0); - do_test (8 - i, 2 * i, 8 << i, 16 << i, 127, 1); - do_test (2 * i, i, 8 << i, 16 << i, 255, 0); - do_test (2 * i, i, 8 << i, 16 << i, 255, 1); - } - - do_test_limit (0, 0, 0, 0, 127, 0); - do_test_limit (4, 0, 21, 20, 127, 0); - do_test_limit (0, 4, 21, 20, 127, 0); - do_test_limit (8, 0, 25, 24, 127, 0); - do_test_limit (0, 8, 25, 24, 127, 0); - - for (i = 0; i < 8; ++i) - { - do_test_limit (0, 0, 17 - i, 16 - i, 127, 0); - do_test_limit (0, 0, 17 - i, 16 - i, 255, 0); - do_test_limit (0, 0, 15 - i, 16 - i, 127, 0); - do_test_limit (0, 0, 15 - i, 16 - i, 127, 1); - do_test_limit (0, 0, 15 - i, 16 - i, 127, -1); - do_test_limit (0, 0, 15 - i, 16 - i, 255, 0); - do_test_limit (0, 0, 15 - i, 16 - i, 255, 1); - do_test_limit (0, 0, 15 - i, 16 - i, 255, -1); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-strnlen.c b/tests/test-strnlen.c deleted file mode 100644 index 937d707..0000000 --- a/tests/test-strnlen.c +++ /dev/null @@ -1,175 +0,0 @@ -/* Test and measure strlen functions. - Copyright (C) 1999-2012 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define TEST_MAIN -#define TEST_NAME "strnlen" -#include "test-string.h" - -#define MIN(a,b) ((a) < (b) ? (a) : (b)) - -typedef size_t (*proto_t) (const char *, size_t); -size_t simple_strnlen (const char *, size_t); - -IMPL (simple_strnlen, 0) -IMPL (strnlen, 1) - -size_t -simple_strnlen (const char *s, size_t maxlen) -{ - size_t i; - - for (i = 0; i < maxlen && s[i]; ++i); - return i; -} - -static void -do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len) -{ - size_t len = CALL (impl, s, maxlen); - if (len != exp_len) - { - error (0, 0, "Wrong result in function %s %zd %zd", impl->name, - len, exp_len); - ret = 1; - return; - } -} - -static void -do_test (size_t align, size_t len, size_t maxlen, int max_char) -{ - size_t i; - - align &= 7; - if (align + len >= page_size) - return; - - for (i = 0; i < len; ++i) - buf1[align + i] = 1 + 7 * i % max_char; - buf1[align + len] = 0; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen)); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align, len; - unsigned char *p = buf1 + page_size - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align = random () & 15; - len = random () & 511; - if (len + align > 510) - len = 511 - align - (random () & 7); - j = len + align + 64; - if (j > 512) - j = 512; - - for (i = 0; i < j; i++) - { - if (i == len + align) - p[i] = 0; - else - { - p[i] = random () & 255; - if (i >= align && i < len + align && !p[i]) - p[i] = (random () & 127) + 1; - } - } - - FOR_EACH_IMPL (impl, 1) - { - if (len > 0 - && CALL (impl, (char *) (p + align), len - 1) != len - 1) - { - error (0, 0, "Iteration %zd (limited) - wrong result in function %s (%zd) %zd != %zd, p %p", - n, impl->name, align, - CALL (impl, (char *) (p + align), len - 1), len - 1, p); - ret = 1; - } - if (CALL (impl, (char *) (p + align), len) != len) - { - error (0, 0, "Iteration %zd (exact) - wrong result in function %s (%zd) %zd != %zd, p %p", - n, impl->name, align, - CALL (impl, (char *) (p + align), len), len, p); - ret = 1; - } - if (CALL (impl, (char *) (p + align), len + 1) != len) - { - error (0, 0, "Iteration %zd (long) - wrong result in function %s (%zd) %zd != %zd, p %p", - n, impl->name, align, - CALL (impl, (char *) (p + align), len + 1), len, p); - ret = 1; - } - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%20s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 8; ++i) - { - do_test (0, i, i - 1, 127); - do_test (0, i, i, 127); - do_test (0, i, i + 1, 127); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, i, i - 1, 127); - do_test (i, i, i, 127); - do_test (i, i, i + 1, 127); - } - - for (i = 2; i <= 10; ++i) - { - do_test (0, 1 << i, 5000, 127); - do_test (1, 1 << i, 5000, 127); - } - - for (i = 1; i < 8; ++i) - do_test (0, i, 5000, 255); - - for (i = 1; i < 8; ++i) - do_test (i, i, 5000, 255); - - for (i = 2; i <= 10; ++i) - { - do_test (0, 1 << i, 5000, 255); - do_test (1, 1 << i, 5000, 255); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" diff --git a/tests/test-strrchr.c b/tests/test-strrchr.c deleted file mode 100644 index 7d57fc0..0000000 --- a/tests/test-strrchr.c +++ /dev/null @@ -1,229 +0,0 @@ -/* Test and measure STRCHR functions. - Copyright (C) 1999-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jakub Jelinek <jakub@redhat.com>, 1999. - Added wcsrrchr support by Liubov Dmitrieva <liubov.dmitrieva@gmail.com>, - 2011. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define TEST_MAIN -#include "test-string.h" -#include <limits.h> - -#define BIG_CHAR CHAR_MAX -#define SMALL_CHAR 127 - -typedef char *(*proto_t) (const char *, int); -char *simple_strrchr (const char *, int); - -IMPL (simple_strrchr, 0) -IMPL (strrchr, 1) - -char * -simple_strrchr (const char *s, int c) -{ - const char *ret = NULL; - - for (; *s != '\0'; ++s) - if (*s == (char) c) - ret = s; - - return (char *) (c == '\0' ? s : ret); -} - -static void -do_one_test (impl_t *impl, const char *s, int c, char *exp_res) -{ - char *res = CALL (impl, s, c); - if (res != exp_res) - { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, exp_res); - ret = 1; - return; - } -} - -static void -do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char) -/* For wcsrchr: align here means align not in bytes, - but in wchar_ts, in bytes it will equal to align * (sizeof (wchar_t)) - len for wcschr here isn't in bytes but it's number of wchar_t symbols. */ -{ - size_t i; - char *result; - char *buf = (char *) buf1; - - align &= 7; - if ( (align + len) * sizeof(char) >= page_size) - return; - - for (i = 0; i < len; ++i) - { - buf[align + i] = (random () * random ()) & max_char; - if (!buf[align + i]) - buf[align + i] = (random () * random ()) & max_char; - if (!buf[align + i]) - buf[align + i] = 1; - if ((i > pos || pos >= len) && buf[align + i] == seek_char) - buf[align + i] = seek_char + 10 + (random () & 15); - } - buf[align + len] = 0; - - if (pos < len) - { - buf[align + pos] = seek_char; - result = (char *) (buf + align + pos); - } - else if (seek_char == 0) - result = (char *) (buf + align + len); - else - result = NULL; - - FOR_EACH_IMPL (impl, 0) - do_one_test (impl, (char *) (buf + align), seek_char, result); -} - -static void -do_random_tests (void) -{ - size_t i, j, n, align, pos, len; - int seek_char; - char *result; - unsigned char *p = (unsigned char *) (buf1 + page_size) - 512; - - for (n = 0; n < ITERATIONS; n++) - { - align = random () & (63 / sizeof(char)); - /* For wcsrchr: align here means align not in bytes, but in wchar_ts, - in bytes it will equal to align * (sizeof (wchar_t)). - For strrchr we need to check all alignments from 0 to 63 since - some assembly implementations have separate prolog for alignments - more 48. */ - pos = random () & 511; - if (pos + align >= 511) - pos = 510 - align - (random () & 7); - len = random () & 511; - /* len for wcschr here isn't in bytes but it's number of wchar_t - symbols. */ - if (pos >= len) - len = pos + (random () & 7); - if (len + align >= 512) - len = 511 - align - (random () & 7); - seek_char = random () & 255; - if (seek_char && pos == len) - { - if (pos) - --pos; - else - ++len; - } - j = len + align + 64; - if (j > 512) - j = 512; - - for (i = 0; i < j; i++) - { - if (i == pos + align) - p[i] = seek_char; - else if (i == len + align) - p[i] = 0; - else - { - p[i] = random () & 255; - if (((i > pos + align && i < len + align) || pos > len) - && p[i] == seek_char) - p[i] = seek_char + 13; - if (i < len + align && !p[i]) - { - p[i] = seek_char - 13; - if (!p[i]) - p[i] = 140; - } - } - } - - if (pos <= len) - result = (char *) (p + pos + align); - else if (seek_char == 0) - result = (char *) (p + len + align); - else - result = NULL; - - FOR_EACH_IMPL (impl, 1) - if (CALL (impl, (char *) (p + align), seek_char) != result) - { - error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd, %zd) %p != %p, p %p", - n, impl->name, align, seek_char, len, pos, - CALL (impl, (char *) (p + align), seek_char), result, p); - ret = 1; - } - } -} - -int -test_main (void) -{ - size_t i; - - test_init (); - - printf ("%20s", ""); - FOR_EACH_IMPL (impl, 0) - printf ("\t%s", impl->name); - putchar ('\n'); - - for (i = 1; i < 8; ++i) - { - do_test (0, 16 << i, 2048, 23, SMALL_CHAR); - do_test (i, 16 << i, 2048, 23, SMALL_CHAR); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 64, 256, 23, SMALL_CHAR); - do_test (i, 64, 256, 23, BIG_CHAR); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, i, i + 1, 23, SMALL_CHAR); - do_test (0, i, i + 1, 23, BIG_CHAR); - } - - for (i = 1; i < 8; ++i) - { - do_test (0, 16 << i, 2048, 0, SMALL_CHAR); - do_test (i, 16 << i, 2048, 0, SMALL_CHAR); - } - - for (i = 1; i < 8; ++i) - { - do_test (i, 64, 256, 0, SMALL_CHAR); - do_test (i, 64, 256, 0, BIG_CHAR); - } - - for (i = 0; i < 32; ++i) - { - do_test (0, i, i + 1, 0, SMALL_CHAR); - do_test (0, i, i + 1, 0, BIG_CHAR); - } - - do_random_tests (); - return ret; -} - -#include "test-skeleton.c" |