87 lines
3.1 KiB
C
87 lines
3.1 KiB
C
#include <stdlib.h>
|
|
|
|
// WARNING: When building the scalar versions of these functions you need to
|
|
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
|
|
// from recognising a loop idiom and planting calls to memcpy!
|
|
|
|
static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
|
|
size_t n) __arm_streaming_compatible {
|
|
unsigned char *destp = (unsigned char *)dest;
|
|
const unsigned char *srcp = (const unsigned char *)src;
|
|
for (size_t i = 0; i < n; ++i)
|
|
destp[i] = srcp[i];
|
|
|
|
return dest;
|
|
}
|
|
|
|
// If dest and src overlap then behaviour is undefined, hence we can add the
|
|
// restrict keywords here. This also matches the definition of the libc memcpy
|
|
// according to the man page.
|
|
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
|
|
size_t n) __arm_streaming_compatible {
|
|
return __arm_sc_memcpy_fwd(dest, src, n);
|
|
}
|
|
|
|
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
|
|
unsigned char *destp = (unsigned char *)dest;
|
|
unsigned char c8 = (unsigned char)c;
|
|
for (size_t i = 0; i < n; ++i)
|
|
destp[i] = c8;
|
|
|
|
return dest;
|
|
}
|
|
|
|
static void *__arm_sc_memcpy_rev(void *dest, const void *src,
|
|
size_t n) __arm_streaming_compatible {
|
|
unsigned char *destp = (unsigned char *)dest;
|
|
const unsigned char *srcp = (const unsigned char *)src;
|
|
// TODO: Improve performance by copying larger chunks in reverse, or by
|
|
// using SVE.
|
|
while (n > 0) {
|
|
--n;
|
|
destp[n] = srcp[n];
|
|
}
|
|
return dest;
|
|
}
|
|
|
|
// Semantically a memmove is equivalent to the following:
|
|
// 1. Copy the entire contents of src to a temporary array that does not
|
|
// overlap with src or dest.
|
|
// 2. Copy the contents of the temporary array into dest.
|
|
void *__arm_sc_memmove(void *dest, const void *src,
|
|
size_t n) __arm_streaming_compatible {
|
|
unsigned char *destp = (unsigned char *)dest;
|
|
const unsigned char *srcp = (const unsigned char *)src;
|
|
|
|
// If src and dest don't overlap then just invoke memcpy
|
|
if ((srcp > (destp + n)) || (destp > (srcp + n)))
|
|
return __arm_sc_memcpy_fwd(dest, src, n);
|
|
|
|
// Overlap case 1:
|
|
// src: Low | -> | High
|
|
// dest: Low | -> | High
|
|
// Here src is always ahead of dest at a higher addres. If we first read a
|
|
// chunk of data from src we can safely write the same chunk to dest without
|
|
// corrupting future reads of src.
|
|
if (srcp > destp)
|
|
return __arm_sc_memcpy_fwd(dest, src, n);
|
|
|
|
// Overlap case 2:
|
|
// src: Low | -> | High
|
|
// dest: Low | -> | High
|
|
// While we're in the overlap region we're always corrupting future reads of
|
|
// src when writing to dest. An efficient way to do this is to copy the data
|
|
// in reverse by starting at the highest address.
|
|
return __arm_sc_memcpy_rev(dest, src, n);
|
|
}
|
|
|
|
const void *__arm_sc_memchr(const void *src, int c,
|
|
size_t n) __arm_streaming_compatible {
|
|
const unsigned char *srcp = (const unsigned char *)src;
|
|
unsigned char c8 = (unsigned char)c;
|
|
for (size_t i = 0; i < n; ++i)
|
|
if (srcp[i] == c8)
|
|
return &srcp[i];
|
|
|
|
return NULL;
|
|
}
|