From 400e266a3527bd3a8c5175d71dee2f723f427633 Mon Sep 17 00:00:00 2001 From: Jacek Maksymowicz Date: Thu, 9 Jan 2025 16:55:04 +0100 Subject: [PATCH] arch/aarch64: implement more functions with FPU instructions Implement fabs(), ceil(), floor(), round() and trunc() and their float counterparts with dedicated FPU instructions on AArch64. JIRA: RTOS-949 --- include/arch/aarch64/arch.h | 32 +++++++++++++++++ include/math.h | 15 +++----- math/exp.c | 70 +++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h index a36a338c..2786d9ef 100644 --- a/include/arch/aarch64/arch.h +++ b/include/arch/aarch64/arch.h @@ -24,11 +24,43 @@ #define __MEMMOVE #ifndef __SOFTFP__ +/* clang-format off */ #define __IEEE754_SQRT #define __ieee754_sqrt(x) ({ double a = (x); __asm__ ("fsqrt %d0, %d1" : "=w"(a) : "w"(a)); a; }) #define __IEEE754_SQRTF #define __ieee754_sqrtf(x) ({ float a = (x); __asm__ ("fsqrt %s0, %s1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_FABS +#define __ieee754_fabs(x) ({ double a = (x); __asm__ ("fabs %d0, %d1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_FABSF +#define __ieee754_fabsf(x) ({ float a = (x); __asm__ ("fabs %s0, %s1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_CEIL +#define __ieee754_ceil(x) ({ double a = (x); __asm__ ("frintp %d0, %d1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_CEILF +#define __ieee754_ceilf(x) ({ float a = (x); __asm__ ("frintp %s0, %s1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_FLOOR +#define __ieee754_floor(x) ({ double a = (x); __asm__ ("frintm %d0, %d1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_FLOORF +#define __ieee754_floorf(x) ({ float a = (x); __asm__ ("frintm %s0, %s1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_ROUND +#define __ieee754_round(x) ({ double a = (x); __asm__ ("frinta %d0, %d1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_ROUNDF +#define __ieee754_roundf(x) ({ float a = (x); __asm__ ("frinta %s0, %s1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_TRUNC +#define __ieee754_trunc(x) ({ double a = (x); __asm__ ("frintz %d0, %d1" : "=w"(a) : "w"(a)); a; }) + +#define __IEEE754_TRUNCF +#define __ieee754_truncf(x) ({ float a = (x); __asm__ ("frintz %s0, %s1" : "=w"(a) : "w"(a)); a; }) +/* clang-format on */ #endif #define _PAGE_SIZE 0x1000 diff --git a/include/math.h b/include/math.h index c80ea4ad..3353192c 100644 --- a/include/math.h +++ b/include/math.h @@ -152,10 +152,12 @@ extern double sqrt(double x); /* Rounds x upward, returning the smallest integral value that is not less than x. */ extern double ceil(double x); +extern float ceilf(float x); /* Rounds x downward, returning the largest integral value that is not greater than x. */ extern double floor(double x); +extern float floorf(float x); /* Returns the floating-point remainder of numer/denom (rounded towards zero). */ @@ -164,10 +166,12 @@ extern double fmod(double numer, double denom); /* Return the integral value nearest to x */ extern double round(double x); +extern float roundf(float x); /* Rounds x toward zero, returning the nearest integral value that is not larger in magnitude than x. */ extern double trunc(double x); +extern float truncf(float x); /* Miscellaneous */ @@ -175,6 +179,7 @@ extern double trunc(double x); /* Returns the absolute value of x: |x|. */ extern double fabs(double x); +extern float fabsf(float x); /* C99 extensions */ @@ -195,12 +200,7 @@ float logf(float x); float log10f(float x); float powf(float base, float exponent); float sqrtf(float x); -float roundf(float x); -float ceilf(float x); -float floorf(float x); float fmodf(float num, float denom); -float truncf(float x); -float fabsf(float x); #define cosf(x) ((float)cos(x)) @@ -219,12 +219,7 @@ float fabsf(float x); #define logf(x) ((float)log(x)) #define log10f(x) ((float)log10(x)) #define powf(base, exponent) ((float)pow(base, exponent)) -#define roundf(x) ((float)round(x)) -#define ceilf(x) ((float)ceil(x)) -#define floorf(x) ((float)floor(x)) #define fmodf(num, denom) ((float)fmod(num, denom)) -#define truncf(x) ((float)trunc(x)) -#define fabsf(x) ((float)fabs(x)) #ifdef __cplusplus diff --git a/math/exp.c b/math/exp.c index f9f0c5d4..d22ca0c4 100644 --- a/math/exp.c +++ b/math/exp.c @@ -231,6 +231,9 @@ double exp(double x) double ceil(double x) { +#ifdef __IEEE754_CEIL + return __ieee754_ceil(x); +#else double ipart, fpart; if (isnan(x) != 0) { @@ -244,11 +247,25 @@ double ceil(double x) } return ipart; +#endif +} + + +float ceilf(float x) +{ +#ifdef __IEEE754_CEILF + return __ieee754_ceilf(x); +#else + return (float)ceil(x); +#endif } double floor(double x) { +#ifdef __IEEE754_FLOOR + return __ieee754_floor(x); +#else double ipart, fpart; if (isnan(x) != 0) { @@ -262,6 +279,17 @@ double floor(double x) } return ipart; +#endif +} + + +float floorf(float x) +{ +#ifdef __IEEE754_FLOORF + return __ieee754_floorf(x); +#else + return (float)floor(x); +#endif } @@ -286,6 +314,9 @@ double fmod(double number, double denom) double round(double x) { +#ifdef __IEEE754_ROUND + return __ieee754_round(x); +#else double ret, frac; frac = modf(x, &ret); @@ -298,24 +329,63 @@ double round(double x) } return ret; +#endif +} + + +float roundf(float x) +{ +#ifdef __IEEE754_ROUNDF + return __ieee754_roundf(x); +#else + return (float)round(x); +#endif } double trunc(double x) { +#ifdef __IEEE754_TRUNC + return __ieee754_trunc(x); +#else double ret; modf(x, &ret); return ret; +#endif +} + + +float truncf(float x) +{ +#ifdef __IEEE754_TRUNCF + return __ieee754_truncf(x); +#else + return (float)trunc(x); +#endif } double fabs(double x) { +#ifdef __IEEE754_FABS + return __ieee754_fabs(x); +#else conv_t *conv = (conv_t *)&x; conv->i.sign = 0; return x; +#endif +} + + +float fabsf(float x) +{ +#ifdef __IEEE754_FABSF + return __ieee754_fabsf(x); +#else + return (float)fabs(x); +#endif }