From 400e266a3527bd3a8c5175d71dee2f723f427633 Mon Sep 17 00:00:00 2001
From: Jacek Maksymowicz <jacek.maksymowicz@phoenix-rtos.com>
Date: Thu, 9 Jan 2025 16:55:04 +0100
Subject: [PATCH] arch/aarch64: implement more functions with FPU instructions

Implement fabs(), ceil(), floor(), round() and trunc() and their float
counterparts with dedicated FPU instructions on AArch64.

JIRA: RTOS-949
---
 include/arch/aarch64/arch.h | 32 +++++++++++++++++
 include/math.h              | 15 +++-----
 math/exp.c                  | 70 +++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index a36a338c..2786d9ef 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -24,11 +24,43 @@
 #define __MEMMOVE
 
 #ifndef __SOFTFP__
+/* clang-format off */
 #define __IEEE754_SQRT
 #define __ieee754_sqrt(x) ({ double a = (x); __asm__ ("fsqrt %d0, %d1" : "=w"(a) : "w"(a)); a; })
 
 #define __IEEE754_SQRTF
 #define __ieee754_sqrtf(x) ({ float a = (x); __asm__ ("fsqrt %s0, %s1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_FABS
+#define __ieee754_fabs(x) ({ double a = (x); __asm__ ("fabs %d0, %d1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_FABSF
+#define __ieee754_fabsf(x) ({ float a = (x); __asm__ ("fabs %s0, %s1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_CEIL
+#define __ieee754_ceil(x) ({ double a = (x); __asm__ ("frintp %d0, %d1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_CEILF
+#define __ieee754_ceilf(x) ({ float a = (x); __asm__ ("frintp %s0, %s1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_FLOOR
+#define __ieee754_floor(x) ({ double a = (x); __asm__ ("frintm %d0, %d1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_FLOORF
+#define __ieee754_floorf(x) ({ float a = (x); __asm__ ("frintm %s0, %s1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_ROUND
+#define __ieee754_round(x) ({ double a = (x); __asm__ ("frinta %d0, %d1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_ROUNDF
+#define __ieee754_roundf(x) ({ float a = (x); __asm__ ("frinta %s0, %s1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_TRUNC
+#define __ieee754_trunc(x) ({ double a = (x); __asm__ ("frintz %d0, %d1" : "=w"(a) : "w"(a)); a; })
+
+#define __IEEE754_TRUNCF
+#define __ieee754_truncf(x) ({ float a = (x); __asm__ ("frintz %s0, %s1" : "=w"(a) : "w"(a)); a; })
+/* clang-format on */
 #endif
 
 #define _PAGE_SIZE 0x1000
diff --git a/include/math.h b/include/math.h
index c80ea4ad..3353192c 100644
--- a/include/math.h
+++ b/include/math.h
@@ -152,10 +152,12 @@ extern double sqrt(double x);
 
 /* Rounds x upward, returning the smallest integral value that is not less than x. */
 extern double ceil(double x);
+extern float ceilf(float x);
 
 
 /* Rounds x downward, returning the largest integral value that is not greater than x. */
 extern double floor(double x);
+extern float floorf(float x);
 
 
 /* Returns the floating-point remainder of numer/denom (rounded towards zero). */
@@ -164,10 +166,12 @@ extern double fmod(double numer, double denom);
 
 /* Return the integral value nearest to x */
 extern double round(double x);
+extern float roundf(float x);
 
 
 /* Rounds x toward zero, returning the nearest integral value that is not larger in magnitude than x. */
 extern double trunc(double x);
+extern float truncf(float x);
 
 
 /* Miscellaneous */
@@ -175,6 +179,7 @@ extern double trunc(double x);
 
 /* Returns the absolute value of x: |x|. */
 extern double fabs(double x);
+extern float fabsf(float x);
 
 
 /* C99 extensions */
@@ -195,12 +200,7 @@ float logf(float x);
 float log10f(float x);
 float powf(float base, float exponent);
 float sqrtf(float x);
-float roundf(float x);
-float ceilf(float x);
-float floorf(float x);
 float fmodf(float num, float denom);
-float truncf(float x);
-float fabsf(float x);
 
 
 #define cosf(x)              ((float)cos(x))
@@ -219,12 +219,7 @@ float fabsf(float x);
 #define logf(x)              ((float)log(x))
 #define log10f(x)            ((float)log10(x))
 #define powf(base, exponent) ((float)pow(base, exponent))
-#define roundf(x)            ((float)round(x))
-#define ceilf(x)             ((float)ceil(x))
-#define floorf(x)            ((float)floor(x))
 #define fmodf(num, denom)    ((float)fmod(num, denom))
-#define truncf(x)            ((float)trunc(x))
-#define fabsf(x)             ((float)fabs(x))
 
 
 #ifdef __cplusplus
diff --git a/math/exp.c b/math/exp.c
index f9f0c5d4..d22ca0c4 100644
--- a/math/exp.c
+++ b/math/exp.c
@@ -231,6 +231,9 @@ double exp(double x)
 
 double ceil(double x)
 {
+#ifdef __IEEE754_CEIL
+	return __ieee754_ceil(x);
+#else
 	double ipart, fpart;
 
 	if (isnan(x) != 0) {
@@ -244,11 +247,25 @@ double ceil(double x)
 	}
 
 	return ipart;
+#endif
+}
+
+
+float ceilf(float x)
+{
+#ifdef __IEEE754_CEILF
+	return __ieee754_ceilf(x);
+#else
+	return (float)ceil(x);
+#endif
 }
 
 
 double floor(double x)
 {
+#ifdef __IEEE754_FLOOR
+	return __ieee754_floor(x);
+#else
 	double ipart, fpart;
 
 	if (isnan(x) != 0) {
@@ -262,6 +279,17 @@ double floor(double x)
 	}
 
 	return ipart;
+#endif
+}
+
+
+float floorf(float x)
+{
+#ifdef __IEEE754_FLOORF
+	return __ieee754_floorf(x);
+#else
+	return (float)floor(x);
+#endif
 }
 
 
@@ -286,6 +314,9 @@ double fmod(double number, double denom)
 
 double round(double x)
 {
+#ifdef __IEEE754_ROUND
+	return __ieee754_round(x);
+#else
 	double ret, frac;
 
 	frac = modf(x, &ret);
@@ -298,24 +329,63 @@ double round(double x)
 	}
 
 	return ret;
+#endif
+}
+
+
+float roundf(float x)
+{
+#ifdef __IEEE754_ROUNDF
+	return __ieee754_roundf(x);
+#else
+	return (float)round(x);
+#endif
 }
 
 
 double trunc(double x)
 {
+#ifdef __IEEE754_TRUNC
+	return __ieee754_trunc(x);
+#else
 	double ret;
 
 	modf(x, &ret);
 
 	return ret;
+#endif
+}
+
+
+float truncf(float x)
+{
+#ifdef __IEEE754_TRUNCF
+	return __ieee754_truncf(x);
+#else
+	return (float)trunc(x);
+#endif
 }
 
 
 double fabs(double x)
 {
+#ifdef __IEEE754_FABS
+	return __ieee754_fabs(x);
+#else
 	conv_t *conv = (conv_t *)&x;
 
 	conv->i.sign = 0;
 
 	return x;
+#endif
+}
+
+
+float fabsf(float x)
+{
+#ifdef __IEEE754_FABSF
+	return __ieee754_fabsf(x);
+#else
+	return (float)fabs(x);
+#endif
 }