LMMS
Loading...
Searching...
No Matches
denormal.h
Go to the documentation of this file.
1#ifndef _WDL_DENORMAL_H_
2#define _WDL_DENORMAL_H_
3
4#include <string.h>
5#include "wdltypes.h"
6// note: the _aggressive versions filter out anything less than around 1.0e-16 or so (approximately) to 0.0, including -0.0 (becomes 0.0)
7// note: new! the _aggressive versions also filter inf and NaN to 0.0
8
9#ifdef __cplusplus
10#define WDL_DENORMAL_INLINE inline
11#elif defined(_MSC_VER)
12#define WDL_DENORMAL_INLINE __inline
13#else
14 #ifdef WDL_STATICFUNC_UNUSED
15 #define WDL_DENORMAL_INLINE WDL_STATICFUNC_UNUSED
16 #else
17 #define WDL_DENORMAL_INLINE
18 #endif
19#endif
20
21static WDL_DENORMAL_INLINE unsigned int WDL_DENORMAL_FLOAT_W(const float *a) { unsigned int v; memcpy(&v,a,sizeof(v)); return v; }
22static WDL_DENORMAL_INLINE unsigned int WDL_DENORMAL_DOUBLE_HW(const double *a) { WDL_UINT64 v; memcpy(&v,(char*)a,sizeof(v)); return (unsigned int) (v>>32); }
23
24#define WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF 0x3cA00000 // 0x3B8000000 maybe instead? that's 10^-5 smaller or so
25#define WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF 0x25000000
26
27
28// define WDL_DENORMAL_WANTS_SCOPED_FTZ, and then use a WDL_denormal_ftz_scope in addition to denormal_*(), then
29// if FTZ is available it will be used instead...
30//
31#ifdef WDL_DENORMAL_WANTS_SCOPED_FTZ
32
33#if defined(__SSE2__) || _M_IX86_FP >= 2 || defined(_WIN64)
34 #define WDL_DENORMAL_FTZMODE
35 #define WDL_DENORMAL_FTZSTATE_TYPE unsigned int
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #else
39 #include <xmmintrin.h>
40 #endif
41 #define wdl_denorm_mm_getcsr() _mm_getcsr()
42 #define wdl_denorm_mm_setcsr(x) _mm_setcsr(x)
43 #if defined(__SSE3__)
44 #define wdl_denorm_mm_csr_mask ((1<<15)|(1<<11) | (1<<8) | (1<<6)) // FTZ, underflow, denormal mask, DAZ
45 #else
46 #define wdl_denorm_mm_csr_mask ((1<<15)|(1<<11)) // FTZ and underflow only (target SSE2)
47 #endif
48#elif defined(__arm__) || defined(__aarch64__)
49 #define WDL_DENORMAL_FTZMODE
50 #define WDL_DENORMAL_FTZSTATE_TYPE unsigned long
51 static unsigned long __attribute__((unused)) wdl_denorm_mm_getcsr()
52 {
53 unsigned long rv;
54#ifdef __aarch64__
55 asm volatile ( "mrs %0, fpcr" : "=r" (rv));
56#else
57 asm volatile ( "fmrx %0, fpscr" : "=r" (rv));
58#endif
59 return rv;
60 }
61 static void __attribute__((unused)) wdl_denorm_mm_setcsr(unsigned long v)
62 {
63#ifdef __aarch64__
64 asm volatile ( "msr fpcr, %0" :: "r"(v));
65#else
66 asm volatile ( "fmxr fpscr, %0" :: "r"(v));
67#endif
68 }
69 #define wdl_denorm_mm_csr_mask (1<<24)
70#endif
71
72class WDL_denormal_ftz_scope
73{
74 public:
75 WDL_denormal_ftz_scope()
76 {
77#ifdef WDL_DENORMAL_FTZMODE
78 const WDL_DENORMAL_FTZSTATE_TYPE b = wdl_denorm_mm_csr_mask;
79 old_state = wdl_denorm_mm_getcsr();
80 if ((need_restore = (old_state & b) != b))
81 wdl_denorm_mm_setcsr(old_state|b);
82#endif
83 }
84 ~WDL_denormal_ftz_scope()
85 {
86#ifdef WDL_DENORMAL_FTZMODE
87 if (need_restore) wdl_denorm_mm_setcsr(old_state);
88#endif
89 }
90
91#ifdef WDL_DENORMAL_FTZMODE
92 WDL_DENORMAL_FTZSTATE_TYPE old_state;
93 bool need_restore;
94#endif
95
96};
97
98
99#endif
100
101
102#if !defined(WDL_DENORMAL_FTZMODE) && !defined(WDL_DENORMAL_DO_NOT_FILTER)
103
105{
106 return (WDL_DENORMAL_DOUBLE_HW(&a)&0x7ff00000) ? a : 0.0;
107}
108
110{
111 return ((WDL_DENORMAL_DOUBLE_HW(&a)+0x100000)&0x7ff00000) > 0x100000 ? a : 0.0;
112}
113
115{
116 return ((WDL_DENORMAL_DOUBLE_HW(&a)+0x100000)&0x7ff00000) >= WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF ? a : 0.0;
117}
118
120{
121 return (WDL_DENORMAL_FLOAT_W(&a)&0x7f800000) ? a : 0.0f;
122}
123
125{
126 return ((WDL_DENORMAL_FLOAT_W(&a)+0x800000)&0x7f800000) > 0x800000 ? a : 0.0f;
127}
128
129
131{
132 return ((WDL_DENORMAL_FLOAT_W(&a)+0x800000)&0x7f800000) >= WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF ? a : 0.0f;
133}
135{
136 if (!(WDL_DENORMAL_DOUBLE_HW(a)&0x7ff00000)) *a=0.0;
137}
138
140{
141 if (((WDL_DENORMAL_DOUBLE_HW(a)+0x100000)&0x7ff00000) < WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF) *a=0.0;
142}
143
145{
146 if (!(WDL_DENORMAL_FLOAT_W(a)&0x7f800000)) *a=0.0f;
147}
149{
150 if (((WDL_DENORMAL_FLOAT_W(a)+0x800000)&0x7f800000) < WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF) *a=0.0f;
151}
152
153
154
155#ifdef __cplusplus // automatic typed versions (though one should probably use the explicit versions...
156
157
158static double WDL_DENORMAL_INLINE denormal_filter(double a)
159{
160 return (WDL_DENORMAL_DOUBLE_HW(&a)&0x7ff00000) ? a : 0.0;
161}
162static double WDL_DENORMAL_INLINE denormal_filter_aggressive(double a)
163{
164 return ((WDL_DENORMAL_DOUBLE_HW(&a)+0x100000)&0x7ff00000) >= WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF ? a : 0.0;
165}
166
167static float WDL_DENORMAL_INLINE denormal_filter(float a)
168{
169 return (WDL_DENORMAL_FLOAT_W(&a)&0x7f800000) ? a : 0.0f;
170}
171
172static float WDL_DENORMAL_INLINE denormal_filter_aggressive(float a)
173{
174 return ((WDL_DENORMAL_FLOAT_W(&a)+0x800000)&0x7f800000) >= WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF ? a : 0.0f;
175}
176
177static void WDL_DENORMAL_INLINE denormal_fix(double *a)
178{
179 if (!(WDL_DENORMAL_DOUBLE_HW(a)&0x7ff00000)) *a=0.0;
180}
181static void WDL_DENORMAL_INLINE denormal_fix_aggressive(double *a)
182{
183 if (((WDL_DENORMAL_DOUBLE_HW(a)+0x100000)&0x7ff00000) < WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF) *a=0.0;
184}
185static void WDL_DENORMAL_INLINE denormal_fix(float *a)
186{
187 if (!(WDL_DENORMAL_FLOAT_W(a)&0x7f800000)) *a=0.0f;
188}
189static void WDL_DENORMAL_INLINE denormal_fix_aggressive(float *a)
190{
191 if (((WDL_DENORMAL_FLOAT_W(a)+0x800000)&0x7f800000) < WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF) *a=0.0f;
192}
193
194
195
196#endif // cplusplus versions
197
198#else // end of !WDL_DENORMAL_DO_NOT_FILTER (and other platform-specific checks)
199
200#define denormal_filter(x) (x)
201#define denormal_filter2(x) (x)
202#define denormal_filter_double(x) (x)
203#define denormal_filter_double2(x) (x)
204#define denormal_filter_double_aggressive(x) (x)
205#define denormal_filter_float(x) (x)
206#define denormal_filter_float2(x) (x)
207#define denormal_filter_float_aggressive(x) (x)
208#define denormal_filter_aggressive(x) (x)
209#define denormal_fix(x) do { } while(0)
210#define denormal_fix_aggressive(x) do { } while(0)
211#define denormal_fix_double(x) do { } while(0)
212#define denormal_fix_double_aggressive(x) do { } while(0)
213#define denormal_fix_float(x) do { } while(0)
214#define denormal_fix_float_aggressive(x) do { } while(0)
215
216#endif
217
218
220// this isnt a denormal function but it is similar, so we'll put it here as a bonus
221
222static void WDL_DENORMAL_INLINE GetDoubleMaxAbsValue(double *out, const double *in) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
223{
224 WDL_UINT64 i, o;
225 memcpy(&i,in,sizeof(i));
226 memcpy(&o,out,sizeof(o));
227 i &= WDL_UINT64_CONST(0x7fffffffffffffff);
228 if (i > o) memcpy(out,&i,sizeof(i));
229}
230
231static void WDL_DENORMAL_INLINE GetFloatMaxAbsValue(float *out, const float *in) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
232{
233 unsigned int i, o;
234 memcpy(&i, in, sizeof(i));
235 memcpy(&o, out, sizeof(o));
236 i &= 0x7fffffff;
237 if (i > o) memcpy(out, &i, sizeof(i));
238}
239
240
241#ifdef __cplusplus
242static void WDL_DENORMAL_INLINE GetFloatMaxAbsValue(double *out, const double *in) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
243{
245}
246#endif
247
248#endif
uint8_t a
Definition Spc_Cpu.h:141
__attribute__((naked, target("arm")))
Definition asm-nseel-arm-gcc.c:66
unsigned v[N_MAX]
Definition inflate.c:1584
register unsigned i
Definition inflate.c:1575
static float WDL_DENORMAL_INLINE denormal_filter_float2(float a)
Definition denormal.h:124
static void WDL_DENORMAL_INLINE denormal_fix_float_aggressive(float *a)
Definition denormal.h:148
static float WDL_DENORMAL_INLINE denormal_filter_float_aggressive(float a)
Definition denormal.h:130
static double WDL_DENORMAL_INLINE denormal_filter_double(double a)
Definition denormal.h:104
static WDL_DENORMAL_INLINE unsigned int WDL_DENORMAL_DOUBLE_HW(const double *a)
Definition denormal.h:22
static void WDL_DENORMAL_INLINE GetDoubleMaxAbsValue(double *out, const double *in)
Definition denormal.h:222
#define WDL_DENORMAL_INLINE
Definition denormal.h:15
#define WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF
Definition denormal.h:24
#define WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF
Definition denormal.h:25
static void WDL_DENORMAL_INLINE denormal_fix_double_aggressive(double *a)
Definition denormal.h:139
static float WDL_DENORMAL_INLINE denormal_filter_float(float a)
Definition denormal.h:119
static void WDL_DENORMAL_INLINE GetFloatMaxAbsValue(float *out, const float *in)
Definition denormal.h:231
static void WDL_DENORMAL_INLINE denormal_fix_float(float *a)
Definition denormal.h:144
static WDL_DENORMAL_INLINE unsigned int WDL_DENORMAL_FLOAT_W(const float *a)
Definition denormal.h:21
static double WDL_DENORMAL_INLINE denormal_filter_double2(double a)
Definition denormal.h:109
static double WDL_DENORMAL_INLINE denormal_filter_double_aggressive(double a)
Definition denormal.h:114
static void WDL_DENORMAL_INLINE denormal_fix_double(double *a)
Definition denormal.h:134
float in
Definition lilv_test.c:1460
float out
Definition lilv_test.c:1461
memcpy(hh, h, RAND_HEAD_LEN)
b
Definition crypt.c:628
unsigned long long WDL_UINT64
Definition wdltypes.h:12
#define WDL_UINT64_CONST(x)
Definition wdltypes.h:20