LMMS
Loading...
Searching...
No Matches
glue_x86.h
Go to the documentation of this file.
1#ifndef _NSEEL_GLUE_X86_H_
2#define _NSEEL_GLUE_X86_H_
3
4#define GLUE_MAX_FPSTACK_SIZE 8
5
6// endOfInstruction is end of jump with relative offset, offset is offset from end of instruction to jump to
7#define GLUE_JMP_SET_OFFSET(endOfInstruction,offset) (((int *)(endOfInstruction))[-1] = (offset))
8
9static const unsigned char GLUE_JMP_NC[] = { 0xE9, 0,0,0,0, }; // jmp<offset>
10static const unsigned char GLUE_JMP_IF_P1_Z[] = {0x85, 0xC0, 0x0F, 0x84, 0,0,0,0 }; // test eax, eax, jz
11static const unsigned char GLUE_JMP_IF_P1_NZ[] = {0x85, 0xC0, 0x0F, 0x85, 0,0,0,0 }; // test eax, eax, jnz
12
13#define GLUE_FUNC_ENTER_SIZE 0
14#define GLUE_FUNC_LEAVE_SIZE 0
15const static unsigned int GLUE_FUNC_ENTER[1];
16const static unsigned int GLUE_FUNC_LEAVE[1];
17
18 // x86
19 // stack is 16 byte aligned
20 // when pushing values to stack, alignment pushed first, then value (value is at the lower address)
21 // when pushing pointers to stack, alignment pushed first, then pointer (pointer is at the lower address)
22
23 static const unsigned char GLUE_PUSH_P1PTR_AS_VALUE[] =
24 {
25 0x83, 0xEC, 8, /* sub esp, 8 */
26 0xff, 0x70, 0x4, /* push dword [eax+4] */
27 0xff, 0x30, /* push dword [eax] */
28 };
29
30 static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
31 {
32 if (buf)
33 {
34 *buf++ = 0xB8; *(void **) buf = destptr; buf+=4; // mov eax, directvalue
35
36 *buf++ = 0x8f; *buf++ = 0x00; // pop dword [eax]
37 *buf++ = 0x8f; *buf++ = 0x40; *buf++ = 4; // pop dword [eax+4]
38
39 *buf++ = 0x59; // pop ecx (alignment)
40 *buf++ = 0x59; // pop ecx (alignment)
41 }
42
43 return 12;
44 }
45
46 static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
47 {
48 if (buf)
49 {
50 *buf++ = 0x8B; *buf++ = 0x38; // mov edi, [eax]
51 *buf++ = 0x8B; *buf++ = 0x48; *buf++ = 0x04; // mov ecx, [eax+4]
52
53
54 *buf++ = 0xB8; *(void **) buf = destptr; buf+=4; // mov eax, directvalue
55 *buf++ = 0x89; *buf++ = 0x38; // mov [eax], edi
56 *buf++ = 0x89; *buf++ = 0x48; *buf++ = 0x04; // mov [eax+4], ecx
57 }
58
59 return 2 + 3 + 5 + 2 + 3;
60 }
61
62 static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
63 {
64 if (buf)
65 {
66 *buf++ = 0xB8; *(void **) buf = destptr; buf+=4; // mov eax, directvalue
67 *buf++ = 0xDD; *buf++ = 0x18; // fstp qword [eax]
68 }
69 return 1+4+2;
70 }
71
72
73 #define GLUE_MOV_PX_DIRECTVALUE_SIZE 5
74 #define GLUE_MOV_PX_DIRECTVALUE_TOSTACK_SIZE 6 // length when wv == -1
75
76 static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv)
77 {
78 if (wv==-1)
79 {
80 const static unsigned char t[2] = {0xDD, 0x05};
81 memcpy(b,t,2);
82 b= ((unsigned char *)b)+2;
83 }
84 else
85 {
86 const static unsigned char tab[3] = {
87 0xB8 /* mov eax, dv*/,
88 0xBF /* mov edi, dv */ ,
89 0xB9 /* mov ecx, dv */
90 };
91 *((unsigned char *)b) = tab[wv]; // mov eax, dv
92 b= ((unsigned char *)b)+1;
93 }
94 *(INT_PTR *)b = v;
95 }
96 const static unsigned char GLUE_PUSH_P1[4]={0x83, 0xEC, 12, 0x50}; // sub esp, 12, push eax
97
98 #define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(x) 7
99 static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
100 {
101 ((unsigned char *)b)[0] = 0x89; // mov [esp+offs], eax
102 ((unsigned char *)b)[1] = 0x84;
103 ((unsigned char *)b)[2] = 0x24;
104 *(int *)((unsigned char *)b+3) = offs;
105 }
106
107 #define GLUE_MOVE_PX_STACKPTR_SIZE 2
108 static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
109 {
110 static const unsigned char tab[3][GLUE_MOVE_PX_STACKPTR_SIZE]=
111 {
112 { 0x89, 0xe0 }, // mov eax, esp
113 { 0x89, 0xe7 }, // mov edi, esp
114 { 0x89, 0xe1 }, // mov ecx, esp
115 };
117 }
118
119 #define GLUE_MOVE_STACK_SIZE 6
120 static void GLUE_MOVE_STACK(void *b, int amt)
121 {
122 ((unsigned char *)b)[0] = 0x81;
123 if (amt <0)
124 {
125 ((unsigned char *)b)[1] = 0xEC;
126 *(int *)((char*)b+2) = -amt; // sub esp, -amt
127 }
128 else
129 {
130 ((unsigned char *)b)[1] = 0xc4;
131 *(int *)((char*)b+2) = amt; // add esp, amt
132 }
133 }
134
135
136 #define GLUE_POP_PX_SIZE 4
137 static void GLUE_POP_PX(void *b, int wv)
138 {
139 static const unsigned char tab[3][GLUE_POP_PX_SIZE]=
140 {
141 {0x58,/*pop eax*/ 0x83, 0xC4, 12 /* add esp, 12*/},
142 {0x5F,/*pop edi*/ 0x83, 0xC4, 12},
143 {0x59,/*pop ecx*/ 0x83, 0xC4, 12},
144 };
145 memcpy(b,tab[wv],GLUE_POP_PX_SIZE);
146 }
147
148 #define GLUE_SET_PX_FROM_P1_SIZE 2
149 static void GLUE_SET_PX_FROM_P1(void *b, int wv)
150 {
151 static const unsigned char tab[3][GLUE_SET_PX_FROM_P1_SIZE]={
152 {0x90,0x90}, // should never be used! (nopnop)
153 {0x89,0xC7}, // mov edi, eax
154 {0x89,0xC1}, // mov ecx, eax
155 };
157 }
158
159 #define GLUE_POP_FPSTACK_SIZE 2
160 static const unsigned char GLUE_POP_FPSTACK[2] = { 0xDD, 0xD8 }; // fstp st0
161
162 static const unsigned char GLUE_POP_FPSTACK_TOSTACK[] = {
163 0x83, 0xEC, 16, // sub esp, 16
164 0xDD, 0x1C, 0x24 // fstp qword (%esp)
165 };
166
167 static const unsigned char GLUE_POP_STACK_TO_FPSTACK[] = {
168 0xDD, 0x04, 0x24, // fld qword (%esp)
169 0x83, 0xC4, 16 // add esp, 16
170 };
171
172 static const unsigned char GLUE_POP_FPSTACK_TO_WTP[] = {
173 0xDD, 0x1E, /* fstp qword [esi] */
174 0x83, 0xC6, 8, /* add esi, 8 */
175 };
176
177 #define GLUE_SET_PX_FROM_WTP_SIZE 2
178 static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
179 {
180 static const unsigned char tab[3][GLUE_SET_PX_FROM_WTP_SIZE]={
181 {0x89,0xF0}, // mov eax, esi
182 {0x89,0xF7}, // mov edi, esi
183 {0x89,0xF1}, // mov ecx, esi
184 };
186 }
187
188 #define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 2
189 static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
190 {
191 static const unsigned char tab[3][GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE]={
192 {0xDD,0x00}, // fld qword [eax]
193 {0xDD,0x07}, // fld qword [edi]
194 {0xDD,0x01}, // fld qword [ecx]
195 };
197 }
198
199#define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (GLUE_SET_PX_FROM_WTP_SIZE + sizeof(GLUE_POP_FPSTACK_TO_WTP))
200static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
201{
202 GLUE_SET_PX_FROM_WTP(buf,wv);
204};
205
206
207const static unsigned char GLUE_RET=0xC3;
208
209static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
210{
211 if (out)
212 {
213 *out++ = 0xBE; // mov esi, constant
214 memcpy(out,&ptr,sizeof(void *));
215 out+=sizeof(void *);
216 }
217 return 1+sizeof(void *);
218}
219
220
221#ifdef _MSC_VER
222#pragma warning(push)
223#pragma warning(disable: 4731)
224#endif
225
226#define GLUE_TABPTR_IGNORED
227#define GLUE_CALL_CODE(bp, cp, rt) do { \
228 if (h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) eel_callcode32_fast(cp, rt); \
229 else eel_callcode32(cp, rt);\
230 } while(0)
231
232static void eel_callcode32(INT_PTR cp, INT_PTR ramptr)
233{
234 #ifndef NSEEL_EEL1_COMPAT_MODE
235 short oldsw, newsw;
236 #endif
237 #ifdef _MSC_VER
238
239 __asm
240 {
241#ifndef NSEEL_EEL1_COMPAT_MODE
242 fnstcw [oldsw]
243 mov ax, [oldsw]
244 or ax, 0x23F // 53 or 64 bit precision (depending on whether 0x100 is set), and masking all exceptions
245 mov [newsw], ax
246 fldcw [newsw]
247#endif
248
249 mov eax, cp
250 mov ebx, ramptr
251
252 pushad
253 mov ebp, esp
254 and esp, -16
255
256 // on win32, which _MSC_VER implies, we keep things aligned to 16 bytes, and if we call a win32 function,
257 // the stack is 16 byte aligned before the call, meaning that if calling a function with no frame pointer,
258 // the stack would be aligned to a 16 byte boundary +4, which isn't good for performance. Having said that,
259 // normally we compile with frame pointers (which brings that to 16 byte + 8, which is fine), or ICC, which
260 // for nontrivial functions will align the stack itself (for very short functions, it appears to weigh the
261 // cost of aligning the stack vs that of the slower misaligned double accesses).
262
263 // it may be worthwhile (at some point) to put some logic in the code that calls out to functions
264 // (generic1parm etc) to detect which alignment would be most optimal.
265 sub esp, 12
266 call eax
267 mov esp, ebp
268 popad
269#ifndef NSEEL_EEL1_COMPAT_MODE
270 fldcw [oldsw]
271#endif
272 };
273
274 #else // gcc x86
275 __asm__(
276#ifndef NSEEL_EEL1_COMPAT_MODE
277 "fnstcw %2\n"
278 "movw %2, %%ax\n"
279 "orw $0x23F, %%ax\n" // 53 or 64 bit precision (depending on whether 0x100 is set), and masking all exceptions
280 "movw %%ax, %3\n"
281 "fldcw %3\n"
282#endif
283 "pushl %%ebx\n"
284 "movl %%ecx, %%ebx\n"
285 "pushl %%ebp\n"
286 "movl %%esp, %%ebp\n"
287 "andl $-16, %%esp\n" // align stack to 16 bytes
288 "subl $12, %%esp\n" // call will push 4 bytes on stack, align for that
289 "call *%%edx\n"
290 "leave\n"
291 "popl %%ebx\n"
292#ifndef NSEEL_EEL1_COMPAT_MODE
293 "fldcw %2\n"
294#endif
295 ::
296 "d" (cp), "c" (ramptr)
297#ifndef NSEEL_EEL1_COMPAT_MODE
298 , "m" (oldsw), "m" (newsw)
299#endif
300 : "%eax","%esi","%edi");
301 #endif //gcc x86
302}
303
304void eel_enterfp(int s[2])
305{
306 #ifdef _MSC_VER
307 __asm
308 {
309 mov ecx, s
310 fnstcw [ecx]
311 mov ax, [ecx]
312 or ax, 0x23F // 53 or 64 bit precision (depending on whether 0x100 is set), and masking all exceptions
313 mov [ecx+4], ax
314 fldcw [ecx+4]
315 };
316 #else
317 __asm__(
318 "fnstcw (%%ecx)\n"
319 "movw (%%ecx), %%ax\n"
320 "orw $0x23F, %%ax\n" // 53 or 64 bit precision (depending on whether 0x100 is set), and masking all exceptions
321 "movw %%ax, 4(%%ecx)\n"
322 "fldcw 4(%%ecx)\n"
323 :: "c" (s) : "%eax");
324 #endif
325}
326void eel_leavefp(int s[2])
327{
328 #ifdef _MSC_VER
329 __asm
330 {
331 mov ecx, s
332 fldcw [ecx]
333 };
334 #else
335 __asm__(
336 "fldcw (%%ecx)\n"
337 :: "c" (s) : "%eax");
338 #endif
339}
340
342{
343 #ifdef _MSC_VER
344
345 __asm
346 {
347 mov eax, cp
348 mov ebx, ramptr
349
350 pushad
351 mov ebp, esp
352 and esp, -16
353
354 // on win32, which _MSC_VER implies, we keep things aligned to 16 bytes, and if we call a win32 function,
355 // the stack is 16 byte aligned before the call, meaning that if calling a function with no frame pointer,
356 // the stack would be aligned to a 16 byte boundary +4, which isn't good for performance. Having said that,
357 // normally we compile with frame pointers (which brings that to 16 byte + 8, which is fine), or ICC, which
358 // for nontrivial functions will align the stack itself (for very short functions, it appears to weigh the
359 // cost of aligning the stack vs that of the slower misaligned double accesses).
360
361 // it may be worthwhile (at some point) to put some logic in the code that calls out to functions
362 // (generic1parm etc) to detect which alignment would be most optimal.
363 sub esp, 12
364 call eax
365 mov esp, ebp
366 popad
367 };
368
369 #else // gcc x86
370 __asm__(
371 "pushl %%ebx\n"
372 "movl %%ecx, %%ebx\n"
373 "pushl %%ebp\n"
374 "movl %%esp, %%ebp\n"
375 "andl $-16, %%esp\n" // align stack to 16 bytes
376 "subl $12, %%esp\n" // call will push 4 bytes on stack, align for that
377 "call *%%edx\n"
378 "leave\n"
379 "popl %%ebx\n"
380 ::
381 "d" (cp), "c" (ramptr)
382 : "%eax","%esi","%edi");
383 #endif //gcc x86
384}
385
386#ifdef _MSC_VER
387#pragma warning(pop)
388#endif
389
390
391static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
392{
393 char *p=(char*)_p;
394 INT_PTR scan = 0xFEFEFEFE;
395 while (*(INT_PTR *)p != scan) p++;
396 *(INT_PTR *)p = newv;
397 return (unsigned char *) (((INT_PTR*)p)+1);
398}
399
400#define INT_TO_LECHARS(x) ((x)&0xff),(((x)>>8)&0xff), (((x)>>16)&0xff), (((x)>>24)&0xff)
401
402
403#define GLUE_INLINE_LOOPS
404
405#define GLUE_LOOP_LOADCNT_SIZE (nseel_has_sse3() ? sizeof(GLUE_LOOP_LOADCNT_SSE3) : sizeof(GLUE_LOOP_LOADCNT_NOSSE3))
406#define GLUE_LOOP_LOADCNT (nseel_has_sse3() ? GLUE_LOOP_LOADCNT_SSE3 : GLUE_LOOP_LOADCNT_NOSSE3)
407static const unsigned char GLUE_LOOP_LOADCNT_SSE3[]={
408 0xdb, 0x0e, // fisttp dword [esi]
409 0x8B, 0x0E, // mov ecx, [esi]
410 0x81, 0xf9, 1,0,0,0, // cmp ecx, 1
411 0x0F, 0x8C, 0,0,0,0, // JL <skipptr>
412};
413
414static const unsigned char GLUE_LOOP_LOADCNT_NOSSE3[]={
415 0xd9, 0x7e, 0x04, // fnstcw [esi+4]
416 0x66, 0x8b, 0x46, 0x04, // mov ax, [esi+4]
417 0x66, 0x0d, 0x00, 0x0c, // or ax, 0xC00
418 0x66, 0x89, 0x46, 0x08, // mov [esi+8], ax
419 0xd9, 0x6e, 0x08, // fldcw [esi+8]
420 0xDB, 0x1E, // fistp dword [esi]
421 0xd9, 0x6e, 0x04, // fldcw [esi+4]
422 0x8B, 0x0E, // mov ecx, [esi]
423 0x81, 0xf9, 1,0,0,0, // cmp ecx, 1
424 0x0F, 0x8C, 0,0,0,0, // JL <skipptr>
425};
426
427#if NSEEL_LOOPFUNC_SUPPORT_MAXLEN > 0
428#define GLUE_LOOP_CLAMPCNT_SIZE sizeof(GLUE_LOOP_CLAMPCNT)
429static const unsigned char GLUE_LOOP_CLAMPCNT[]={
430 0x81, 0xf9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), // cmp ecx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
431 0x0F, 0x8C, 5,0,0,0, // JL over-the-mov
432 0xB9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), // mov ecx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
433};
434#else
435
436#define GLUE_LOOP_CLAMPCNT_SIZE 0
437#define GLUE_LOOP_CLAMPCNT ""
438
439#endif
440
441#define GLUE_LOOP_BEGIN_SIZE sizeof(GLUE_LOOP_BEGIN)
442static const unsigned char GLUE_LOOP_BEGIN[]={
443 0x56, //push esi
444 0x51, // push ecx
445 0x81, 0xEC, 0x08, 0,0,0, // sub esp, 8
446};
447static const unsigned char GLUE_LOOP_END[]={
448 0x81, 0xC4, 0x08, 0,0,0, // add esp, 8
449 0x59, //pop ecx
450 0x5E, // pop esi
451 0x49, // dec ecx
452 0x0f, 0x85, 0,0,0,0, // jnz ...
453};
454
455
456#if NSEEL_LOOPFUNC_SUPPORT_MAXLEN > 0
457#define GLUE_WHILE_SETUP_SIZE sizeof(GLUE_WHILE_SETUP)
458static const unsigned char GLUE_WHILE_SETUP[]={
459 0xB9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), // mov ecx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
460};
461static const unsigned char GLUE_WHILE_BEGIN[]={
462 0x56, //push esi
463 0x51, // push ecx
464 0x81, 0xEC, 0x08, 0,0,0, // sub esp, 8
465};
466static const unsigned char GLUE_WHILE_END[]={
467 0x81, 0xC4, 0x08, 0,0,0, // add esp, 8
468 0x59, //pop ecx
469 0x5E, // pop esi
470
471
472 0x49, // dec ecx
473 0x0f, 0x84, 0,0,0,0, // jz endpt
474};
475
476
477#else
478
479#define GLUE_WHILE_SETUP_SIZE 0
480#define GLUE_WHILE_SETUP ""
481#define GLUE_WHILE_END_NOJUMP
482static const unsigned char GLUE_WHILE_BEGIN[]={
483 0x56, //push esi
484 0x81, 0xEC, 12, 0,0,0, // sub esp, 12
485};
486static const unsigned char GLUE_WHILE_END[]={
487 0x81, 0xC4, 12, 0,0,0, // add esp, 12
488 0x5E, // pop esi
489};
490
491#endif
492
493static const unsigned char GLUE_WHILE_CHECK_RV[] = {
494 0x85, 0xC0, // test eax, eax
495 0x0F, 0x85, 0,0,0,0 // jnz looppt
496};
497
498static const unsigned char GLUE_SET_P1_Z[] = { 0x29, 0xC0 }; // sub eax, eax
499static const unsigned char GLUE_SET_P1_NZ[] = { 0xb0, 0x01 }; // mov al, 1
500
501static const unsigned char GLUE_FXCH[] = {0xd9, 0xc9};
502
503#define GLUE_HAS_FLDZ
504static const unsigned char GLUE_FLDZ[] = {0xd9, 0xee};
505#define GLUE_HAS_FLD1
506static const unsigned char GLUE_FLD1[] = {0xd9, 0xe8};
507
508static EEL_F negativezeropointfive=-0.5f;
509static EEL_F onepointfive=1.5f;
510#define GLUE_INVSQRT_NEEDREPL &negativezeropointfive, &onepointfive,
511
512
513#define GLUE_HAS_NATIVE_TRIGSQRTLOG
514
515
516void nseel_asm_or(void);
517void nseel_asm_or0(void);
518void nseel_asm_or_op(void);
519void nseel_asm_and(void);
520void nseel_asm_and_op(void);
521void nseel_asm_xor(void);
522void nseel_asm_xor_op(void);
523void nseel_asm_shl(void);
524void nseel_asm_shr(void);
525void nseel_asm_mod(void);
526void nseel_asm_mod_op(void);
527void nseel_asm_stack_peek(void);
528void _asm_gmegabuf(void);
529void _asm_megabuf(void);
530
531static struct roundinftab {
532 void *fn;
533 char istores; // number of fistp's
534 char flag; // 0=fistpll, 1=fistpl, 2=fistpl 4(%esp)
535 int newsz;
536 void *newfn;
537} s_round_fixes[] = {
538 { nseel_asm_or, 2, },
539 { nseel_asm_or_op, 2, },
540 { nseel_asm_or0, 1, },
541 { nseel_asm_and, 2, },
542 { nseel_asm_and_op, 2, },
543 { nseel_asm_xor, 2, },
544 { nseel_asm_xor_op, 2, },
545 { nseel_asm_shl, 2, 1, },
546 { nseel_asm_shr, 2, 1, },
547 { nseel_asm_mod, 2, 1, },
548 { nseel_asm_mod_op, 2, 1, },
549 { nseel_asm_stack_peek, 1, 1, },
550 { _asm_megabuf, 1, 1, },
551 { _asm_gmegabuf, 1, 2, },
553
554static void eel_fixup_sse3(unsigned char *p, unsigned char *endp, int np, int flag)
555{
556 const int isz = flag == 2 ? 3 : 2;
557 while (p+isz <= endp && np > 0)
558 {
559 if (flag == 0 && p[0] == 0xdf && (p[1]&0xbe) == 0x3e)
560 {
561 *p++ = 0xdd;
562 *p -= 0x30;
563 if (*p & 0x40) p++;
564 np--;
565 }
566 else if (flag == 1 && p[0] == 0xdb && (p[1]&0xbe) == 0x1e)
567 {
568 *++p -= 0x10;
569 if (*p & 0x40) p++;
570 np--;
571 }
572 else if (flag == 2 && p[0] == 0xdb && (p[1]&0xbf) == 0x1c && p[2] == 0x24)
573 {
574 *++p -= 0x10;
575 if (*p & 0x40) p++;
576 p++;
577 np--;
578 }
579 p++;
580 }
581 WDL_ASSERT(np == 0);
582}
583
584static int nseel_has_sse3()
585{
586 static char c;
587 if (!c)
588 {
589 int features = 1;
590 #ifdef _MSC_VER
591 __asm {
592 mov eax, 1
593 cpuid
594 mov [features], ecx
595 };
596 #else
597 __asm__(
598 "movl $1, %%eax\n"
599 "pushl %%ebx\n"
600 "pushl %%edx\n"
601 "cpuid\n"
602 "popl %%edx\n"
603 "popl %%ebx\n"
604 : "=c" (features) : : "%eax");
605 #endif
606 c=(features&1) ? 1 : -1;
607 }
608 return c>0;
609}
610static void *GLUE_realAddress(void *fn, int *size)
611{
612 static const unsigned char sig[12] = { 0x89, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90 };
613 unsigned char *p = (unsigned char *)fn;
614
615 size_t rmatch;
616 const size_t nmatch = sizeof(s_round_fixes) / sizeof(s_round_fixes[0]);
617 for (rmatch = 0; rmatch < nmatch && s_round_fixes[rmatch].fn != fn; rmatch++);
618 if (rmatch < nmatch && s_round_fixes[rmatch].newfn && s_round_fixes[rmatch].newsz)
619 {
620 *size = s_round_fixes[rmatch].newsz;
621 return s_round_fixes[rmatch].newfn;
622 }
623
624 #if defined(_DEBUG) && defined(_MSC_VER)
625 if (*p == 0xE9) // this means jump to the following address (debug stub)
626 {
627 p += 5 + *(int *)(p+1);
628 }
629 #endif
630
631 while (memcmp(p,sig,sizeof(sig))) p++;
632 p+=sizeof(sig);
633 fn = p;
634
635 while (memcmp(p,sig,sizeof(sig))) p++;
636 *size = p - (unsigned char *)fn;
637
638 if (rmatch < nmatch)
639 {
640 static const unsigned char prefix[] = {
641 0xd9, 0x7e, 0x10, // fnstcw [esi+16]
642 0x66, 0x8b, 0x46, 0x10, // mov ax, [esi+16]
643 0x66, 0x0d, 0x00, 0x0c, // or ax, 0xC00
644 0x66, 0x89, 0x46, 0x14, // mov [esi+20], ax
645 0xd9, 0x6e, 0x14, // fldcw [esi+20]
646 };
647 static const unsigned char postfix[] = {
648 0xd9, 0x6e, 0x10, // fldcw [esi+16]
649 };
650
651 const int has_sse = nseel_has_sse3();
652
653 unsigned char *tmp = (unsigned char *) malloc(*size + (has_sse ? 0 : sizeof(prefix) + sizeof(postfix)));
654 if (tmp)
655 {
656 if (has_sse)
657 {
658 memcpy(tmp,fn,*size);
659 eel_fixup_sse3(tmp,tmp + *size,s_round_fixes[rmatch].istores, s_round_fixes[rmatch].flag);
660 }
661 else
662 {
663 memcpy(tmp,prefix,sizeof(prefix));
664 memcpy(tmp+sizeof(prefix),fn,*size);
665 memcpy(tmp+sizeof(prefix)+*size,postfix,sizeof(postfix));
666
667 *size += sizeof(prefix) + sizeof(postfix);
668 }
669 fn = tmp;
670 s_round_fixes[rmatch].newsz = *size;
671 s_round_fixes[rmatch].newfn = fn;
672 }
673 }
674
675 return fn;
676}
677
678#endif
string postfix
Definition Nio.cpp:18
void nseel_asm_xor_op(void)
Definition asm-nseel-aarch64-gcc.c:440
void nseel_asm_mod_op(void)
Definition asm-nseel-aarch64-gcc.c:368
void nseel_asm_or0(void)
Definition asm-nseel-aarch64-gcc.c:402
void nseel_asm_or_op(void)
Definition asm-nseel-aarch64-gcc.c:412
void nseel_asm_and(void)
Definition asm-nseel-aarch64-gcc.c:456
void nseel_asm_or(void)
Definition asm-nseel-aarch64-gcc.c:390
void nseel_asm_shl(void)
Definition asm-nseel-aarch64-gcc.c:344
void nseel_asm_stack_peek(void)
Definition asm-nseel-aarch64-gcc.c:1152
void nseel_asm_shr(void)
Definition asm-nseel-aarch64-gcc.c:356
void nseel_asm_xor(void)
Definition asm-nseel-aarch64-gcc.c:428
void _asm_gmegabuf(void)
Definition asm-nseel-aarch64-gcc.c:1028
void nseel_asm_mod(void)
Definition asm-nseel-aarch64-gcc.c:329
void _asm_megabuf(void)
Definition asm-nseel-aarch64-gcc.c:985
void nseel_asm_and_op(void)
Definition asm-nseel-aarch64-gcc.c:467
struct huft * t
Definition inflate.c:943
unsigned v[N_MAX]
Definition inflate.c:1584
unsigned s
Definition inflate.c:1555
void(* eel_leavefp)(int s[2])
Definition eel_import.h:43
void(* eel_enterfp)(int s[2])
Definition eel_import.h:42
static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[]
Definition glue_aarch64.h:183
#define GLUE_SET_PX_FROM_P1_SIZE
Definition glue_aarch64.h:176
static const unsigned int GLUE_POP_FPSTACK_TOSTACK[]
Definition glue_aarch64.h:303
static const unsigned int GLUE_POP_FPSTACK[1]
Definition glue_aarch64.h:301
#define GLUE_SET_PX_FROM_WTP_SIZE
Definition glue_aarch64.h:281
#define GLUE_MOVE_PX_STACKPTR_SIZE
Definition glue_aarch64.h:143
static const unsigned int GLUE_FUNC_LEAVE[1]
Definition glue_aarch64.h:106
static const unsigned int GLUE_POP_FPSTACK_TO_WTP[]
Definition glue_aarch64.h:308
#define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE
Definition glue_aarch64.h:312
static const unsigned int GLUE_JMP_NC[]
Definition glue_aarch64.h:72
static const unsigned int GLUE_JMP_IF_P1_NZ[]
Definition glue_aarch64.h:79
static const unsigned int GLUE_FUNC_ENTER[2]
Definition glue_aarch64.h:104
static const unsigned int GLUE_SET_P1_Z[]
Definition glue_aarch64.h:325
static const unsigned int GLUE_RET[]
Definition glue_aarch64.h:108
static const unsigned int GLUE_JMP_IF_P1_Z[]
Definition glue_aarch64.h:74
#define GLUE_POP_PX_SIZE
Definition glue_aarch64.h:170
static const unsigned int GLUE_PUSH_P1[1]
Definition glue_aarch64.h:118
static const unsigned int GLUE_SET_P1_NZ[]
Definition glue_aarch64.h:326
#define GLUE_WHILE_SETUP
Definition glue_port.h:185
static unsigned char GLUE_POP_STACK_TO_FPSTACK[1]
Definition glue_port.h:341
#define GLUE_LOOP_BEGIN
Definition glue_port.h:175
#define GLUE_LOOP_CLAMPCNT
Definition glue_port.h:177
static void GLUE_MOVE_STACK(void *b, int amt)
Definition glue_x86.h:120
static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
Definition glue_x86.h:178
void nseel_asm_xor_op(void)
Definition asm-nseel-aarch64-gcc.c:440
static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
Definition glue_x86.h:200
static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv)
Definition glue_x86.h:76
static const unsigned char GLUE_FLD1[]
Definition glue_x86.h:506
static const unsigned char GLUE_LOOP_LOADCNT_SSE3[]
Definition glue_x86.h:407
void nseel_asm_mod_op(void)
Definition asm-nseel-aarch64-gcc.c:368
static const unsigned char GLUE_LOOP_LOADCNT_NOSSE3[]
Definition glue_x86.h:414
void nseel_asm_or0(void)
Definition asm-nseel-aarch64-gcc.c:402
static EEL_F negativezeropointfive
Definition glue_x86.h:508
static void * GLUE_realAddress(void *fn, int *size)
Definition glue_x86.h:610
static EEL_F onepointfive
Definition glue_x86.h:509
void nseel_asm_or_op(void)
Definition asm-nseel-aarch64-gcc.c:412
void nseel_asm_and(void)
Definition asm-nseel-aarch64-gcc.c:456
static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
Definition glue_x86.h:209
void nseel_asm_or(void)
Definition asm-nseel-aarch64-gcc.c:390
static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
Definition glue_x86.h:30
static struct roundinftab s_round_fixes[]
void nseel_asm_shl(void)
Definition asm-nseel-aarch64-gcc.c:344
void nseel_asm_stack_peek(void)
Definition asm-nseel-aarch64-gcc.c:1152
static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
Definition glue_x86.h:108
void nseel_asm_shr(void)
Definition asm-nseel-aarch64-gcc.c:356
static const unsigned char GLUE_FLDZ[]
Definition glue_x86.h:504
static int nseel_has_sse3()
Definition glue_x86.h:584
static void eel_fixup_sse3(unsigned char *p, unsigned char *endp, int np, int flag)
Definition glue_x86.h:554
static const unsigned char GLUE_WHILE_CHECK_RV[]
Definition glue_x86.h:493
void nseel_asm_xor(void)
Definition asm-nseel-aarch64-gcc.c:428
static void eel_callcode32_fast(INT_PTR cp, INT_PTR ramptr)
Definition glue_x86.h:341
static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
Definition glue_x86.h:99
static void GLUE_POP_PX(void *b, int wv)
Definition glue_x86.h:137
static const unsigned char GLUE_WHILE_END[]
Definition glue_x86.h:486
static const unsigned char GLUE_LOOP_END[]
Definition glue_x86.h:447
static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
Definition glue_x86.h:62
void _asm_gmegabuf(void)
Definition asm-nseel-aarch64-gcc.c:1028
static unsigned char * EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
Definition glue_x86.h:391
#define INT_TO_LECHARS(x)
Definition glue_x86.h:400
void nseel_asm_mod(void)
Definition asm-nseel-aarch64-gcc.c:329
static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
Definition glue_x86.h:189
static void GLUE_SET_PX_FROM_P1(void *b, int wv)
Definition glue_x86.h:149
void _asm_megabuf(void)
Definition asm-nseel-aarch64-gcc.c:985
static const unsigned char GLUE_FXCH[]
Definition glue_x86.h:501
static const unsigned char GLUE_WHILE_BEGIN[]
Definition glue_x86.h:482
void nseel_asm_and_op(void)
Definition asm-nseel-aarch64-gcc.c:467
static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
Definition glue_x86.h:46
static void eel_callcode32(INT_PTR cp, INT_PTR ramptr)
Definition glue_x86.h:232
float out
Definition lilv_test.c:1461
#define NSEEL_LOOPFUNC_SUPPORT_MAXLEN
Definition ns-eel.h:209
Definition glue_x86.h:531
int newsz
Definition glue_x86.h:535
char istores
Definition glue_x86.h:533
char flag
Definition glue_x86.h:534
void * newfn
Definition glue_x86.h:536
void * fn
Definition glue_x86.h:532
const char const char const char const char char * fn
Definition swell-functions.h:168
intptr_t INT_PTR
Definition swell-types.h:42
uch * p
Definition crypt.c:594
return c
Definition crypt.c:175
memcpy(hh, h, RAND_HEAD_LEN)
b
Definition crypt.c:628
ulg size
Definition extract.c:2350
char * cp
Definition unix.c:513
int flag
Definition unix.c:754
char * malloc()
#define WDL_ASSERT(x)
Definition wdltypes.h:164