LMMS
Loading...
Searching...
No Matches
glue_x86_64_sse.h
Go to the documentation of this file.
1#ifndef _NSEEL_GLUE_X86_64_SSE_H_
2#define _NSEEL_GLUE_X86_64_SSE_H_
3
4// SSE version (needs the appropriate .o linked!)
5
6#define GLUE_PREFER_NONFP_DV_ASSIGNS
7#define GLUE_HAS_FPREG2 1
8
9static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[] = { 0xc8100ff2 }; // movsd %xmm0,%xmm1
10static unsigned char GLUE_POP_STACK_TO_FPREG2[] = {
11 0xf2, 0x0f, 0x10, 0x0c, 0x24, // movsd (%rsp), %xmm1
12 0x48, 0x81, 0xC4, 16, 0,0,0, // add rsp, 16
13};
14
15// spill registers
16#define GLUE_MAX_SPILL_REGS 4
17#ifdef _WIN32
18 // win64: xmm6-xmm15 are nonvolatile, so we use xmm6-xmm9 as spill registers (xmm8/xmm9 are 5 byte encodings)
19 #define GLUE_SAVE_TO_SPILL_SIZE(x) (4 + ((x)>1))
20 #define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4 + ((x)>1))
21
22 static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
23 {
24 if (ws < 2)
25 {
26 *(unsigned int *)b = 0xce100ff2 + (ws<<24); // movsd xmm1, xmm6+ws
27 }
28 else
29 {
30 // movsd xmm1, xmm8 + (ws-2)
31 *(unsigned int *)b = 0x100f41f2;
32 ((unsigned char *)b)[4] = 0xc8 + (ws-2);
33 }
34 }
35 static void GLUE_SAVE_TO_SPILL(void *b, int ws)
36 {
37 if (ws < 2)
38 {
39 *(unsigned int *)b = 0xf0100ff2 + (ws<<27); // movsd xmm6+ws, xmm0
40 }
41 else
42 {
43 // movsd xmm8+(ws-2), xmm0
44 *(unsigned int *)b = 0x100f44f2;
45 ((unsigned char *)b)[4] = 0xc0 + ((ws-2)<<3);
46 }
47 }
48#else
49 // non-win32: our function stubs preserve xmm4-xmm7
50
51#ifdef _DEBUG
52#define GLUE_VALIDATE_SPILLS
53#endif
54
55#ifdef GLUE_VALIDATE_SPILLS
56
57static unsigned char save_validate[]={
580x48,0x83,0xec,0x10, // subq $16, %rsp
590xf2,0x0f,0x11,0x04,0x24, // movsd %xmm0, (%rsp)
600x66,0x48,0x0f,0x6e,0xe4, // movq %rsp, %xmm4 (+ws<<3)
61};
62
63static unsigned char restore_validate[] = {
64 0xf2, 0x0f, 0x10, 0xcc, // movsd %xmm7, %xmm1 (+ws)
65 0x66, 0x48, 0x0f, 0x6e, 0xdc, // movq %rsp, %xmm3
66 0x66, 0x0f, 0x2e, 0xd9, // ucomisd %xmm1, %xmm3
67 0x74, 0x02, // je 2 <skip>
68 0xcd, 0x03, // int $3
69 0xf2, 0x0f, 0x10, 0x0c, 0x24, // movsd (%rsp), %xmm1
70 0x48, 0x83, 0xc4, 0x10, // addq $16, %rsp
71};
72 #define GLUE_SAVE_TO_SPILL_SIZE(x) (sizeof(save_validate))
73 #define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (sizeof(restore_validate))
74
75#else
76
77 #define GLUE_SAVE_TO_SPILL_SIZE(x) (4)
78 #define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4)
79
80#endif
81
82 static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
83 {
84#ifdef GLUE_VALIDATE_SPILLS
85 char *p = (char*) b;
86 memcpy(p,restore_validate,sizeof(restore_validate));
87 p[3] += ws;
88#else
89 *(unsigned int *)b = 0xcc100ff2 + (ws<<24); // movsd xmm1, xmm4+ws
90#endif
91 }
92 static void GLUE_SAVE_TO_SPILL(void *b, int ws)
93 {
94#ifdef GLUE_VALIDATE_SPILLS
95 char *p = (char*) b;
96 memcpy(p,save_validate,sizeof(save_validate));
97 p[sizeof(save_validate)-1] += ws<<3;
98#else
99 *(unsigned int *)b = 0xe0100ff2 + (ws<<27); // movsd xmm4+ws, xmm0
100#endif
101 }
102#endif
103
104#define GLUE_MAX_FPSTACK_SIZE 0
105#define GLUE_JMP_SET_OFFSET(endOfInstruction,offset) (((int *)(endOfInstruction))[-1] = (int) (offset))
106
107static const unsigned char GLUE_JMP_NC[] = { 0xE9, 0,0,0,0, }; // jmp<offset>
108static const unsigned char GLUE_JMP_IF_P1_Z[] = {0x85, 0xC0, 0x0F, 0x84, 0,0,0,0 }; // test eax, eax, jz
109static const unsigned char GLUE_JMP_IF_P1_NZ[] = {0x85, 0xC0, 0x0F, 0x85, 0,0,0,0 }; // test eax, eax, jnz
110
111
112#define GLUE_FUNC_ENTER_SIZE 0
113#define GLUE_FUNC_LEAVE_SIZE 0
114const static unsigned int GLUE_FUNC_ENTER[1];
115const static unsigned int GLUE_FUNC_LEAVE[1];
116
117 // on x86-64:
118 // stack is always 16 byte aligned
119 // pushing values to the stack (for eel functions) has alignment pushed first, then value (value is at the lower address)
120 // pushing pointers to the stack has the pointer pushed first, then the alignment (pointer is at the higher address)
121 #define GLUE_MOV_PX_DIRECTVALUE_SIZE 10
122 #define GLUE_MOV_PX_DIRECTVALUE_TOSTACK_SIZE 14 // wr=-1, sets xmm0
123 #define GLUE_MOV_PX_DIRECTVALUE_TOFPREG2_SIZE 14 // wr=-2, sets xmm1
124 static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wr) {
125 const static unsigned short tab[3] =
126 {
127 0xB848 /* mov rax, dv*/,
128 0xBF48 /* mov rdi, dv */ ,
129 0xB948 /* mov rcx, dv */
130 };
131 unsigned short *bb = (unsigned short *)b;
132 *bb++ = tab[wdl_max(wr,0)]; // mov rax, directvalue
133 *(INT_PTR *)bb = v;
134 if (wr == -2) *(unsigned int *)(bb + 4) = 0x08100ff2; // movsd (%rax), %xmm1
135 else if (wr == -1) *(unsigned int *)(bb + 4) = 0x00100ff2; // movsd (%rax), %xmm0
136 }
137
138 const static unsigned char GLUE_PUSH_P1[2]={ 0x50,0x50}; // push rax (pointer); push rax (alignment)
139
140 #define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(x) 8
141 static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
142 {
143 ((unsigned char *)b)[0] = 0x48; // mov [rsp+offs], rax
144 ((unsigned char *)b)[1] = 0x89;
145 ((unsigned char *)b)[2] = 0x84;
146 ((unsigned char *)b)[3] = 0x24;
147 *(int *)((unsigned char *)b+4) = offs;
148 }
149
150 #define GLUE_MOVE_PX_STACKPTR_SIZE 3
151 static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
152 {
153 static const unsigned char tab[3][GLUE_MOVE_PX_STACKPTR_SIZE]=
154 {
155 { 0x48, 0x89, 0xe0 }, // mov rax, rsp
156 { 0x48, 0x89, 0xe7 }, // mov rdi, rsp
157 { 0x48, 0x89, 0xe1 }, // mov rcx, rsp
158 };
160 }
161
162 #define GLUE_MOVE_STACK_SIZE 7
163 static void GLUE_MOVE_STACK(void *b, int amt)
164 {
165 ((unsigned char *)b)[0] = 0x48;
166 ((unsigned char *)b)[1] = 0x81;
167 if (amt < 0)
168 {
169 ((unsigned char *)b)[2] = 0xEC;
170 *(int *)((char*)b+3) = -amt; // sub rsp, -amt32
171 }
172 else
173 {
174 ((unsigned char *)b)[2] = 0xc4;
175 *(int *)((char*)b+3) = amt; // add rsp, amt32
176 }
177 }
178
179 #define GLUE_POP_PX_SIZE 2
180 static void GLUE_POP_PX(void *b, int wv)
181 {
182 static const unsigned char tab[3][GLUE_POP_PX_SIZE]=
183 {
184 {0x58,/*pop rax*/ 0x58}, // pop alignment, then pop pointer
185 {0x5F,/*pop rdi*/ 0x5F},
186 {0x59,/*pop rcx*/ 0x59},
187 };
188 memcpy(b,tab[wv],GLUE_POP_PX_SIZE);
189 }
190
191 static const unsigned char GLUE_PUSH_P1PTR_AS_VALUE[] =
192 {
193 0x50, /*push rax - for alignment */
194 0xff, 0x30, /* push qword [rax] */
195 };
196
197 static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr) // trashes P2 (rdi) and P3 (rcx)
198 {
199 if (buf)
200 {
201 *buf++ = 0x48; *buf++ = 0xB9; *(void **) buf = destptr; buf+=8; // mov rcx, directvalue
202 *buf++ = 0x8f; *buf++ = 0x01; // pop qword [rcx]
203 *buf++ = 0x5F ; // pop rdi (alignment, safe to trash rdi though)
204 }
205 return 1+10+2;
206 }
207
208 static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr) // trashes P2/P3
209 {
210 if (buf)
211 {
212 *buf++ = 0x48; *buf++ = 0xB9; *(void **) buf = destptr; buf+=8; // mov rcx, directvalue
213 *buf++ = 0x48; *buf++ = 0x8B; *buf++ = 0x38; // mov rdi, [rax]
214 *buf++ = 0x48; *buf++ = 0x89; *buf++ = 0x39; // mov [rcx], rdi
215 }
216
217 return 3 + 10 + 3;
218 }
219
220 static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
221 {
222 if (buf)
223 {
224 *buf++ = 0x48;
225 *buf++ = 0xB8;
226 *(void **) buf = destptr; buf+=8; // mov rax, directvalue
227
228 *buf++ = 0xf2; // movsd %xmm0, (%rax)
229 *buf++ = 0x0f;
230 *buf++ = 0x11;
231 *buf++ = 0x00;
232 }
233 return 2+8+4;
234 }
235
236
237 #define GLUE_SET_PX_FROM_P1_SIZE 3
238 static void GLUE_SET_PX_FROM_P1(void *b, int wv)
239 {
240 static const unsigned char tab[3][GLUE_SET_PX_FROM_P1_SIZE]={
241 {0x90,0x90,0x90}, // should never be used! (nopnop)
242 {0x48,0x89,0xC7}, // mov rdi, rax
243 {0x48,0x89,0xC1}, // mov rcx, rax
244 };
246 }
247
248
249 #define GLUE_POP_FPSTACK_SIZE 0
250 static const unsigned char GLUE_POP_FPSTACK[1] = { 0 };
251
252 static const unsigned char GLUE_POP_FPSTACK_TOSTACK[] = {
253 0x48, 0x81, 0xEC, 16, 0,0,0, // sub rsp, 16
254 0xf2, 0x0f, 0x11, 0x04, 0x24, // movsd xmm0, (%rsp)
255 };
256
257 static const unsigned char GLUE_POP_FPSTACK_TO_WTP[] = {
258 0xf2, 0x0f, 0x11, 0x06, // movsd xmm0, (%rsi)
259 0x48, 0x81, 0xC6, 8, 0,0,0,/* add rsi, 8 */
260 };
261
262 #define GLUE_SET_PX_FROM_WTP_SIZE 3
263 static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
264 {
265 static const unsigned char tab[3][GLUE_SET_PX_FROM_WTP_SIZE]={
266 {0x48, 0x89,0xF0}, // mov rax, rsi
267 {0x48, 0x89,0xF7}, // mov rdi, rsi
268 {0x48, 0x89,0xF1}, // mov rcx, rsi
269 };
271 }
272
273 #define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 4
274 static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
275 {
276 static const unsigned char tab[3][GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE]={
277 {0xf2, 0x0f, 0x10, 0x00}, // movsd (%rax), %xmm0
278 {0xf2, 0x0f, 0x10, 0x07}, // movsd (%rdi), %xmm0
279 {0xf2, 0x0f, 0x10, 0x01}, // movsd (%rcx), %xmm0
280 };
282 }
283
284#define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (GLUE_SET_PX_FROM_WTP_SIZE + sizeof(GLUE_POP_FPSTACK_TO_WTP))
285static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
286{
287 GLUE_SET_PX_FROM_WTP(buf,wv);
289};
290
291
292const static unsigned char GLUE_RET=0xC3;
293
294static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
295{
296 if (out)
297 {
298 *out++ = 0x48;
299 *out++ = 0xBE; // mov rsi, constant64
300 *(void **)out = ptr;
301 out+=sizeof(void *);
302 }
303 return 2+sizeof(void *);
304}
305
306extern void eel_callcode64(INT_PTR code, INT_PTR ram_tab);
308#define GLUE_CALL_CODE(bp, cp, rt) do { \
309 if (h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) eel_callcode64_fast(cp, rt); \
310 else eel_callcode64(cp, rt);\
311 } while(0)
312#define GLUE_TABPTR_IGNORED
313
314static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
315{
316 char *p=(char*)_p;
317 INT_PTR scan = 0xFEFEFEFEFEFEFEFE;
318 while (*(INT_PTR *)p != scan) p++;
319 *(INT_PTR *)p = newv;
320 return (unsigned char *) (((INT_PTR*)p)+1);
321}
322
323#define INT_TO_LECHARS(x) ((x)&0xff),(((x)>>8)&0xff), (((x)>>16)&0xff), (((x)>>24)&0xff)
324
325#define GLUE_INLINE_LOOPS
326
327static const unsigned char GLUE_LOOP_LOADCNT[]={
328 0xf2, 0x48, 0x0f, 0x2c, 0xc8, // cvttsd2si %xmm0, %rcx
329 0x48, 0x81, 0xf9, 1,0,0,0, // cmp rcx, 1
330 0x0F, 0x8C, 0,0,0,0, // JL <skipptr>
331};
332
333#if NSEEL_LOOPFUNC_SUPPORT_MAXLEN > 0
334#define GLUE_LOOP_CLAMPCNT_SIZE sizeof(GLUE_LOOP_CLAMPCNT)
335static const unsigned char GLUE_LOOP_CLAMPCNT[]={
336 0x48, 0x81, 0xf9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), // cmp rcx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
337 0x0F, 0x8C, 10,0,0,0, // JL over-the-mov
338 0x48, 0xB9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), 0,0,0,0, // mov rcx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
339};
340#else
341#define GLUE_LOOP_CLAMPCNT_SIZE 0
342#define GLUE_LOOP_CLAMPCNT ""
343#endif
344
345#define GLUE_LOOP_BEGIN_SIZE sizeof(GLUE_LOOP_BEGIN)
346static const unsigned char GLUE_LOOP_BEGIN[]={
347 0x56, //push rsi
348 0x51, // push rcx
349};
350static const unsigned char GLUE_LOOP_END[]={
351 0x59, //pop rcx
352 0x5E, // pop rsi
353 0xff, 0xc9, // dec rcx
354 0x0f, 0x85, 0,0,0,0, // jnz ...
355};
356
357
358
359#if NSEEL_LOOPFUNC_SUPPORT_MAXLEN > 0
360static const unsigned char GLUE_WHILE_SETUP[]={
361 0x48, 0xB9, INT_TO_LECHARS(NSEEL_LOOPFUNC_SUPPORT_MAXLEN), 0,0,0,0, // mov rcx, NSEEL_LOOPFUNC_SUPPORT_MAXLEN
362};
363#define GLUE_WHILE_SETUP_SIZE sizeof(GLUE_WHILE_SETUP)
364
365static const unsigned char GLUE_WHILE_BEGIN[]={
366 0x56, //push rsi
367 0x51, // push rcx
368};
369static const unsigned char GLUE_WHILE_END[]={
370 0x59, //pop rcx
371 0x5E, // pop rsi
372
373 0xff, 0xc9, // dec rcx
374 0x0f, 0x84, 0,0,0,0, // jz endpt
375};
376
377
378#else
379#define GLUE_WHILE_SETUP ""
380#define GLUE_WHILE_SETUP_SIZE 0
381#define GLUE_WHILE_END_NOJUMP
382
383static const unsigned char GLUE_WHILE_BEGIN[]={
384 0x56, //push rsi
385 0x51, // push rcx
386};
387static const unsigned char GLUE_WHILE_END[]={
388 0x59, //pop rcx
389 0x5E, // pop rsi
390};
391
392#endif
393
394
395static const unsigned char GLUE_WHILE_CHECK_RV[] = {
396 0x85, 0xC0, // test eax, eax
397 0x0F, 0x85, 0,0,0,0 // jnz looppt
398};
399
400static const unsigned char GLUE_SET_P1_Z[] = { 0x48, 0x29, 0xC0 }; // sub rax, rax
401static const unsigned char GLUE_SET_P1_NZ[] = { 0xb0, 0x01 }; // mov al, 1
402
403
404#define GLUE_HAS_FLDZ
405static const unsigned char GLUE_FLDZ[] = {
406 0x0f, 0x57, 0xc0 //xorps %xmm0, %xmm0
407};
408
409
410static EEL_F negativezeropointfive=-0.5f;
411static EEL_F onepointfive=1.5f;
412#define GLUE_INVSQRT_NEEDREPL &negativezeropointfive, &onepointfive,
413
414
415static void *GLUE_realAddress(void *fn, int *size)
416{
417 static const unsigned char new_sig[8] = { 0x89, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x00 };
418 int sz = 0;
419 while (memcmp((char*)fn + sz,new_sig,sizeof(new_sig))) sz++;
420 *size = sz;
421 return fn;
422}
423
424#define GLUE_HAS_FUSE 1
425static int GLUE_FUSE(compileContext *ctx, unsigned char *code, int left_size, int right_size, int fuse_flags, int spill_reg)
426{
427 const UINT_PTR base = (UINT_PTR) ctx->ram_state->blocks;
428 const int is_sse_op = right_size == 4 && // add/mul/sub/min/max
429 code[0] == 0xf2 &&
430 code[1] == 0x0f &&
431 code[3] == 0xc1 && // low nibble is xmm1
432 (code[2] == 0x58 || code[2] == 0x59 || code[2] == 0x5c || code[2]==0x5d || code[2] == 0x5f);
433
434 if (spill_reg >= 0)
435 {
436#ifndef GLUE_VALIDATE_SPILLS
437 if (is_sse_op)
438 {
439 char tmp[32];
440 const int sz = GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(spill_reg);
441 GLUE_RESTORE_SPILL_TO_FPREG2(tmp,spill_reg);
442 if (left_size>=sz && !memcmp(code-sz,tmp,sz))
443 {
444 code[-2] = code[2]; // modify the movsd into an addsd
445 code[-1] -= 8; // movsd uses 0xc8+(xmmX&7), addsd etc use 0xc0
446 return -4;
447 }
448 }
449#endif
450 }
451 else
452 {
453 if (left_size==28)
454 {
455 // if const64_1 is within a 32-bit offset of ctx->ram_blocks->blocks, we can use [r12+offs]
456 if (code[-28] == 0x48 && code[-27] == 0xb8 && // mov rax, const64_1
457 *(int *)(code - 18) == 0x08100ff2 && // movsd xmm1, [rax]
458 code[-14] == 0x48 && code[-13] == 0xb8 && // mov rax, const64_2
459 *(int *)(code - 4) == 0x00100ff2 // movsd xmm0, [rax]
460 )
461 {
462 UINT_PTR c1, c2;
463 INT_PTR c2offs,c1offs;
464 unsigned char opc[3];
465 int wrpos = -28;
466 if (is_sse_op) memcpy(opc,code,3);
467 memcpy(&c1,code-26,8);
468 memcpy(&c2,code-12,8);
469
470#define PTR_32_OK(x) ((x) == (INT_PTR)(int)(x))
471 c2offs = c2-base;
472 if (!PTR_32_OK(c2offs))
473 {
474 code[wrpos++] = 0x48;
475 code[wrpos++] = 0xb8;
476 memcpy(code+wrpos,&c2,8); // mov rax, const64_2
477 wrpos += 8;
478 }
479
480 c1offs = c1-base;
481 if (!PTR_32_OK(c1offs))
482 {
483 code[wrpos++] = 0x48;
484 code[wrpos++] = 0xbf;
485 memcpy(code+wrpos,&c1,8); // mov rdi, const64_1
486 wrpos += 8;
487 }
488
489 if (!PTR_32_OK(c2offs))
490 {
491 *(int *)(code+wrpos) = 0x00100ff2; // movsd xmm0, [rax]
492 wrpos += 4;
493 }
494 else
495 {
496 // movsd xmm0, [r12+offs]
497 code[wrpos++] = 0xf2;
498 code[wrpos++] = 0x41;
499 code[wrpos++] = 0x0f;
500 code[wrpos++] = 0x10;
501 code[wrpos++] = 0x84;
502 code[wrpos++] = 0x24;
503 *(int *)(code+wrpos) = (int)c2offs;
504 wrpos += 4;
505 }
506
507 if (!is_sse_op)
508 {
509 // load xmm1 from rdi/c1offs
510 if (!PTR_32_OK(c1offs))
511 {
512 *(int *)(code+wrpos) = 0x0f100ff2; // movsd xmm1, [rdi]
513 wrpos += 4;
514 }
515 else
516 {
517 // movsd xmm1, [r12+offs]
518 code[wrpos++] = 0xf2;
519 code[wrpos++] = 0x41;
520 code[wrpos++] = 0x0f;
521 code[wrpos++] = 0x10;
522 code[wrpos++] = 0x8c;
523 code[wrpos++] = 0x24;
524 *(int *)(code+wrpos) = (int)c1offs;
525 wrpos += 4;
526 }
527 if (wrpos<0) memmove(code+wrpos,code,right_size);
528 return wrpos;
529 }
530
531 // fuse to sse op
532 if (!PTR_32_OK(c1offs))
533 {
534 memcpy(code+wrpos,opc,3);
535 code[wrpos+3] = 0x07; // [rdi]
536 wrpos += 4;
537 }
538 else
539 {
540 // mul/add/sub/min/max/sd xmm0, [r12+offs]
541 code[wrpos++] = opc[0]; // 0xf2
542 code[wrpos++] = 0x41;
543 code[wrpos++] = opc[1]; // 0x0f
544 code[wrpos++] = opc[2]; // 0x58 etc
545 code[wrpos++] = 0x84;
546 code[wrpos++] = 0x24;
547 *(int *)(code+wrpos) = (int)c1offs;
548 wrpos += 4;
549 }
550 return wrpos - right_size;
551 }
552 }
553 if ((fuse_flags&1) && left_size >= 14)
554 {
555 if (code[-14] == 0x48 && code[-13] == 0xb8 && // mov rax, const64_2
556 *(int *)(code - 4) == 0x00100ff2) // movsd xmm0, [rax]
557 {
558 INT_PTR c1;
559 memcpy(&c1,code-12,8);
560 c1 -= base;
561 if (PTR_32_OK(c1))
562 {
563 // movsd xmm0, [r12+offs]
564 int wrpos = -14;
565 code[wrpos++] = 0xf2;
566 code[wrpos++] = 0x41;
567 code[wrpos++] = 0x0f;
568 code[wrpos++] = 0x10;
569 code[wrpos++] = 0x84;
570 code[wrpos++] = 0x24;
571 *(int *)(code+wrpos) = (int)c1;
572 wrpos += 4;
573 if (wrpos<0) memmove(code+wrpos,code,right_size);
574 return wrpos;
575 }
576 }
577 }
578
579 if (left_size == 20 && right_size == 9 &&
580 code[-20]==0x48 && code[-19] == 0xbf && // mov rdi, const64_1
581 code[-10]==0x48 && code[-9] == 0xb8 // mov rax, const64_2
582 )
583 {
584 static unsigned char assign_copy[9] = { 0x48, 0x8b, 0x10, // mov rdx, [rax]
585 0x48, 0x89, 0x17, // mov [rdi], rdx
586 0x48, 0x89, 0xf8, // mov rax, rdi
587 };
588 if (!memcmp(code,assign_copy,9))
589 {
590 int wrpos = -20;
591 INT_PTR c1,c2; // c1 is dest, c2 is src
592 memcpy(&c1,code-18,8);
593 memcpy(&c2,code-8,8);
594
595 if (!PTR_32_OK(c2-base))
596 {
597 code[wrpos++] = 0x48; // mov rdi, src
598 code[wrpos++] = 0xbf;
599 memcpy(code+wrpos,&c2,8);
600 wrpos +=8;
601 }
602
603 code[wrpos++] = 0x48; // mov rax, dest
604 code[wrpos++] = 0xb8;
605 memcpy(code+wrpos,&c1,8);
606 wrpos +=8;
607
608 if (PTR_32_OK(c2-base))
609 {
610 // mov rdx, [r12+offs]
611 code[wrpos++] = 0x49;
612 code[wrpos++] = 0x8b;
613 code[wrpos++] = 0x94;
614 code[wrpos++] = 0x24;
615 *(int *)(code+wrpos) = (int)(c2-base);
616 wrpos += 4;
617 }
618 else
619 {
620 code[wrpos++] = 0x48; // mov rdx, [rdi]
621 code[wrpos++] = 0x8b;
622 code[wrpos++] = 0x17;
623 }
624
625 code[wrpos++] = 0x48; // mov [rax], rdx
626 code[wrpos++] = 0x89;
627 code[wrpos++] = 0x10;
628
629 return wrpos - right_size;
630 }
631 }
632
633
634 }
635 return 0;
636}
637
638#endif
G bb
Definition inflate.c:1057
unsigned v[N_MAX]
Definition inflate.c:1584
static void c2(register WDL_FFT_COMPLEX *a)
Definition fft.c:270
static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[]
Definition glue_aarch64.h:183
#define GLUE_SET_PX_FROM_P1_SIZE
Definition glue_aarch64.h:176
static const unsigned int GLUE_POP_FPSTACK_TOSTACK[]
Definition glue_aarch64.h:303
static const unsigned int GLUE_POP_FPSTACK[1]
Definition glue_aarch64.h:301
#define GLUE_SET_PX_FROM_WTP_SIZE
Definition glue_aarch64.h:281
#define GLUE_MOVE_PX_STACKPTR_SIZE
Definition glue_aarch64.h:143
static const unsigned int GLUE_FUNC_LEAVE[1]
Definition glue_aarch64.h:106
static const unsigned int GLUE_POP_FPSTACK_TO_WTP[]
Definition glue_aarch64.h:308
#define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE
Definition glue_aarch64.h:312
static const unsigned int GLUE_JMP_NC[]
Definition glue_aarch64.h:72
static unsigned int GLUE_POP_STACK_TO_FPREG2[]
Definition glue_aarch64.h:46
static const unsigned int GLUE_JMP_IF_P1_NZ[]
Definition glue_aarch64.h:79
static const unsigned int GLUE_FUNC_ENTER[2]
Definition glue_aarch64.h:104
static const unsigned int GLUE_SET_P1_Z[]
Definition glue_aarch64.h:325
static const unsigned int GLUE_RET[]
Definition glue_aarch64.h:108
static const unsigned int GLUE_JMP_IF_P1_Z[]
Definition glue_aarch64.h:74
static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[]
Definition glue_aarch64.h:45
#define GLUE_POP_PX_SIZE
Definition glue_aarch64.h:170
static const unsigned int GLUE_PUSH_P1[1]
Definition glue_aarch64.h:118
static const unsigned int GLUE_SET_P1_NZ[]
Definition glue_aarch64.h:326
#define GLUE_WHILE_SETUP
Definition glue_port.h:185
#define GLUE_LOOP_BEGIN
Definition glue_port.h:175
#define GLUE_LOOP_CLAMPCNT
Definition glue_port.h:177
static EEL_F negativezeropointfive
Definition glue_x86.h:508
static EEL_F onepointfive
Definition glue_x86.h:509
static const unsigned char GLUE_FLDZ[]
Definition glue_x86.h:504
#define GLUE_LOOP_LOADCNT
Definition glue_x86.h:406
static const unsigned char GLUE_WHILE_CHECK_RV[]
Definition glue_x86.h:493
static const unsigned char GLUE_WHILE_END[]
Definition glue_x86.h:486
static const unsigned char GLUE_LOOP_END[]
Definition glue_x86.h:447
static const unsigned char GLUE_WHILE_BEGIN[]
Definition glue_x86.h:482
static void GLUE_MOVE_STACK(void *b, int amt)
Definition glue_x86_64_sse.h:163
static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
Definition glue_x86_64_sse.h:263
#define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x)
Definition glue_x86_64_sse.h:78
static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
Definition glue_x86_64_sse.h:285
static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
Definition glue_x86_64_sse.h:82
static void * GLUE_realAddress(void *fn, int *size)
Definition glue_x86_64_sse.h:415
#define PTR_32_OK(x)
static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
Definition glue_x86_64_sse.h:294
static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
Definition glue_x86_64_sse.h:197
static void GLUE_SAVE_TO_SPILL(void *b, int ws)
Definition glue_x86_64_sse.h:92
static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
Definition glue_x86_64_sse.h:151
static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wr)
Definition glue_x86_64_sse.h:124
static int GLUE_FUSE(compileContext *ctx, unsigned char *code, int left_size, int right_size, int fuse_flags, int spill_reg)
Definition glue_x86_64_sse.h:425
void eel_callcode64(INT_PTR code, INT_PTR ram_tab)
static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
Definition glue_x86_64_sse.h:141
static void GLUE_POP_PX(void *b, int wv)
Definition glue_x86_64_sse.h:180
static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
Definition glue_x86_64_sse.h:220
static unsigned char * EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
Definition glue_x86_64_sse.h:314
#define INT_TO_LECHARS(x)
Definition glue_x86_64_sse.h:323
static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
Definition glue_x86_64_sse.h:274
static void GLUE_SET_PX_FROM_P1(void *b, int wv)
Definition glue_x86_64_sse.h:238
void eel_callcode64_fast(INT_PTR code, INT_PTR ram_tab)
static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
Definition glue_x86_64_sse.h:208
float out
Definition lilv_test.c:1461
struct _compileContext compileContext
#define NSEEL_LOOPFUNC_SUPPORT_MAXLEN
Definition ns-eel.h:209
struct _compileContext::@171017327167146311274025377202307266252366150176 * ram_state
EEL_F * blocks[NSEEL_RAM_BLOCKS]
Definition ns-eel-int.h:242
Definition inftrees.h:27
const char const char const char const char char * fn
Definition swell-functions.h:168
uintptr_t UINT_PTR
Definition swell-types.h:43
intptr_t INT_PTR
Definition swell-types.h:42
uch * p
Definition crypt.c:594
memcpy(hh, h, RAND_HEAD_LEN)
b
Definition crypt.c:628
ulg size
Definition extract.c:2350
typedef int(UZ_EXP MsgFn)()
#define wdl_max(x, y)
Definition wdltypes.h:106