diff options
Diffstat (limited to 'src/sffe/asm/cmplx.asm')
-rw-r--r-- | src/sffe/asm/cmplx.asm | 901 |
1 files changed, 901 insertions, 0 deletions
diff --git a/src/sffe/asm/cmplx.asm b/src/sffe/asm/cmplx.asm new file mode 100644 index 0000000..ea3826b --- /dev/null +++ b/src/sffe/asm/cmplx.asm @@ -0,0 +1,901 @@ + ; COMPLEX NUMBER ARITHMETICS + ; Mateusz Malczak + ; NASM version + ; http://www.malczak.info + ; SFFE libs + ; http://segfaultlabs.com/projects/sffe + global _sffecabs,_sffecarg,_sffecargs,_sffecargc + global _sffecinv + global _sffecexp,_sffecln,_sffeclog2,_sffeclog + global _sffecsin,_sffeccos,_sffecsincos,_sffectan,_sffeccot + global _sffecsinh,_sffeccosh,_sffectanh,_sffeccoth + global _sffeccpow, _sffecpowd, _sffecpowi, _sffecpowc + global _sffecsqrt, _sffecrtni + + %ifndef DELPHI + ; section .text use32 class=CODE ;class=CODE - needed for Borlands Compiler + + ; section CODE use32 + ; ".text" is more portable than "CODE use32": + section .text + %endif + + ;; internal use +%ifdef DELPHI +[section expfunc_text use32] +%endif +_sffecexpfnc: ; exponent of real argument + fldl2e + fmulp st1 + fld st0 + frndint + fsub to st1 + fld1 + fscale + fstp st1 + fxch st1 + f2xm1 + fld1 + faddp st1 + fmulp st1 + ret + + ;; globals +%ifdef DELPHI +[section cabs_text use32] +%endif +_sffecabs: + push ebp + mov ebp, esp + fld qword [ebp+08h] + fmul qword [ebp+08h] + fld qword [ebp+10h] + fmul qword [ebp+10h] + faddp st1 + fsqrt + leave + ret + +%ifdef DELPHI +[section carg_text use32] +%endif +_sffecarg: + push ebp + mov ebp, esp + fld qword [ebp+08h] + fld qword [ebp+10h] + fpatan + wait + leave + ret + +%ifdef DELPHI +[section cargs_text use32] +%endif +_sffecargs: + push ebp + mov ebp, esp + fld qword [ebp+10h] + fld qword [ebp+08h] + fmul qword [ebp+08h] + fld qword [ebp+10h] + fmul qword [ebp+10h] + faddp st1 + fsqrt + fdivp st1 + wait + leave + ret + +%ifdef DELPHI +[section cargc_text use32] +%endif +_sffecargc: + push ebp + mov ebp, esp + fld qword [ebp+08h] + fld qword [ebp+08h] + fmul qword [ebp+08h] + fld qword [ebp+10h] + fmul qword [ebp+10h] + faddp st1 + fsqrt + fdivp st1 + wait + leave + ret + +%ifdef DELPHI +[section cinv_text use32] +%endif +_sffecinv: + push ebp + mov ebp, esp + mov edx, [ebp+08h] + fld qword [ebp+0ch] + fld qword [ebp+14h] + fld st0 + fmul to st0 + fld st2 + fmul to st0 + faddp st1 + fdiv to st1 + fdiv to st2 + fstp st0 + fchs + fstp qword [edx+08h] + fstp qword [edx] + wait + leave + ret + +%ifdef DELPHI +[section cexp_text use32] +%endif +_sffecexp: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + fld qword [ebp+0ch] + call _sffecexpfnc + fld qword [ebp+14h] + fsincos + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + wait + leave + ret + +%ifdef DELPHI +[section cln_text use32] +%endif +_sffecln: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ;ln||z|| + fld1 + fxch st1 + fyl2x + fldl2e + fdivp st1 + fstp qword [eax] + fstp qword [eax+08h] + wait + leave + ret + +%ifdef DELPHI +[section clog2_text use32] +%endif +_sffeclog2: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + fld1 + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + fyl2x + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + fldln2 + fdivp st1 + fstp qword [eax+08h] + fstp qword [eax] + wait + leave + ret + +%ifdef DELPHI +[section clog_text use32] +%endif +_sffeclog: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;ln(base) + fld1 + fild word [ebp+1ch] + fyl2x + fldl2e + fdivp st1 + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ;ln||z|| + fld1 + fxch st1 + fyl2x + fldl2e + fdivp st1 + fdiv st1 + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + fdiv st2 + fstp qword [eax+08h] + fstp qword [eax] + fstp st0 + wait + leave + ret + +%ifdef DELPHI +[section csin_text use32] +%endif +_sffecsin: + push ebp + mov ebp, esp + mov eax, [ebp+08h] + fld1 + fld1 + faddp st1 + fld qword [ebp+14h] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1;- + fld st1;- optimize + faddp st1;- + fdiv st3 + fxch st2 + fxch st1 + fsubp st1 + fdiv st2 + fld qword [ebp+0ch] + fsincos + fxch st3 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + fstp st0 + fwait + leave + ret + +%ifdef DELPHI +[section ccos_text use32] +%endif +_sffeccos: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + fld1 + fld1 + faddp st1 + fld qword [ebp+14h] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + faddp st1 + fdiv st3 + fxch st2 + fxch st1 + fsubp st1 + fdiv st2 + fld qword [ebp+0ch] + fsincos + fxch st2 + fmulp st1 + fchs + fstp qword [eax+08h] + fmulp st1 + fstp qword [eax] + fstp st0 + fwait + leave + ret + +%ifdef DELPHI +[section ctan_text use32] +%endif +_sffectan: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;sinh(2b) cosh(2b) + fld1 + fld1 + faddp st1 + fld qword [ebp+14h] + fadd qword [ebp+14h] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + faddp st1 + fdiv st3 + fxch st2 + fxch st1 + fsubp st1 + fdivrp st2 ;fdivrp st(2),st(0) + ;sin(2b) cos(2b) + fld qword [ebp+0ch] + fadd qword [ebp+0ch] + fsincos + faddp st2 + ;check if zero + fdiv st1 + fstp qword [eax] + fdivp st1 + fstp qword [eax+08h] + fwait + leave + ret + +%ifdef DELPHI +[section ccot_text use32] +%endif +_sffeccot: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;sinh(2b) cosh(2b) + fld1 + fld1 + faddp st1 + fld qword [ebp+14h] + fadd qword [ebp+14h] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + fsubp st1 + fdiv st3 + fxch st2 + fxch st1 + faddp st1 + fdivrp st2 ;fdivrp st(2),st(0) + ;sin(2b) cos(2b) + fld qword [ebp+0ch] + fld st0 + faddp st1 + fsincos + fsubp st3 + ;check if zero + fdiv st2 + fstp qword [eax] + fdiv st1 + fchs + fstp qword [eax+08h] + fstp st0 + fwait + leave + ret + +%ifdef DELPHI +[section csinh_text use32] +%endif +_sffecsinh: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + fld1 + fld1 + faddp st1 + fld qword [ebp+0Ch] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + faddp st1 + fdiv st3 + fxch st2 + fxch st1 + fsubp st1 + fdivrp st2 ;jak w tan i cot + fld qword [ebp+14h] + fsincos + fxch st2 + fmulp st1 + fstp qword [eax+08h] + fmulp st1 + fstp qword [eax] + fwait + leave + ret + +%ifdef DELPHI +[section ccosh_text use32] +%endif +_sffeccosh: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + fld qword [ebp+0ch] + call _sffecexpfnc + fld st0 + fld1 + fxch st1 + fdivp st1 + fld st1 + fld st1 + faddp st1 + fld1 + fld1 + faddp st1 + fdivp st1 + fxch st2 + fxch st1 + fsubp st1 + fld1 + fld1 + faddp st1 + fdivp st1 + fld qword [ebp+14h] + fsincos + fxch st2 + fmulp st1 + fstp qword [eax+08h] + fmulp st1 + fstp qword [eax] + fwait + leave + ret + +%ifdef DELPHI +[section ctanh_text use32] +%endif +_sffectanh: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;sinh(2a) cosh(2a) + fld1 + fld1 + faddp st1 + fld qword [ebp+0ch] + fadd qword [ebp+0ch] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + fsubp st1 + fdiv st3 + fxch st2 + fxch st1 + faddp st1 + fdivrp st2 ;jak wczesniej + ;sin(2b) cos(2b) + fld qword [ebp+14h] + fadd qword [ebp+14h] + fsincos + faddp st3 + ;check if zero + fdiv st2 + fstp qword [eax+08h] + fxch st1 + fdivp st1 + fstp qword [eax] + fwait + leave + ret + +%ifdef DELPHI +[section ccoth_text use32] +%endif +_sffeccoth: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + ;sinh(2b) cosh(2b) + fld1 + fld1 + faddp st1 + fld qword [ebp+0ch] + fadd qword [ebp+0ch] + call _sffecexpfnc + fld1 + fdiv st1 + fld st1 + fld st1 + fsubp st1 + fdiv st3 + fxch st2 + fxch st1 + faddp st1 + fdivrp st2 ;jak wczesniej + ;sin(2b) cos(2b) + fld qword [ebp+14h] + fadd qword [ebp+14h] + fsincos + fsubp st3 + ;check if zero + fdiv st2 + fchs + fstp qword [eax+08h] + fxch st1 + fdivp st1 + fstp qword [eax] + fwait + leave + ret + +;***************** COMPLEX TO COMPLEX POWER +%ifdef DELPHI +[section ccpow_text use32] +%endif +_sffeccpow: +;TODO: wyeliminowac xch po wystepujace po obliczeniu theta + push ebp + mov ebp,esp + fld1 + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ftst + fstsw ax + test ah, 1000000b + jnz MZ + mov eax, [ebp+08h] + ;ln||z||*/ + fyl2x + fldl2e + fdivp st1 + ;theta*/ + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + + fld st1 ;ln||z|| + fmul qword [ebp+1ch] + fld st1 ;theta + fmul qword [ebp+24h] + fsubp st1 ; st0-st1 + fxch st2 ; w st2 mam teraz a = z2.re * lnz - z2.im * theta w st0 jest ln||z|| + fmul qword [ebp+24h] + fxch st1 + fmul qword [ebp+1ch] + faddp st1 + fxch st1 + + call _sffecexpfnc + fxch st1 + fsincos + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + jmp END +MZ: + mov eax, [ebp+08h] + fstp qword [eax+08h] + fldz + fstp qword [eax] + fstp st0 +END: +s fwait + leave + ret + +;***************** COMPLEX TO REAL POWER +%ifdef DELPHI +[section cpowd_text use32] +%endif +_sffecpowd: + push ebp + mov ebp,esp +; fld1 + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ftst + fstsw ax + test ah, 1000000b + jnz MZ2 + mov eax, [ebp+08h] + fld1 + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + fmul qword [ebp+1ch] + fxch st2 + ;ln||z|| + fyl2x + fldl2e + fdivp st1 + fmul qword [ebp+1ch] + + call _sffecexpfnc + fxch st1 + fsincos + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + jmp END2 +MZ2: + mov eax, [ebp+08h] + fstp qword [eax+08h] + fldz + fstp qword [eax] +END2: + fwait + leave + ret + +;***************** COMPLEX TO INT POWER +%ifdef DELPHI +[section cpowi_text use32] +%endif +_sffecpowi: + push ebp + mov ebp,esp + ;z module + fild dword [ebp+1ch] ;st(0)=n + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ftst + fstsw ax + test ah, 1000000b + jnz MZ5 + mov eax, [ebp+08h] + ;||z||^n + fld1 + fxch st1 + fyl2x + fmulp st1 + fld st0 + frndint + fsub to st1 + fld1 + fscale + fstp st1 + fxch st1 + f2xm1 + fld1 + faddp st1 + fmulp st1 + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan + fild dword [ebp+1ch] + fmulp st1 + fsincos + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + jmp END53 +MZ5: + mov eax, [ebp+08h] + fstp qword [eax+08h] + fldz + fstp qword [eax] + fstp st0 +END53: + fwait + leave + ret + +;***************** INT/DOUBLE TO COMPLEX POWER +%ifdef DELPHI +[section cpowc_text use32] +%endif +_sffecpowc: + push ebp + mov ebp,esp + fld qword [ebp+0ch] + ftst + fstsw ax + test ah, 1000000b + jnz MZ3 + mov eax, [ebp+08h] + ;n^a + fld qword [ebp+14h] + fxch st1 + fabs + fld1 + fxch st1 + fyl2x + fmulp st1 + fld st0 + frndint + fsub to st1 + fld1 + fscale + fstp st1 + fxch st1 + f2xm1 + fld1 + faddp st1 + fmulp st1 + ;ln||n|| + fld qword [ebp+0ch] + fabs + fld1 + fxch st1 + fyl2x + fldl2e + fdivp st1 + fld qword [ebp+1ch] + fmulp st1 + fsincos + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + jmp END3 +MZ3: + mov eax, [ebp+08h] + fstp qword [eax+08h] + fldz + fstp qword [eax] +END3: + fwait + leave + ret + +;***************** SQRT +%ifdef DELPHI +[section csqrt_text use32] +%endif +_sffecsqrt: + push ebp + mov ebp,esp + push ebx + mov ebx, [ebp+08h] + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + fld st0 ;duplicate |z| + ;real + fadd qword [ebp+0ch] + fld1 + fld1 + faddp st1 + fdivp st1 + fsqrt + fstp qword [ebx] + fwait + ;imag + fsub qword [ebp+0ch] + fld1 + fld1 + faddp st1 + fdivp st1 + fsqrt + ;imag sign check + push eax + fld qword [ebp+14h] + ftst + fstp st0 + fstsw ax + test ah, 1b + jz IMAGPOS + fchs +IMAGPOS: + pop eax + fwait + fstp qword [ebx+08h] + pop ebx + leave + ret + +;***************** Nth ORDER ROOT +%ifdef DELPHI +[section crtni_text use32] +%endif +_sffecrtni: + push ebp + mov ebp,esp + ;z module + fld qword [ebp+0ch] + fmul qword [ebp+0ch] + fld qword [ebp+14h] + fmul qword [ebp+14h] + faddp st1 + fsqrt + ftst + fstsw ax + test ah, 1000000b + jnz MZ4 + mov eax, [ebp+08h] + ;n-th root if ||z|| + fld1 + fild word [ebp+1ch] + fdivp st1 ;st0=1/n + fxch st1 ;st0=||z|| st1=1/n + fyl2x ;log2(||z||/n) + fld st0 ;duplikuj st0 + frndint + fsub to st1 ;st0=int( log2(||z||/n) ) st1=frac( log2(||z||/n) ) + fld1 ;st0=1 + fscale ;st0=2^int( log2(||z||/n) ) st1=frac( log2(||z||/n) ) + fstp st1 + fxch st1 + f2xm1 ;st1=2^frac( log2(||z||/n) )-1 + fld1 + faddp st1 + fmulp st1 ;sqrt(||z||) + ;theta + fld qword [ebp+14h] + fld qword [ebp+0ch] + fpatan ;st0=theta st1=sqrN(||z||) + ;theta_i + fldpi + fldpi + faddp st1 ;st0=2Pi st1=theta st2=sqrN(||z||) + fimul word [ebp+20h] + faddp st1 ;st1=theta+i2Pi + fidiv word [ebp+1ch] ;st0=theta_i + ;re/im + fsincos + + fld st2 + fmulp st1 + fstp qword [eax] + fmulp st1 + fstp qword [eax+08h] + jmp END4 +MZ4: + fldz + fstp qword [eax] + fstp qword [eax+08h] +END4: + leave + ret + +%ifndef DELPHI +_sffecfunc: + push ebp + mov ebp,esp + mov eax, [ebp+08h] + leave + ret +%endif |