| %ifidn __OUTPUT_FORMAT__,obj |
| section code use32 class=code align=64 |
| %elifidn __OUTPUT_FORMAT__,win32 |
| %ifdef __YASM_VERSION_ID__ |
| %if __YASM_VERSION_ID__ < 01010000h |
| %error yasm version 1.1.0 or later needed. |
| %endif |
| ; Yasm automatically includes .00 and complains about redefining it. |
| ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
| %else |
| $@feat.00 equ 1 |
| %endif |
| section .text code align=64 |
| %else |
| section .text code |
| %endif |
| ;extern _OPENSSL_ia32cap_P |
| global _bn_mul_mont |
| align 16 |
| _bn_mul_mont: |
| L$_bn_mul_mont_begin: |
| push ebp |
| push ebx |
| push esi |
| push edi |
| xor eax,eax |
| mov edi,DWORD [40+esp] |
| cmp edi,4 |
| jl NEAR L$000just_leave |
| lea esi,[20+esp] |
| lea edx,[24+esp] |
| add edi,2 |
| neg edi |
| lea ebp,[edi*4+esp-32] |
| neg edi |
| mov eax,ebp |
| sub eax,edx |
| and eax,2047 |
| sub ebp,eax |
| xor edx,ebp |
| and edx,2048 |
| xor edx,2048 |
| sub ebp,edx |
| and ebp,-64 |
| mov eax,esp |
| sub eax,ebp |
| and eax,-4096 |
| mov edx,esp |
| lea esp,[eax*1+ebp] |
| mov eax,DWORD [esp] |
| cmp esp,ebp |
| ja NEAR L$001page_walk |
| jmp NEAR L$002page_walk_done |
| align 16 |
| L$001page_walk: |
| lea esp,[esp-4096] |
| mov eax,DWORD [esp] |
| cmp esp,ebp |
| ja NEAR L$001page_walk |
| L$002page_walk_done: |
| mov eax,DWORD [esi] |
| mov ebx,DWORD [4+esi] |
| mov ecx,DWORD [8+esi] |
| mov ebp,DWORD [12+esi] |
| mov esi,DWORD [16+esi] |
| mov esi,DWORD [esi] |
| mov DWORD [4+esp],eax |
| mov DWORD [8+esp],ebx |
| mov DWORD [12+esp],ecx |
| mov DWORD [16+esp],ebp |
| mov DWORD [20+esp],esi |
| lea ebx,[edi-3] |
| mov DWORD [24+esp],edx |
| lea eax,[_OPENSSL_ia32cap_P] |
| bt DWORD [eax],26 |
| jnc NEAR L$003non_sse2 |
| mov eax,-1 |
| movd mm7,eax |
| mov esi,DWORD [8+esp] |
| mov edi,DWORD [12+esp] |
| mov ebp,DWORD [16+esp] |
| xor edx,edx |
| xor ecx,ecx |
| movd mm4,DWORD [edi] |
| movd mm5,DWORD [esi] |
| movd mm3,DWORD [ebp] |
| pmuludq mm5,mm4 |
| movq mm2,mm5 |
| movq mm0,mm5 |
| pand mm0,mm7 |
| pmuludq mm5,[20+esp] |
| pmuludq mm3,mm5 |
| paddq mm3,mm0 |
| movd mm1,DWORD [4+ebp] |
| movd mm0,DWORD [4+esi] |
| psrlq mm2,32 |
| psrlq mm3,32 |
| inc ecx |
| align 16 |
| L$0041st: |
| pmuludq mm0,mm4 |
| pmuludq mm1,mm5 |
| paddq mm2,mm0 |
| paddq mm3,mm1 |
| movq mm0,mm2 |
| pand mm0,mm7 |
| movd mm1,DWORD [4+ecx*4+ebp] |
| paddq mm3,mm0 |
| movd mm0,DWORD [4+ecx*4+esi] |
| psrlq mm2,32 |
| movd DWORD [28+ecx*4+esp],mm3 |
| psrlq mm3,32 |
| lea ecx,[1+ecx] |
| cmp ecx,ebx |
| jl NEAR L$0041st |
| pmuludq mm0,mm4 |
| pmuludq mm1,mm5 |
| paddq mm2,mm0 |
| paddq mm3,mm1 |
| movq mm0,mm2 |
| pand mm0,mm7 |
| paddq mm3,mm0 |
| movd DWORD [28+ecx*4+esp],mm3 |
| psrlq mm2,32 |
| psrlq mm3,32 |
| paddq mm3,mm2 |
| movq [32+ebx*4+esp],mm3 |
| inc edx |
| L$005outer: |
| xor ecx,ecx |
| movd mm4,DWORD [edx*4+edi] |
| movd mm5,DWORD [esi] |
| movd mm6,DWORD [32+esp] |
| movd mm3,DWORD [ebp] |
| pmuludq mm5,mm4 |
| paddq mm5,mm6 |
| movq mm0,mm5 |
| movq mm2,mm5 |
| pand mm0,mm7 |
| pmuludq mm5,[20+esp] |
| pmuludq mm3,mm5 |
| paddq mm3,mm0 |
| movd mm6,DWORD [36+esp] |
| movd mm1,DWORD [4+ebp] |
| movd mm0,DWORD [4+esi] |
| psrlq mm2,32 |
| psrlq mm3,32 |
| paddq mm2,mm6 |
| inc ecx |
| dec ebx |
| L$006inner: |
| pmuludq mm0,mm4 |
| pmuludq mm1,mm5 |
| paddq mm2,mm0 |
| paddq mm3,mm1 |
| movq mm0,mm2 |
| movd mm6,DWORD [36+ecx*4+esp] |
| pand mm0,mm7 |
| movd mm1,DWORD [4+ecx*4+ebp] |
| paddq mm3,mm0 |
| movd mm0,DWORD [4+ecx*4+esi] |
| psrlq mm2,32 |
| movd DWORD [28+ecx*4+esp],mm3 |
| psrlq mm3,32 |
| paddq mm2,mm6 |
| dec ebx |
| lea ecx,[1+ecx] |
| jnz NEAR L$006inner |
| mov ebx,ecx |
| pmuludq mm0,mm4 |
| pmuludq mm1,mm5 |
| paddq mm2,mm0 |
| paddq mm3,mm1 |
| movq mm0,mm2 |
| pand mm0,mm7 |
| paddq mm3,mm0 |
| movd DWORD [28+ecx*4+esp],mm3 |
| psrlq mm2,32 |
| psrlq mm3,32 |
| movd mm6,DWORD [36+ebx*4+esp] |
| paddq mm3,mm2 |
| paddq mm3,mm6 |
| movq [32+ebx*4+esp],mm3 |
| lea edx,[1+edx] |
| cmp edx,ebx |
| jle NEAR L$005outer |
| emms |
| jmp NEAR L$007common_tail |
| align 16 |
| L$003non_sse2: |
| mov esi,DWORD [8+esp] |
| lea ebp,[1+ebx] |
| mov edi,DWORD [12+esp] |
| xor ecx,ecx |
| mov edx,esi |
| and ebp,1 |
| sub edx,edi |
| lea eax,[4+ebx*4+edi] |
| or ebp,edx |
| mov edi,DWORD [edi] |
| jz NEAR L$008bn_sqr_mont |
| mov DWORD [28+esp],eax |
| mov eax,DWORD [esi] |
| xor edx,edx |
| align 16 |
| L$009mull: |
| mov ebp,edx |
| mul edi |
| add ebp,eax |
| lea ecx,[1+ecx] |
| adc edx,0 |
| mov eax,DWORD [ecx*4+esi] |
| cmp ecx,ebx |
| mov DWORD [28+ecx*4+esp],ebp |
| jl NEAR L$009mull |
| mov ebp,edx |
| mul edi |
| mov edi,DWORD [20+esp] |
| add eax,ebp |
| mov esi,DWORD [16+esp] |
| adc edx,0 |
| imul edi,DWORD [32+esp] |
| mov DWORD [32+ebx*4+esp],eax |
| xor ecx,ecx |
| mov DWORD [36+ebx*4+esp],edx |
| mov DWORD [40+ebx*4+esp],ecx |
| mov eax,DWORD [esi] |
| mul edi |
| add eax,DWORD [32+esp] |
| mov eax,DWORD [4+esi] |
| adc edx,0 |
| inc ecx |
| jmp NEAR L$0102ndmadd |
| align 16 |
| L$0111stmadd: |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [32+ecx*4+esp] |
| lea ecx,[1+ecx] |
| adc edx,0 |
| add ebp,eax |
| mov eax,DWORD [ecx*4+esi] |
| adc edx,0 |
| cmp ecx,ebx |
| mov DWORD [28+ecx*4+esp],ebp |
| jl NEAR L$0111stmadd |
| mov ebp,edx |
| mul edi |
| add eax,DWORD [32+ebx*4+esp] |
| mov edi,DWORD [20+esp] |
| adc edx,0 |
| mov esi,DWORD [16+esp] |
| add ebp,eax |
| adc edx,0 |
| imul edi,DWORD [32+esp] |
| xor ecx,ecx |
| add edx,DWORD [36+ebx*4+esp] |
| mov DWORD [32+ebx*4+esp],ebp |
| adc ecx,0 |
| mov eax,DWORD [esi] |
| mov DWORD [36+ebx*4+esp],edx |
| mov DWORD [40+ebx*4+esp],ecx |
| mul edi |
| add eax,DWORD [32+esp] |
| mov eax,DWORD [4+esi] |
| adc edx,0 |
| mov ecx,1 |
| align 16 |
| L$0102ndmadd: |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [32+ecx*4+esp] |
| lea ecx,[1+ecx] |
| adc edx,0 |
| add ebp,eax |
| mov eax,DWORD [ecx*4+esi] |
| adc edx,0 |
| cmp ecx,ebx |
| mov DWORD [24+ecx*4+esp],ebp |
| jl NEAR L$0102ndmadd |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [32+ebx*4+esp] |
| adc edx,0 |
| add ebp,eax |
| adc edx,0 |
| mov DWORD [28+ebx*4+esp],ebp |
| xor eax,eax |
| mov ecx,DWORD [12+esp] |
| add edx,DWORD [36+ebx*4+esp] |
| adc eax,DWORD [40+ebx*4+esp] |
| lea ecx,[4+ecx] |
| mov DWORD [32+ebx*4+esp],edx |
| cmp ecx,DWORD [28+esp] |
| mov DWORD [36+ebx*4+esp],eax |
| je NEAR L$007common_tail |
| mov edi,DWORD [ecx] |
| mov esi,DWORD [8+esp] |
| mov DWORD [12+esp],ecx |
| xor ecx,ecx |
| xor edx,edx |
| mov eax,DWORD [esi] |
| jmp NEAR L$0111stmadd |
| align 16 |
| L$008bn_sqr_mont: |
| mov DWORD [esp],ebx |
| mov DWORD [12+esp],ecx |
| mov eax,edi |
| mul edi |
| mov DWORD [32+esp],eax |
| mov ebx,edx |
| shr edx,1 |
| and ebx,1 |
| inc ecx |
| align 16 |
| L$012sqr: |
| mov eax,DWORD [ecx*4+esi] |
| mov ebp,edx |
| mul edi |
| add eax,ebp |
| lea ecx,[1+ecx] |
| adc edx,0 |
| lea ebp,[eax*2+ebx] |
| shr eax,31 |
| cmp ecx,DWORD [esp] |
| mov ebx,eax |
| mov DWORD [28+ecx*4+esp],ebp |
| jl NEAR L$012sqr |
| mov eax,DWORD [ecx*4+esi] |
| mov ebp,edx |
| mul edi |
| add eax,ebp |
| mov edi,DWORD [20+esp] |
| adc edx,0 |
| mov esi,DWORD [16+esp] |
| lea ebp,[eax*2+ebx] |
| imul edi,DWORD [32+esp] |
| shr eax,31 |
| mov DWORD [32+ecx*4+esp],ebp |
| lea ebp,[edx*2+eax] |
| mov eax,DWORD [esi] |
| shr edx,31 |
| mov DWORD [36+ecx*4+esp],ebp |
| mov DWORD [40+ecx*4+esp],edx |
| mul edi |
| add eax,DWORD [32+esp] |
| mov ebx,ecx |
| adc edx,0 |
| mov eax,DWORD [4+esi] |
| mov ecx,1 |
| align 16 |
| L$0133rdmadd: |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [32+ecx*4+esp] |
| adc edx,0 |
| add ebp,eax |
| mov eax,DWORD [4+ecx*4+esi] |
| adc edx,0 |
| mov DWORD [28+ecx*4+esp],ebp |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [36+ecx*4+esp] |
| lea ecx,[2+ecx] |
| adc edx,0 |
| add ebp,eax |
| mov eax,DWORD [ecx*4+esi] |
| adc edx,0 |
| cmp ecx,ebx |
| mov DWORD [24+ecx*4+esp],ebp |
| jl NEAR L$0133rdmadd |
| mov ebp,edx |
| mul edi |
| add ebp,DWORD [32+ebx*4+esp] |
| adc edx,0 |
| add ebp,eax |
| adc edx,0 |
| mov DWORD [28+ebx*4+esp],ebp |
| mov ecx,DWORD [12+esp] |
| xor eax,eax |
| mov esi,DWORD [8+esp] |
| add edx,DWORD [36+ebx*4+esp] |
| adc eax,DWORD [40+ebx*4+esp] |
| mov DWORD [32+ebx*4+esp],edx |
| cmp ecx,ebx |
| mov DWORD [36+ebx*4+esp],eax |
| je NEAR L$007common_tail |
| mov edi,DWORD [4+ecx*4+esi] |
| lea ecx,[1+ecx] |
| mov eax,edi |
| mov DWORD [12+esp],ecx |
| mul edi |
| add eax,DWORD [32+ecx*4+esp] |
| adc edx,0 |
| mov DWORD [32+ecx*4+esp],eax |
| xor ebp,ebp |
| cmp ecx,ebx |
| lea ecx,[1+ecx] |
| je NEAR L$014sqrlast |
| mov ebx,edx |
| shr edx,1 |
| and ebx,1 |
| align 16 |
| L$015sqradd: |
| mov eax,DWORD [ecx*4+esi] |
| mov ebp,edx |
| mul edi |
| add eax,ebp |
| lea ebp,[eax*1+eax] |
| adc edx,0 |
| shr eax,31 |
| add ebp,DWORD [32+ecx*4+esp] |
| lea ecx,[1+ecx] |
| adc eax,0 |
| add ebp,ebx |
| adc eax,0 |
| cmp ecx,DWORD [esp] |
| mov DWORD [28+ecx*4+esp],ebp |
| mov ebx,eax |
| jle NEAR L$015sqradd |
| mov ebp,edx |
| add edx,edx |
| shr ebp,31 |
| add edx,ebx |
| adc ebp,0 |
| L$014sqrlast: |
| mov edi,DWORD [20+esp] |
| mov esi,DWORD [16+esp] |
| imul edi,DWORD [32+esp] |
| add edx,DWORD [32+ecx*4+esp] |
| mov eax,DWORD [esi] |
| adc ebp,0 |
| mov DWORD [32+ecx*4+esp],edx |
| mov DWORD [36+ecx*4+esp],ebp |
| mul edi |
| add eax,DWORD [32+esp] |
| lea ebx,[ecx-1] |
| adc edx,0 |
| mov ecx,1 |
| mov eax,DWORD [4+esi] |
| jmp NEAR L$0133rdmadd |
| align 16 |
| L$007common_tail: |
| mov ebp,DWORD [16+esp] |
| mov edi,DWORD [4+esp] |
| lea esi,[32+esp] |
| mov eax,DWORD [esi] |
| mov ecx,ebx |
| xor edx,edx |
| align 16 |
| L$016sub: |
| sbb eax,DWORD [edx*4+ebp] |
| mov DWORD [edx*4+edi],eax |
| dec ecx |
| mov eax,DWORD [4+edx*4+esi] |
| lea edx,[1+edx] |
| jge NEAR L$016sub |
| sbb eax,0 |
| mov edx,-1 |
| xor edx,eax |
| jmp NEAR L$017copy |
| align 16 |
| L$017copy: |
| mov esi,DWORD [32+ebx*4+esp] |
| mov ebp,DWORD [ebx*4+edi] |
| mov DWORD [32+ebx*4+esp],ecx |
| and esi,eax |
| and ebp,edx |
| or ebp,esi |
| mov DWORD [ebx*4+edi],ebp |
| dec ebx |
| jge NEAR L$017copy |
| mov esp,DWORD [24+esp] |
| mov eax,1 |
| L$000just_leave: |
| pop edi |
| pop esi |
| pop ebx |
| pop ebp |
| ret |
| db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
| db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
| db 111,114,103,62,0 |
| segment .bss |
| common _OPENSSL_ia32cap_P 16 |