blob: b08b05637db27915efb782c748da6578feed82bd [file] [log] [blame]
Andrew Top61a84952019-04-30 15:07:33 -07001%ifidn __OUTPUT_FORMAT__,obj
2section code use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4%ifdef __YASM_VERSION_ID__
5%if __YASM_VERSION_ID__ < 01010000h
6%error yasm version 1.1.0 or later needed.
7%endif
8; Yasm automatically includes .00 and complains about redefining it.
9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10%else
11$@feat.00 equ 1
12%endif
13section .text code align=64
14%else
15section .text code
16%endif
17align 64
18L$_vpaes_consts:
19dd 218628480,235210255,168496130,67568393
20dd 252381056,17041926,33884169,51187212
21dd 252645135,252645135,252645135,252645135
22dd 1512730624,3266504856,1377990664,3401244816
23dd 830229760,1275146365,2969422977,3447763452
24dd 3411033600,2979783055,338359620,2782886510
25dd 4209124096,907596821,221174255,1006095553
26dd 191964160,3799684038,3164090317,1589111125
27dd 182528256,1777043520,2877432650,3265356744
28dd 1874708224,3503451415,3305285752,363511674
29dd 1606117888,3487855781,1093350906,2384367825
30dd 197121,67569157,134941193,202313229
31dd 67569157,134941193,202313229,197121
32dd 134941193,202313229,197121,67569157
33dd 202313229,197121,67569157,134941193
34dd 33619971,100992007,168364043,235736079
35dd 235736079,33619971,100992007,168364043
36dd 168364043,235736079,33619971,100992007
37dd 100992007,168364043,235736079,33619971
38dd 50462976,117835012,185207048,252579084
39dd 252314880,51251460,117574920,184942860
40dd 184682752,252054788,50987272,118359308
41dd 118099200,185467140,251790600,50727180
42dd 2946363062,528716217,1300004225,1881839624
43dd 1532713819,1532713819,1532713819,1532713819
44dd 3602276352,4288629033,3737020424,4153884961
45dd 1354558464,32357713,2958822624,3775749553
46dd 1201988352,132424512,1572796698,503232858
47dd 2213177600,1597421020,4103937655,675398315
48dd 2749646592,4273543773,1511898873,121693092
49dd 3040248576,1103263732,2871565598,1608280554
50dd 2236667136,2588920351,482954393,64377734
51dd 3069987328,291237287,2117370568,3650299247
52dd 533321216,3573750986,2572112006,1401264716
53dd 1339849704,2721158661,548607111,3445553514
54dd 2128193280,3054596040,2183486460,1257083700
55dd 655635200,1165381986,3923443150,2344132524
56dd 190078720,256924420,290342170,357187870
57dd 1610966272,2263057382,4103205268,309794674
58dd 2592527872,2233205587,1335446729,3402964816
59dd 3973531904,3225098121,3002836325,1918774430
60dd 3870401024,2102906079,2284471353,4117666579
61dd 617007872,1021508343,366931923,691083277
62dd 2528395776,3491914898,2968704004,1613121270
63dd 3445188352,3247741094,844474987,4093578302
64dd 651481088,1190302358,1689581232,574775300
65dd 4289380608,206939853,2555985458,2489840491
66dd 2130264064,327674451,3566485037,3349835193
67dd 2470714624,316102159,3636825756,3393945945
68db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
69db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
70db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
71db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
72db 118,101,114,115,105,116,121,41,0
73align 64
74align 16
75__vpaes_preheat:
76 add ebp,DWORD [esp]
77 movdqa xmm7,[ebp-48]
78 movdqa xmm6,[ebp-16]
79 ret
80align 16
81__vpaes_encrypt_core:
82 mov ecx,16
83 mov eax,DWORD [240+edx]
84 movdqa xmm1,xmm6
85 movdqa xmm2,[ebp]
86 pandn xmm1,xmm0
87 pand xmm0,xmm6
88 movdqu xmm5,[edx]
89db 102,15,56,0,208
90 movdqa xmm0,[16+ebp]
91 pxor xmm2,xmm5
92 psrld xmm1,4
93 add edx,16
94db 102,15,56,0,193
95 lea ebx,[192+ebp]
96 pxor xmm0,xmm2
97 jmp NEAR L$000enc_entry
98align 16
99L$001enc_loop:
100 movdqa xmm4,[32+ebp]
101 movdqa xmm0,[48+ebp]
102db 102,15,56,0,226
103db 102,15,56,0,195
104 pxor xmm4,xmm5
105 movdqa xmm5,[64+ebp]
106 pxor xmm0,xmm4
107 movdqa xmm1,[ecx*1+ebx-64]
108db 102,15,56,0,234
109 movdqa xmm2,[80+ebp]
110 movdqa xmm4,[ecx*1+ebx]
111db 102,15,56,0,211
112 movdqa xmm3,xmm0
113 pxor xmm2,xmm5
114db 102,15,56,0,193
115 add edx,16
116 pxor xmm0,xmm2
117db 102,15,56,0,220
118 add ecx,16
119 pxor xmm3,xmm0
120db 102,15,56,0,193
121 and ecx,48
122 sub eax,1
123 pxor xmm0,xmm3
124L$000enc_entry:
125 movdqa xmm1,xmm6
126 movdqa xmm5,[ebp-32]
127 pandn xmm1,xmm0
128 psrld xmm1,4
129 pand xmm0,xmm6
130db 102,15,56,0,232
131 movdqa xmm3,xmm7
132 pxor xmm0,xmm1
133db 102,15,56,0,217
134 movdqa xmm4,xmm7
135 pxor xmm3,xmm5
136db 102,15,56,0,224
137 movdqa xmm2,xmm7
138 pxor xmm4,xmm5
139db 102,15,56,0,211
140 movdqa xmm3,xmm7
141 pxor xmm2,xmm0
142db 102,15,56,0,220
143 movdqu xmm5,[edx]
144 pxor xmm3,xmm1
145 jnz NEAR L$001enc_loop
146 movdqa xmm4,[96+ebp]
147 movdqa xmm0,[112+ebp]
148db 102,15,56,0,226
149 pxor xmm4,xmm5
150db 102,15,56,0,195
151 movdqa xmm1,[64+ecx*1+ebx]
152 pxor xmm0,xmm4
153db 102,15,56,0,193
154 ret
155align 16
156__vpaes_decrypt_core:
157 lea ebx,[608+ebp]
158 mov eax,DWORD [240+edx]
159 movdqa xmm1,xmm6
160 movdqa xmm2,[ebx-64]
161 pandn xmm1,xmm0
162 mov ecx,eax
163 psrld xmm1,4
164 movdqu xmm5,[edx]
165 shl ecx,4
166 pand xmm0,xmm6
167db 102,15,56,0,208
168 movdqa xmm0,[ebx-48]
169 xor ecx,48
170db 102,15,56,0,193
171 and ecx,48
172 pxor xmm2,xmm5
173 movdqa xmm5,[176+ebp]
174 pxor xmm0,xmm2
175 add edx,16
176 lea ecx,[ecx*1+ebx-352]
177 jmp NEAR L$002dec_entry
178align 16
179L$003dec_loop:
180 movdqa xmm4,[ebx-32]
181 movdqa xmm1,[ebx-16]
182db 102,15,56,0,226
183db 102,15,56,0,203
184 pxor xmm0,xmm4
185 movdqa xmm4,[ebx]
186 pxor xmm0,xmm1
187 movdqa xmm1,[16+ebx]
188db 102,15,56,0,226
189db 102,15,56,0,197
190db 102,15,56,0,203
191 pxor xmm0,xmm4
192 movdqa xmm4,[32+ebx]
193 pxor xmm0,xmm1
194 movdqa xmm1,[48+ebx]
195db 102,15,56,0,226
196db 102,15,56,0,197
197db 102,15,56,0,203
198 pxor xmm0,xmm4
199 movdqa xmm4,[64+ebx]
200 pxor xmm0,xmm1
201 movdqa xmm1,[80+ebx]
202db 102,15,56,0,226
203db 102,15,56,0,197
204db 102,15,56,0,203
205 pxor xmm0,xmm4
206 add edx,16
207db 102,15,58,15,237,12
208 pxor xmm0,xmm1
209 sub eax,1
210L$002dec_entry:
211 movdqa xmm1,xmm6
212 movdqa xmm2,[ebp-32]
213 pandn xmm1,xmm0
214 pand xmm0,xmm6
215 psrld xmm1,4
216db 102,15,56,0,208
217 movdqa xmm3,xmm7
218 pxor xmm0,xmm1
219db 102,15,56,0,217
220 movdqa xmm4,xmm7
221 pxor xmm3,xmm2
222db 102,15,56,0,224
223 pxor xmm4,xmm2
224 movdqa xmm2,xmm7
225db 102,15,56,0,211
226 movdqa xmm3,xmm7
227 pxor xmm2,xmm0
228db 102,15,56,0,220
229 movdqu xmm0,[edx]
230 pxor xmm3,xmm1
231 jnz NEAR L$003dec_loop
232 movdqa xmm4,[96+ebx]
233db 102,15,56,0,226
234 pxor xmm4,xmm0
235 movdqa xmm0,[112+ebx]
236 movdqa xmm2,[ecx]
237db 102,15,56,0,195
238 pxor xmm0,xmm4
239db 102,15,56,0,194
240 ret
241align 16
242__vpaes_schedule_core:
243 add ebp,DWORD [esp]
244 movdqu xmm0,[esi]
245 movdqa xmm2,[320+ebp]
246 movdqa xmm3,xmm0
247 lea ebx,[ebp]
248 movdqa [4+esp],xmm2
249 call __vpaes_schedule_transform
250 movdqa xmm7,xmm0
251 test edi,edi
252 jnz NEAR L$004schedule_am_decrypting
253 movdqu [edx],xmm0
254 jmp NEAR L$005schedule_go
255L$004schedule_am_decrypting:
256 movdqa xmm1,[256+ecx*1+ebp]
257db 102,15,56,0,217
258 movdqu [edx],xmm3
259 xor ecx,48
260L$005schedule_go:
261 cmp eax,192
262 ja NEAR L$006schedule_256
263 je NEAR L$007schedule_192
264L$008schedule_128:
265 mov eax,10
266L$009loop_schedule_128:
267 call __vpaes_schedule_round
268 dec eax
269 jz NEAR L$010schedule_mangle_last
270 call __vpaes_schedule_mangle
271 jmp NEAR L$009loop_schedule_128
272align 16
273L$007schedule_192:
274 movdqu xmm0,[8+esi]
275 call __vpaes_schedule_transform
276 movdqa xmm6,xmm0
277 pxor xmm4,xmm4
278 movhlps xmm6,xmm4
279 mov eax,4
280L$011loop_schedule_192:
281 call __vpaes_schedule_round
282db 102,15,58,15,198,8
283 call __vpaes_schedule_mangle
284 call __vpaes_schedule_192_smear
285 call __vpaes_schedule_mangle
286 call __vpaes_schedule_round
287 dec eax
288 jz NEAR L$010schedule_mangle_last
289 call __vpaes_schedule_mangle
290 call __vpaes_schedule_192_smear
291 jmp NEAR L$011loop_schedule_192
292align 16
293L$006schedule_256:
294 movdqu xmm0,[16+esi]
295 call __vpaes_schedule_transform
296 mov eax,7
297L$012loop_schedule_256:
298 call __vpaes_schedule_mangle
299 movdqa xmm6,xmm0
300 call __vpaes_schedule_round
301 dec eax
302 jz NEAR L$010schedule_mangle_last
303 call __vpaes_schedule_mangle
304 pshufd xmm0,xmm0,255
305 movdqa [20+esp],xmm7
306 movdqa xmm7,xmm6
307 call L$_vpaes_schedule_low_round
308 movdqa xmm7,[20+esp]
309 jmp NEAR L$012loop_schedule_256
310align 16
311L$010schedule_mangle_last:
312 lea ebx,[384+ebp]
313 test edi,edi
314 jnz NEAR L$013schedule_mangle_last_dec
315 movdqa xmm1,[256+ecx*1+ebp]
316db 102,15,56,0,193
317 lea ebx,[352+ebp]
318 add edx,32
319L$013schedule_mangle_last_dec:
320 add edx,-16
321 pxor xmm0,[336+ebp]
322 call __vpaes_schedule_transform
323 movdqu [edx],xmm0
324 pxor xmm0,xmm0
325 pxor xmm1,xmm1
326 pxor xmm2,xmm2
327 pxor xmm3,xmm3
328 pxor xmm4,xmm4
329 pxor xmm5,xmm5
330 pxor xmm6,xmm6
331 pxor xmm7,xmm7
332 ret
333align 16
334__vpaes_schedule_192_smear:
335 pshufd xmm1,xmm6,128
336 pshufd xmm0,xmm7,254
337 pxor xmm6,xmm1
338 pxor xmm1,xmm1
339 pxor xmm6,xmm0
340 movdqa xmm0,xmm6
341 movhlps xmm6,xmm1
342 ret
343align 16
344__vpaes_schedule_round:
345 movdqa xmm2,[8+esp]
346 pxor xmm1,xmm1
347db 102,15,58,15,202,15
348db 102,15,58,15,210,15
349 pxor xmm7,xmm1
350 pshufd xmm0,xmm0,255
351db 102,15,58,15,192,1
352 movdqa [8+esp],xmm2
353L$_vpaes_schedule_low_round:
354 movdqa xmm1,xmm7
355 pslldq xmm7,4
356 pxor xmm7,xmm1
357 movdqa xmm1,xmm7
358 pslldq xmm7,8
359 pxor xmm7,xmm1
360 pxor xmm7,[336+ebp]
361 movdqa xmm4,[ebp-16]
362 movdqa xmm5,[ebp-48]
363 movdqa xmm1,xmm4
364 pandn xmm1,xmm0
365 psrld xmm1,4
366 pand xmm0,xmm4
367 movdqa xmm2,[ebp-32]
368db 102,15,56,0,208
369 pxor xmm0,xmm1
370 movdqa xmm3,xmm5
371db 102,15,56,0,217
372 pxor xmm3,xmm2
373 movdqa xmm4,xmm5
374db 102,15,56,0,224
375 pxor xmm4,xmm2
376 movdqa xmm2,xmm5
377db 102,15,56,0,211
378 pxor xmm2,xmm0
379 movdqa xmm3,xmm5
380db 102,15,56,0,220
381 pxor xmm3,xmm1
382 movdqa xmm4,[32+ebp]
383db 102,15,56,0,226
384 movdqa xmm0,[48+ebp]
385db 102,15,56,0,195
386 pxor xmm0,xmm4
387 pxor xmm0,xmm7
388 movdqa xmm7,xmm0
389 ret
390align 16
391__vpaes_schedule_transform:
392 movdqa xmm2,[ebp-16]
393 movdqa xmm1,xmm2
394 pandn xmm1,xmm0
395 psrld xmm1,4
396 pand xmm0,xmm2
397 movdqa xmm2,[ebx]
398db 102,15,56,0,208
399 movdqa xmm0,[16+ebx]
400db 102,15,56,0,193
401 pxor xmm0,xmm2
402 ret
403align 16
404__vpaes_schedule_mangle:
405 movdqa xmm4,xmm0
406 movdqa xmm5,[128+ebp]
407 test edi,edi
408 jnz NEAR L$014schedule_mangle_dec
409 add edx,16
410 pxor xmm4,[336+ebp]
411db 102,15,56,0,229
412 movdqa xmm3,xmm4
413db 102,15,56,0,229
414 pxor xmm3,xmm4
415db 102,15,56,0,229
416 pxor xmm3,xmm4
417 jmp NEAR L$015schedule_mangle_both
418align 16
419L$014schedule_mangle_dec:
420 movdqa xmm2,[ebp-16]
421 lea esi,[416+ebp]
422 movdqa xmm1,xmm2
423 pandn xmm1,xmm4
424 psrld xmm1,4
425 pand xmm4,xmm2
426 movdqa xmm2,[esi]
427db 102,15,56,0,212
428 movdqa xmm3,[16+esi]
429db 102,15,56,0,217
430 pxor xmm3,xmm2
431db 102,15,56,0,221
432 movdqa xmm2,[32+esi]
433db 102,15,56,0,212
434 pxor xmm2,xmm3
435 movdqa xmm3,[48+esi]
436db 102,15,56,0,217
437 pxor xmm3,xmm2
438db 102,15,56,0,221
439 movdqa xmm2,[64+esi]
440db 102,15,56,0,212
441 pxor xmm2,xmm3
442 movdqa xmm3,[80+esi]
443db 102,15,56,0,217
444 pxor xmm3,xmm2
445db 102,15,56,0,221
446 movdqa xmm2,[96+esi]
447db 102,15,56,0,212
448 pxor xmm2,xmm3
449 movdqa xmm3,[112+esi]
450db 102,15,56,0,217
451 pxor xmm3,xmm2
452 add edx,-16
453L$015schedule_mangle_both:
454 movdqa xmm1,[256+ecx*1+ebp]
455db 102,15,56,0,217
456 add ecx,-16
457 and ecx,48
458 movdqu [edx],xmm3
459 ret
460global _vpaes_set_encrypt_key
461align 16
462_vpaes_set_encrypt_key:
463L$_vpaes_set_encrypt_key_begin:
464 push ebp
465 push ebx
466 push esi
467 push edi
468 mov esi,DWORD [20+esp]
469 lea ebx,[esp-56]
470 mov eax,DWORD [24+esp]
471 and ebx,-16
472 mov edx,DWORD [28+esp]
473 xchg ebx,esp
474 mov DWORD [48+esp],ebx
475 mov ebx,eax
476 shr ebx,5
477 add ebx,5
478 mov DWORD [240+edx],ebx
479 mov ecx,48
480 mov edi,0
481 lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
482 call __vpaes_schedule_core
483L$016pic_point:
484 mov esp,DWORD [48+esp]
485 xor eax,eax
486 pop edi
487 pop esi
488 pop ebx
489 pop ebp
490 ret
491global _vpaes_set_decrypt_key
492align 16
493_vpaes_set_decrypt_key:
494L$_vpaes_set_decrypt_key_begin:
495 push ebp
496 push ebx
497 push esi
498 push edi
499 mov esi,DWORD [20+esp]
500 lea ebx,[esp-56]
501 mov eax,DWORD [24+esp]
502 and ebx,-16
503 mov edx,DWORD [28+esp]
504 xchg ebx,esp
505 mov DWORD [48+esp],ebx
506 mov ebx,eax
507 shr ebx,5
508 add ebx,5
509 mov DWORD [240+edx],ebx
510 shl ebx,4
511 lea edx,[16+ebx*1+edx]
512 mov edi,1
513 mov ecx,eax
514 shr ecx,1
515 and ecx,32
516 xor ecx,32
517 lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
518 call __vpaes_schedule_core
519L$017pic_point:
520 mov esp,DWORD [48+esp]
521 xor eax,eax
522 pop edi
523 pop esi
524 pop ebx
525 pop ebp
526 ret
527global _vpaes_encrypt
528align 16
529_vpaes_encrypt:
530L$_vpaes_encrypt_begin:
531 push ebp
532 push ebx
533 push esi
534 push edi
535 lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
536 call __vpaes_preheat
537L$018pic_point:
538 mov esi,DWORD [20+esp]
539 lea ebx,[esp-56]
540 mov edi,DWORD [24+esp]
541 and ebx,-16
542 mov edx,DWORD [28+esp]
543 xchg ebx,esp
544 mov DWORD [48+esp],ebx
545 movdqu xmm0,[esi]
546 call __vpaes_encrypt_core
547 movdqu [edi],xmm0
548 mov esp,DWORD [48+esp]
549 pop edi
550 pop esi
551 pop ebx
552 pop ebp
553 ret
554global _vpaes_decrypt
555align 16
556_vpaes_decrypt:
557L$_vpaes_decrypt_begin:
558 push ebp
559 push ebx
560 push esi
561 push edi
562 lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
563 call __vpaes_preheat
564L$019pic_point:
565 mov esi,DWORD [20+esp]
566 lea ebx,[esp-56]
567 mov edi,DWORD [24+esp]
568 and ebx,-16
569 mov edx,DWORD [28+esp]
570 xchg ebx,esp
571 mov DWORD [48+esp],ebx
572 movdqu xmm0,[esi]
573 call __vpaes_decrypt_core
574 movdqu [edi],xmm0
575 mov esp,DWORD [48+esp]
576 pop edi
577 pop esi
578 pop ebx
579 pop ebp
580 ret
581global _vpaes_cbc_encrypt
582align 16
583_vpaes_cbc_encrypt:
584L$_vpaes_cbc_encrypt_begin:
585 push ebp
586 push ebx
587 push esi
588 push edi
589 mov esi,DWORD [20+esp]
590 mov edi,DWORD [24+esp]
591 mov eax,DWORD [28+esp]
592 mov edx,DWORD [32+esp]
593 sub eax,16
594 jc NEAR L$020cbc_abort
595 lea ebx,[esp-56]
596 mov ebp,DWORD [36+esp]
597 and ebx,-16
598 mov ecx,DWORD [40+esp]
599 xchg ebx,esp
600 movdqu xmm1,[ebp]
601 sub edi,esi
602 mov DWORD [48+esp],ebx
603 mov DWORD [esp],edi
604 mov DWORD [4+esp],edx
605 mov DWORD [8+esp],ebp
606 mov edi,eax
607 lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
608 call __vpaes_preheat
609L$021pic_point:
610 cmp ecx,0
611 je NEAR L$022cbc_dec_loop
612 jmp NEAR L$023cbc_enc_loop
613align 16
614L$023cbc_enc_loop:
615 movdqu xmm0,[esi]
616 pxor xmm0,xmm1
617 call __vpaes_encrypt_core
618 mov ebx,DWORD [esp]
619 mov edx,DWORD [4+esp]
620 movdqa xmm1,xmm0
621 movdqu [esi*1+ebx],xmm0
622 lea esi,[16+esi]
623 sub edi,16
624 jnc NEAR L$023cbc_enc_loop
625 jmp NEAR L$024cbc_done
626align 16
627L$022cbc_dec_loop:
628 movdqu xmm0,[esi]
629 movdqa [16+esp],xmm1
630 movdqa [32+esp],xmm0
631 call __vpaes_decrypt_core
632 mov ebx,DWORD [esp]
633 mov edx,DWORD [4+esp]
634 pxor xmm0,[16+esp]
635 movdqa xmm1,[32+esp]
636 movdqu [esi*1+ebx],xmm0
637 lea esi,[16+esi]
638 sub edi,16
639 jnc NEAR L$022cbc_dec_loop
640L$024cbc_done:
641 mov ebx,DWORD [8+esp]
642 mov esp,DWORD [48+esp]
643 movdqu [ebx],xmm1
644L$020cbc_abort:
645 pop edi
646 pop esi
647 pop ebx
648 pop ebp
649 ret