1
0

fe25519_freeze.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. # qhasm: int64 rp
  2. # qhasm: input rp
  3. # qhasm: int64 r0
  4. # qhasm: int64 r1
  5. # qhasm: int64 r2
  6. # qhasm: int64 r3
  7. # qhasm: int64 r4
  8. # qhasm: int64 t
  9. # qhasm: int64 loop
  10. # qhasm: int64 two51minus1
  11. # qhasm: int64 two51minus19
  12. # qhasm: int64 caller1
  13. # qhasm: int64 caller2
  14. # qhasm: int64 caller3
  15. # qhasm: int64 caller4
  16. # qhasm: int64 caller5
  17. # qhasm: int64 caller6
  18. # qhasm: int64 caller7
  19. # qhasm: caller caller1
  20. # qhasm: caller caller2
  21. # qhasm: caller caller3
  22. # qhasm: caller caller4
  23. # qhasm: caller caller5
  24. # qhasm: caller caller6
  25. # qhasm: caller caller7
  26. # qhasm: stack64 caller1_stack
  27. # qhasm: stack64 caller2_stack
  28. # qhasm: stack64 caller3_stack
  29. # qhasm: stack64 caller4_stack
  30. # qhasm: stack64 caller5_stack
  31. # qhasm: stack64 caller6_stack
  32. # qhasm: stack64 caller7_stack
  33. # qhasm: enter CRYPTO_NAMESPACE(batch_fe25519_freeze)
  34. .text
  35. .p2align 5
  36. .globl _CRYPTO_NAMESPACE(batch_fe25519_freeze)
  37. .globl CRYPTO_NAMESPACE(batch_fe25519_freeze)
  38. _CRYPTO_NAMESPACE(batch_fe25519_freeze):
  39. CRYPTO_NAMESPACE(batch_fe25519_freeze):
  40. mov %rsp,%r11
  41. and $31,%r11
  42. add $64,%r11
  43. sub %r11,%rsp
  44. # qhasm: caller1_stack = caller1
  45. # asm 1: movq <caller1=int64#9,>caller1_stack=stack64#1
  46. # asm 2: movq <caller1=%r11,>caller1_stack=0(%rsp)
  47. movq %r11,0(%rsp)
  48. # qhasm: caller2_stack = caller2
  49. # asm 1: movq <caller2=int64#10,>caller2_stack=stack64#2
  50. # asm 2: movq <caller2=%r12,>caller2_stack=8(%rsp)
  51. movq %r12,8(%rsp)
  52. # qhasm: caller3_stack = caller3
  53. # asm 1: movq <caller3=int64#11,>caller3_stack=stack64#3
  54. # asm 2: movq <caller3=%r13,>caller3_stack=16(%rsp)
  55. movq %r13,16(%rsp)
  56. # qhasm: caller4_stack = caller4
  57. # asm 1: movq <caller4=int64#12,>caller4_stack=stack64#4
  58. # asm 2: movq <caller4=%r14,>caller4_stack=24(%rsp)
  59. movq %r14,24(%rsp)
  60. # qhasm: caller5_stack = caller5
  61. # asm 1: movq <caller5=int64#13,>caller5_stack=stack64#5
  62. # asm 2: movq <caller5=%r15,>caller5_stack=32(%rsp)
  63. movq %r15,32(%rsp)
  64. # qhasm: caller6_stack = caller6
  65. # asm 1: movq <caller6=int64#14,>caller6_stack=stack64#6
  66. # asm 2: movq <caller6=%rbx,>caller6_stack=40(%rsp)
  67. movq %rbx,40(%rsp)
  68. # qhasm: caller7_stack = caller7
  69. # asm 1: movq <caller7=int64#15,>caller7_stack=stack64#7
  70. # asm 2: movq <caller7=%rbp,>caller7_stack=48(%rsp)
  71. movq %rbp,48(%rsp)
  72. # qhasm: r0 = *(uint64 *) (rp + 0)
  73. # asm 1: movq 0(<rp=int64#1),>r0=int64#2
  74. # asm 2: movq 0(<rp=%rdi),>r0=%rsi
  75. movq 0(%rdi),%rsi
  76. # qhasm: r1 = *(uint64 *) (rp + 8)
  77. # asm 1: movq 8(<rp=int64#1),>r1=int64#3
  78. # asm 2: movq 8(<rp=%rdi),>r1=%rdx
  79. movq 8(%rdi),%rdx
  80. # qhasm: r2 = *(uint64 *) (rp + 16)
  81. # asm 1: movq 16(<rp=int64#1),>r2=int64#4
  82. # asm 2: movq 16(<rp=%rdi),>r2=%rcx
  83. movq 16(%rdi),%rcx
  84. # qhasm: r3 = *(uint64 *) (rp + 24)
  85. # asm 1: movq 24(<rp=int64#1),>r3=int64#5
  86. # asm 2: movq 24(<rp=%rdi),>r3=%r8
  87. movq 24(%rdi),%r8
  88. # qhasm: r4 = *(uint64 *) (rp + 32)
  89. # asm 1: movq 32(<rp=int64#1),>r4=int64#6
  90. # asm 2: movq 32(<rp=%rdi),>r4=%r9
  91. movq 32(%rdi),%r9
  92. # qhasm: two51minus1 = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  93. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>two51minus1=int64#7
  94. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>two51minus1=%rax
  95. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rax
  96. # qhasm: two51minus19 = two51minus1
  97. # asm 1: mov <two51minus1=int64#7,>two51minus19=int64#8
  98. # asm 2: mov <two51minus1=%rax,>two51minus19=%r10
  99. mov %rax,%r10
  100. # qhasm: two51minus19 -= 18
  101. # asm 1: sub $18,<two51minus19=int64#8
  102. # asm 2: sub $18,<two51minus19=%r10
  103. sub $18,%r10
  104. # qhasm: loop = 3
  105. # asm 1: mov $3,>loop=int64#9
  106. # asm 2: mov $3,>loop=%r11
  107. mov $3,%r11
  108. # qhasm: reduceloop:
  109. ._reduceloop:
  110. # qhasm: t = r0
  111. # asm 1: mov <r0=int64#2,>t=int64#10
  112. # asm 2: mov <r0=%rsi,>t=%r12
  113. mov %rsi,%r12
  114. # qhasm: (uint64) t >>= 51
  115. # asm 1: shr $51,<t=int64#10
  116. # asm 2: shr $51,<t=%r12
  117. shr $51,%r12
  118. # qhasm: r0 &= two51minus1
  119. # asm 1: and <two51minus1=int64#7,<r0=int64#2
  120. # asm 2: and <two51minus1=%rax,<r0=%rsi
  121. and %rax,%rsi
  122. # qhasm: r1 += t
  123. # asm 1: add <t=int64#10,<r1=int64#3
  124. # asm 2: add <t=%r12,<r1=%rdx
  125. add %r12,%rdx
  126. # qhasm: t = r1
  127. # asm 1: mov <r1=int64#3,>t=int64#10
  128. # asm 2: mov <r1=%rdx,>t=%r12
  129. mov %rdx,%r12
  130. # qhasm: (uint64) t >>= 51
  131. # asm 1: shr $51,<t=int64#10
  132. # asm 2: shr $51,<t=%r12
  133. shr $51,%r12
  134. # qhasm: r1 &= two51minus1
  135. # asm 1: and <two51minus1=int64#7,<r1=int64#3
  136. # asm 2: and <two51minus1=%rax,<r1=%rdx
  137. and %rax,%rdx
  138. # qhasm: r2 += t
  139. # asm 1: add <t=int64#10,<r2=int64#4
  140. # asm 2: add <t=%r12,<r2=%rcx
  141. add %r12,%rcx
  142. # qhasm: t = r2
  143. # asm 1: mov <r2=int64#4,>t=int64#10
  144. # asm 2: mov <r2=%rcx,>t=%r12
  145. mov %rcx,%r12
  146. # qhasm: (uint64) t >>= 51
  147. # asm 1: shr $51,<t=int64#10
  148. # asm 2: shr $51,<t=%r12
  149. shr $51,%r12
  150. # qhasm: r2 &= two51minus1
  151. # asm 1: and <two51minus1=int64#7,<r2=int64#4
  152. # asm 2: and <two51minus1=%rax,<r2=%rcx
  153. and %rax,%rcx
  154. # qhasm: r3 += t
  155. # asm 1: add <t=int64#10,<r3=int64#5
  156. # asm 2: add <t=%r12,<r3=%r8
  157. add %r12,%r8
  158. # qhasm: t = r3
  159. # asm 1: mov <r3=int64#5,>t=int64#10
  160. # asm 2: mov <r3=%r8,>t=%r12
  161. mov %r8,%r12
  162. # qhasm: (uint64) t >>= 51
  163. # asm 1: shr $51,<t=int64#10
  164. # asm 2: shr $51,<t=%r12
  165. shr $51,%r12
  166. # qhasm: r3 &= two51minus1
  167. # asm 1: and <two51minus1=int64#7,<r3=int64#5
  168. # asm 2: and <two51minus1=%rax,<r3=%r8
  169. and %rax,%r8
  170. # qhasm: r4 += t
  171. # asm 1: add <t=int64#10,<r4=int64#6
  172. # asm 2: add <t=%r12,<r4=%r9
  173. add %r12,%r9
  174. # qhasm: t = r4
  175. # asm 1: mov <r4=int64#6,>t=int64#10
  176. # asm 2: mov <r4=%r9,>t=%r12
  177. mov %r9,%r12
  178. # qhasm: (uint64) t >>= 51
  179. # asm 1: shr $51,<t=int64#10
  180. # asm 2: shr $51,<t=%r12
  181. shr $51,%r12
  182. # qhasm: r4 &= two51minus1
  183. # asm 1: and <two51minus1=int64#7,<r4=int64#6
  184. # asm 2: and <two51minus1=%rax,<r4=%r9
  185. and %rax,%r9
  186. # qhasm: t *= 19
  187. # asm 1: imulq $19,<t=int64#10,>t=int64#10
  188. # asm 2: imulq $19,<t=%r12,>t=%r12
  189. imulq $19,%r12,%r12
  190. # qhasm: r0 += t
  191. # asm 1: add <t=int64#10,<r0=int64#2
  192. # asm 2: add <t=%r12,<r0=%rsi
  193. add %r12,%rsi
  194. # qhasm: unsigned>? loop -= 1
  195. # asm 1: sub $1,<loop=int64#9
  196. # asm 2: sub $1,<loop=%r11
  197. sub $1,%r11
  198. # comment:fp stack unchanged by jump
  199. # qhasm: goto reduceloop if unsigned>
  200. ja ._reduceloop
  201. # qhasm: t = 1
  202. # asm 1: mov $1,>t=int64#10
  203. # asm 2: mov $1,>t=%r12
  204. mov $1,%r12
  205. # qhasm: signed<? r0 - two51minus19
  206. # asm 1: cmp <two51minus19=int64#8,<r0=int64#2
  207. # asm 2: cmp <two51minus19=%r10,<r0=%rsi
  208. cmp %r10,%rsi
  209. # qhasm: t = loop if signed<
  210. # asm 1: cmovl <loop=int64#9,<t=int64#10
  211. # asm 2: cmovl <loop=%r11,<t=%r12
  212. cmovl %r11,%r12
  213. # qhasm: =? r1 - two51minus1
  214. # asm 1: cmp <two51minus1=int64#7,<r1=int64#3
  215. # asm 2: cmp <two51minus1=%rax,<r1=%rdx
  216. cmp %rax,%rdx
  217. # qhasm: t = loop if !=
  218. # asm 1: cmovne <loop=int64#9,<t=int64#10
  219. # asm 2: cmovne <loop=%r11,<t=%r12
  220. cmovne %r11,%r12
  221. # qhasm: =? r2 - two51minus1
  222. # asm 1: cmp <two51minus1=int64#7,<r2=int64#4
  223. # asm 2: cmp <two51minus1=%rax,<r2=%rcx
  224. cmp %rax,%rcx
  225. # qhasm: t = loop if !=
  226. # asm 1: cmovne <loop=int64#9,<t=int64#10
  227. # asm 2: cmovne <loop=%r11,<t=%r12
  228. cmovne %r11,%r12
  229. # qhasm: =? r3 - two51minus1
  230. # asm 1: cmp <two51minus1=int64#7,<r3=int64#5
  231. # asm 2: cmp <two51minus1=%rax,<r3=%r8
  232. cmp %rax,%r8
  233. # qhasm: t = loop if !=
  234. # asm 1: cmovne <loop=int64#9,<t=int64#10
  235. # asm 2: cmovne <loop=%r11,<t=%r12
  236. cmovne %r11,%r12
  237. # qhasm: =? r4 - two51minus1
  238. # asm 1: cmp <two51minus1=int64#7,<r4=int64#6
  239. # asm 2: cmp <two51minus1=%rax,<r4=%r9
  240. cmp %rax,%r9
  241. # qhasm: t = loop if !=
  242. # asm 1: cmovne <loop=int64#9,<t=int64#10
  243. # asm 2: cmovne <loop=%r11,<t=%r12
  244. cmovne %r11,%r12
  245. # qhasm: t = -t
  246. # asm 1: neg <t=int64#10
  247. # asm 2: neg <t=%r12
  248. neg %r12
  249. # qhasm: two51minus1 &= t
  250. # asm 1: and <t=int64#10,<two51minus1=int64#7
  251. # asm 2: and <t=%r12,<two51minus1=%rax
  252. and %r12,%rax
  253. # qhasm: two51minus19 &= t
  254. # asm 1: and <t=int64#10,<two51minus19=int64#8
  255. # asm 2: and <t=%r12,<two51minus19=%r10
  256. and %r12,%r10
  257. # qhasm: r0 -= two51minus19
  258. # asm 1: sub <two51minus19=int64#8,<r0=int64#2
  259. # asm 2: sub <two51minus19=%r10,<r0=%rsi
  260. sub %r10,%rsi
  261. # qhasm: r1 -= two51minus1
  262. # asm 1: sub <two51minus1=int64#7,<r1=int64#3
  263. # asm 2: sub <two51minus1=%rax,<r1=%rdx
  264. sub %rax,%rdx
  265. # qhasm: r2 -= two51minus1
  266. # asm 1: sub <two51minus1=int64#7,<r2=int64#4
  267. # asm 2: sub <two51minus1=%rax,<r2=%rcx
  268. sub %rax,%rcx
  269. # qhasm: r3 -= two51minus1
  270. # asm 1: sub <two51minus1=int64#7,<r3=int64#5
  271. # asm 2: sub <two51minus1=%rax,<r3=%r8
  272. sub %rax,%r8
  273. # qhasm: r4 -= two51minus1
  274. # asm 1: sub <two51minus1=int64#7,<r4=int64#6
  275. # asm 2: sub <two51minus1=%rax,<r4=%r9
  276. sub %rax,%r9
  277. # qhasm: *(uint64 *)(rp + 0) = r0
  278. # asm 1: movq <r0=int64#2,0(<rp=int64#1)
  279. # asm 2: movq <r0=%rsi,0(<rp=%rdi)
  280. movq %rsi,0(%rdi)
  281. # qhasm: *(uint64 *)(rp + 8) = r1
  282. # asm 1: movq <r1=int64#3,8(<rp=int64#1)
  283. # asm 2: movq <r1=%rdx,8(<rp=%rdi)
  284. movq %rdx,8(%rdi)
  285. # qhasm: *(uint64 *)(rp + 16) = r2
  286. # asm 1: movq <r2=int64#4,16(<rp=int64#1)
  287. # asm 2: movq <r2=%rcx,16(<rp=%rdi)
  288. movq %rcx,16(%rdi)
  289. # qhasm: *(uint64 *)(rp + 24) = r3
  290. # asm 1: movq <r3=int64#5,24(<rp=int64#1)
  291. # asm 2: movq <r3=%r8,24(<rp=%rdi)
  292. movq %r8,24(%rdi)
  293. # qhasm: *(uint64 *)(rp + 32) = r4
  294. # asm 1: movq <r4=int64#6,32(<rp=int64#1)
  295. # asm 2: movq <r4=%r9,32(<rp=%rdi)
  296. movq %r9,32(%rdi)
  297. # qhasm: caller1 = caller1_stack
  298. # asm 1: movq <caller1_stack=stack64#1,>caller1=int64#9
  299. # asm 2: movq <caller1_stack=0(%rsp),>caller1=%r11
  300. movq 0(%rsp),%r11
  301. # qhasm: caller2 = caller2_stack
  302. # asm 1: movq <caller2_stack=stack64#2,>caller2=int64#10
  303. # asm 2: movq <caller2_stack=8(%rsp),>caller2=%r12
  304. movq 8(%rsp),%r12
  305. # qhasm: caller3 = caller3_stack
  306. # asm 1: movq <caller3_stack=stack64#3,>caller3=int64#11
  307. # asm 2: movq <caller3_stack=16(%rsp),>caller3=%r13
  308. movq 16(%rsp),%r13
  309. # qhasm: caller4 = caller4_stack
  310. # asm 1: movq <caller4_stack=stack64#4,>caller4=int64#12
  311. # asm 2: movq <caller4_stack=24(%rsp),>caller4=%r14
  312. movq 24(%rsp),%r14
  313. # qhasm: caller5 = caller5_stack
  314. # asm 1: movq <caller5_stack=stack64#5,>caller5=int64#13
  315. # asm 2: movq <caller5_stack=32(%rsp),>caller5=%r15
  316. movq 32(%rsp),%r15
  317. # qhasm: caller6 = caller6_stack
  318. # asm 1: movq <caller6_stack=stack64#6,>caller6=int64#14
  319. # asm 2: movq <caller6_stack=40(%rsp),>caller6=%rbx
  320. movq 40(%rsp),%rbx
  321. # qhasm: caller7 = caller7_stack
  322. # asm 1: movq <caller7_stack=stack64#7,>caller7=int64#15
  323. # asm 2: movq <caller7_stack=48(%rsp),>caller7=%rbp
  324. movq 48(%rsp),%rbp
  325. # qhasm: leave
  326. add %r11,%rsp
  327. mov %rdi,%rax
  328. mov %rsi,%rdx
  329. ret