1
0

fe25519_mul.S 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. # qhasm: int64 rp
  2. # qhasm: int64 xp
  3. # qhasm: int64 yp
  4. # qhasm: input rp
  5. # qhasm: input xp
  6. # qhasm: input yp
  7. # qhasm: int64 r0
  8. # qhasm: int64 r1
  9. # qhasm: int64 r2
  10. # qhasm: int64 r3
  11. # qhasm: int64 r4
  12. # qhasm: int64 c1
  13. # qhasm: int64 c2
  14. # qhasm: int64 c3
  15. # qhasm: int64 c4
  16. # qhasm: int64 c5
  17. # qhasm: int64 c6
  18. # qhasm: int64 c7
  19. # qhasm: caller c1
  20. # qhasm: caller c2
  21. # qhasm: caller c3
  22. # qhasm: caller c4
  23. # qhasm: caller c5
  24. # qhasm: caller c6
  25. # qhasm: caller c7
  26. # qhasm: stack64 c1_stack
  27. # qhasm: stack64 c2_stack
  28. # qhasm: stack64 c3_stack
  29. # qhasm: stack64 c4_stack
  30. # qhasm: stack64 c5_stack
  31. # qhasm: stack64 c6_stack
  32. # qhasm: stack64 c7_stack
  33. # qhasm: stack64 x119_stack
  34. # qhasm: stack64 x219_stack
  35. # qhasm: stack64 x319_stack
  36. # qhasm: stack64 x419_stack
  37. # qhasm: stack64 rp_stack
  38. # qhasm: int64 mulr01
  39. # qhasm: int64 mulr11
  40. # qhasm: int64 mulr21
  41. # qhasm: int64 mulr31
  42. # qhasm: int64 mulr41
  43. # qhasm: int64 mulrax
  44. # qhasm: int64 mulrdx
  45. # qhasm: int64 mult
  46. # qhasm: int64 mulredmask
  47. # qhasm: stack64 mulx219_stack
  48. # qhasm: stack64 mulx319_stack
  49. # qhasm: stack64 mulx419_stack
  50. # qhasm: enter CRYPTO_NAMESPACE(batch_fe25519_mul)
  51. .text
  52. .p2align 5
  53. .globl _CRYPTO_NAMESPACE(batch_fe25519_mul)
  54. .globl CRYPTO_NAMESPACE(batch_fe25519_mul)
  55. _CRYPTO_NAMESPACE(batch_fe25519_mul):
  56. CRYPTO_NAMESPACE(batch_fe25519_mul):
  57. mov %rsp,%r11
  58. and $31,%r11
  59. add $96,%r11
  60. sub %r11,%rsp
  61. # qhasm: c1_stack = c1
  62. # asm 1: movq <c1=int64#9,>c1_stack=stack64#1
  63. # asm 2: movq <c1=%r11,>c1_stack=0(%rsp)
  64. movq %r11,0(%rsp)
  65. # qhasm: c2_stack = c2
  66. # asm 1: movq <c2=int64#10,>c2_stack=stack64#2
  67. # asm 2: movq <c2=%r12,>c2_stack=8(%rsp)
  68. movq %r12,8(%rsp)
  69. # qhasm: c3_stack = c3
  70. # asm 1: movq <c3=int64#11,>c3_stack=stack64#3
  71. # asm 2: movq <c3=%r13,>c3_stack=16(%rsp)
  72. movq %r13,16(%rsp)
  73. # qhasm: c4_stack = c4
  74. # asm 1: movq <c4=int64#12,>c4_stack=stack64#4
  75. # asm 2: movq <c4=%r14,>c4_stack=24(%rsp)
  76. movq %r14,24(%rsp)
  77. # qhasm: c5_stack = c5
  78. # asm 1: movq <c5=int64#13,>c5_stack=stack64#5
  79. # asm 2: movq <c5=%r15,>c5_stack=32(%rsp)
  80. movq %r15,32(%rsp)
  81. # qhasm: c6_stack = c6
  82. # asm 1: movq <c6=int64#14,>c6_stack=stack64#6
  83. # asm 2: movq <c6=%rbx,>c6_stack=40(%rsp)
  84. movq %rbx,40(%rsp)
  85. # qhasm: c7_stack = c7
  86. # asm 1: movq <c7=int64#15,>c7_stack=stack64#7
  87. # asm 2: movq <c7=%rbp,>c7_stack=48(%rsp)
  88. movq %rbp,48(%rsp)
  89. # qhasm: rp_stack = rp
  90. # asm 1: movq <rp=int64#1,>rp_stack=stack64#8
  91. # asm 2: movq <rp=%rdi,>rp_stack=56(%rsp)
  92. movq %rdi,56(%rsp)
  93. # qhasm: yp = yp
  94. # asm 1: mov <yp=int64#3,>yp=int64#4
  95. # asm 2: mov <yp=%rdx,>yp=%rcx
  96. mov %rdx,%rcx
  97. # qhasm: mulrax = *(uint64 *)(xp + 24)
  98. # asm 1: movq 24(<xp=int64#2),>mulrax=int64#3
  99. # asm 2: movq 24(<xp=%rsi),>mulrax=%rdx
  100. movq 24(%rsi),%rdx
  101. # qhasm: mulrax *= 19
  102. # asm 1: imulq $19,<mulrax=int64#3,>mulrax=int64#7
  103. # asm 2: imulq $19,<mulrax=%rdx,>mulrax=%rax
  104. imulq $19,%rdx,%rax
  105. # qhasm: mulx319_stack = mulrax
  106. # asm 1: movq <mulrax=int64#7,>mulx319_stack=stack64#9
  107. # asm 2: movq <mulrax=%rax,>mulx319_stack=64(%rsp)
  108. movq %rax,64(%rsp)
  109. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16)
  110. # asm 1: mulq 16(<yp=int64#4)
  111. # asm 2: mulq 16(<yp=%rcx)
  112. mulq 16(%rcx)
  113. # qhasm: r0 = mulrax
  114. # asm 1: mov <mulrax=int64#7,>r0=int64#5
  115. # asm 2: mov <mulrax=%rax,>r0=%r8
  116. mov %rax,%r8
  117. # qhasm: mulr01 = mulrdx
  118. # asm 1: mov <mulrdx=int64#3,>mulr01=int64#6
  119. # asm 2: mov <mulrdx=%rdx,>mulr01=%r9
  120. mov %rdx,%r9
  121. # qhasm: mulrax = *(uint64 *)(xp + 32)
  122. # asm 1: movq 32(<xp=int64#2),>mulrax=int64#3
  123. # asm 2: movq 32(<xp=%rsi),>mulrax=%rdx
  124. movq 32(%rsi),%rdx
  125. # qhasm: mulrax *= 19
  126. # asm 1: imulq $19,<mulrax=int64#3,>mulrax=int64#7
  127. # asm 2: imulq $19,<mulrax=%rdx,>mulrax=%rax
  128. imulq $19,%rdx,%rax
  129. # qhasm: mulx419_stack = mulrax
  130. # asm 1: movq <mulrax=int64#7,>mulx419_stack=stack64#10
  131. # asm 2: movq <mulrax=%rax,>mulx419_stack=72(%rsp)
  132. movq %rax,72(%rsp)
  133. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8)
  134. # asm 1: mulq 8(<yp=int64#4)
  135. # asm 2: mulq 8(<yp=%rcx)
  136. mulq 8(%rcx)
  137. # qhasm: carry? r0 += mulrax
  138. # asm 1: add <mulrax=int64#7,<r0=int64#5
  139. # asm 2: add <mulrax=%rax,<r0=%r8
  140. add %rax,%r8
  141. # qhasm: mulr01 += mulrdx + carry
  142. # asm 1: adc <mulrdx=int64#3,<mulr01=int64#6
  143. # asm 2: adc <mulrdx=%rdx,<mulr01=%r9
  144. adc %rdx,%r9
  145. # qhasm: mulrax = *(uint64 *)(xp + 0)
  146. # asm 1: movq 0(<xp=int64#2),>mulrax=int64#7
  147. # asm 2: movq 0(<xp=%rsi),>mulrax=%rax
  148. movq 0(%rsi),%rax
  149. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0)
  150. # asm 1: mulq 0(<yp=int64#4)
  151. # asm 2: mulq 0(<yp=%rcx)
  152. mulq 0(%rcx)
  153. # qhasm: carry? r0 += mulrax
  154. # asm 1: add <mulrax=int64#7,<r0=int64#5
  155. # asm 2: add <mulrax=%rax,<r0=%r8
  156. add %rax,%r8
  157. # qhasm: mulr01 += mulrdx + carry
  158. # asm 1: adc <mulrdx=int64#3,<mulr01=int64#6
  159. # asm 2: adc <mulrdx=%rdx,<mulr01=%r9
  160. adc %rdx,%r9
  161. # qhasm: mulrax = *(uint64 *)(xp + 0)
  162. # asm 1: movq 0(<xp=int64#2),>mulrax=int64#7
  163. # asm 2: movq 0(<xp=%rsi),>mulrax=%rax
  164. movq 0(%rsi),%rax
  165. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8)
  166. # asm 1: mulq 8(<yp=int64#4)
  167. # asm 2: mulq 8(<yp=%rcx)
  168. mulq 8(%rcx)
  169. # qhasm: r1 = mulrax
  170. # asm 1: mov <mulrax=int64#7,>r1=int64#8
  171. # asm 2: mov <mulrax=%rax,>r1=%r10
  172. mov %rax,%r10
  173. # qhasm: mulr11 = mulrdx
  174. # asm 1: mov <mulrdx=int64#3,>mulr11=int64#9
  175. # asm 2: mov <mulrdx=%rdx,>mulr11=%r11
  176. mov %rdx,%r11
  177. # qhasm: mulrax = *(uint64 *)(xp + 0)
  178. # asm 1: movq 0(<xp=int64#2),>mulrax=int64#7
  179. # asm 2: movq 0(<xp=%rsi),>mulrax=%rax
  180. movq 0(%rsi),%rax
  181. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16)
  182. # asm 1: mulq 16(<yp=int64#4)
  183. # asm 2: mulq 16(<yp=%rcx)
  184. mulq 16(%rcx)
  185. # qhasm: r2 = mulrax
  186. # asm 1: mov <mulrax=int64#7,>r2=int64#10
  187. # asm 2: mov <mulrax=%rax,>r2=%r12
  188. mov %rax,%r12
  189. # qhasm: mulr21 = mulrdx
  190. # asm 1: mov <mulrdx=int64#3,>mulr21=int64#11
  191. # asm 2: mov <mulrdx=%rdx,>mulr21=%r13
  192. mov %rdx,%r13
  193. # qhasm: mulrax = *(uint64 *)(xp + 0)
  194. # asm 1: movq 0(<xp=int64#2),>mulrax=int64#7
  195. # asm 2: movq 0(<xp=%rsi),>mulrax=%rax
  196. movq 0(%rsi),%rax
  197. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24)
  198. # asm 1: mulq 24(<yp=int64#4)
  199. # asm 2: mulq 24(<yp=%rcx)
  200. mulq 24(%rcx)
  201. # qhasm: r3 = mulrax
  202. # asm 1: mov <mulrax=int64#7,>r3=int64#12
  203. # asm 2: mov <mulrax=%rax,>r3=%r14
  204. mov %rax,%r14
  205. # qhasm: mulr31 = mulrdx
  206. # asm 1: mov <mulrdx=int64#3,>mulr31=int64#13
  207. # asm 2: mov <mulrdx=%rdx,>mulr31=%r15
  208. mov %rdx,%r15
  209. # qhasm: mulrax = *(uint64 *)(xp + 0)
  210. # asm 1: movq 0(<xp=int64#2),>mulrax=int64#7
  211. # asm 2: movq 0(<xp=%rsi),>mulrax=%rax
  212. movq 0(%rsi),%rax
  213. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32)
  214. # asm 1: mulq 32(<yp=int64#4)
  215. # asm 2: mulq 32(<yp=%rcx)
  216. mulq 32(%rcx)
  217. # qhasm: r4 = mulrax
  218. # asm 1: mov <mulrax=int64#7,>r4=int64#14
  219. # asm 2: mov <mulrax=%rax,>r4=%rbx
  220. mov %rax,%rbx
  221. # qhasm: mulr41 = mulrdx
  222. # asm 1: mov <mulrdx=int64#3,>mulr41=int64#15
  223. # asm 2: mov <mulrdx=%rdx,>mulr41=%rbp
  224. mov %rdx,%rbp
  225. # qhasm: mulrax = *(uint64 *)(xp + 8)
  226. # asm 1: movq 8(<xp=int64#2),>mulrax=int64#7
  227. # asm 2: movq 8(<xp=%rsi),>mulrax=%rax
  228. movq 8(%rsi),%rax
  229. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0)
  230. # asm 1: mulq 0(<yp=int64#4)
  231. # asm 2: mulq 0(<yp=%rcx)
  232. mulq 0(%rcx)
  233. # qhasm: carry? r1 += mulrax
  234. # asm 1: add <mulrax=int64#7,<r1=int64#8
  235. # asm 2: add <mulrax=%rax,<r1=%r10
  236. add %rax,%r10
  237. # qhasm: mulr11 += mulrdx + carry
  238. # asm 1: adc <mulrdx=int64#3,<mulr11=int64#9
  239. # asm 2: adc <mulrdx=%rdx,<mulr11=%r11
  240. adc %rdx,%r11
  241. # qhasm: mulrax = *(uint64 *)(xp + 8)
  242. # asm 1: movq 8(<xp=int64#2),>mulrax=int64#7
  243. # asm 2: movq 8(<xp=%rsi),>mulrax=%rax
  244. movq 8(%rsi),%rax
  245. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8)
  246. # asm 1: mulq 8(<yp=int64#4)
  247. # asm 2: mulq 8(<yp=%rcx)
  248. mulq 8(%rcx)
  249. # qhasm: carry? r2 += mulrax
  250. # asm 1: add <mulrax=int64#7,<r2=int64#10
  251. # asm 2: add <mulrax=%rax,<r2=%r12
  252. add %rax,%r12
  253. # qhasm: mulr21 += mulrdx + carry
  254. # asm 1: adc <mulrdx=int64#3,<mulr21=int64#11
  255. # asm 2: adc <mulrdx=%rdx,<mulr21=%r13
  256. adc %rdx,%r13
  257. # qhasm: mulrax = *(uint64 *)(xp + 8)
  258. # asm 1: movq 8(<xp=int64#2),>mulrax=int64#7
  259. # asm 2: movq 8(<xp=%rsi),>mulrax=%rax
  260. movq 8(%rsi),%rax
  261. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16)
  262. # asm 1: mulq 16(<yp=int64#4)
  263. # asm 2: mulq 16(<yp=%rcx)
  264. mulq 16(%rcx)
  265. # qhasm: carry? r3 += mulrax
  266. # asm 1: add <mulrax=int64#7,<r3=int64#12
  267. # asm 2: add <mulrax=%rax,<r3=%r14
  268. add %rax,%r14
  269. # qhasm: mulr31 += mulrdx + carry
  270. # asm 1: adc <mulrdx=int64#3,<mulr31=int64#13
  271. # asm 2: adc <mulrdx=%rdx,<mulr31=%r15
  272. adc %rdx,%r15
  273. # qhasm: mulrax = *(uint64 *)(xp + 8)
  274. # asm 1: movq 8(<xp=int64#2),>mulrax=int64#7
  275. # asm 2: movq 8(<xp=%rsi),>mulrax=%rax
  276. movq 8(%rsi),%rax
  277. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24)
  278. # asm 1: mulq 24(<yp=int64#4)
  279. # asm 2: mulq 24(<yp=%rcx)
  280. mulq 24(%rcx)
  281. # qhasm: carry? r4 += mulrax
  282. # asm 1: add <mulrax=int64#7,<r4=int64#14
  283. # asm 2: add <mulrax=%rax,<r4=%rbx
  284. add %rax,%rbx
  285. # qhasm: mulr41 += mulrdx + carry
  286. # asm 1: adc <mulrdx=int64#3,<mulr41=int64#15
  287. # asm 2: adc <mulrdx=%rdx,<mulr41=%rbp
  288. adc %rdx,%rbp
  289. # qhasm: mulrax = *(uint64 *)(xp + 8)
  290. # asm 1: movq 8(<xp=int64#2),>mulrax=int64#3
  291. # asm 2: movq 8(<xp=%rsi),>mulrax=%rdx
  292. movq 8(%rsi),%rdx
  293. # qhasm: mulrax *= 19
  294. # asm 1: imulq $19,<mulrax=int64#3,>mulrax=int64#7
  295. # asm 2: imulq $19,<mulrax=%rdx,>mulrax=%rax
  296. imulq $19,%rdx,%rax
  297. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32)
  298. # asm 1: mulq 32(<yp=int64#4)
  299. # asm 2: mulq 32(<yp=%rcx)
  300. mulq 32(%rcx)
  301. # qhasm: carry? r0 += mulrax
  302. # asm 1: add <mulrax=int64#7,<r0=int64#5
  303. # asm 2: add <mulrax=%rax,<r0=%r8
  304. add %rax,%r8
  305. # qhasm: mulr01 += mulrdx + carry
  306. # asm 1: adc <mulrdx=int64#3,<mulr01=int64#6
  307. # asm 2: adc <mulrdx=%rdx,<mulr01=%r9
  308. adc %rdx,%r9
  309. # qhasm: mulrax = *(uint64 *)(xp + 16)
  310. # asm 1: movq 16(<xp=int64#2),>mulrax=int64#7
  311. # asm 2: movq 16(<xp=%rsi),>mulrax=%rax
  312. movq 16(%rsi),%rax
  313. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0)
  314. # asm 1: mulq 0(<yp=int64#4)
  315. # asm 2: mulq 0(<yp=%rcx)
  316. mulq 0(%rcx)
  317. # qhasm: carry? r2 += mulrax
  318. # asm 1: add <mulrax=int64#7,<r2=int64#10
  319. # asm 2: add <mulrax=%rax,<r2=%r12
  320. add %rax,%r12
  321. # qhasm: mulr21 += mulrdx + carry
  322. # asm 1: adc <mulrdx=int64#3,<mulr21=int64#11
  323. # asm 2: adc <mulrdx=%rdx,<mulr21=%r13
  324. adc %rdx,%r13
  325. # qhasm: mulrax = *(uint64 *)(xp + 16)
  326. # asm 1: movq 16(<xp=int64#2),>mulrax=int64#7
  327. # asm 2: movq 16(<xp=%rsi),>mulrax=%rax
  328. movq 16(%rsi),%rax
  329. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8)
  330. # asm 1: mulq 8(<yp=int64#4)
  331. # asm 2: mulq 8(<yp=%rcx)
  332. mulq 8(%rcx)
  333. # qhasm: carry? r3 += mulrax
  334. # asm 1: add <mulrax=int64#7,<r3=int64#12
  335. # asm 2: add <mulrax=%rax,<r3=%r14
  336. add %rax,%r14
  337. # qhasm: mulr31 += mulrdx + carry
  338. # asm 1: adc <mulrdx=int64#3,<mulr31=int64#13
  339. # asm 2: adc <mulrdx=%rdx,<mulr31=%r15
  340. adc %rdx,%r15
  341. # qhasm: mulrax = *(uint64 *)(xp + 16)
  342. # asm 1: movq 16(<xp=int64#2),>mulrax=int64#7
  343. # asm 2: movq 16(<xp=%rsi),>mulrax=%rax
  344. movq 16(%rsi),%rax
  345. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16)
  346. # asm 1: mulq 16(<yp=int64#4)
  347. # asm 2: mulq 16(<yp=%rcx)
  348. mulq 16(%rcx)
  349. # qhasm: carry? r4 += mulrax
  350. # asm 1: add <mulrax=int64#7,<r4=int64#14
  351. # asm 2: add <mulrax=%rax,<r4=%rbx
  352. add %rax,%rbx
  353. # qhasm: mulr41 += mulrdx + carry
  354. # asm 1: adc <mulrdx=int64#3,<mulr41=int64#15
  355. # asm 2: adc <mulrdx=%rdx,<mulr41=%rbp
  356. adc %rdx,%rbp
  357. # qhasm: mulrax = *(uint64 *)(xp + 16)
  358. # asm 1: movq 16(<xp=int64#2),>mulrax=int64#3
  359. # asm 2: movq 16(<xp=%rsi),>mulrax=%rdx
  360. movq 16(%rsi),%rdx
  361. # qhasm: mulrax *= 19
  362. # asm 1: imulq $19,<mulrax=int64#3,>mulrax=int64#7
  363. # asm 2: imulq $19,<mulrax=%rdx,>mulrax=%rax
  364. imulq $19,%rdx,%rax
  365. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24)
  366. # asm 1: mulq 24(<yp=int64#4)
  367. # asm 2: mulq 24(<yp=%rcx)
  368. mulq 24(%rcx)
  369. # qhasm: carry? r0 += mulrax
  370. # asm 1: add <mulrax=int64#7,<r0=int64#5
  371. # asm 2: add <mulrax=%rax,<r0=%r8
  372. add %rax,%r8
  373. # qhasm: mulr01 += mulrdx + carry
  374. # asm 1: adc <mulrdx=int64#3,<mulr01=int64#6
  375. # asm 2: adc <mulrdx=%rdx,<mulr01=%r9
  376. adc %rdx,%r9
  377. # qhasm: mulrax = *(uint64 *)(xp + 16)
  378. # asm 1: movq 16(<xp=int64#2),>mulrax=int64#3
  379. # asm 2: movq 16(<xp=%rsi),>mulrax=%rdx
  380. movq 16(%rsi),%rdx
  381. # qhasm: mulrax *= 19
  382. # asm 1: imulq $19,<mulrax=int64#3,>mulrax=int64#7
  383. # asm 2: imulq $19,<mulrax=%rdx,>mulrax=%rax
  384. imulq $19,%rdx,%rax
  385. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32)
  386. # asm 1: mulq 32(<yp=int64#4)
  387. # asm 2: mulq 32(<yp=%rcx)
  388. mulq 32(%rcx)
  389. # qhasm: carry? r1 += mulrax
  390. # asm 1: add <mulrax=int64#7,<r1=int64#8
  391. # asm 2: add <mulrax=%rax,<r1=%r10
  392. add %rax,%r10
  393. # qhasm: mulr11 += mulrdx + carry
  394. # asm 1: adc <mulrdx=int64#3,<mulr11=int64#9
  395. # asm 2: adc <mulrdx=%rdx,<mulr11=%r11
  396. adc %rdx,%r11
  397. # qhasm: mulrax = *(uint64 *)(xp + 24)
  398. # asm 1: movq 24(<xp=int64#2),>mulrax=int64#7
  399. # asm 2: movq 24(<xp=%rsi),>mulrax=%rax
  400. movq 24(%rsi),%rax
  401. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0)
  402. # asm 1: mulq 0(<yp=int64#4)
  403. # asm 2: mulq 0(<yp=%rcx)
  404. mulq 0(%rcx)
  405. # qhasm: carry? r3 += mulrax
  406. # asm 1: add <mulrax=int64#7,<r3=int64#12
  407. # asm 2: add <mulrax=%rax,<r3=%r14
  408. add %rax,%r14
  409. # qhasm: mulr31 += mulrdx + carry
  410. # asm 1: adc <mulrdx=int64#3,<mulr31=int64#13
  411. # asm 2: adc <mulrdx=%rdx,<mulr31=%r15
  412. adc %rdx,%r15
  413. # qhasm: mulrax = *(uint64 *)(xp + 24)
  414. # asm 1: movq 24(<xp=int64#2),>mulrax=int64#7
  415. # asm 2: movq 24(<xp=%rsi),>mulrax=%rax
  416. movq 24(%rsi),%rax
  417. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8)
  418. # asm 1: mulq 8(<yp=int64#4)
  419. # asm 2: mulq 8(<yp=%rcx)
  420. mulq 8(%rcx)
  421. # qhasm: carry? r4 += mulrax
  422. # asm 1: add <mulrax=int64#7,<r4=int64#14
  423. # asm 2: add <mulrax=%rax,<r4=%rbx
  424. add %rax,%rbx
  425. # qhasm: mulr41 += mulrdx + carry
  426. # asm 1: adc <mulrdx=int64#3,<mulr41=int64#15
  427. # asm 2: adc <mulrdx=%rdx,<mulr41=%rbp
  428. adc %rdx,%rbp
  429. # qhasm: mulrax = mulx319_stack
  430. # asm 1: movq <mulx319_stack=stack64#9,>mulrax=int64#7
  431. # asm 2: movq <mulx319_stack=64(%rsp),>mulrax=%rax
  432. movq 64(%rsp),%rax
  433. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24)
  434. # asm 1: mulq 24(<yp=int64#4)
  435. # asm 2: mulq 24(<yp=%rcx)
  436. mulq 24(%rcx)
  437. # qhasm: carry? r1 += mulrax
  438. # asm 1: add <mulrax=int64#7,<r1=int64#8
  439. # asm 2: add <mulrax=%rax,<r1=%r10
  440. add %rax,%r10
  441. # qhasm: mulr11 += mulrdx + carry
  442. # asm 1: adc <mulrdx=int64#3,<mulr11=int64#9
  443. # asm 2: adc <mulrdx=%rdx,<mulr11=%r11
  444. adc %rdx,%r11
  445. # qhasm: mulrax = mulx319_stack
  446. # asm 1: movq <mulx319_stack=stack64#9,>mulrax=int64#7
  447. # asm 2: movq <mulx319_stack=64(%rsp),>mulrax=%rax
  448. movq 64(%rsp),%rax
  449. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32)
  450. # asm 1: mulq 32(<yp=int64#4)
  451. # asm 2: mulq 32(<yp=%rcx)
  452. mulq 32(%rcx)
  453. # qhasm: carry? r2 += mulrax
  454. # asm 1: add <mulrax=int64#7,<r2=int64#10
  455. # asm 2: add <mulrax=%rax,<r2=%r12
  456. add %rax,%r12
  457. # qhasm: mulr21 += mulrdx + carry
  458. # asm 1: adc <mulrdx=int64#3,<mulr21=int64#11
  459. # asm 2: adc <mulrdx=%rdx,<mulr21=%r13
  460. adc %rdx,%r13
  461. # qhasm: mulrax = *(uint64 *)(xp + 32)
  462. # asm 1: movq 32(<xp=int64#2),>mulrax=int64#7
  463. # asm 2: movq 32(<xp=%rsi),>mulrax=%rax
  464. movq 32(%rsi),%rax
  465. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0)
  466. # asm 1: mulq 0(<yp=int64#4)
  467. # asm 2: mulq 0(<yp=%rcx)
  468. mulq 0(%rcx)
  469. # qhasm: carry? r4 += mulrax
  470. # asm 1: add <mulrax=int64#7,<r4=int64#14
  471. # asm 2: add <mulrax=%rax,<r4=%rbx
  472. add %rax,%rbx
  473. # qhasm: mulr41 += mulrdx + carry
  474. # asm 1: adc <mulrdx=int64#3,<mulr41=int64#15
  475. # asm 2: adc <mulrdx=%rdx,<mulr41=%rbp
  476. adc %rdx,%rbp
  477. # qhasm: mulrax = mulx419_stack
  478. # asm 1: movq <mulx419_stack=stack64#10,>mulrax=int64#7
  479. # asm 2: movq <mulx419_stack=72(%rsp),>mulrax=%rax
  480. movq 72(%rsp),%rax
  481. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16)
  482. # asm 1: mulq 16(<yp=int64#4)
  483. # asm 2: mulq 16(<yp=%rcx)
  484. mulq 16(%rcx)
  485. # qhasm: carry? r1 += mulrax
  486. # asm 1: add <mulrax=int64#7,<r1=int64#8
  487. # asm 2: add <mulrax=%rax,<r1=%r10
  488. add %rax,%r10
  489. # qhasm: mulr11 += mulrdx + carry
  490. # asm 1: adc <mulrdx=int64#3,<mulr11=int64#9
  491. # asm 2: adc <mulrdx=%rdx,<mulr11=%r11
  492. adc %rdx,%r11
  493. # qhasm: mulrax = mulx419_stack
  494. # asm 1: movq <mulx419_stack=stack64#10,>mulrax=int64#7
  495. # asm 2: movq <mulx419_stack=72(%rsp),>mulrax=%rax
  496. movq 72(%rsp),%rax
  497. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24)
  498. # asm 1: mulq 24(<yp=int64#4)
  499. # asm 2: mulq 24(<yp=%rcx)
  500. mulq 24(%rcx)
  501. # qhasm: carry? r2 += mulrax
  502. # asm 1: add <mulrax=int64#7,<r2=int64#10
  503. # asm 2: add <mulrax=%rax,<r2=%r12
  504. add %rax,%r12
  505. # qhasm: mulr21 += mulrdx + carry
  506. # asm 1: adc <mulrdx=int64#3,<mulr21=int64#11
  507. # asm 2: adc <mulrdx=%rdx,<mulr21=%r13
  508. adc %rdx,%r13
  509. # qhasm: mulrax = mulx419_stack
  510. # asm 1: movq <mulx419_stack=stack64#10,>mulrax=int64#7
  511. # asm 2: movq <mulx419_stack=72(%rsp),>mulrax=%rax
  512. movq 72(%rsp),%rax
  513. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32)
  514. # asm 1: mulq 32(<yp=int64#4)
  515. # asm 2: mulq 32(<yp=%rcx)
  516. mulq 32(%rcx)
  517. # qhasm: carry? r3 += mulrax
  518. # asm 1: add <mulrax=int64#7,<r3=int64#12
  519. # asm 2: add <mulrax=%rax,<r3=%r14
  520. add %rax,%r14
  521. # qhasm: mulr31 += mulrdx + carry
  522. # asm 1: adc <mulrdx=int64#3,<mulr31=int64#13
  523. # asm 2: adc <mulrdx=%rdx,<mulr31=%r15
  524. adc %rdx,%r15
  525. # qhasm: mulredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  526. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>mulredmask=int64#2
  527. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>mulredmask=%rsi
  528. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rsi
  529. # qhasm: mulr01 = (mulr01.r0) << 13
  530. # asm 1: shld $13,<r0=int64#5,<mulr01=int64#6
  531. # asm 2: shld $13,<r0=%r8,<mulr01=%r9
  532. shld $13,%r8,%r9
  533. # qhasm: r0 &= mulredmask
  534. # asm 1: and <mulredmask=int64#2,<r0=int64#5
  535. # asm 2: and <mulredmask=%rsi,<r0=%r8
  536. and %rsi,%r8
  537. # qhasm: mulr11 = (mulr11.r1) << 13
  538. # asm 1: shld $13,<r1=int64#8,<mulr11=int64#9
  539. # asm 2: shld $13,<r1=%r10,<mulr11=%r11
  540. shld $13,%r10,%r11
  541. # qhasm: r1 &= mulredmask
  542. # asm 1: and <mulredmask=int64#2,<r1=int64#8
  543. # asm 2: and <mulredmask=%rsi,<r1=%r10
  544. and %rsi,%r10
  545. # qhasm: r1 += mulr01
  546. # asm 1: add <mulr01=int64#6,<r1=int64#8
  547. # asm 2: add <mulr01=%r9,<r1=%r10
  548. add %r9,%r10
  549. # qhasm: mulr21 = (mulr21.r2) << 13
  550. # asm 1: shld $13,<r2=int64#10,<mulr21=int64#11
  551. # asm 2: shld $13,<r2=%r12,<mulr21=%r13
  552. shld $13,%r12,%r13
  553. # qhasm: r2 &= mulredmask
  554. # asm 1: and <mulredmask=int64#2,<r2=int64#10
  555. # asm 2: and <mulredmask=%rsi,<r2=%r12
  556. and %rsi,%r12
  557. # qhasm: r2 += mulr11
  558. # asm 1: add <mulr11=int64#9,<r2=int64#10
  559. # asm 2: add <mulr11=%r11,<r2=%r12
  560. add %r11,%r12
  561. # qhasm: mulr31 = (mulr31.r3) << 13
  562. # asm 1: shld $13,<r3=int64#12,<mulr31=int64#13
  563. # asm 2: shld $13,<r3=%r14,<mulr31=%r15
  564. shld $13,%r14,%r15
  565. # qhasm: r3 &= mulredmask
  566. # asm 1: and <mulredmask=int64#2,<r3=int64#12
  567. # asm 2: and <mulredmask=%rsi,<r3=%r14
  568. and %rsi,%r14
  569. # qhasm: r3 += mulr21
  570. # asm 1: add <mulr21=int64#11,<r3=int64#12
  571. # asm 2: add <mulr21=%r13,<r3=%r14
  572. add %r13,%r14
  573. # qhasm: mulr41 = (mulr41.r4) << 13
  574. # asm 1: shld $13,<r4=int64#14,<mulr41=int64#15
  575. # asm 2: shld $13,<r4=%rbx,<mulr41=%rbp
  576. shld $13,%rbx,%rbp
  577. # qhasm: r4 &= mulredmask
  578. # asm 1: and <mulredmask=int64#2,<r4=int64#14
  579. # asm 2: and <mulredmask=%rsi,<r4=%rbx
  580. and %rsi,%rbx
  581. # qhasm: r4 += mulr31
  582. # asm 1: add <mulr31=int64#13,<r4=int64#14
  583. # asm 2: add <mulr31=%r15,<r4=%rbx
  584. add %r15,%rbx
  585. # qhasm: mulr41 = mulr41 * 19
  586. # asm 1: imulq $19,<mulr41=int64#15,>mulr41=int64#3
  587. # asm 2: imulq $19,<mulr41=%rbp,>mulr41=%rdx
  588. imulq $19,%rbp,%rdx
  589. # qhasm: r0 += mulr41
  590. # asm 1: add <mulr41=int64#3,<r0=int64#5
  591. # asm 2: add <mulr41=%rdx,<r0=%r8
  592. add %rdx,%r8
  593. # qhasm: mult = r0
  594. # asm 1: mov <r0=int64#5,>mult=int64#3
  595. # asm 2: mov <r0=%r8,>mult=%rdx
  596. mov %r8,%rdx
  597. # qhasm: (uint64) mult >>= 51
  598. # asm 1: shr $51,<mult=int64#3
  599. # asm 2: shr $51,<mult=%rdx
  600. shr $51,%rdx
  601. # qhasm: mult += r1
  602. # asm 1: add <r1=int64#8,<mult=int64#3
  603. # asm 2: add <r1=%r10,<mult=%rdx
  604. add %r10,%rdx
  605. # qhasm: r1 = mult
  606. # asm 1: mov <mult=int64#3,>r1=int64#4
  607. # asm 2: mov <mult=%rdx,>r1=%rcx
  608. mov %rdx,%rcx
  609. # qhasm: (uint64) mult >>= 51
  610. # asm 1: shr $51,<mult=int64#3
  611. # asm 2: shr $51,<mult=%rdx
  612. shr $51,%rdx
  613. # qhasm: r0 &= mulredmask
  614. # asm 1: and <mulredmask=int64#2,<r0=int64#5
  615. # asm 2: and <mulredmask=%rsi,<r0=%r8
  616. and %rsi,%r8
  617. # qhasm: mult += r2
  618. # asm 1: add <r2=int64#10,<mult=int64#3
  619. # asm 2: add <r2=%r12,<mult=%rdx
  620. add %r12,%rdx
  621. # qhasm: r2 = mult
  622. # asm 1: mov <mult=int64#3,>r2=int64#6
  623. # asm 2: mov <mult=%rdx,>r2=%r9
  624. mov %rdx,%r9
  625. # qhasm: (uint64) mult >>= 51
  626. # asm 1: shr $51,<mult=int64#3
  627. # asm 2: shr $51,<mult=%rdx
  628. shr $51,%rdx
  629. # qhasm: r1 &= mulredmask
  630. # asm 1: and <mulredmask=int64#2,<r1=int64#4
  631. # asm 2: and <mulredmask=%rsi,<r1=%rcx
  632. and %rsi,%rcx
  633. # qhasm: mult += r3
  634. # asm 1: add <r3=int64#12,<mult=int64#3
  635. # asm 2: add <r3=%r14,<mult=%rdx
  636. add %r14,%rdx
  637. # qhasm: r3 = mult
  638. # asm 1: mov <mult=int64#3,>r3=int64#7
  639. # asm 2: mov <mult=%rdx,>r3=%rax
  640. mov %rdx,%rax
  641. # qhasm: (uint64) mult >>= 51
  642. # asm 1: shr $51,<mult=int64#3
  643. # asm 2: shr $51,<mult=%rdx
  644. shr $51,%rdx
  645. # qhasm: r2 &= mulredmask
  646. # asm 1: and <mulredmask=int64#2,<r2=int64#6
  647. # asm 2: and <mulredmask=%rsi,<r2=%r9
  648. and %rsi,%r9
  649. # qhasm: mult += r4
  650. # asm 1: add <r4=int64#14,<mult=int64#3
  651. # asm 2: add <r4=%rbx,<mult=%rdx
  652. add %rbx,%rdx
  653. # qhasm: r4 = mult
  654. # asm 1: mov <mult=int64#3,>r4=int64#8
  655. # asm 2: mov <mult=%rdx,>r4=%r10
  656. mov %rdx,%r10
  657. # qhasm: (uint64) mult >>= 51
  658. # asm 1: shr $51,<mult=int64#3
  659. # asm 2: shr $51,<mult=%rdx
  660. shr $51,%rdx
  661. # qhasm: r3 &= mulredmask
  662. # asm 1: and <mulredmask=int64#2,<r3=int64#7
  663. # asm 2: and <mulredmask=%rsi,<r3=%rax
  664. and %rsi,%rax
  665. # qhasm: mult *= 19
  666. # asm 1: imulq $19,<mult=int64#3,>mult=int64#3
  667. # asm 2: imulq $19,<mult=%rdx,>mult=%rdx
  668. imulq $19,%rdx,%rdx
  669. # qhasm: r0 += mult
  670. # asm 1: add <mult=int64#3,<r0=int64#5
  671. # asm 2: add <mult=%rdx,<r0=%r8
  672. add %rdx,%r8
  673. # qhasm: r4 &= mulredmask
  674. # asm 1: and <mulredmask=int64#2,<r4=int64#8
  675. # asm 2: and <mulredmask=%rsi,<r4=%r10
  676. and %rsi,%r10
  677. # qhasm: *(uint64 *)(rp + 0) = r0
  678. # asm 1: movq <r0=int64#5,0(<rp=int64#1)
  679. # asm 2: movq <r0=%r8,0(<rp=%rdi)
  680. movq %r8,0(%rdi)
  681. # qhasm: *(uint64 *)(rp + 8) = r1
  682. # asm 1: movq <r1=int64#4,8(<rp=int64#1)
  683. # asm 2: movq <r1=%rcx,8(<rp=%rdi)
  684. movq %rcx,8(%rdi)
  685. # qhasm: *(uint64 *)(rp + 16) = r2
  686. # asm 1: movq <r2=int64#6,16(<rp=int64#1)
  687. # asm 2: movq <r2=%r9,16(<rp=%rdi)
  688. movq %r9,16(%rdi)
  689. # qhasm: *(uint64 *)(rp + 24) = r3
  690. # asm 1: movq <r3=int64#7,24(<rp=int64#1)
  691. # asm 2: movq <r3=%rax,24(<rp=%rdi)
  692. movq %rax,24(%rdi)
  693. # qhasm: *(uint64 *)(rp + 32) = r4
  694. # asm 1: movq <r4=int64#8,32(<rp=int64#1)
  695. # asm 2: movq <r4=%r10,32(<rp=%rdi)
  696. movq %r10,32(%rdi)
  697. # qhasm: c1 =c1_stack
  698. # asm 1: movq <c1_stack=stack64#1,>c1=int64#9
  699. # asm 2: movq <c1_stack=0(%rsp),>c1=%r11
  700. movq 0(%rsp),%r11
  701. # qhasm: c2 =c2_stack
  702. # asm 1: movq <c2_stack=stack64#2,>c2=int64#10
  703. # asm 2: movq <c2_stack=8(%rsp),>c2=%r12
  704. movq 8(%rsp),%r12
  705. # qhasm: c3 =c3_stack
  706. # asm 1: movq <c3_stack=stack64#3,>c3=int64#11
  707. # asm 2: movq <c3_stack=16(%rsp),>c3=%r13
  708. movq 16(%rsp),%r13
  709. # qhasm: c4 =c4_stack
  710. # asm 1: movq <c4_stack=stack64#4,>c4=int64#12
  711. # asm 2: movq <c4_stack=24(%rsp),>c4=%r14
  712. movq 24(%rsp),%r14
  713. # qhasm: c5 =c5_stack
  714. # asm 1: movq <c5_stack=stack64#5,>c5=int64#13
  715. # asm 2: movq <c5_stack=32(%rsp),>c5=%r15
  716. movq 32(%rsp),%r15
  717. # qhasm: c6 =c6_stack
  718. # asm 1: movq <c6_stack=stack64#6,>c6=int64#14
  719. # asm 2: movq <c6_stack=40(%rsp),>c6=%rbx
  720. movq 40(%rsp),%rbx
  721. # qhasm: c7 =c7_stack
  722. # asm 1: movq <c7_stack=stack64#7,>c7=int64#15
  723. # asm 2: movq <c7_stack=48(%rsp),>c7=%rbp
  724. movq 48(%rsp),%rbp
  725. # qhasm: leave
  726. add %r11,%rsp
  727. mov %rdi,%rax
  728. mov %rsi,%rdx
  729. ret