1
0

fe25519_square.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. # qhasm: int64 rp
  2. # qhasm: int64 xp
  3. # qhasm: input rp
  4. # qhasm: input xp
  5. # qhasm: int64 r0
  6. # qhasm: int64 r1
  7. # qhasm: int64 r2
  8. # qhasm: int64 r3
  9. # qhasm: int64 r4
  10. # qhasm: int64 c1
  11. # qhasm: int64 c2
  12. # qhasm: int64 c3
  13. # qhasm: int64 c4
  14. # qhasm: int64 c5
  15. # qhasm: int64 c6
  16. # qhasm: int64 c7
  17. # qhasm: caller c1
  18. # qhasm: caller c2
  19. # qhasm: caller c3
  20. # qhasm: caller c4
  21. # qhasm: caller c5
  22. # qhasm: caller c6
  23. # qhasm: caller c7
  24. # qhasm: stack64 c1_stack
  25. # qhasm: stack64 c2_stack
  26. # qhasm: stack64 c3_stack
  27. # qhasm: stack64 c4_stack
  28. # qhasm: stack64 c5_stack
  29. # qhasm: stack64 c6_stack
  30. # qhasm: stack64 c7_stack
  31. # qhasm: stack64 x119_stack
  32. # qhasm: stack64 x219_stack
  33. # qhasm: stack64 x319_stack
  34. # qhasm: stack64 x419_stack
  35. # qhasm: int64 squarer01
  36. # qhasm: int64 squarer11
  37. # qhasm: int64 squarer21
  38. # qhasm: int64 squarer31
  39. # qhasm: int64 squarer41
  40. # qhasm: int64 squarerax
  41. # qhasm: int64 squarerdx
  42. # qhasm: int64 squaret
  43. # qhasm: int64 squareredmask
  44. # qhasm: enter CRYPTO_NAMESPACE(batch_fe25519_square)
  45. .text
  46. .p2align 5
  47. .globl _CRYPTO_NAMESPACE(batch_fe25519_square)
  48. .globl CRYPTO_NAMESPACE(batch_fe25519_square)
  49. _CRYPTO_NAMESPACE(batch_fe25519_square):
  50. CRYPTO_NAMESPACE(batch_fe25519_square):
  51. mov %rsp,%r11
  52. and $31,%r11
  53. add $64,%r11
  54. sub %r11,%rsp
  55. # qhasm: c1_stack = c1
  56. # asm 1: movq <c1=int64#9,>c1_stack=stack64#1
  57. # asm 2: movq <c1=%r11,>c1_stack=0(%rsp)
  58. movq %r11,0(%rsp)
  59. # qhasm: c2_stack = c2
  60. # asm 1: movq <c2=int64#10,>c2_stack=stack64#2
  61. # asm 2: movq <c2=%r12,>c2_stack=8(%rsp)
  62. movq %r12,8(%rsp)
  63. # qhasm: c3_stack = c3
  64. # asm 1: movq <c3=int64#11,>c3_stack=stack64#3
  65. # asm 2: movq <c3=%r13,>c3_stack=16(%rsp)
  66. movq %r13,16(%rsp)
  67. # qhasm: c4_stack = c4
  68. # asm 1: movq <c4=int64#12,>c4_stack=stack64#4
  69. # asm 2: movq <c4=%r14,>c4_stack=24(%rsp)
  70. movq %r14,24(%rsp)
  71. # qhasm: c5_stack = c5
  72. # asm 1: movq <c5=int64#13,>c5_stack=stack64#5
  73. # asm 2: movq <c5=%r15,>c5_stack=32(%rsp)
  74. movq %r15,32(%rsp)
  75. # qhasm: c6_stack = c6
  76. # asm 1: movq <c6=int64#14,>c6_stack=stack64#6
  77. # asm 2: movq <c6=%rbx,>c6_stack=40(%rsp)
  78. movq %rbx,40(%rsp)
  79. # qhasm: c7_stack = c7
  80. # asm 1: movq <c7=int64#15,>c7_stack=stack64#7
  81. # asm 2: movq <c7=%rbp,>c7_stack=48(%rsp)
  82. movq %rbp,48(%rsp)
  83. # qhasm: squarerax = *(uint64 *)(xp + 0)
  84. # asm 1: movq 0(<xp=int64#2),>squarerax=int64#7
  85. # asm 2: movq 0(<xp=%rsi),>squarerax=%rax
  86. movq 0(%rsi),%rax
  87. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 0)
  88. # asm 1: mulq 0(<xp=int64#2)
  89. # asm 2: mulq 0(<xp=%rsi)
  90. mulq 0(%rsi)
  91. # qhasm: r0 = squarerax
  92. # asm 1: mov <squarerax=int64#7,>r0=int64#4
  93. # asm 2: mov <squarerax=%rax,>r0=%rcx
  94. mov %rax,%rcx
  95. # qhasm: squarer01 = squarerdx
  96. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#5
  97. # asm 2: mov <squarerdx=%rdx,>squarer01=%r8
  98. mov %rdx,%r8
  99. # qhasm: squarerax = *(uint64 *)(xp + 0)
  100. # asm 1: movq 0(<xp=int64#2),>squarerax=int64#7
  101. # asm 2: movq 0(<xp=%rsi),>squarerax=%rax
  102. movq 0(%rsi),%rax
  103. # qhasm: squarerax <<= 1
  104. # asm 1: shl $1,<squarerax=int64#7
  105. # asm 2: shl $1,<squarerax=%rax
  106. shl $1,%rax
  107. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 8)
  108. # asm 1: mulq 8(<xp=int64#2)
  109. # asm 2: mulq 8(<xp=%rsi)
  110. mulq 8(%rsi)
  111. # qhasm: r1 = squarerax
  112. # asm 1: mov <squarerax=int64#7,>r1=int64#6
  113. # asm 2: mov <squarerax=%rax,>r1=%r9
  114. mov %rax,%r9
  115. # qhasm: squarer11 = squarerdx
  116. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#8
  117. # asm 2: mov <squarerdx=%rdx,>squarer11=%r10
  118. mov %rdx,%r10
  119. # qhasm: squarerax = *(uint64 *)(xp + 0)
  120. # asm 1: movq 0(<xp=int64#2),>squarerax=int64#7
  121. # asm 2: movq 0(<xp=%rsi),>squarerax=%rax
  122. movq 0(%rsi),%rax
  123. # qhasm: squarerax <<= 1
  124. # asm 1: shl $1,<squarerax=int64#7
  125. # asm 2: shl $1,<squarerax=%rax
  126. shl $1,%rax
  127. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 16)
  128. # asm 1: mulq 16(<xp=int64#2)
  129. # asm 2: mulq 16(<xp=%rsi)
  130. mulq 16(%rsi)
  131. # qhasm: r2 = squarerax
  132. # asm 1: mov <squarerax=int64#7,>r2=int64#9
  133. # asm 2: mov <squarerax=%rax,>r2=%r11
  134. mov %rax,%r11
  135. # qhasm: squarer21 = squarerdx
  136. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#10
  137. # asm 2: mov <squarerdx=%rdx,>squarer21=%r12
  138. mov %rdx,%r12
  139. # qhasm: squarerax = *(uint64 *)(xp + 0)
  140. # asm 1: movq 0(<xp=int64#2),>squarerax=int64#7
  141. # asm 2: movq 0(<xp=%rsi),>squarerax=%rax
  142. movq 0(%rsi),%rax
  143. # qhasm: squarerax <<= 1
  144. # asm 1: shl $1,<squarerax=int64#7
  145. # asm 2: shl $1,<squarerax=%rax
  146. shl $1,%rax
  147. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24)
  148. # asm 1: mulq 24(<xp=int64#2)
  149. # asm 2: mulq 24(<xp=%rsi)
  150. mulq 24(%rsi)
  151. # qhasm: r3 = squarerax
  152. # asm 1: mov <squarerax=int64#7,>r3=int64#11
  153. # asm 2: mov <squarerax=%rax,>r3=%r13
  154. mov %rax,%r13
  155. # qhasm: squarer31 = squarerdx
  156. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#12
  157. # asm 2: mov <squarerdx=%rdx,>squarer31=%r14
  158. mov %rdx,%r14
  159. # qhasm: squarerax = *(uint64 *)(xp + 0)
  160. # asm 1: movq 0(<xp=int64#2),>squarerax=int64#7
  161. # asm 2: movq 0(<xp=%rsi),>squarerax=%rax
  162. movq 0(%rsi),%rax
  163. # qhasm: squarerax <<= 1
  164. # asm 1: shl $1,<squarerax=int64#7
  165. # asm 2: shl $1,<squarerax=%rax
  166. shl $1,%rax
  167. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32)
  168. # asm 1: mulq 32(<xp=int64#2)
  169. # asm 2: mulq 32(<xp=%rsi)
  170. mulq 32(%rsi)
  171. # qhasm: r4 = squarerax
  172. # asm 1: mov <squarerax=int64#7,>r4=int64#13
  173. # asm 2: mov <squarerax=%rax,>r4=%r15
  174. mov %rax,%r15
  175. # qhasm: squarer41 = squarerdx
  176. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#14
  177. # asm 2: mov <squarerdx=%rdx,>squarer41=%rbx
  178. mov %rdx,%rbx
  179. # qhasm: squarerax = *(uint64 *)(xp + 8)
  180. # asm 1: movq 8(<xp=int64#2),>squarerax=int64#7
  181. # asm 2: movq 8(<xp=%rsi),>squarerax=%rax
  182. movq 8(%rsi),%rax
  183. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 8)
  184. # asm 1: mulq 8(<xp=int64#2)
  185. # asm 2: mulq 8(<xp=%rsi)
  186. mulq 8(%rsi)
  187. # qhasm: carry? r2 += squarerax
  188. # asm 1: add <squarerax=int64#7,<r2=int64#9
  189. # asm 2: add <squarerax=%rax,<r2=%r11
  190. add %rax,%r11
  191. # qhasm: squarer21 += squarerdx + carry
  192. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  193. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  194. adc %rdx,%r12
  195. # qhasm: squarerax = *(uint64 *)(xp + 8)
  196. # asm 1: movq 8(<xp=int64#2),>squarerax=int64#7
  197. # asm 2: movq 8(<xp=%rsi),>squarerax=%rax
  198. movq 8(%rsi),%rax
  199. # qhasm: squarerax <<= 1
  200. # asm 1: shl $1,<squarerax=int64#7
  201. # asm 2: shl $1,<squarerax=%rax
  202. shl $1,%rax
  203. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 16)
  204. # asm 1: mulq 16(<xp=int64#2)
  205. # asm 2: mulq 16(<xp=%rsi)
  206. mulq 16(%rsi)
  207. # qhasm: carry? r3 += squarerax
  208. # asm 1: add <squarerax=int64#7,<r3=int64#11
  209. # asm 2: add <squarerax=%rax,<r3=%r13
  210. add %rax,%r13
  211. # qhasm: squarer31 += squarerdx + carry
  212. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  213. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  214. adc %rdx,%r14
  215. # qhasm: squarerax = *(uint64 *)(xp + 8)
  216. # asm 1: movq 8(<xp=int64#2),>squarerax=int64#7
  217. # asm 2: movq 8(<xp=%rsi),>squarerax=%rax
  218. movq 8(%rsi),%rax
  219. # qhasm: squarerax <<= 1
  220. # asm 1: shl $1,<squarerax=int64#7
  221. # asm 2: shl $1,<squarerax=%rax
  222. shl $1,%rax
  223. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24)
  224. # asm 1: mulq 24(<xp=int64#2)
  225. # asm 2: mulq 24(<xp=%rsi)
  226. mulq 24(%rsi)
  227. # qhasm: carry? r4 += squarerax
  228. # asm 1: add <squarerax=int64#7,<r4=int64#13
  229. # asm 2: add <squarerax=%rax,<r4=%r15
  230. add %rax,%r15
  231. # qhasm: squarer41 += squarerdx + carry
  232. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  233. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  234. adc %rdx,%rbx
  235. # qhasm: squarerax = *(uint64 *)(xp + 8)
  236. # asm 1: movq 8(<xp=int64#2),>squarerax=int64#3
  237. # asm 2: movq 8(<xp=%rsi),>squarerax=%rdx
  238. movq 8(%rsi),%rdx
  239. # qhasm: squarerax *= 38
  240. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  241. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  242. imulq $38,%rdx,%rax
  243. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32)
  244. # asm 1: mulq 32(<xp=int64#2)
  245. # asm 2: mulq 32(<xp=%rsi)
  246. mulq 32(%rsi)
  247. # qhasm: carry? r0 += squarerax
  248. # asm 1: add <squarerax=int64#7,<r0=int64#4
  249. # asm 2: add <squarerax=%rax,<r0=%rcx
  250. add %rax,%rcx
  251. # qhasm: squarer01 += squarerdx + carry
  252. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  253. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  254. adc %rdx,%r8
  255. # qhasm: squarerax = *(uint64 *)(xp + 16)
  256. # asm 1: movq 16(<xp=int64#2),>squarerax=int64#7
  257. # asm 2: movq 16(<xp=%rsi),>squarerax=%rax
  258. movq 16(%rsi),%rax
  259. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 16)
  260. # asm 1: mulq 16(<xp=int64#2)
  261. # asm 2: mulq 16(<xp=%rsi)
  262. mulq 16(%rsi)
  263. # qhasm: carry? r4 += squarerax
  264. # asm 1: add <squarerax=int64#7,<r4=int64#13
  265. # asm 2: add <squarerax=%rax,<r4=%r15
  266. add %rax,%r15
  267. # qhasm: squarer41 += squarerdx + carry
  268. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  269. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  270. adc %rdx,%rbx
  271. # qhasm: squarerax = *(uint64 *)(xp + 16)
  272. # asm 1: movq 16(<xp=int64#2),>squarerax=int64#3
  273. # asm 2: movq 16(<xp=%rsi),>squarerax=%rdx
  274. movq 16(%rsi),%rdx
  275. # qhasm: squarerax *= 38
  276. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  277. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  278. imulq $38,%rdx,%rax
  279. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24)
  280. # asm 1: mulq 24(<xp=int64#2)
  281. # asm 2: mulq 24(<xp=%rsi)
  282. mulq 24(%rsi)
  283. # qhasm: carry? r0 += squarerax
  284. # asm 1: add <squarerax=int64#7,<r0=int64#4
  285. # asm 2: add <squarerax=%rax,<r0=%rcx
  286. add %rax,%rcx
  287. # qhasm: squarer01 += squarerdx + carry
  288. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  289. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  290. adc %rdx,%r8
  291. # qhasm: squarerax = *(uint64 *)(xp + 16)
  292. # asm 1: movq 16(<xp=int64#2),>squarerax=int64#3
  293. # asm 2: movq 16(<xp=%rsi),>squarerax=%rdx
  294. movq 16(%rsi),%rdx
  295. # qhasm: squarerax *= 38
  296. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  297. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  298. imulq $38,%rdx,%rax
  299. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32)
  300. # asm 1: mulq 32(<xp=int64#2)
  301. # asm 2: mulq 32(<xp=%rsi)
  302. mulq 32(%rsi)
  303. # qhasm: carry? r1 += squarerax
  304. # asm 1: add <squarerax=int64#7,<r1=int64#6
  305. # asm 2: add <squarerax=%rax,<r1=%r9
  306. add %rax,%r9
  307. # qhasm: squarer11 += squarerdx + carry
  308. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  309. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  310. adc %rdx,%r10
  311. # qhasm: squarerax = *(uint64 *)(xp + 24)
  312. # asm 1: movq 24(<xp=int64#2),>squarerax=int64#3
  313. # asm 2: movq 24(<xp=%rsi),>squarerax=%rdx
  314. movq 24(%rsi),%rdx
  315. # qhasm: squarerax *= 19
  316. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  317. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  318. imulq $19,%rdx,%rax
  319. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24)
  320. # asm 1: mulq 24(<xp=int64#2)
  321. # asm 2: mulq 24(<xp=%rsi)
  322. mulq 24(%rsi)
  323. # qhasm: carry? r1 += squarerax
  324. # asm 1: add <squarerax=int64#7,<r1=int64#6
  325. # asm 2: add <squarerax=%rax,<r1=%r9
  326. add %rax,%r9
  327. # qhasm: squarer11 += squarerdx + carry
  328. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  329. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  330. adc %rdx,%r10
  331. # qhasm: squarerax = *(uint64 *)(xp + 24)
  332. # asm 1: movq 24(<xp=int64#2),>squarerax=int64#3
  333. # asm 2: movq 24(<xp=%rsi),>squarerax=%rdx
  334. movq 24(%rsi),%rdx
  335. # qhasm: squarerax *= 38
  336. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  337. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  338. imulq $38,%rdx,%rax
  339. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32)
  340. # asm 1: mulq 32(<xp=int64#2)
  341. # asm 2: mulq 32(<xp=%rsi)
  342. mulq 32(%rsi)
  343. # qhasm: carry? r2 += squarerax
  344. # asm 1: add <squarerax=int64#7,<r2=int64#9
  345. # asm 2: add <squarerax=%rax,<r2=%r11
  346. add %rax,%r11
  347. # qhasm: squarer21 += squarerdx + carry
  348. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  349. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  350. adc %rdx,%r12
  351. # qhasm: squarerax = *(uint64 *)(xp + 32)
  352. # asm 1: movq 32(<xp=int64#2),>squarerax=int64#3
  353. # asm 2: movq 32(<xp=%rsi),>squarerax=%rdx
  354. movq 32(%rsi),%rdx
  355. # qhasm: squarerax *= 19
  356. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  357. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  358. imulq $19,%rdx,%rax
  359. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32)
  360. # asm 1: mulq 32(<xp=int64#2)
  361. # asm 2: mulq 32(<xp=%rsi)
  362. mulq 32(%rsi)
  363. # qhasm: carry? r3 += squarerax
  364. # asm 1: add <squarerax=int64#7,<r3=int64#11
  365. # asm 2: add <squarerax=%rax,<r3=%r13
  366. add %rax,%r13
  367. # qhasm: squarer31 += squarerdx + carry
  368. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  369. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  370. adc %rdx,%r14
  371. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  372. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#2
  373. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rsi
  374. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rsi
  375. # qhasm: squarer01 = (squarer01.r0) << 13
  376. # asm 1: shld $13,<r0=int64#4,<squarer01=int64#5
  377. # asm 2: shld $13,<r0=%rcx,<squarer01=%r8
  378. shld $13,%rcx,%r8
  379. # qhasm: r0 &= squareredmask
  380. # asm 1: and <squareredmask=int64#2,<r0=int64#4
  381. # asm 2: and <squareredmask=%rsi,<r0=%rcx
  382. and %rsi,%rcx
  383. # qhasm: squarer11 = (squarer11.r1) << 13
  384. # asm 1: shld $13,<r1=int64#6,<squarer11=int64#8
  385. # asm 2: shld $13,<r1=%r9,<squarer11=%r10
  386. shld $13,%r9,%r10
  387. # qhasm: r1 &= squareredmask
  388. # asm 1: and <squareredmask=int64#2,<r1=int64#6
  389. # asm 2: and <squareredmask=%rsi,<r1=%r9
  390. and %rsi,%r9
  391. # qhasm: r1 += squarer01
  392. # asm 1: add <squarer01=int64#5,<r1=int64#6
  393. # asm 2: add <squarer01=%r8,<r1=%r9
  394. add %r8,%r9
  395. # qhasm: squarer21 = (squarer21.r2) << 13
  396. # asm 1: shld $13,<r2=int64#9,<squarer21=int64#10
  397. # asm 2: shld $13,<r2=%r11,<squarer21=%r12
  398. shld $13,%r11,%r12
  399. # qhasm: r2 &= squareredmask
  400. # asm 1: and <squareredmask=int64#2,<r2=int64#9
  401. # asm 2: and <squareredmask=%rsi,<r2=%r11
  402. and %rsi,%r11
  403. # qhasm: r2 += squarer11
  404. # asm 1: add <squarer11=int64#8,<r2=int64#9
  405. # asm 2: add <squarer11=%r10,<r2=%r11
  406. add %r10,%r11
  407. # qhasm: squarer31 = (squarer31.r3) << 13
  408. # asm 1: shld $13,<r3=int64#11,<squarer31=int64#12
  409. # asm 2: shld $13,<r3=%r13,<squarer31=%r14
  410. shld $13,%r13,%r14
  411. # qhasm: r3 &= squareredmask
  412. # asm 1: and <squareredmask=int64#2,<r3=int64#11
  413. # asm 2: and <squareredmask=%rsi,<r3=%r13
  414. and %rsi,%r13
  415. # qhasm: r3 += squarer21
  416. # asm 1: add <squarer21=int64#10,<r3=int64#11
  417. # asm 2: add <squarer21=%r12,<r3=%r13
  418. add %r12,%r13
  419. # qhasm: squarer41 = (squarer41.r4) << 13
  420. # asm 1: shld $13,<r4=int64#13,<squarer41=int64#14
  421. # asm 2: shld $13,<r4=%r15,<squarer41=%rbx
  422. shld $13,%r15,%rbx
  423. # qhasm: r4 &= squareredmask
  424. # asm 1: and <squareredmask=int64#2,<r4=int64#13
  425. # asm 2: and <squareredmask=%rsi,<r4=%r15
  426. and %rsi,%r15
  427. # qhasm: r4 += squarer31
  428. # asm 1: add <squarer31=int64#12,<r4=int64#13
  429. # asm 2: add <squarer31=%r14,<r4=%r15
  430. add %r14,%r15
  431. # qhasm: squarer41 = squarer41 * 19
  432. # asm 1: imulq $19,<squarer41=int64#14,>squarer41=int64#3
  433. # asm 2: imulq $19,<squarer41=%rbx,>squarer41=%rdx
  434. imulq $19,%rbx,%rdx
  435. # qhasm: r0 += squarer41
  436. # asm 1: add <squarer41=int64#3,<r0=int64#4
  437. # asm 2: add <squarer41=%rdx,<r0=%rcx
  438. add %rdx,%rcx
  439. # qhasm: squaret = r0
  440. # asm 1: mov <r0=int64#4,>squaret=int64#3
  441. # asm 2: mov <r0=%rcx,>squaret=%rdx
  442. mov %rcx,%rdx
  443. # qhasm: (uint64) squaret >>= 51
  444. # asm 1: shr $51,<squaret=int64#3
  445. # asm 2: shr $51,<squaret=%rdx
  446. shr $51,%rdx
  447. # qhasm: squaret += r1
  448. # asm 1: add <r1=int64#6,<squaret=int64#3
  449. # asm 2: add <r1=%r9,<squaret=%rdx
  450. add %r9,%rdx
  451. # qhasm: r0 &= squareredmask
  452. # asm 1: and <squareredmask=int64#2,<r0=int64#4
  453. # asm 2: and <squareredmask=%rsi,<r0=%rcx
  454. and %rsi,%rcx
  455. # qhasm: r1 = squaret
  456. # asm 1: mov <squaret=int64#3,>r1=int64#5
  457. # asm 2: mov <squaret=%rdx,>r1=%r8
  458. mov %rdx,%r8
  459. # qhasm: (uint64) squaret >>= 51
  460. # asm 1: shr $51,<squaret=int64#3
  461. # asm 2: shr $51,<squaret=%rdx
  462. shr $51,%rdx
  463. # qhasm: squaret += r2
  464. # asm 1: add <r2=int64#9,<squaret=int64#3
  465. # asm 2: add <r2=%r11,<squaret=%rdx
  466. add %r11,%rdx
  467. # qhasm: r1 &= squareredmask
  468. # asm 1: and <squareredmask=int64#2,<r1=int64#5
  469. # asm 2: and <squareredmask=%rsi,<r1=%r8
  470. and %rsi,%r8
  471. # qhasm: r2 = squaret
  472. # asm 1: mov <squaret=int64#3,>r2=int64#6
  473. # asm 2: mov <squaret=%rdx,>r2=%r9
  474. mov %rdx,%r9
  475. # qhasm: (uint64) squaret >>= 51
  476. # asm 1: shr $51,<squaret=int64#3
  477. # asm 2: shr $51,<squaret=%rdx
  478. shr $51,%rdx
  479. # qhasm: squaret += r3
  480. # asm 1: add <r3=int64#11,<squaret=int64#3
  481. # asm 2: add <r3=%r13,<squaret=%rdx
  482. add %r13,%rdx
  483. # qhasm: r2 &= squareredmask
  484. # asm 1: and <squareredmask=int64#2,<r2=int64#6
  485. # asm 2: and <squareredmask=%rsi,<r2=%r9
  486. and %rsi,%r9
  487. # qhasm: r3 = squaret
  488. # asm 1: mov <squaret=int64#3,>r3=int64#7
  489. # asm 2: mov <squaret=%rdx,>r3=%rax
  490. mov %rdx,%rax
  491. # qhasm: (uint64) squaret >>= 51
  492. # asm 1: shr $51,<squaret=int64#3
  493. # asm 2: shr $51,<squaret=%rdx
  494. shr $51,%rdx
  495. # qhasm: squaret += r4
  496. # asm 1: add <r4=int64#13,<squaret=int64#3
  497. # asm 2: add <r4=%r15,<squaret=%rdx
  498. add %r15,%rdx
  499. # qhasm: r3 &= squareredmask
  500. # asm 1: and <squareredmask=int64#2,<r3=int64#7
  501. # asm 2: and <squareredmask=%rsi,<r3=%rax
  502. and %rsi,%rax
  503. # qhasm: r4 = squaret
  504. # asm 1: mov <squaret=int64#3,>r4=int64#8
  505. # asm 2: mov <squaret=%rdx,>r4=%r10
  506. mov %rdx,%r10
  507. # qhasm: (uint64) squaret >>= 51
  508. # asm 1: shr $51,<squaret=int64#3
  509. # asm 2: shr $51,<squaret=%rdx
  510. shr $51,%rdx
  511. # qhasm: squaret *= 19
  512. # asm 1: imulq $19,<squaret=int64#3,>squaret=int64#3
  513. # asm 2: imulq $19,<squaret=%rdx,>squaret=%rdx
  514. imulq $19,%rdx,%rdx
  515. # qhasm: r0 += squaret
  516. # asm 1: add <squaret=int64#3,<r0=int64#4
  517. # asm 2: add <squaret=%rdx,<r0=%rcx
  518. add %rdx,%rcx
  519. # qhasm: r4 &= squareredmask
  520. # asm 1: and <squareredmask=int64#2,<r4=int64#8
  521. # asm 2: and <squareredmask=%rsi,<r4=%r10
  522. and %rsi,%r10
  523. # qhasm: *(uint64 *)(rp + 0) = r0
  524. # asm 1: movq <r0=int64#4,0(<rp=int64#1)
  525. # asm 2: movq <r0=%rcx,0(<rp=%rdi)
  526. movq %rcx,0(%rdi)
  527. # qhasm: *(uint64 *)(rp + 8) = r1
  528. # asm 1: movq <r1=int64#5,8(<rp=int64#1)
  529. # asm 2: movq <r1=%r8,8(<rp=%rdi)
  530. movq %r8,8(%rdi)
  531. # qhasm: *(uint64 *)(rp + 16) = r2
  532. # asm 1: movq <r2=int64#6,16(<rp=int64#1)
  533. # asm 2: movq <r2=%r9,16(<rp=%rdi)
  534. movq %r9,16(%rdi)
  535. # qhasm: *(uint64 *)(rp + 24) = r3
  536. # asm 1: movq <r3=int64#7,24(<rp=int64#1)
  537. # asm 2: movq <r3=%rax,24(<rp=%rdi)
  538. movq %rax,24(%rdi)
  539. # qhasm: *(uint64 *)(rp + 32) = r4
  540. # asm 1: movq <r4=int64#8,32(<rp=int64#1)
  541. # asm 2: movq <r4=%r10,32(<rp=%rdi)
  542. movq %r10,32(%rdi)
  543. # qhasm: c1 =c1_stack
  544. # asm 1: movq <c1_stack=stack64#1,>c1=int64#9
  545. # asm 2: movq <c1_stack=0(%rsp),>c1=%r11
  546. movq 0(%rsp),%r11
  547. # qhasm: c2 =c2_stack
  548. # asm 1: movq <c2_stack=stack64#2,>c2=int64#10
  549. # asm 2: movq <c2_stack=8(%rsp),>c2=%r12
  550. movq 8(%rsp),%r12
  551. # qhasm: c3 =c3_stack
  552. # asm 1: movq <c3_stack=stack64#3,>c3=int64#11
  553. # asm 2: movq <c3_stack=16(%rsp),>c3=%r13
  554. movq 16(%rsp),%r13
  555. # qhasm: c4 =c4_stack
  556. # asm 1: movq <c4_stack=stack64#4,>c4=int64#12
  557. # asm 2: movq <c4_stack=24(%rsp),>c4=%r14
  558. movq 24(%rsp),%r14
  559. # qhasm: c5 =c5_stack
  560. # asm 1: movq <c5_stack=stack64#5,>c5=int64#13
  561. # asm 2: movq <c5_stack=32(%rsp),>c5=%r15
  562. movq 32(%rsp),%r15
  563. # qhasm: c6 =c6_stack
  564. # asm 1: movq <c6_stack=stack64#6,>c6=int64#14
  565. # asm 2: movq <c6_stack=40(%rsp),>c6=%rbx
  566. movq 40(%rsp),%rbx
  567. # qhasm: c7 =c7_stack
  568. # asm 1: movq <c7_stack=stack64#7,>c7=int64#15
  569. # asm 2: movq <c7_stack=48(%rsp),>c7=%rbp
  570. movq 48(%rsp),%rbp
  571. # qhasm: leave
  572. add %r11,%rsp
  573. mov %rdi,%rax
  574. mov %rsi,%rdx
  575. ret