1
0

ge25519_dbl_p1p1.S 84 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155
  1. # qhasm: int64 rp
  2. # qhasm: int64 pp
  3. # qhasm: input rp
  4. # qhasm: input pp
  5. # qhasm: int64 a0
  6. # qhasm: int64 a1
  7. # qhasm: int64 a2
  8. # qhasm: int64 a3
  9. # qhasm: int64 a4
  10. # qhasm: stack64 a0_stack
  11. # qhasm: stack64 a1_stack
  12. # qhasm: stack64 a2_stack
  13. # qhasm: stack64 a3_stack
  14. # qhasm: stack64 a4_stack
  15. # qhasm: int64 b0
  16. # qhasm: int64 b1
  17. # qhasm: int64 b2
  18. # qhasm: int64 b3
  19. # qhasm: int64 b4
  20. # qhasm: stack64 b0_stack
  21. # qhasm: stack64 b1_stack
  22. # qhasm: stack64 b2_stack
  23. # qhasm: stack64 b3_stack
  24. # qhasm: stack64 b4_stack
  25. # qhasm: int64 c0
  26. # qhasm: int64 c1
  27. # qhasm: int64 c2
  28. # qhasm: int64 c3
  29. # qhasm: int64 c4
  30. # qhasm: stack64 c0_stack
  31. # qhasm: stack64 c1_stack
  32. # qhasm: stack64 c2_stack
  33. # qhasm: stack64 c3_stack
  34. # qhasm: stack64 c4_stack
  35. # qhasm: int64 d0
  36. # qhasm: int64 d1
  37. # qhasm: int64 d2
  38. # qhasm: int64 d3
  39. # qhasm: int64 d4
  40. # qhasm: stack64 d0_stack
  41. # qhasm: stack64 d1_stack
  42. # qhasm: stack64 d2_stack
  43. # qhasm: stack64 d3_stack
  44. # qhasm: stack64 d4_stack
  45. # qhasm: int64 e0
  46. # qhasm: int64 e1
  47. # qhasm: int64 e2
  48. # qhasm: int64 e3
  49. # qhasm: int64 e4
  50. # qhasm: stack64 e0_stack
  51. # qhasm: stack64 e1_stack
  52. # qhasm: stack64 e2_stack
  53. # qhasm: stack64 e3_stack
  54. # qhasm: stack64 e4_stack
  55. # qhasm: int64 rx0
  56. # qhasm: int64 rx1
  57. # qhasm: int64 rx2
  58. # qhasm: int64 rx3
  59. # qhasm: int64 rx4
  60. # qhasm: stack64 rx0_stack
  61. # qhasm: stack64 rx1_stack
  62. # qhasm: stack64 rx2_stack
  63. # qhasm: stack64 rx3_stack
  64. # qhasm: stack64 rx4_stack
  65. # qhasm: int64 ry0
  66. # qhasm: int64 ry1
  67. # qhasm: int64 ry2
  68. # qhasm: int64 ry3
  69. # qhasm: int64 ry4
  70. # qhasm: int64 rz0
  71. # qhasm: int64 rz1
  72. # qhasm: int64 rz2
  73. # qhasm: int64 rz3
  74. # qhasm: int64 rz4
  75. # qhasm: int64 rt0
  76. # qhasm: int64 rt1
  77. # qhasm: int64 rt2
  78. # qhasm: int64 rt3
  79. # qhasm: int64 rt4
  80. # qhasm: int64 mulr01
  81. # qhasm: int64 mulr11
  82. # qhasm: int64 mulr21
  83. # qhasm: int64 mulr31
  84. # qhasm: int64 mulr41
  85. # qhasm: int64 mulrax
  86. # qhasm: int64 mulrdx
  87. # qhasm: int64 mult
  88. # qhasm: int64 mulredmask
  89. # qhasm: stack64 mulx219_stack
  90. # qhasm: stack64 mulx319_stack
  91. # qhasm: stack64 mulx419_stack
  92. # qhasm: int64 squarer01
  93. # qhasm: int64 squarer11
  94. # qhasm: int64 squarer21
  95. # qhasm: int64 squarer31
  96. # qhasm: int64 squarer41
  97. # qhasm: int64 squarerax
  98. # qhasm: int64 squarerdx
  99. # qhasm: int64 squaret
  100. # qhasm: int64 squareredmask
  101. # qhasm: int64 caller1
  102. # qhasm: int64 caller2
  103. # qhasm: int64 caller3
  104. # qhasm: int64 caller4
  105. # qhasm: int64 caller5
  106. # qhasm: int64 caller6
  107. # qhasm: int64 caller7
  108. # qhasm: caller caller1
  109. # qhasm: caller caller2
  110. # qhasm: caller caller3
  111. # qhasm: caller caller4
  112. # qhasm: caller caller5
  113. # qhasm: caller caller6
  114. # qhasm: caller caller7
  115. # qhasm: stack64 caller1_stack
  116. # qhasm: stack64 caller2_stack
  117. # qhasm: stack64 caller3_stack
  118. # qhasm: stack64 caller4_stack
  119. # qhasm: stack64 caller5_stack
  120. # qhasm: stack64 caller6_stack
  121. # qhasm: stack64 caller7_stack
  122. # qhasm: enter CRYPTO_NAMESPACE(batch_ge25519_dbl_p1p1)
  123. .text
  124. .p2align 5
  125. .globl _CRYPTO_NAMESPACE(batch_ge25519_dbl_p1p1)
  126. .globl CRYPTO_NAMESPACE(batch_ge25519_dbl_p1p1)
  127. _CRYPTO_NAMESPACE(batch_ge25519_dbl_p1p1):
  128. CRYPTO_NAMESPACE(batch_ge25519_dbl_p1p1):
  129. mov %rsp,%r11
  130. and $31,%r11
  131. add $224,%r11
  132. sub %r11,%rsp
  133. # qhasm: caller1_stack = caller1
  134. # asm 1: movq <caller1=int64#9,>caller1_stack=stack64#1
  135. # asm 2: movq <caller1=%r11,>caller1_stack=0(%rsp)
  136. movq %r11,0(%rsp)
  137. # qhasm: caller2_stack = caller2
  138. # asm 1: movq <caller2=int64#10,>caller2_stack=stack64#2
  139. # asm 2: movq <caller2=%r12,>caller2_stack=8(%rsp)
  140. movq %r12,8(%rsp)
  141. # qhasm: caller3_stack = caller3
  142. # asm 1: movq <caller3=int64#11,>caller3_stack=stack64#3
  143. # asm 2: movq <caller3=%r13,>caller3_stack=16(%rsp)
  144. movq %r13,16(%rsp)
  145. # qhasm: caller4_stack = caller4
  146. # asm 1: movq <caller4=int64#12,>caller4_stack=stack64#4
  147. # asm 2: movq <caller4=%r14,>caller4_stack=24(%rsp)
  148. movq %r14,24(%rsp)
  149. # qhasm: caller5_stack = caller5
  150. # asm 1: movq <caller5=int64#13,>caller5_stack=stack64#5
  151. # asm 2: movq <caller5=%r15,>caller5_stack=32(%rsp)
  152. movq %r15,32(%rsp)
  153. # qhasm: caller6_stack = caller6
  154. # asm 1: movq <caller6=int64#14,>caller6_stack=stack64#6
  155. # asm 2: movq <caller6=%rbx,>caller6_stack=40(%rsp)
  156. movq %rbx,40(%rsp)
  157. # qhasm: caller7_stack = caller7
  158. # asm 1: movq <caller7=int64#15,>caller7_stack=stack64#7
  159. # asm 2: movq <caller7=%rbp,>caller7_stack=48(%rsp)
  160. movq %rbp,48(%rsp)
  161. # qhasm: squarerax = *(uint64 *)(pp + 0)
  162. # asm 1: movq 0(<pp=int64#2),>squarerax=int64#7
  163. # asm 2: movq 0(<pp=%rsi),>squarerax=%rax
  164. movq 0(%rsi),%rax
  165. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 0)
  166. # asm 1: mulq 0(<pp=int64#2)
  167. # asm 2: mulq 0(<pp=%rsi)
  168. mulq 0(%rsi)
  169. # qhasm: a0 = squarerax
  170. # asm 1: mov <squarerax=int64#7,>a0=int64#4
  171. # asm 2: mov <squarerax=%rax,>a0=%rcx
  172. mov %rax,%rcx
  173. # qhasm: squarer01 = squarerdx
  174. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#5
  175. # asm 2: mov <squarerdx=%rdx,>squarer01=%r8
  176. mov %rdx,%r8
  177. # qhasm: squarerax = *(uint64 *)(pp + 0)
  178. # asm 1: movq 0(<pp=int64#2),>squarerax=int64#7
  179. # asm 2: movq 0(<pp=%rsi),>squarerax=%rax
  180. movq 0(%rsi),%rax
  181. # qhasm: squarerax <<= 1
  182. # asm 1: shl $1,<squarerax=int64#7
  183. # asm 2: shl $1,<squarerax=%rax
  184. shl $1,%rax
  185. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 8)
  186. # asm 1: mulq 8(<pp=int64#2)
  187. # asm 2: mulq 8(<pp=%rsi)
  188. mulq 8(%rsi)
  189. # qhasm: a1 = squarerax
  190. # asm 1: mov <squarerax=int64#7,>a1=int64#6
  191. # asm 2: mov <squarerax=%rax,>a1=%r9
  192. mov %rax,%r9
  193. # qhasm: squarer11 = squarerdx
  194. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#8
  195. # asm 2: mov <squarerdx=%rdx,>squarer11=%r10
  196. mov %rdx,%r10
  197. # qhasm: squarerax = *(uint64 *)(pp + 0)
  198. # asm 1: movq 0(<pp=int64#2),>squarerax=int64#7
  199. # asm 2: movq 0(<pp=%rsi),>squarerax=%rax
  200. movq 0(%rsi),%rax
  201. # qhasm: squarerax <<= 1
  202. # asm 1: shl $1,<squarerax=int64#7
  203. # asm 2: shl $1,<squarerax=%rax
  204. shl $1,%rax
  205. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 16)
  206. # asm 1: mulq 16(<pp=int64#2)
  207. # asm 2: mulq 16(<pp=%rsi)
  208. mulq 16(%rsi)
  209. # qhasm: a2 = squarerax
  210. # asm 1: mov <squarerax=int64#7,>a2=int64#9
  211. # asm 2: mov <squarerax=%rax,>a2=%r11
  212. mov %rax,%r11
  213. # qhasm: squarer21 = squarerdx
  214. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#10
  215. # asm 2: mov <squarerdx=%rdx,>squarer21=%r12
  216. mov %rdx,%r12
  217. # qhasm: squarerax = *(uint64 *)(pp + 0)
  218. # asm 1: movq 0(<pp=int64#2),>squarerax=int64#7
  219. # asm 2: movq 0(<pp=%rsi),>squarerax=%rax
  220. movq 0(%rsi),%rax
  221. # qhasm: squarerax <<= 1
  222. # asm 1: shl $1,<squarerax=int64#7
  223. # asm 2: shl $1,<squarerax=%rax
  224. shl $1,%rax
  225. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 24)
  226. # asm 1: mulq 24(<pp=int64#2)
  227. # asm 2: mulq 24(<pp=%rsi)
  228. mulq 24(%rsi)
  229. # qhasm: a3 = squarerax
  230. # asm 1: mov <squarerax=int64#7,>a3=int64#11
  231. # asm 2: mov <squarerax=%rax,>a3=%r13
  232. mov %rax,%r13
  233. # qhasm: squarer31 = squarerdx
  234. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#12
  235. # asm 2: mov <squarerdx=%rdx,>squarer31=%r14
  236. mov %rdx,%r14
  237. # qhasm: squarerax = *(uint64 *)(pp + 0)
  238. # asm 1: movq 0(<pp=int64#2),>squarerax=int64#7
  239. # asm 2: movq 0(<pp=%rsi),>squarerax=%rax
  240. movq 0(%rsi),%rax
  241. # qhasm: squarerax <<= 1
  242. # asm 1: shl $1,<squarerax=int64#7
  243. # asm 2: shl $1,<squarerax=%rax
  244. shl $1,%rax
  245. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 32)
  246. # asm 1: mulq 32(<pp=int64#2)
  247. # asm 2: mulq 32(<pp=%rsi)
  248. mulq 32(%rsi)
  249. # qhasm: a4 = squarerax
  250. # asm 1: mov <squarerax=int64#7,>a4=int64#13
  251. # asm 2: mov <squarerax=%rax,>a4=%r15
  252. mov %rax,%r15
  253. # qhasm: squarer41 = squarerdx
  254. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#14
  255. # asm 2: mov <squarerdx=%rdx,>squarer41=%rbx
  256. mov %rdx,%rbx
  257. # qhasm: squarerax = *(uint64 *)(pp + 8)
  258. # asm 1: movq 8(<pp=int64#2),>squarerax=int64#7
  259. # asm 2: movq 8(<pp=%rsi),>squarerax=%rax
  260. movq 8(%rsi),%rax
  261. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 8)
  262. # asm 1: mulq 8(<pp=int64#2)
  263. # asm 2: mulq 8(<pp=%rsi)
  264. mulq 8(%rsi)
  265. # qhasm: carry? a2 += squarerax
  266. # asm 1: add <squarerax=int64#7,<a2=int64#9
  267. # asm 2: add <squarerax=%rax,<a2=%r11
  268. add %rax,%r11
  269. # qhasm: squarer21 += squarerdx + carry
  270. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  271. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  272. adc %rdx,%r12
  273. # qhasm: squarerax = *(uint64 *)(pp + 8)
  274. # asm 1: movq 8(<pp=int64#2),>squarerax=int64#7
  275. # asm 2: movq 8(<pp=%rsi),>squarerax=%rax
  276. movq 8(%rsi),%rax
  277. # qhasm: squarerax <<= 1
  278. # asm 1: shl $1,<squarerax=int64#7
  279. # asm 2: shl $1,<squarerax=%rax
  280. shl $1,%rax
  281. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 16)
  282. # asm 1: mulq 16(<pp=int64#2)
  283. # asm 2: mulq 16(<pp=%rsi)
  284. mulq 16(%rsi)
  285. # qhasm: carry? a3 += squarerax
  286. # asm 1: add <squarerax=int64#7,<a3=int64#11
  287. # asm 2: add <squarerax=%rax,<a3=%r13
  288. add %rax,%r13
  289. # qhasm: squarer31 += squarerdx + carry
  290. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  291. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  292. adc %rdx,%r14
  293. # qhasm: squarerax = *(uint64 *)(pp + 8)
  294. # asm 1: movq 8(<pp=int64#2),>squarerax=int64#7
  295. # asm 2: movq 8(<pp=%rsi),>squarerax=%rax
  296. movq 8(%rsi),%rax
  297. # qhasm: squarerax <<= 1
  298. # asm 1: shl $1,<squarerax=int64#7
  299. # asm 2: shl $1,<squarerax=%rax
  300. shl $1,%rax
  301. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 24)
  302. # asm 1: mulq 24(<pp=int64#2)
  303. # asm 2: mulq 24(<pp=%rsi)
  304. mulq 24(%rsi)
  305. # qhasm: carry? a4 += squarerax
  306. # asm 1: add <squarerax=int64#7,<a4=int64#13
  307. # asm 2: add <squarerax=%rax,<a4=%r15
  308. add %rax,%r15
  309. # qhasm: squarer41 += squarerdx + carry
  310. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  311. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  312. adc %rdx,%rbx
  313. # qhasm: squarerax = *(uint64 *)(pp + 8)
  314. # asm 1: movq 8(<pp=int64#2),>squarerax=int64#3
  315. # asm 2: movq 8(<pp=%rsi),>squarerax=%rdx
  316. movq 8(%rsi),%rdx
  317. # qhasm: squarerax *= 38
  318. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  319. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  320. imulq $38,%rdx,%rax
  321. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 32)
  322. # asm 1: mulq 32(<pp=int64#2)
  323. # asm 2: mulq 32(<pp=%rsi)
  324. mulq 32(%rsi)
  325. # qhasm: carry? a0 += squarerax
  326. # asm 1: add <squarerax=int64#7,<a0=int64#4
  327. # asm 2: add <squarerax=%rax,<a0=%rcx
  328. add %rax,%rcx
  329. # qhasm: squarer01 += squarerdx + carry
  330. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  331. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  332. adc %rdx,%r8
  333. # qhasm: squarerax = *(uint64 *)(pp + 16)
  334. # asm 1: movq 16(<pp=int64#2),>squarerax=int64#7
  335. # asm 2: movq 16(<pp=%rsi),>squarerax=%rax
  336. movq 16(%rsi),%rax
  337. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 16)
  338. # asm 1: mulq 16(<pp=int64#2)
  339. # asm 2: mulq 16(<pp=%rsi)
  340. mulq 16(%rsi)
  341. # qhasm: carry? a4 += squarerax
  342. # asm 1: add <squarerax=int64#7,<a4=int64#13
  343. # asm 2: add <squarerax=%rax,<a4=%r15
  344. add %rax,%r15
  345. # qhasm: squarer41 += squarerdx + carry
  346. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  347. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  348. adc %rdx,%rbx
  349. # qhasm: squarerax = *(uint64 *)(pp + 16)
  350. # asm 1: movq 16(<pp=int64#2),>squarerax=int64#3
  351. # asm 2: movq 16(<pp=%rsi),>squarerax=%rdx
  352. movq 16(%rsi),%rdx
  353. # qhasm: squarerax *= 38
  354. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  355. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  356. imulq $38,%rdx,%rax
  357. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 24)
  358. # asm 1: mulq 24(<pp=int64#2)
  359. # asm 2: mulq 24(<pp=%rsi)
  360. mulq 24(%rsi)
  361. # qhasm: carry? a0 += squarerax
  362. # asm 1: add <squarerax=int64#7,<a0=int64#4
  363. # asm 2: add <squarerax=%rax,<a0=%rcx
  364. add %rax,%rcx
  365. # qhasm: squarer01 += squarerdx + carry
  366. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  367. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  368. adc %rdx,%r8
  369. # qhasm: squarerax = *(uint64 *)(pp + 16)
  370. # asm 1: movq 16(<pp=int64#2),>squarerax=int64#3
  371. # asm 2: movq 16(<pp=%rsi),>squarerax=%rdx
  372. movq 16(%rsi),%rdx
  373. # qhasm: squarerax *= 38
  374. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  375. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  376. imulq $38,%rdx,%rax
  377. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 32)
  378. # asm 1: mulq 32(<pp=int64#2)
  379. # asm 2: mulq 32(<pp=%rsi)
  380. mulq 32(%rsi)
  381. # qhasm: carry? a1 += squarerax
  382. # asm 1: add <squarerax=int64#7,<a1=int64#6
  383. # asm 2: add <squarerax=%rax,<a1=%r9
  384. add %rax,%r9
  385. # qhasm: squarer11 += squarerdx + carry
  386. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  387. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  388. adc %rdx,%r10
  389. # qhasm: squarerax = *(uint64 *)(pp + 24)
  390. # asm 1: movq 24(<pp=int64#2),>squarerax=int64#3
  391. # asm 2: movq 24(<pp=%rsi),>squarerax=%rdx
  392. movq 24(%rsi),%rdx
  393. # qhasm: squarerax *= 19
  394. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  395. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  396. imulq $19,%rdx,%rax
  397. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 24)
  398. # asm 1: mulq 24(<pp=int64#2)
  399. # asm 2: mulq 24(<pp=%rsi)
  400. mulq 24(%rsi)
  401. # qhasm: carry? a1 += squarerax
  402. # asm 1: add <squarerax=int64#7,<a1=int64#6
  403. # asm 2: add <squarerax=%rax,<a1=%r9
  404. add %rax,%r9
  405. # qhasm: squarer11 += squarerdx + carry
  406. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  407. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  408. adc %rdx,%r10
  409. # qhasm: squarerax = *(uint64 *)(pp + 24)
  410. # asm 1: movq 24(<pp=int64#2),>squarerax=int64#3
  411. # asm 2: movq 24(<pp=%rsi),>squarerax=%rdx
  412. movq 24(%rsi),%rdx
  413. # qhasm: squarerax *= 38
  414. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  415. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  416. imulq $38,%rdx,%rax
  417. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 32)
  418. # asm 1: mulq 32(<pp=int64#2)
  419. # asm 2: mulq 32(<pp=%rsi)
  420. mulq 32(%rsi)
  421. # qhasm: carry? a2 += squarerax
  422. # asm 1: add <squarerax=int64#7,<a2=int64#9
  423. # asm 2: add <squarerax=%rax,<a2=%r11
  424. add %rax,%r11
  425. # qhasm: squarer21 += squarerdx + carry
  426. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  427. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  428. adc %rdx,%r12
  429. # qhasm: squarerax = *(uint64 *)(pp + 32)
  430. # asm 1: movq 32(<pp=int64#2),>squarerax=int64#3
  431. # asm 2: movq 32(<pp=%rsi),>squarerax=%rdx
  432. movq 32(%rsi),%rdx
  433. # qhasm: squarerax *= 19
  434. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  435. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  436. imulq $19,%rdx,%rax
  437. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 32)
  438. # asm 1: mulq 32(<pp=int64#2)
  439. # asm 2: mulq 32(<pp=%rsi)
  440. mulq 32(%rsi)
  441. # qhasm: carry? a3 += squarerax
  442. # asm 1: add <squarerax=int64#7,<a3=int64#11
  443. # asm 2: add <squarerax=%rax,<a3=%r13
  444. add %rax,%r13
  445. # qhasm: squarer31 += squarerdx + carry
  446. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  447. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  448. adc %rdx,%r14
  449. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  450. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#3
  451. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rdx
  452. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rdx
  453. # qhasm: squarer01 = (squarer01.a0) << 13
  454. # asm 1: shld $13,<a0=int64#4,<squarer01=int64#5
  455. # asm 2: shld $13,<a0=%rcx,<squarer01=%r8
  456. shld $13,%rcx,%r8
  457. # qhasm: a0 &= squareredmask
  458. # asm 1: and <squareredmask=int64#3,<a0=int64#4
  459. # asm 2: and <squareredmask=%rdx,<a0=%rcx
  460. and %rdx,%rcx
  461. # qhasm: squarer11 = (squarer11.a1) << 13
  462. # asm 1: shld $13,<a1=int64#6,<squarer11=int64#8
  463. # asm 2: shld $13,<a1=%r9,<squarer11=%r10
  464. shld $13,%r9,%r10
  465. # qhasm: a1 &= squareredmask
  466. # asm 1: and <squareredmask=int64#3,<a1=int64#6
  467. # asm 2: and <squareredmask=%rdx,<a1=%r9
  468. and %rdx,%r9
  469. # qhasm: a1 += squarer01
  470. # asm 1: add <squarer01=int64#5,<a1=int64#6
  471. # asm 2: add <squarer01=%r8,<a1=%r9
  472. add %r8,%r9
  473. # qhasm: squarer21 = (squarer21.a2) << 13
  474. # asm 1: shld $13,<a2=int64#9,<squarer21=int64#10
  475. # asm 2: shld $13,<a2=%r11,<squarer21=%r12
  476. shld $13,%r11,%r12
  477. # qhasm: a2 &= squareredmask
  478. # asm 1: and <squareredmask=int64#3,<a2=int64#9
  479. # asm 2: and <squareredmask=%rdx,<a2=%r11
  480. and %rdx,%r11
  481. # qhasm: a2 += squarer11
  482. # asm 1: add <squarer11=int64#8,<a2=int64#9
  483. # asm 2: add <squarer11=%r10,<a2=%r11
  484. add %r10,%r11
  485. # qhasm: squarer31 = (squarer31.a3) << 13
  486. # asm 1: shld $13,<a3=int64#11,<squarer31=int64#12
  487. # asm 2: shld $13,<a3=%r13,<squarer31=%r14
  488. shld $13,%r13,%r14
  489. # qhasm: a3 &= squareredmask
  490. # asm 1: and <squareredmask=int64#3,<a3=int64#11
  491. # asm 2: and <squareredmask=%rdx,<a3=%r13
  492. and %rdx,%r13
  493. # qhasm: a3 += squarer21
  494. # asm 1: add <squarer21=int64#10,<a3=int64#11
  495. # asm 2: add <squarer21=%r12,<a3=%r13
  496. add %r12,%r13
  497. # qhasm: squarer41 = (squarer41.a4) << 13
  498. # asm 1: shld $13,<a4=int64#13,<squarer41=int64#14
  499. # asm 2: shld $13,<a4=%r15,<squarer41=%rbx
  500. shld $13,%r15,%rbx
  501. # qhasm: a4 &= squareredmask
  502. # asm 1: and <squareredmask=int64#3,<a4=int64#13
  503. # asm 2: and <squareredmask=%rdx,<a4=%r15
  504. and %rdx,%r15
  505. # qhasm: a4 += squarer31
  506. # asm 1: add <squarer31=int64#12,<a4=int64#13
  507. # asm 2: add <squarer31=%r14,<a4=%r15
  508. add %r14,%r15
  509. # qhasm: squarer41 = squarer41 * 19
  510. # asm 1: imulq $19,<squarer41=int64#14,>squarer41=int64#5
  511. # asm 2: imulq $19,<squarer41=%rbx,>squarer41=%r8
  512. imulq $19,%rbx,%r8
  513. # qhasm: a0 += squarer41
  514. # asm 1: add <squarer41=int64#5,<a0=int64#4
  515. # asm 2: add <squarer41=%r8,<a0=%rcx
  516. add %r8,%rcx
  517. # qhasm: squaret = a0
  518. # asm 1: mov <a0=int64#4,>squaret=int64#5
  519. # asm 2: mov <a0=%rcx,>squaret=%r8
  520. mov %rcx,%r8
  521. # qhasm: (uint64) squaret >>= 51
  522. # asm 1: shr $51,<squaret=int64#5
  523. # asm 2: shr $51,<squaret=%r8
  524. shr $51,%r8
  525. # qhasm: squaret += a1
  526. # asm 1: add <a1=int64#6,<squaret=int64#5
  527. # asm 2: add <a1=%r9,<squaret=%r8
  528. add %r9,%r8
  529. # qhasm: a0 &= squareredmask
  530. # asm 1: and <squareredmask=int64#3,<a0=int64#4
  531. # asm 2: and <squareredmask=%rdx,<a0=%rcx
  532. and %rdx,%rcx
  533. # qhasm: a1 = squaret
  534. # asm 1: mov <squaret=int64#5,>a1=int64#6
  535. # asm 2: mov <squaret=%r8,>a1=%r9
  536. mov %r8,%r9
  537. # qhasm: (uint64) squaret >>= 51
  538. # asm 1: shr $51,<squaret=int64#5
  539. # asm 2: shr $51,<squaret=%r8
  540. shr $51,%r8
  541. # qhasm: squaret += a2
  542. # asm 1: add <a2=int64#9,<squaret=int64#5
  543. # asm 2: add <a2=%r11,<squaret=%r8
  544. add %r11,%r8
  545. # qhasm: a1 &= squareredmask
  546. # asm 1: and <squareredmask=int64#3,<a1=int64#6
  547. # asm 2: and <squareredmask=%rdx,<a1=%r9
  548. and %rdx,%r9
  549. # qhasm: a2 = squaret
  550. # asm 1: mov <squaret=int64#5,>a2=int64#7
  551. # asm 2: mov <squaret=%r8,>a2=%rax
  552. mov %r8,%rax
  553. # qhasm: (uint64) squaret >>= 51
  554. # asm 1: shr $51,<squaret=int64#5
  555. # asm 2: shr $51,<squaret=%r8
  556. shr $51,%r8
  557. # qhasm: squaret += a3
  558. # asm 1: add <a3=int64#11,<squaret=int64#5
  559. # asm 2: add <a3=%r13,<squaret=%r8
  560. add %r13,%r8
  561. # qhasm: a2 &= squareredmask
  562. # asm 1: and <squareredmask=int64#3,<a2=int64#7
  563. # asm 2: and <squareredmask=%rdx,<a2=%rax
  564. and %rdx,%rax
  565. # qhasm: a3 = squaret
  566. # asm 1: mov <squaret=int64#5,>a3=int64#8
  567. # asm 2: mov <squaret=%r8,>a3=%r10
  568. mov %r8,%r10
  569. # qhasm: (uint64) squaret >>= 51
  570. # asm 1: shr $51,<squaret=int64#5
  571. # asm 2: shr $51,<squaret=%r8
  572. shr $51,%r8
  573. # qhasm: squaret += a4
  574. # asm 1: add <a4=int64#13,<squaret=int64#5
  575. # asm 2: add <a4=%r15,<squaret=%r8
  576. add %r15,%r8
  577. # qhasm: a3 &= squareredmask
  578. # asm 1: and <squareredmask=int64#3,<a3=int64#8
  579. # asm 2: and <squareredmask=%rdx,<a3=%r10
  580. and %rdx,%r10
  581. # qhasm: a4 = squaret
  582. # asm 1: mov <squaret=int64#5,>a4=int64#9
  583. # asm 2: mov <squaret=%r8,>a4=%r11
  584. mov %r8,%r11
  585. # qhasm: (uint64) squaret >>= 51
  586. # asm 1: shr $51,<squaret=int64#5
  587. # asm 2: shr $51,<squaret=%r8
  588. shr $51,%r8
  589. # qhasm: squaret *= 19
  590. # asm 1: imulq $19,<squaret=int64#5,>squaret=int64#5
  591. # asm 2: imulq $19,<squaret=%r8,>squaret=%r8
  592. imulq $19,%r8,%r8
  593. # qhasm: a0 += squaret
  594. # asm 1: add <squaret=int64#5,<a0=int64#4
  595. # asm 2: add <squaret=%r8,<a0=%rcx
  596. add %r8,%rcx
  597. # qhasm: a4 &= squareredmask
  598. # asm 1: and <squareredmask=int64#3,<a4=int64#9
  599. # asm 2: and <squareredmask=%rdx,<a4=%r11
  600. and %rdx,%r11
  601. # qhasm: a0_stack = a0
  602. # asm 1: movq <a0=int64#4,>a0_stack=stack64#8
  603. # asm 2: movq <a0=%rcx,>a0_stack=56(%rsp)
  604. movq %rcx,56(%rsp)
  605. # qhasm: a1_stack = a1
  606. # asm 1: movq <a1=int64#6,>a1_stack=stack64#9
  607. # asm 2: movq <a1=%r9,>a1_stack=64(%rsp)
  608. movq %r9,64(%rsp)
  609. # qhasm: a2_stack = a2
  610. # asm 1: movq <a2=int64#7,>a2_stack=stack64#10
  611. # asm 2: movq <a2=%rax,>a2_stack=72(%rsp)
  612. movq %rax,72(%rsp)
  613. # qhasm: a3_stack = a3
  614. # asm 1: movq <a3=int64#8,>a3_stack=stack64#11
  615. # asm 2: movq <a3=%r10,>a3_stack=80(%rsp)
  616. movq %r10,80(%rsp)
  617. # qhasm: a4_stack = a4
  618. # asm 1: movq <a4=int64#9,>a4_stack=stack64#12
  619. # asm 2: movq <a4=%r11,>a4_stack=88(%rsp)
  620. movq %r11,88(%rsp)
  621. # qhasm: squarerax = *(uint64 *)(pp + 40)
  622. # asm 1: movq 40(<pp=int64#2),>squarerax=int64#7
  623. # asm 2: movq 40(<pp=%rsi),>squarerax=%rax
  624. movq 40(%rsi),%rax
  625. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 40)
  626. # asm 1: mulq 40(<pp=int64#2)
  627. # asm 2: mulq 40(<pp=%rsi)
  628. mulq 40(%rsi)
  629. # qhasm: b0 = squarerax
  630. # asm 1: mov <squarerax=int64#7,>b0=int64#4
  631. # asm 2: mov <squarerax=%rax,>b0=%rcx
  632. mov %rax,%rcx
  633. # qhasm: squarer01 = squarerdx
  634. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#5
  635. # asm 2: mov <squarerdx=%rdx,>squarer01=%r8
  636. mov %rdx,%r8
  637. # qhasm: squarerax = *(uint64 *)(pp + 40)
  638. # asm 1: movq 40(<pp=int64#2),>squarerax=int64#7
  639. # asm 2: movq 40(<pp=%rsi),>squarerax=%rax
  640. movq 40(%rsi),%rax
  641. # qhasm: squarerax <<= 1
  642. # asm 1: shl $1,<squarerax=int64#7
  643. # asm 2: shl $1,<squarerax=%rax
  644. shl $1,%rax
  645. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 48)
  646. # asm 1: mulq 48(<pp=int64#2)
  647. # asm 2: mulq 48(<pp=%rsi)
  648. mulq 48(%rsi)
  649. # qhasm: b1 = squarerax
  650. # asm 1: mov <squarerax=int64#7,>b1=int64#6
  651. # asm 2: mov <squarerax=%rax,>b1=%r9
  652. mov %rax,%r9
  653. # qhasm: squarer11 = squarerdx
  654. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#8
  655. # asm 2: mov <squarerdx=%rdx,>squarer11=%r10
  656. mov %rdx,%r10
  657. # qhasm: squarerax = *(uint64 *)(pp + 40)
  658. # asm 1: movq 40(<pp=int64#2),>squarerax=int64#7
  659. # asm 2: movq 40(<pp=%rsi),>squarerax=%rax
  660. movq 40(%rsi),%rax
  661. # qhasm: squarerax <<= 1
  662. # asm 1: shl $1,<squarerax=int64#7
  663. # asm 2: shl $1,<squarerax=%rax
  664. shl $1,%rax
  665. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 56)
  666. # asm 1: mulq 56(<pp=int64#2)
  667. # asm 2: mulq 56(<pp=%rsi)
  668. mulq 56(%rsi)
  669. # qhasm: b2 = squarerax
  670. # asm 1: mov <squarerax=int64#7,>b2=int64#9
  671. # asm 2: mov <squarerax=%rax,>b2=%r11
  672. mov %rax,%r11
  673. # qhasm: squarer21 = squarerdx
  674. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#10
  675. # asm 2: mov <squarerdx=%rdx,>squarer21=%r12
  676. mov %rdx,%r12
  677. # qhasm: squarerax = *(uint64 *)(pp + 40)
  678. # asm 1: movq 40(<pp=int64#2),>squarerax=int64#7
  679. # asm 2: movq 40(<pp=%rsi),>squarerax=%rax
  680. movq 40(%rsi),%rax
  681. # qhasm: squarerax <<= 1
  682. # asm 1: shl $1,<squarerax=int64#7
  683. # asm 2: shl $1,<squarerax=%rax
  684. shl $1,%rax
  685. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 64)
  686. # asm 1: mulq 64(<pp=int64#2)
  687. # asm 2: mulq 64(<pp=%rsi)
  688. mulq 64(%rsi)
  689. # qhasm: b3 = squarerax
  690. # asm 1: mov <squarerax=int64#7,>b3=int64#11
  691. # asm 2: mov <squarerax=%rax,>b3=%r13
  692. mov %rax,%r13
  693. # qhasm: squarer31 = squarerdx
  694. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#12
  695. # asm 2: mov <squarerdx=%rdx,>squarer31=%r14
  696. mov %rdx,%r14
  697. # qhasm: squarerax = *(uint64 *)(pp + 40)
  698. # asm 1: movq 40(<pp=int64#2),>squarerax=int64#7
  699. # asm 2: movq 40(<pp=%rsi),>squarerax=%rax
  700. movq 40(%rsi),%rax
  701. # qhasm: squarerax <<= 1
  702. # asm 1: shl $1,<squarerax=int64#7
  703. # asm 2: shl $1,<squarerax=%rax
  704. shl $1,%rax
  705. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 72)
  706. # asm 1: mulq 72(<pp=int64#2)
  707. # asm 2: mulq 72(<pp=%rsi)
  708. mulq 72(%rsi)
  709. # qhasm: b4 = squarerax
  710. # asm 1: mov <squarerax=int64#7,>b4=int64#13
  711. # asm 2: mov <squarerax=%rax,>b4=%r15
  712. mov %rax,%r15
  713. # qhasm: squarer41 = squarerdx
  714. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#14
  715. # asm 2: mov <squarerdx=%rdx,>squarer41=%rbx
  716. mov %rdx,%rbx
  717. # qhasm: squarerax = *(uint64 *)(pp + 48)
  718. # asm 1: movq 48(<pp=int64#2),>squarerax=int64#7
  719. # asm 2: movq 48(<pp=%rsi),>squarerax=%rax
  720. movq 48(%rsi),%rax
  721. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 48)
  722. # asm 1: mulq 48(<pp=int64#2)
  723. # asm 2: mulq 48(<pp=%rsi)
  724. mulq 48(%rsi)
  725. # qhasm: carry? b2 += squarerax
  726. # asm 1: add <squarerax=int64#7,<b2=int64#9
  727. # asm 2: add <squarerax=%rax,<b2=%r11
  728. add %rax,%r11
  729. # qhasm: squarer21 += squarerdx + carry
  730. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  731. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  732. adc %rdx,%r12
  733. # qhasm: squarerax = *(uint64 *)(pp + 48)
  734. # asm 1: movq 48(<pp=int64#2),>squarerax=int64#7
  735. # asm 2: movq 48(<pp=%rsi),>squarerax=%rax
  736. movq 48(%rsi),%rax
  737. # qhasm: squarerax <<= 1
  738. # asm 1: shl $1,<squarerax=int64#7
  739. # asm 2: shl $1,<squarerax=%rax
  740. shl $1,%rax
  741. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 56)
  742. # asm 1: mulq 56(<pp=int64#2)
  743. # asm 2: mulq 56(<pp=%rsi)
  744. mulq 56(%rsi)
  745. # qhasm: carry? b3 += squarerax
  746. # asm 1: add <squarerax=int64#7,<b3=int64#11
  747. # asm 2: add <squarerax=%rax,<b3=%r13
  748. add %rax,%r13
  749. # qhasm: squarer31 += squarerdx + carry
  750. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  751. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  752. adc %rdx,%r14
  753. # qhasm: squarerax = *(uint64 *)(pp + 48)
  754. # asm 1: movq 48(<pp=int64#2),>squarerax=int64#7
  755. # asm 2: movq 48(<pp=%rsi),>squarerax=%rax
  756. movq 48(%rsi),%rax
  757. # qhasm: squarerax <<= 1
  758. # asm 1: shl $1,<squarerax=int64#7
  759. # asm 2: shl $1,<squarerax=%rax
  760. shl $1,%rax
  761. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 64)
  762. # asm 1: mulq 64(<pp=int64#2)
  763. # asm 2: mulq 64(<pp=%rsi)
  764. mulq 64(%rsi)
  765. # qhasm: carry? b4 += squarerax
  766. # asm 1: add <squarerax=int64#7,<b4=int64#13
  767. # asm 2: add <squarerax=%rax,<b4=%r15
  768. add %rax,%r15
  769. # qhasm: squarer41 += squarerdx + carry
  770. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  771. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  772. adc %rdx,%rbx
  773. # qhasm: squarerax = *(uint64 *)(pp + 48)
  774. # asm 1: movq 48(<pp=int64#2),>squarerax=int64#3
  775. # asm 2: movq 48(<pp=%rsi),>squarerax=%rdx
  776. movq 48(%rsi),%rdx
  777. # qhasm: squarerax *= 38
  778. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  779. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  780. imulq $38,%rdx,%rax
  781. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 72)
  782. # asm 1: mulq 72(<pp=int64#2)
  783. # asm 2: mulq 72(<pp=%rsi)
  784. mulq 72(%rsi)
  785. # qhasm: carry? b0 += squarerax
  786. # asm 1: add <squarerax=int64#7,<b0=int64#4
  787. # asm 2: add <squarerax=%rax,<b0=%rcx
  788. add %rax,%rcx
  789. # qhasm: squarer01 += squarerdx + carry
  790. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  791. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  792. adc %rdx,%r8
  793. # qhasm: squarerax = *(uint64 *)(pp + 56)
  794. # asm 1: movq 56(<pp=int64#2),>squarerax=int64#7
  795. # asm 2: movq 56(<pp=%rsi),>squarerax=%rax
  796. movq 56(%rsi),%rax
  797. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 56)
  798. # asm 1: mulq 56(<pp=int64#2)
  799. # asm 2: mulq 56(<pp=%rsi)
  800. mulq 56(%rsi)
  801. # qhasm: carry? b4 += squarerax
  802. # asm 1: add <squarerax=int64#7,<b4=int64#13
  803. # asm 2: add <squarerax=%rax,<b4=%r15
  804. add %rax,%r15
  805. # qhasm: squarer41 += squarerdx + carry
  806. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  807. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  808. adc %rdx,%rbx
  809. # qhasm: squarerax = *(uint64 *)(pp + 56)
  810. # asm 1: movq 56(<pp=int64#2),>squarerax=int64#3
  811. # asm 2: movq 56(<pp=%rsi),>squarerax=%rdx
  812. movq 56(%rsi),%rdx
  813. # qhasm: squarerax *= 38
  814. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  815. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  816. imulq $38,%rdx,%rax
  817. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 64)
  818. # asm 1: mulq 64(<pp=int64#2)
  819. # asm 2: mulq 64(<pp=%rsi)
  820. mulq 64(%rsi)
  821. # qhasm: carry? b0 += squarerax
  822. # asm 1: add <squarerax=int64#7,<b0=int64#4
  823. # asm 2: add <squarerax=%rax,<b0=%rcx
  824. add %rax,%rcx
  825. # qhasm: squarer01 += squarerdx + carry
  826. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  827. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  828. adc %rdx,%r8
  829. # qhasm: squarerax = *(uint64 *)(pp + 56)
  830. # asm 1: movq 56(<pp=int64#2),>squarerax=int64#3
  831. # asm 2: movq 56(<pp=%rsi),>squarerax=%rdx
  832. movq 56(%rsi),%rdx
  833. # qhasm: squarerax *= 38
  834. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  835. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  836. imulq $38,%rdx,%rax
  837. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 72)
  838. # asm 1: mulq 72(<pp=int64#2)
  839. # asm 2: mulq 72(<pp=%rsi)
  840. mulq 72(%rsi)
  841. # qhasm: carry? b1 += squarerax
  842. # asm 1: add <squarerax=int64#7,<b1=int64#6
  843. # asm 2: add <squarerax=%rax,<b1=%r9
  844. add %rax,%r9
  845. # qhasm: squarer11 += squarerdx + carry
  846. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  847. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  848. adc %rdx,%r10
  849. # qhasm: squarerax = *(uint64 *)(pp + 64)
  850. # asm 1: movq 64(<pp=int64#2),>squarerax=int64#3
  851. # asm 2: movq 64(<pp=%rsi),>squarerax=%rdx
  852. movq 64(%rsi),%rdx
  853. # qhasm: squarerax *= 19
  854. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  855. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  856. imulq $19,%rdx,%rax
  857. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 64)
  858. # asm 1: mulq 64(<pp=int64#2)
  859. # asm 2: mulq 64(<pp=%rsi)
  860. mulq 64(%rsi)
  861. # qhasm: carry? b1 += squarerax
  862. # asm 1: add <squarerax=int64#7,<b1=int64#6
  863. # asm 2: add <squarerax=%rax,<b1=%r9
  864. add %rax,%r9
  865. # qhasm: squarer11 += squarerdx + carry
  866. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  867. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  868. adc %rdx,%r10
  869. # qhasm: squarerax = *(uint64 *)(pp + 64)
  870. # asm 1: movq 64(<pp=int64#2),>squarerax=int64#3
  871. # asm 2: movq 64(<pp=%rsi),>squarerax=%rdx
  872. movq 64(%rsi),%rdx
  873. # qhasm: squarerax *= 38
  874. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  875. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  876. imulq $38,%rdx,%rax
  877. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 72)
  878. # asm 1: mulq 72(<pp=int64#2)
  879. # asm 2: mulq 72(<pp=%rsi)
  880. mulq 72(%rsi)
  881. # qhasm: carry? b2 += squarerax
  882. # asm 1: add <squarerax=int64#7,<b2=int64#9
  883. # asm 2: add <squarerax=%rax,<b2=%r11
  884. add %rax,%r11
  885. # qhasm: squarer21 += squarerdx + carry
  886. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  887. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  888. adc %rdx,%r12
  889. # qhasm: squarerax = *(uint64 *)(pp + 72)
  890. # asm 1: movq 72(<pp=int64#2),>squarerax=int64#3
  891. # asm 2: movq 72(<pp=%rsi),>squarerax=%rdx
  892. movq 72(%rsi),%rdx
  893. # qhasm: squarerax *= 19
  894. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  895. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  896. imulq $19,%rdx,%rax
  897. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 72)
  898. # asm 1: mulq 72(<pp=int64#2)
  899. # asm 2: mulq 72(<pp=%rsi)
  900. mulq 72(%rsi)
  901. # qhasm: carry? b3 += squarerax
  902. # asm 1: add <squarerax=int64#7,<b3=int64#11
  903. # asm 2: add <squarerax=%rax,<b3=%r13
  904. add %rax,%r13
  905. # qhasm: squarer31 += squarerdx + carry
  906. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  907. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  908. adc %rdx,%r14
  909. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  910. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#3
  911. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rdx
  912. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rdx
  913. # qhasm: squarer01 = (squarer01.b0) << 13
  914. # asm 1: shld $13,<b0=int64#4,<squarer01=int64#5
  915. # asm 2: shld $13,<b0=%rcx,<squarer01=%r8
  916. shld $13,%rcx,%r8
  917. # qhasm: b0 &= squareredmask
  918. # asm 1: and <squareredmask=int64#3,<b0=int64#4
  919. # asm 2: and <squareredmask=%rdx,<b0=%rcx
  920. and %rdx,%rcx
  921. # qhasm: squarer11 = (squarer11.b1) << 13
  922. # asm 1: shld $13,<b1=int64#6,<squarer11=int64#8
  923. # asm 2: shld $13,<b1=%r9,<squarer11=%r10
  924. shld $13,%r9,%r10
  925. # qhasm: b1 &= squareredmask
  926. # asm 1: and <squareredmask=int64#3,<b1=int64#6
  927. # asm 2: and <squareredmask=%rdx,<b1=%r9
  928. and %rdx,%r9
  929. # qhasm: b1 += squarer01
  930. # asm 1: add <squarer01=int64#5,<b1=int64#6
  931. # asm 2: add <squarer01=%r8,<b1=%r9
  932. add %r8,%r9
  933. # qhasm: squarer21 = (squarer21.b2) << 13
  934. # asm 1: shld $13,<b2=int64#9,<squarer21=int64#10
  935. # asm 2: shld $13,<b2=%r11,<squarer21=%r12
  936. shld $13,%r11,%r12
  937. # qhasm: b2 &= squareredmask
  938. # asm 1: and <squareredmask=int64#3,<b2=int64#9
  939. # asm 2: and <squareredmask=%rdx,<b2=%r11
  940. and %rdx,%r11
  941. # qhasm: b2 += squarer11
  942. # asm 1: add <squarer11=int64#8,<b2=int64#9
  943. # asm 2: add <squarer11=%r10,<b2=%r11
  944. add %r10,%r11
  945. # qhasm: squarer31 = (squarer31.b3) << 13
  946. # asm 1: shld $13,<b3=int64#11,<squarer31=int64#12
  947. # asm 2: shld $13,<b3=%r13,<squarer31=%r14
  948. shld $13,%r13,%r14
  949. # qhasm: b3 &= squareredmask
  950. # asm 1: and <squareredmask=int64#3,<b3=int64#11
  951. # asm 2: and <squareredmask=%rdx,<b3=%r13
  952. and %rdx,%r13
  953. # qhasm: b3 += squarer21
  954. # asm 1: add <squarer21=int64#10,<b3=int64#11
  955. # asm 2: add <squarer21=%r12,<b3=%r13
  956. add %r12,%r13
  957. # qhasm: squarer41 = (squarer41.b4) << 13
  958. # asm 1: shld $13,<b4=int64#13,<squarer41=int64#14
  959. # asm 2: shld $13,<b4=%r15,<squarer41=%rbx
  960. shld $13,%r15,%rbx
  961. # qhasm: b4 &= squareredmask
  962. # asm 1: and <squareredmask=int64#3,<b4=int64#13
  963. # asm 2: and <squareredmask=%rdx,<b4=%r15
  964. and %rdx,%r15
  965. # qhasm: b4 += squarer31
  966. # asm 1: add <squarer31=int64#12,<b4=int64#13
  967. # asm 2: add <squarer31=%r14,<b4=%r15
  968. add %r14,%r15
  969. # qhasm: squarer41 = squarer41 * 19
  970. # asm 1: imulq $19,<squarer41=int64#14,>squarer41=int64#5
  971. # asm 2: imulq $19,<squarer41=%rbx,>squarer41=%r8
  972. imulq $19,%rbx,%r8
  973. # qhasm: b0 += squarer41
  974. # asm 1: add <squarer41=int64#5,<b0=int64#4
  975. # asm 2: add <squarer41=%r8,<b0=%rcx
  976. add %r8,%rcx
  977. # qhasm: squaret = b0
  978. # asm 1: mov <b0=int64#4,>squaret=int64#5
  979. # asm 2: mov <b0=%rcx,>squaret=%r8
  980. mov %rcx,%r8
  981. # qhasm: (uint64) squaret >>= 51
  982. # asm 1: shr $51,<squaret=int64#5
  983. # asm 2: shr $51,<squaret=%r8
  984. shr $51,%r8
  985. # qhasm: squaret += b1
  986. # asm 1: add <b1=int64#6,<squaret=int64#5
  987. # asm 2: add <b1=%r9,<squaret=%r8
  988. add %r9,%r8
  989. # qhasm: b0 &= squareredmask
  990. # asm 1: and <squareredmask=int64#3,<b0=int64#4
  991. # asm 2: and <squareredmask=%rdx,<b0=%rcx
  992. and %rdx,%rcx
  993. # qhasm: b1 = squaret
  994. # asm 1: mov <squaret=int64#5,>b1=int64#6
  995. # asm 2: mov <squaret=%r8,>b1=%r9
  996. mov %r8,%r9
  997. # qhasm: (uint64) squaret >>= 51
  998. # asm 1: shr $51,<squaret=int64#5
  999. # asm 2: shr $51,<squaret=%r8
  1000. shr $51,%r8
  1001. # qhasm: squaret += b2
  1002. # asm 1: add <b2=int64#9,<squaret=int64#5
  1003. # asm 2: add <b2=%r11,<squaret=%r8
  1004. add %r11,%r8
  1005. # qhasm: b1 &= squareredmask
  1006. # asm 1: and <squareredmask=int64#3,<b1=int64#6
  1007. # asm 2: and <squareredmask=%rdx,<b1=%r9
  1008. and %rdx,%r9
  1009. # qhasm: b2 = squaret
  1010. # asm 1: mov <squaret=int64#5,>b2=int64#7
  1011. # asm 2: mov <squaret=%r8,>b2=%rax
  1012. mov %r8,%rax
  1013. # qhasm: (uint64) squaret >>= 51
  1014. # asm 1: shr $51,<squaret=int64#5
  1015. # asm 2: shr $51,<squaret=%r8
  1016. shr $51,%r8
  1017. # qhasm: squaret += b3
  1018. # asm 1: add <b3=int64#11,<squaret=int64#5
  1019. # asm 2: add <b3=%r13,<squaret=%r8
  1020. add %r13,%r8
  1021. # qhasm: b2 &= squareredmask
  1022. # asm 1: and <squareredmask=int64#3,<b2=int64#7
  1023. # asm 2: and <squareredmask=%rdx,<b2=%rax
  1024. and %rdx,%rax
  1025. # qhasm: b3 = squaret
  1026. # asm 1: mov <squaret=int64#5,>b3=int64#8
  1027. # asm 2: mov <squaret=%r8,>b3=%r10
  1028. mov %r8,%r10
  1029. # qhasm: (uint64) squaret >>= 51
  1030. # asm 1: shr $51,<squaret=int64#5
  1031. # asm 2: shr $51,<squaret=%r8
  1032. shr $51,%r8
  1033. # qhasm: squaret += b4
  1034. # asm 1: add <b4=int64#13,<squaret=int64#5
  1035. # asm 2: add <b4=%r15,<squaret=%r8
  1036. add %r15,%r8
  1037. # qhasm: b3 &= squareredmask
  1038. # asm 1: and <squareredmask=int64#3,<b3=int64#8
  1039. # asm 2: and <squareredmask=%rdx,<b3=%r10
  1040. and %rdx,%r10
  1041. # qhasm: b4 = squaret
  1042. # asm 1: mov <squaret=int64#5,>b4=int64#9
  1043. # asm 2: mov <squaret=%r8,>b4=%r11
  1044. mov %r8,%r11
  1045. # qhasm: (uint64) squaret >>= 51
  1046. # asm 1: shr $51,<squaret=int64#5
  1047. # asm 2: shr $51,<squaret=%r8
  1048. shr $51,%r8
  1049. # qhasm: squaret *= 19
  1050. # asm 1: imulq $19,<squaret=int64#5,>squaret=int64#5
  1051. # asm 2: imulq $19,<squaret=%r8,>squaret=%r8
  1052. imulq $19,%r8,%r8
  1053. # qhasm: b0 += squaret
  1054. # asm 1: add <squaret=int64#5,<b0=int64#4
  1055. # asm 2: add <squaret=%r8,<b0=%rcx
  1056. add %r8,%rcx
  1057. # qhasm: b4 &= squareredmask
  1058. # asm 1: and <squareredmask=int64#3,<b4=int64#9
  1059. # asm 2: and <squareredmask=%rdx,<b4=%r11
  1060. and %rdx,%r11
  1061. # qhasm: b0_stack = b0
  1062. # asm 1: movq <b0=int64#4,>b0_stack=stack64#13
  1063. # asm 2: movq <b0=%rcx,>b0_stack=96(%rsp)
  1064. movq %rcx,96(%rsp)
  1065. # qhasm: b1_stack = b1
  1066. # asm 1: movq <b1=int64#6,>b1_stack=stack64#14
  1067. # asm 2: movq <b1=%r9,>b1_stack=104(%rsp)
  1068. movq %r9,104(%rsp)
  1069. # qhasm: b2_stack = b2
  1070. # asm 1: movq <b2=int64#7,>b2_stack=stack64#15
  1071. # asm 2: movq <b2=%rax,>b2_stack=112(%rsp)
  1072. movq %rax,112(%rsp)
  1073. # qhasm: b3_stack = b3
  1074. # asm 1: movq <b3=int64#8,>b3_stack=stack64#16
  1075. # asm 2: movq <b3=%r10,>b3_stack=120(%rsp)
  1076. movq %r10,120(%rsp)
  1077. # qhasm: b4_stack = b4
  1078. # asm 1: movq <b4=int64#9,>b4_stack=stack64#17
  1079. # asm 2: movq <b4=%r11,>b4_stack=128(%rsp)
  1080. movq %r11,128(%rsp)
  1081. # qhasm: squarerax = *(uint64 *)(pp + 80)
  1082. # asm 1: movq 80(<pp=int64#2),>squarerax=int64#7
  1083. # asm 2: movq 80(<pp=%rsi),>squarerax=%rax
  1084. movq 80(%rsi),%rax
  1085. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 80)
  1086. # asm 1: mulq 80(<pp=int64#2)
  1087. # asm 2: mulq 80(<pp=%rsi)
  1088. mulq 80(%rsi)
  1089. # qhasm: c0 = squarerax
  1090. # asm 1: mov <squarerax=int64#7,>c0=int64#4
  1091. # asm 2: mov <squarerax=%rax,>c0=%rcx
  1092. mov %rax,%rcx
  1093. # qhasm: squarer01 = squarerdx
  1094. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#5
  1095. # asm 2: mov <squarerdx=%rdx,>squarer01=%r8
  1096. mov %rdx,%r8
  1097. # qhasm: squarerax = *(uint64 *)(pp + 80)
  1098. # asm 1: movq 80(<pp=int64#2),>squarerax=int64#7
  1099. # asm 2: movq 80(<pp=%rsi),>squarerax=%rax
  1100. movq 80(%rsi),%rax
  1101. # qhasm: squarerax <<= 1
  1102. # asm 1: shl $1,<squarerax=int64#7
  1103. # asm 2: shl $1,<squarerax=%rax
  1104. shl $1,%rax
  1105. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 88)
  1106. # asm 1: mulq 88(<pp=int64#2)
  1107. # asm 2: mulq 88(<pp=%rsi)
  1108. mulq 88(%rsi)
  1109. # qhasm: c1 = squarerax
  1110. # asm 1: mov <squarerax=int64#7,>c1=int64#6
  1111. # asm 2: mov <squarerax=%rax,>c1=%r9
  1112. mov %rax,%r9
  1113. # qhasm: squarer11 = squarerdx
  1114. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#8
  1115. # asm 2: mov <squarerdx=%rdx,>squarer11=%r10
  1116. mov %rdx,%r10
  1117. # qhasm: squarerax = *(uint64 *)(pp + 80)
  1118. # asm 1: movq 80(<pp=int64#2),>squarerax=int64#7
  1119. # asm 2: movq 80(<pp=%rsi),>squarerax=%rax
  1120. movq 80(%rsi),%rax
  1121. # qhasm: squarerax <<= 1
  1122. # asm 1: shl $1,<squarerax=int64#7
  1123. # asm 2: shl $1,<squarerax=%rax
  1124. shl $1,%rax
  1125. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 96)
  1126. # asm 1: mulq 96(<pp=int64#2)
  1127. # asm 2: mulq 96(<pp=%rsi)
  1128. mulq 96(%rsi)
  1129. # qhasm: c2 = squarerax
  1130. # asm 1: mov <squarerax=int64#7,>c2=int64#9
  1131. # asm 2: mov <squarerax=%rax,>c2=%r11
  1132. mov %rax,%r11
  1133. # qhasm: squarer21 = squarerdx
  1134. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#10
  1135. # asm 2: mov <squarerdx=%rdx,>squarer21=%r12
  1136. mov %rdx,%r12
  1137. # qhasm: squarerax = *(uint64 *)(pp + 80)
  1138. # asm 1: movq 80(<pp=int64#2),>squarerax=int64#7
  1139. # asm 2: movq 80(<pp=%rsi),>squarerax=%rax
  1140. movq 80(%rsi),%rax
  1141. # qhasm: squarerax <<= 1
  1142. # asm 1: shl $1,<squarerax=int64#7
  1143. # asm 2: shl $1,<squarerax=%rax
  1144. shl $1,%rax
  1145. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 104)
  1146. # asm 1: mulq 104(<pp=int64#2)
  1147. # asm 2: mulq 104(<pp=%rsi)
  1148. mulq 104(%rsi)
  1149. # qhasm: c3 = squarerax
  1150. # asm 1: mov <squarerax=int64#7,>c3=int64#11
  1151. # asm 2: mov <squarerax=%rax,>c3=%r13
  1152. mov %rax,%r13
  1153. # qhasm: squarer31 = squarerdx
  1154. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#12
  1155. # asm 2: mov <squarerdx=%rdx,>squarer31=%r14
  1156. mov %rdx,%r14
  1157. # qhasm: squarerax = *(uint64 *)(pp + 80)
  1158. # asm 1: movq 80(<pp=int64#2),>squarerax=int64#7
  1159. # asm 2: movq 80(<pp=%rsi),>squarerax=%rax
  1160. movq 80(%rsi),%rax
  1161. # qhasm: squarerax <<= 1
  1162. # asm 1: shl $1,<squarerax=int64#7
  1163. # asm 2: shl $1,<squarerax=%rax
  1164. shl $1,%rax
  1165. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 112)
  1166. # asm 1: mulq 112(<pp=int64#2)
  1167. # asm 2: mulq 112(<pp=%rsi)
  1168. mulq 112(%rsi)
  1169. # qhasm: c4 = squarerax
  1170. # asm 1: mov <squarerax=int64#7,>c4=int64#13
  1171. # asm 2: mov <squarerax=%rax,>c4=%r15
  1172. mov %rax,%r15
  1173. # qhasm: squarer41 = squarerdx
  1174. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#14
  1175. # asm 2: mov <squarerdx=%rdx,>squarer41=%rbx
  1176. mov %rdx,%rbx
  1177. # qhasm: squarerax = *(uint64 *)(pp + 88)
  1178. # asm 1: movq 88(<pp=int64#2),>squarerax=int64#7
  1179. # asm 2: movq 88(<pp=%rsi),>squarerax=%rax
  1180. movq 88(%rsi),%rax
  1181. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 88)
  1182. # asm 1: mulq 88(<pp=int64#2)
  1183. # asm 2: mulq 88(<pp=%rsi)
  1184. mulq 88(%rsi)
  1185. # qhasm: carry? c2 += squarerax
  1186. # asm 1: add <squarerax=int64#7,<c2=int64#9
  1187. # asm 2: add <squarerax=%rax,<c2=%r11
  1188. add %rax,%r11
  1189. # qhasm: squarer21 += squarerdx + carry
  1190. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  1191. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  1192. adc %rdx,%r12
  1193. # qhasm: squarerax = *(uint64 *)(pp + 88)
  1194. # asm 1: movq 88(<pp=int64#2),>squarerax=int64#7
  1195. # asm 2: movq 88(<pp=%rsi),>squarerax=%rax
  1196. movq 88(%rsi),%rax
  1197. # qhasm: squarerax <<= 1
  1198. # asm 1: shl $1,<squarerax=int64#7
  1199. # asm 2: shl $1,<squarerax=%rax
  1200. shl $1,%rax
  1201. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 96)
  1202. # asm 1: mulq 96(<pp=int64#2)
  1203. # asm 2: mulq 96(<pp=%rsi)
  1204. mulq 96(%rsi)
  1205. # qhasm: carry? c3 += squarerax
  1206. # asm 1: add <squarerax=int64#7,<c3=int64#11
  1207. # asm 2: add <squarerax=%rax,<c3=%r13
  1208. add %rax,%r13
  1209. # qhasm: squarer31 += squarerdx + carry
  1210. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  1211. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  1212. adc %rdx,%r14
  1213. # qhasm: squarerax = *(uint64 *)(pp + 88)
  1214. # asm 1: movq 88(<pp=int64#2),>squarerax=int64#7
  1215. # asm 2: movq 88(<pp=%rsi),>squarerax=%rax
  1216. movq 88(%rsi),%rax
  1217. # qhasm: squarerax <<= 1
  1218. # asm 1: shl $1,<squarerax=int64#7
  1219. # asm 2: shl $1,<squarerax=%rax
  1220. shl $1,%rax
  1221. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 104)
  1222. # asm 1: mulq 104(<pp=int64#2)
  1223. # asm 2: mulq 104(<pp=%rsi)
  1224. mulq 104(%rsi)
  1225. # qhasm: carry? c4 += squarerax
  1226. # asm 1: add <squarerax=int64#7,<c4=int64#13
  1227. # asm 2: add <squarerax=%rax,<c4=%r15
  1228. add %rax,%r15
  1229. # qhasm: squarer41 += squarerdx + carry
  1230. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  1231. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  1232. adc %rdx,%rbx
  1233. # qhasm: squarerax = *(uint64 *)(pp + 88)
  1234. # asm 1: movq 88(<pp=int64#2),>squarerax=int64#3
  1235. # asm 2: movq 88(<pp=%rsi),>squarerax=%rdx
  1236. movq 88(%rsi),%rdx
  1237. # qhasm: squarerax *= 38
  1238. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  1239. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  1240. imulq $38,%rdx,%rax
  1241. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 112)
  1242. # asm 1: mulq 112(<pp=int64#2)
  1243. # asm 2: mulq 112(<pp=%rsi)
  1244. mulq 112(%rsi)
  1245. # qhasm: carry? c0 += squarerax
  1246. # asm 1: add <squarerax=int64#7,<c0=int64#4
  1247. # asm 2: add <squarerax=%rax,<c0=%rcx
  1248. add %rax,%rcx
  1249. # qhasm: squarer01 += squarerdx + carry
  1250. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  1251. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  1252. adc %rdx,%r8
  1253. # qhasm: squarerax = *(uint64 *)(pp + 96)
  1254. # asm 1: movq 96(<pp=int64#2),>squarerax=int64#7
  1255. # asm 2: movq 96(<pp=%rsi),>squarerax=%rax
  1256. movq 96(%rsi),%rax
  1257. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 96)
  1258. # asm 1: mulq 96(<pp=int64#2)
  1259. # asm 2: mulq 96(<pp=%rsi)
  1260. mulq 96(%rsi)
  1261. # qhasm: carry? c4 += squarerax
  1262. # asm 1: add <squarerax=int64#7,<c4=int64#13
  1263. # asm 2: add <squarerax=%rax,<c4=%r15
  1264. add %rax,%r15
  1265. # qhasm: squarer41 += squarerdx + carry
  1266. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  1267. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  1268. adc %rdx,%rbx
  1269. # qhasm: squarerax = *(uint64 *)(pp + 96)
  1270. # asm 1: movq 96(<pp=int64#2),>squarerax=int64#3
  1271. # asm 2: movq 96(<pp=%rsi),>squarerax=%rdx
  1272. movq 96(%rsi),%rdx
  1273. # qhasm: squarerax *= 38
  1274. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  1275. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  1276. imulq $38,%rdx,%rax
  1277. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 104)
  1278. # asm 1: mulq 104(<pp=int64#2)
  1279. # asm 2: mulq 104(<pp=%rsi)
  1280. mulq 104(%rsi)
  1281. # qhasm: carry? c0 += squarerax
  1282. # asm 1: add <squarerax=int64#7,<c0=int64#4
  1283. # asm 2: add <squarerax=%rax,<c0=%rcx
  1284. add %rax,%rcx
  1285. # qhasm: squarer01 += squarerdx + carry
  1286. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  1287. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  1288. adc %rdx,%r8
  1289. # qhasm: squarerax = *(uint64 *)(pp + 96)
  1290. # asm 1: movq 96(<pp=int64#2),>squarerax=int64#3
  1291. # asm 2: movq 96(<pp=%rsi),>squarerax=%rdx
  1292. movq 96(%rsi),%rdx
  1293. # qhasm: squarerax *= 38
  1294. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  1295. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  1296. imulq $38,%rdx,%rax
  1297. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 112)
  1298. # asm 1: mulq 112(<pp=int64#2)
  1299. # asm 2: mulq 112(<pp=%rsi)
  1300. mulq 112(%rsi)
  1301. # qhasm: carry? c1 += squarerax
  1302. # asm 1: add <squarerax=int64#7,<c1=int64#6
  1303. # asm 2: add <squarerax=%rax,<c1=%r9
  1304. add %rax,%r9
  1305. # qhasm: squarer11 += squarerdx + carry
  1306. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  1307. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  1308. adc %rdx,%r10
  1309. # qhasm: squarerax = *(uint64 *)(pp + 104)
  1310. # asm 1: movq 104(<pp=int64#2),>squarerax=int64#3
  1311. # asm 2: movq 104(<pp=%rsi),>squarerax=%rdx
  1312. movq 104(%rsi),%rdx
  1313. # qhasm: squarerax *= 19
  1314. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  1315. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  1316. imulq $19,%rdx,%rax
  1317. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 104)
  1318. # asm 1: mulq 104(<pp=int64#2)
  1319. # asm 2: mulq 104(<pp=%rsi)
  1320. mulq 104(%rsi)
  1321. # qhasm: carry? c1 += squarerax
  1322. # asm 1: add <squarerax=int64#7,<c1=int64#6
  1323. # asm 2: add <squarerax=%rax,<c1=%r9
  1324. add %rax,%r9
  1325. # qhasm: squarer11 += squarerdx + carry
  1326. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  1327. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  1328. adc %rdx,%r10
  1329. # qhasm: squarerax = *(uint64 *)(pp + 104)
  1330. # asm 1: movq 104(<pp=int64#2),>squarerax=int64#3
  1331. # asm 2: movq 104(<pp=%rsi),>squarerax=%rdx
  1332. movq 104(%rsi),%rdx
  1333. # qhasm: squarerax *= 38
  1334. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  1335. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  1336. imulq $38,%rdx,%rax
  1337. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 112)
  1338. # asm 1: mulq 112(<pp=int64#2)
  1339. # asm 2: mulq 112(<pp=%rsi)
  1340. mulq 112(%rsi)
  1341. # qhasm: carry? c2 += squarerax
  1342. # asm 1: add <squarerax=int64#7,<c2=int64#9
  1343. # asm 2: add <squarerax=%rax,<c2=%r11
  1344. add %rax,%r11
  1345. # qhasm: squarer21 += squarerdx + carry
  1346. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  1347. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  1348. adc %rdx,%r12
  1349. # qhasm: squarerax = *(uint64 *)(pp + 112)
  1350. # asm 1: movq 112(<pp=int64#2),>squarerax=int64#3
  1351. # asm 2: movq 112(<pp=%rsi),>squarerax=%rdx
  1352. movq 112(%rsi),%rdx
  1353. # qhasm: squarerax *= 19
  1354. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  1355. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  1356. imulq $19,%rdx,%rax
  1357. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(pp + 112)
  1358. # asm 1: mulq 112(<pp=int64#2)
  1359. # asm 2: mulq 112(<pp=%rsi)
  1360. mulq 112(%rsi)
  1361. # qhasm: carry? c3 += squarerax
  1362. # asm 1: add <squarerax=int64#7,<c3=int64#11
  1363. # asm 2: add <squarerax=%rax,<c3=%r13
  1364. add %rax,%r13
  1365. # qhasm: squarer31 += squarerdx + carry
  1366. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  1367. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  1368. adc %rdx,%r14
  1369. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  1370. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#3
  1371. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rdx
  1372. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rdx
  1373. # qhasm: squarer01 = (squarer01.c0) << 13
  1374. # asm 1: shld $13,<c0=int64#4,<squarer01=int64#5
  1375. # asm 2: shld $13,<c0=%rcx,<squarer01=%r8
  1376. shld $13,%rcx,%r8
  1377. # qhasm: c0 &= squareredmask
  1378. # asm 1: and <squareredmask=int64#3,<c0=int64#4
  1379. # asm 2: and <squareredmask=%rdx,<c0=%rcx
  1380. and %rdx,%rcx
  1381. # qhasm: squarer11 = (squarer11.c1) << 13
  1382. # asm 1: shld $13,<c1=int64#6,<squarer11=int64#8
  1383. # asm 2: shld $13,<c1=%r9,<squarer11=%r10
  1384. shld $13,%r9,%r10
  1385. # qhasm: c1 &= squareredmask
  1386. # asm 1: and <squareredmask=int64#3,<c1=int64#6
  1387. # asm 2: and <squareredmask=%rdx,<c1=%r9
  1388. and %rdx,%r9
  1389. # qhasm: c1 += squarer01
  1390. # asm 1: add <squarer01=int64#5,<c1=int64#6
  1391. # asm 2: add <squarer01=%r8,<c1=%r9
  1392. add %r8,%r9
  1393. # qhasm: squarer21 = (squarer21.c2) << 13
  1394. # asm 1: shld $13,<c2=int64#9,<squarer21=int64#10
  1395. # asm 2: shld $13,<c2=%r11,<squarer21=%r12
  1396. shld $13,%r11,%r12
  1397. # qhasm: c2 &= squareredmask
  1398. # asm 1: and <squareredmask=int64#3,<c2=int64#9
  1399. # asm 2: and <squareredmask=%rdx,<c2=%r11
  1400. and %rdx,%r11
  1401. # qhasm: c2 += squarer11
  1402. # asm 1: add <squarer11=int64#8,<c2=int64#9
  1403. # asm 2: add <squarer11=%r10,<c2=%r11
  1404. add %r10,%r11
  1405. # qhasm: squarer31 = (squarer31.c3) << 13
  1406. # asm 1: shld $13,<c3=int64#11,<squarer31=int64#12
  1407. # asm 2: shld $13,<c3=%r13,<squarer31=%r14
  1408. shld $13,%r13,%r14
  1409. # qhasm: c3 &= squareredmask
  1410. # asm 1: and <squareredmask=int64#3,<c3=int64#11
  1411. # asm 2: and <squareredmask=%rdx,<c3=%r13
  1412. and %rdx,%r13
  1413. # qhasm: c3 += squarer21
  1414. # asm 1: add <squarer21=int64#10,<c3=int64#11
  1415. # asm 2: add <squarer21=%r12,<c3=%r13
  1416. add %r12,%r13
  1417. # qhasm: squarer41 = (squarer41.c4) << 13
  1418. # asm 1: shld $13,<c4=int64#13,<squarer41=int64#14
  1419. # asm 2: shld $13,<c4=%r15,<squarer41=%rbx
  1420. shld $13,%r15,%rbx
  1421. # qhasm: c4 &= squareredmask
  1422. # asm 1: and <squareredmask=int64#3,<c4=int64#13
  1423. # asm 2: and <squareredmask=%rdx,<c4=%r15
  1424. and %rdx,%r15
  1425. # qhasm: c4 += squarer31
  1426. # asm 1: add <squarer31=int64#12,<c4=int64#13
  1427. # asm 2: add <squarer31=%r14,<c4=%r15
  1428. add %r14,%r15
  1429. # qhasm: squarer41 = squarer41 * 19
  1430. # asm 1: imulq $19,<squarer41=int64#14,>squarer41=int64#5
  1431. # asm 2: imulq $19,<squarer41=%rbx,>squarer41=%r8
  1432. imulq $19,%rbx,%r8
  1433. # qhasm: c0 += squarer41
  1434. # asm 1: add <squarer41=int64#5,<c0=int64#4
  1435. # asm 2: add <squarer41=%r8,<c0=%rcx
  1436. add %r8,%rcx
  1437. # qhasm: squaret = c0
  1438. # asm 1: mov <c0=int64#4,>squaret=int64#5
  1439. # asm 2: mov <c0=%rcx,>squaret=%r8
  1440. mov %rcx,%r8
  1441. # qhasm: (uint64) squaret >>= 51
  1442. # asm 1: shr $51,<squaret=int64#5
  1443. # asm 2: shr $51,<squaret=%r8
  1444. shr $51,%r8
  1445. # qhasm: squaret += c1
  1446. # asm 1: add <c1=int64#6,<squaret=int64#5
  1447. # asm 2: add <c1=%r9,<squaret=%r8
  1448. add %r9,%r8
  1449. # qhasm: c0 &= squareredmask
  1450. # asm 1: and <squareredmask=int64#3,<c0=int64#4
  1451. # asm 2: and <squareredmask=%rdx,<c0=%rcx
  1452. and %rdx,%rcx
  1453. # qhasm: c1 = squaret
  1454. # asm 1: mov <squaret=int64#5,>c1=int64#6
  1455. # asm 2: mov <squaret=%r8,>c1=%r9
  1456. mov %r8,%r9
  1457. # qhasm: (uint64) squaret >>= 51
  1458. # asm 1: shr $51,<squaret=int64#5
  1459. # asm 2: shr $51,<squaret=%r8
  1460. shr $51,%r8
  1461. # qhasm: squaret += c2
  1462. # asm 1: add <c2=int64#9,<squaret=int64#5
  1463. # asm 2: add <c2=%r11,<squaret=%r8
  1464. add %r11,%r8
  1465. # qhasm: c1 &= squareredmask
  1466. # asm 1: and <squareredmask=int64#3,<c1=int64#6
  1467. # asm 2: and <squareredmask=%rdx,<c1=%r9
  1468. and %rdx,%r9
  1469. # qhasm: c2 = squaret
  1470. # asm 1: mov <squaret=int64#5,>c2=int64#7
  1471. # asm 2: mov <squaret=%r8,>c2=%rax
  1472. mov %r8,%rax
  1473. # qhasm: (uint64) squaret >>= 51
  1474. # asm 1: shr $51,<squaret=int64#5
  1475. # asm 2: shr $51,<squaret=%r8
  1476. shr $51,%r8
  1477. # qhasm: squaret += c3
  1478. # asm 1: add <c3=int64#11,<squaret=int64#5
  1479. # asm 2: add <c3=%r13,<squaret=%r8
  1480. add %r13,%r8
  1481. # qhasm: c2 &= squareredmask
  1482. # asm 1: and <squareredmask=int64#3,<c2=int64#7
  1483. # asm 2: and <squareredmask=%rdx,<c2=%rax
  1484. and %rdx,%rax
  1485. # qhasm: c3 = squaret
  1486. # asm 1: mov <squaret=int64#5,>c3=int64#8
  1487. # asm 2: mov <squaret=%r8,>c3=%r10
  1488. mov %r8,%r10
  1489. # qhasm: (uint64) squaret >>= 51
  1490. # asm 1: shr $51,<squaret=int64#5
  1491. # asm 2: shr $51,<squaret=%r8
  1492. shr $51,%r8
  1493. # qhasm: squaret += c4
  1494. # asm 1: add <c4=int64#13,<squaret=int64#5
  1495. # asm 2: add <c4=%r15,<squaret=%r8
  1496. add %r15,%r8
  1497. # qhasm: c3 &= squareredmask
  1498. # asm 1: and <squareredmask=int64#3,<c3=int64#8
  1499. # asm 2: and <squareredmask=%rdx,<c3=%r10
  1500. and %rdx,%r10
  1501. # qhasm: c4 = squaret
  1502. # asm 1: mov <squaret=int64#5,>c4=int64#9
  1503. # asm 2: mov <squaret=%r8,>c4=%r11
  1504. mov %r8,%r11
  1505. # qhasm: (uint64) squaret >>= 51
  1506. # asm 1: shr $51,<squaret=int64#5
  1507. # asm 2: shr $51,<squaret=%r8
  1508. shr $51,%r8
  1509. # qhasm: squaret *= 19
  1510. # asm 1: imulq $19,<squaret=int64#5,>squaret=int64#5
  1511. # asm 2: imulq $19,<squaret=%r8,>squaret=%r8
  1512. imulq $19,%r8,%r8
  1513. # qhasm: c0 += squaret
  1514. # asm 1: add <squaret=int64#5,<c0=int64#4
  1515. # asm 2: add <squaret=%r8,<c0=%rcx
  1516. add %r8,%rcx
  1517. # qhasm: c4 &= squareredmask
  1518. # asm 1: and <squareredmask=int64#3,<c4=int64#9
  1519. # asm 2: and <squareredmask=%rdx,<c4=%r11
  1520. and %rdx,%r11
  1521. # qhasm: c0 += c0
  1522. # asm 1: add <c0=int64#4,<c0=int64#4
  1523. # asm 2: add <c0=%rcx,<c0=%rcx
  1524. add %rcx,%rcx
  1525. # qhasm: c1 += c1
  1526. # asm 1: add <c1=int64#6,<c1=int64#6
  1527. # asm 2: add <c1=%r9,<c1=%r9
  1528. add %r9,%r9
  1529. # qhasm: c2 += c2
  1530. # asm 1: add <c2=int64#7,<c2=int64#7
  1531. # asm 2: add <c2=%rax,<c2=%rax
  1532. add %rax,%rax
  1533. # qhasm: c3 += c3
  1534. # asm 1: add <c3=int64#8,<c3=int64#8
  1535. # asm 2: add <c3=%r10,<c3=%r10
  1536. add %r10,%r10
  1537. # qhasm: c4 += c4
  1538. # asm 1: add <c4=int64#9,<c4=int64#9
  1539. # asm 2: add <c4=%r11,<c4=%r11
  1540. add %r11,%r11
  1541. # qhasm: c0_stack = c0
  1542. # asm 1: movq <c0=int64#4,>c0_stack=stack64#18
  1543. # asm 2: movq <c0=%rcx,>c0_stack=136(%rsp)
  1544. movq %rcx,136(%rsp)
  1545. # qhasm: c1_stack = c1
  1546. # asm 1: movq <c1=int64#6,>c1_stack=stack64#19
  1547. # asm 2: movq <c1=%r9,>c1_stack=144(%rsp)
  1548. movq %r9,144(%rsp)
  1549. # qhasm: c2_stack = c2
  1550. # asm 1: movq <c2=int64#7,>c2_stack=stack64#20
  1551. # asm 2: movq <c2=%rax,>c2_stack=152(%rsp)
  1552. movq %rax,152(%rsp)
  1553. # qhasm: c3_stack = c3
  1554. # asm 1: movq <c3=int64#8,>c3_stack=stack64#21
  1555. # asm 2: movq <c3=%r10,>c3_stack=160(%rsp)
  1556. movq %r10,160(%rsp)
  1557. # qhasm: c4_stack = c4
  1558. # asm 1: movq <c4=int64#9,>c4_stack=stack64#22
  1559. # asm 2: movq <c4=%r11,>c4_stack=168(%rsp)
  1560. movq %r11,168(%rsp)
  1561. # qhasm: d0 = *(uint64 *)&CRYPTO_NAMESPACE(batch_2P0)
  1562. # asm 1: movq CRYPTO_NAMESPACE(batch_2P0),>d0=int64#3
  1563. # asm 2: movq CRYPTO_NAMESPACE(batch_2P0),>d0=%rdx
  1564. movq CRYPTO_NAMESPACE(batch_2P0)(%rip),%rdx
  1565. # qhasm: d1 = *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1566. # asm 1: movq CRYPTO_NAMESPACE(batch_2P1234),>d1=int64#4
  1567. # asm 2: movq CRYPTO_NAMESPACE(batch_2P1234),>d1=%rcx
  1568. movq CRYPTO_NAMESPACE(batch_2P1234)(%rip),%rcx
  1569. # qhasm: d2 = *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1570. # asm 1: movq CRYPTO_NAMESPACE(batch_2P1234),>d2=int64#5
  1571. # asm 2: movq CRYPTO_NAMESPACE(batch_2P1234),>d2=%r8
  1572. movq CRYPTO_NAMESPACE(batch_2P1234)(%rip),%r8
  1573. # qhasm: d3 = *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1574. # asm 1: movq CRYPTO_NAMESPACE(batch_2P1234),>d3=int64#6
  1575. # asm 2: movq CRYPTO_NAMESPACE(batch_2P1234),>d3=%r9
  1576. movq CRYPTO_NAMESPACE(batch_2P1234)(%rip),%r9
  1577. # qhasm: d4 = *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1578. # asm 1: movq CRYPTO_NAMESPACE(batch_2P1234),>d4=int64#7
  1579. # asm 2: movq CRYPTO_NAMESPACE(batch_2P1234),>d4=%rax
  1580. movq CRYPTO_NAMESPACE(batch_2P1234)(%rip),%rax
  1581. # qhasm: e0 = d0
  1582. # asm 1: mov <d0=int64#3,>e0=int64#8
  1583. # asm 2: mov <d0=%rdx,>e0=%r10
  1584. mov %rdx,%r10
  1585. # qhasm: e1 = d1
  1586. # asm 1: mov <d1=int64#4,>e1=int64#9
  1587. # asm 2: mov <d1=%rcx,>e1=%r11
  1588. mov %rcx,%r11
  1589. # qhasm: e2 = d2
  1590. # asm 1: mov <d2=int64#5,>e2=int64#10
  1591. # asm 2: mov <d2=%r8,>e2=%r12
  1592. mov %r8,%r12
  1593. # qhasm: e3 = d3
  1594. # asm 1: mov <d3=int64#6,>e3=int64#11
  1595. # asm 2: mov <d3=%r9,>e3=%r13
  1596. mov %r9,%r13
  1597. # qhasm: e4 = d4
  1598. # asm 1: mov <d4=int64#7,>e4=int64#12
  1599. # asm 2: mov <d4=%rax,>e4=%r14
  1600. mov %rax,%r14
  1601. # qhasm: d0 -= a0_stack
  1602. # asm 1: subq <a0_stack=stack64#8,<d0=int64#3
  1603. # asm 2: subq <a0_stack=56(%rsp),<d0=%rdx
  1604. subq 56(%rsp),%rdx
  1605. # qhasm: d1 -= a1_stack
  1606. # asm 1: subq <a1_stack=stack64#9,<d1=int64#4
  1607. # asm 2: subq <a1_stack=64(%rsp),<d1=%rcx
  1608. subq 64(%rsp),%rcx
  1609. # qhasm: d2 -= a2_stack
  1610. # asm 1: subq <a2_stack=stack64#10,<d2=int64#5
  1611. # asm 2: subq <a2_stack=72(%rsp),<d2=%r8
  1612. subq 72(%rsp),%r8
  1613. # qhasm: d3 -= a3_stack
  1614. # asm 1: subq <a3_stack=stack64#11,<d3=int64#6
  1615. # asm 2: subq <a3_stack=80(%rsp),<d3=%r9
  1616. subq 80(%rsp),%r9
  1617. # qhasm: d4 -= a4_stack
  1618. # asm 1: subq <a4_stack=stack64#12,<d4=int64#7
  1619. # asm 2: subq <a4_stack=88(%rsp),<d4=%rax
  1620. subq 88(%rsp),%rax
  1621. # qhasm: e0 -= b0_stack
  1622. # asm 1: subq <b0_stack=stack64#13,<e0=int64#8
  1623. # asm 2: subq <b0_stack=96(%rsp),<e0=%r10
  1624. subq 96(%rsp),%r10
  1625. # qhasm: e1 -= b1_stack
  1626. # asm 1: subq <b1_stack=stack64#14,<e1=int64#9
  1627. # asm 2: subq <b1_stack=104(%rsp),<e1=%r11
  1628. subq 104(%rsp),%r11
  1629. # qhasm: e2 -= b2_stack
  1630. # asm 1: subq <b2_stack=stack64#15,<e2=int64#10
  1631. # asm 2: subq <b2_stack=112(%rsp),<e2=%r12
  1632. subq 112(%rsp),%r12
  1633. # qhasm: e3 -= b3_stack
  1634. # asm 1: subq <b3_stack=stack64#16,<e3=int64#11
  1635. # asm 2: subq <b3_stack=120(%rsp),<e3=%r13
  1636. subq 120(%rsp),%r13
  1637. # qhasm: e4 -= b4_stack
  1638. # asm 1: subq <b4_stack=stack64#17,<e4=int64#12
  1639. # asm 2: subq <b4_stack=128(%rsp),<e4=%r14
  1640. subq 128(%rsp),%r14
  1641. # qhasm: d0_stack = d0
  1642. # asm 1: movq <d0=int64#3,>d0_stack=stack64#8
  1643. # asm 2: movq <d0=%rdx,>d0_stack=56(%rsp)
  1644. movq %rdx,56(%rsp)
  1645. # qhasm: d1_stack = d1
  1646. # asm 1: movq <d1=int64#4,>d1_stack=stack64#9
  1647. # asm 2: movq <d1=%rcx,>d1_stack=64(%rsp)
  1648. movq %rcx,64(%rsp)
  1649. # qhasm: d2_stack = d2
  1650. # asm 1: movq <d2=int64#5,>d2_stack=stack64#10
  1651. # asm 2: movq <d2=%r8,>d2_stack=72(%rsp)
  1652. movq %r8,72(%rsp)
  1653. # qhasm: d3_stack = d3
  1654. # asm 1: movq <d3=int64#6,>d3_stack=stack64#11
  1655. # asm 2: movq <d3=%r9,>d3_stack=80(%rsp)
  1656. movq %r9,80(%rsp)
  1657. # qhasm: d4_stack = d4
  1658. # asm 1: movq <d4=int64#7,>d4_stack=stack64#12
  1659. # asm 2: movq <d4=%rax,>d4_stack=88(%rsp)
  1660. movq %rax,88(%rsp)
  1661. # qhasm: e0_stack = e0
  1662. # asm 1: movq <e0=int64#8,>e0_stack=stack64#23
  1663. # asm 2: movq <e0=%r10,>e0_stack=176(%rsp)
  1664. movq %r10,176(%rsp)
  1665. # qhasm: e1_stack = e1
  1666. # asm 1: movq <e1=int64#9,>e1_stack=stack64#24
  1667. # asm 2: movq <e1=%r11,>e1_stack=184(%rsp)
  1668. movq %r11,184(%rsp)
  1669. # qhasm: e2_stack = e2
  1670. # asm 1: movq <e2=int64#10,>e2_stack=stack64#25
  1671. # asm 2: movq <e2=%r12,>e2_stack=192(%rsp)
  1672. movq %r12,192(%rsp)
  1673. # qhasm: e3_stack = e3
  1674. # asm 1: movq <e3=int64#11,>e3_stack=stack64#26
  1675. # asm 2: movq <e3=%r13,>e3_stack=200(%rsp)
  1676. movq %r13,200(%rsp)
  1677. # qhasm: e4_stack = e4
  1678. # asm 1: movq <e4=int64#12,>e4_stack=stack64#27
  1679. # asm 2: movq <e4=%r14,>e4_stack=208(%rsp)
  1680. movq %r14,208(%rsp)
  1681. # qhasm: rz0 = d0
  1682. # asm 1: mov <d0=int64#3,>rz0=int64#8
  1683. # asm 2: mov <d0=%rdx,>rz0=%r10
  1684. mov %rdx,%r10
  1685. # qhasm: rz1 = d1
  1686. # asm 1: mov <d1=int64#4,>rz1=int64#9
  1687. # asm 2: mov <d1=%rcx,>rz1=%r11
  1688. mov %rcx,%r11
  1689. # qhasm: rz2 = d2
  1690. # asm 1: mov <d2=int64#5,>rz2=int64#10
  1691. # asm 2: mov <d2=%r8,>rz2=%r12
  1692. mov %r8,%r12
  1693. # qhasm: rz3 = d3
  1694. # asm 1: mov <d3=int64#6,>rz3=int64#11
  1695. # asm 2: mov <d3=%r9,>rz3=%r13
  1696. mov %r9,%r13
  1697. # qhasm: rz4 = d4
  1698. # asm 1: mov <d4=int64#7,>rz4=int64#12
  1699. # asm 2: mov <d4=%rax,>rz4=%r14
  1700. mov %rax,%r14
  1701. # qhasm: rz0 += b0_stack
  1702. # asm 1: addq <b0_stack=stack64#13,<rz0=int64#8
  1703. # asm 2: addq <b0_stack=96(%rsp),<rz0=%r10
  1704. addq 96(%rsp),%r10
  1705. # qhasm: rz1 += b1_stack
  1706. # asm 1: addq <b1_stack=stack64#14,<rz1=int64#9
  1707. # asm 2: addq <b1_stack=104(%rsp),<rz1=%r11
  1708. addq 104(%rsp),%r11
  1709. # qhasm: rz2 += b2_stack
  1710. # asm 1: addq <b2_stack=stack64#15,<rz2=int64#10
  1711. # asm 2: addq <b2_stack=112(%rsp),<rz2=%r12
  1712. addq 112(%rsp),%r12
  1713. # qhasm: rz3 += b3_stack
  1714. # asm 1: addq <b3_stack=stack64#16,<rz3=int64#11
  1715. # asm 2: addq <b3_stack=120(%rsp),<rz3=%r13
  1716. addq 120(%rsp),%r13
  1717. # qhasm: rz4 += b4_stack
  1718. # asm 1: addq <b4_stack=stack64#17,<rz4=int64#12
  1719. # asm 2: addq <b4_stack=128(%rsp),<rz4=%r14
  1720. addq 128(%rsp),%r14
  1721. # qhasm: *(uint64 *) (rp + 40) = rz0
  1722. # asm 1: movq <rz0=int64#8,40(<rp=int64#1)
  1723. # asm 2: movq <rz0=%r10,40(<rp=%rdi)
  1724. movq %r10,40(%rdi)
  1725. # qhasm: *(uint64 *) (rp + 48) = rz1
  1726. # asm 1: movq <rz1=int64#9,48(<rp=int64#1)
  1727. # asm 2: movq <rz1=%r11,48(<rp=%rdi)
  1728. movq %r11,48(%rdi)
  1729. # qhasm: *(uint64 *) (rp + 56) = rz2
  1730. # asm 1: movq <rz2=int64#10,56(<rp=int64#1)
  1731. # asm 2: movq <rz2=%r12,56(<rp=%rdi)
  1732. movq %r12,56(%rdi)
  1733. # qhasm: *(uint64 *) (rp + 64) = rz3
  1734. # asm 1: movq <rz3=int64#11,64(<rp=int64#1)
  1735. # asm 2: movq <rz3=%r13,64(<rp=%rdi)
  1736. movq %r13,64(%rdi)
  1737. # qhasm: *(uint64 *) (rp + 72) = rz4
  1738. # asm 1: movq <rz4=int64#12,72(<rp=int64#1)
  1739. # asm 2: movq <rz4=%r14,72(<rp=%rdi)
  1740. movq %r14,72(%rdi)
  1741. # qhasm: d0 += *(uint64 *)&CRYPTO_NAMESPACE(batch_2P0)
  1742. # asm 1: add CRYPTO_NAMESPACE(batch_2P0),<d0=int64#3
  1743. # asm 2: add CRYPTO_NAMESPACE(batch_2P0),<d0=%rdx
  1744. add CRYPTO_NAMESPACE(batch_2P0)(%rip),%rdx
  1745. # qhasm: d1 += *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1746. # asm 1: add CRYPTO_NAMESPACE(batch_2P1234),<d1=int64#4
  1747. # asm 2: add CRYPTO_NAMESPACE(batch_2P1234),<d1=%rcx
  1748. add CRYPTO_NAMESPACE(batch_2P1234)(%rip),%rcx
  1749. # qhasm: d2 += *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1750. # asm 1: add CRYPTO_NAMESPACE(batch_2P1234),<d2=int64#5
  1751. # asm 2: add CRYPTO_NAMESPACE(batch_2P1234),<d2=%r8
  1752. add CRYPTO_NAMESPACE(batch_2P1234)(%rip),%r8
  1753. # qhasm: d3 += *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1754. # asm 1: add CRYPTO_NAMESPACE(batch_2P1234),<d3=int64#6
  1755. # asm 2: add CRYPTO_NAMESPACE(batch_2P1234),<d3=%r9
  1756. add CRYPTO_NAMESPACE(batch_2P1234)(%rip),%r9
  1757. # qhasm: d4 += *(uint64 *)&CRYPTO_NAMESPACE(batch_2P1234)
  1758. # asm 1: add CRYPTO_NAMESPACE(batch_2P1234),<d4=int64#7
  1759. # asm 2: add CRYPTO_NAMESPACE(batch_2P1234),<d4=%rax
  1760. add CRYPTO_NAMESPACE(batch_2P1234)(%rip),%rax
  1761. # qhasm: d0 -= b0_stack
  1762. # asm 1: subq <b0_stack=stack64#13,<d0=int64#3
  1763. # asm 2: subq <b0_stack=96(%rsp),<d0=%rdx
  1764. subq 96(%rsp),%rdx
  1765. # qhasm: d1 -= b1_stack
  1766. # asm 1: subq <b1_stack=stack64#14,<d1=int64#4
  1767. # asm 2: subq <b1_stack=104(%rsp),<d1=%rcx
  1768. subq 104(%rsp),%rcx
  1769. # qhasm: d2 -= b2_stack
  1770. # asm 1: subq <b2_stack=stack64#15,<d2=int64#5
  1771. # asm 2: subq <b2_stack=112(%rsp),<d2=%r8
  1772. subq 112(%rsp),%r8
  1773. # qhasm: d3 -= b3_stack
  1774. # asm 1: subq <b3_stack=stack64#16,<d3=int64#6
  1775. # asm 2: subq <b3_stack=120(%rsp),<d3=%r9
  1776. subq 120(%rsp),%r9
  1777. # qhasm: d4 -= b4_stack
  1778. # asm 1: subq <b4_stack=stack64#17,<d4=int64#7
  1779. # asm 2: subq <b4_stack=128(%rsp),<d4=%rax
  1780. subq 128(%rsp),%rax
  1781. # qhasm: *(uint64 *)(rp + 80) = d0
  1782. # asm 1: movq <d0=int64#3,80(<rp=int64#1)
  1783. # asm 2: movq <d0=%rdx,80(<rp=%rdi)
  1784. movq %rdx,80(%rdi)
  1785. # qhasm: *(uint64 *)(rp + 88) = d1
  1786. # asm 1: movq <d1=int64#4,88(<rp=int64#1)
  1787. # asm 2: movq <d1=%rcx,88(<rp=%rdi)
  1788. movq %rcx,88(%rdi)
  1789. # qhasm: *(uint64 *)(rp + 96) = d2
  1790. # asm 1: movq <d2=int64#5,96(<rp=int64#1)
  1791. # asm 2: movq <d2=%r8,96(<rp=%rdi)
  1792. movq %r8,96(%rdi)
  1793. # qhasm: *(uint64 *)(rp + 104) = d3
  1794. # asm 1: movq <d3=int64#6,104(<rp=int64#1)
  1795. # asm 2: movq <d3=%r9,104(<rp=%rdi)
  1796. movq %r9,104(%rdi)
  1797. # qhasm: *(uint64 *)(rp + 112) = d4
  1798. # asm 1: movq <d4=int64#7,112(<rp=int64#1)
  1799. # asm 2: movq <d4=%rax,112(<rp=%rdi)
  1800. movq %rax,112(%rdi)
  1801. # qhasm: rz0 += *(uint64 *)&CRYPTO_NAMESPACE(batch_4P0)
  1802. # asm 1: add CRYPTO_NAMESPACE(batch_4P0),<rz0=int64#8
  1803. # asm 2: add CRYPTO_NAMESPACE(batch_4P0),<rz0=%r10
  1804. add CRYPTO_NAMESPACE(batch_4P0)(%rip),%r10
  1805. # qhasm: rz1 += *(uint64 *)&CRYPTO_NAMESPACE(batch_4P1234)
  1806. # asm 1: add CRYPTO_NAMESPACE(batch_4P1234),<rz1=int64#9
  1807. # asm 2: add CRYPTO_NAMESPACE(batch_4P1234),<rz1=%r11
  1808. add CRYPTO_NAMESPACE(batch_4P1234)(%rip),%r11
  1809. # qhasm: rz2 += *(uint64 *)&CRYPTO_NAMESPACE(batch_4P1234)
  1810. # asm 1: add CRYPTO_NAMESPACE(batch_4P1234),<rz2=int64#10
  1811. # asm 2: add CRYPTO_NAMESPACE(batch_4P1234),<rz2=%r12
  1812. add CRYPTO_NAMESPACE(batch_4P1234)(%rip),%r12
  1813. # qhasm: rz3 += *(uint64 *)&CRYPTO_NAMESPACE(batch_4P1234)
  1814. # asm 1: add CRYPTO_NAMESPACE(batch_4P1234),<rz3=int64#11
  1815. # asm 2: add CRYPTO_NAMESPACE(batch_4P1234),<rz3=%r13
  1816. add CRYPTO_NAMESPACE(batch_4P1234)(%rip),%r13
  1817. # qhasm: rz4 += *(uint64 *)&CRYPTO_NAMESPACE(batch_4P1234)
  1818. # asm 1: add CRYPTO_NAMESPACE(batch_4P1234),<rz4=int64#12
  1819. # asm 2: add CRYPTO_NAMESPACE(batch_4P1234),<rz4=%r14
  1820. add CRYPTO_NAMESPACE(batch_4P1234)(%rip),%r14
  1821. # qhasm: rz0 -= c0_stack
  1822. # asm 1: subq <c0_stack=stack64#18,<rz0=int64#8
  1823. # asm 2: subq <c0_stack=136(%rsp),<rz0=%r10
  1824. subq 136(%rsp),%r10
  1825. # qhasm: rz1 -= c1_stack
  1826. # asm 1: subq <c1_stack=stack64#19,<rz1=int64#9
  1827. # asm 2: subq <c1_stack=144(%rsp),<rz1=%r11
  1828. subq 144(%rsp),%r11
  1829. # qhasm: rz2 -= c2_stack
  1830. # asm 1: subq <c2_stack=stack64#20,<rz2=int64#10
  1831. # asm 2: subq <c2_stack=152(%rsp),<rz2=%r12
  1832. subq 152(%rsp),%r12
  1833. # qhasm: rz3 -= c3_stack
  1834. # asm 1: subq <c3_stack=stack64#21,<rz3=int64#11
  1835. # asm 2: subq <c3_stack=160(%rsp),<rz3=%r13
  1836. subq 160(%rsp),%r13
  1837. # qhasm: rz4 -= c4_stack
  1838. # asm 1: subq <c4_stack=stack64#22,<rz4=int64#12
  1839. # asm 2: subq <c4_stack=168(%rsp),<rz4=%r14
  1840. subq 168(%rsp),%r14
  1841. # qhasm: *(uint64 *) (rp + 120) = rz0
  1842. # asm 1: movq <rz0=int64#8,120(<rp=int64#1)
  1843. # asm 2: movq <rz0=%r10,120(<rp=%rdi)
  1844. movq %r10,120(%rdi)
  1845. # qhasm: *(uint64 *) (rp + 128) = rz1
  1846. # asm 1: movq <rz1=int64#9,128(<rp=int64#1)
  1847. # asm 2: movq <rz1=%r11,128(<rp=%rdi)
  1848. movq %r11,128(%rdi)
  1849. # qhasm: *(uint64 *) (rp + 136) = rz2
  1850. # asm 1: movq <rz2=int64#10,136(<rp=int64#1)
  1851. # asm 2: movq <rz2=%r12,136(<rp=%rdi)
  1852. movq %r12,136(%rdi)
  1853. # qhasm: *(uint64 *) (rp + 144) = rz3
  1854. # asm 1: movq <rz3=int64#11,144(<rp=int64#1)
  1855. # asm 2: movq <rz3=%r13,144(<rp=%rdi)
  1856. movq %r13,144(%rdi)
  1857. # qhasm: *(uint64 *) (rp + 152) = rz4
  1858. # asm 1: movq <rz4=int64#12,152(<rp=int64#1)
  1859. # asm 2: movq <rz4=%r14,152(<rp=%rdi)
  1860. movq %r14,152(%rdi)
  1861. # qhasm: rx0 = *(uint64 *)(pp + 0)
  1862. # asm 1: movq 0(<pp=int64#2),>rx0=int64#3
  1863. # asm 2: movq 0(<pp=%rsi),>rx0=%rdx
  1864. movq 0(%rsi),%rdx
  1865. # qhasm: rx1 = *(uint64 *)(pp + 8)
  1866. # asm 1: movq 8(<pp=int64#2),>rx1=int64#4
  1867. # asm 2: movq 8(<pp=%rsi),>rx1=%rcx
  1868. movq 8(%rsi),%rcx
  1869. # qhasm: rx2 = *(uint64 *)(pp + 16)
  1870. # asm 1: movq 16(<pp=int64#2),>rx2=int64#5
  1871. # asm 2: movq 16(<pp=%rsi),>rx2=%r8
  1872. movq 16(%rsi),%r8
  1873. # qhasm: rx3 = *(uint64 *)(pp + 24)
  1874. # asm 1: movq 24(<pp=int64#2),>rx3=int64#6
  1875. # asm 2: movq 24(<pp=%rsi),>rx3=%r9
  1876. movq 24(%rsi),%r9
  1877. # qhasm: rx4 = *(uint64 *)(pp + 32)
  1878. # asm 1: movq 32(<pp=int64#2),>rx4=int64#7
  1879. # asm 2: movq 32(<pp=%rsi),>rx4=%rax
  1880. movq 32(%rsi),%rax
  1881. # qhasm: rx0 += *(uint64 *)(pp + 40)
  1882. # asm 1: addq 40(<pp=int64#2),<rx0=int64#3
  1883. # asm 2: addq 40(<pp=%rsi),<rx0=%rdx
  1884. addq 40(%rsi),%rdx
  1885. # qhasm: rx1 += *(uint64 *)(pp + 48)
  1886. # asm 1: addq 48(<pp=int64#2),<rx1=int64#4
  1887. # asm 2: addq 48(<pp=%rsi),<rx1=%rcx
  1888. addq 48(%rsi),%rcx
  1889. # qhasm: rx2 += *(uint64 *)(pp + 56)
  1890. # asm 1: addq 56(<pp=int64#2),<rx2=int64#5
  1891. # asm 2: addq 56(<pp=%rsi),<rx2=%r8
  1892. addq 56(%rsi),%r8
  1893. # qhasm: rx3 += *(uint64 *)(pp + 64)
  1894. # asm 1: addq 64(<pp=int64#2),<rx3=int64#6
  1895. # asm 2: addq 64(<pp=%rsi),<rx3=%r9
  1896. addq 64(%rsi),%r9
  1897. # qhasm: rx4 += *(uint64 *)(pp + 72)
  1898. # asm 1: addq 72(<pp=int64#2),<rx4=int64#7
  1899. # asm 2: addq 72(<pp=%rsi),<rx4=%rax
  1900. addq 72(%rsi),%rax
  1901. # qhasm: rx0_stack = rx0
  1902. # asm 1: movq <rx0=int64#3,>rx0_stack=stack64#13
  1903. # asm 2: movq <rx0=%rdx,>rx0_stack=96(%rsp)
  1904. movq %rdx,96(%rsp)
  1905. # qhasm: rx1_stack = rx1
  1906. # asm 1: movq <rx1=int64#4,>rx1_stack=stack64#14
  1907. # asm 2: movq <rx1=%rcx,>rx1_stack=104(%rsp)
  1908. movq %rcx,104(%rsp)
  1909. # qhasm: rx2_stack = rx2
  1910. # asm 1: movq <rx2=int64#5,>rx2_stack=stack64#15
  1911. # asm 2: movq <rx2=%r8,>rx2_stack=112(%rsp)
  1912. movq %r8,112(%rsp)
  1913. # qhasm: rx3_stack = rx3
  1914. # asm 1: movq <rx3=int64#6,>rx3_stack=stack64#16
  1915. # asm 2: movq <rx3=%r9,>rx3_stack=120(%rsp)
  1916. movq %r9,120(%rsp)
  1917. # qhasm: rx4_stack = rx4
  1918. # asm 1: movq <rx4=int64#7,>rx4_stack=stack64#17
  1919. # asm 2: movq <rx4=%rax,>rx4_stack=128(%rsp)
  1920. movq %rax,128(%rsp)
  1921. # qhasm: squarerax = rx0_stack
  1922. # asm 1: movq <rx0_stack=stack64#13,>squarerax=int64#7
  1923. # asm 2: movq <rx0_stack=96(%rsp),>squarerax=%rax
  1924. movq 96(%rsp),%rax
  1925. # qhasm: (uint128) squarerdx squarerax = squarerax * rx0_stack
  1926. # asm 1: mulq <rx0_stack=stack64#13
  1927. # asm 2: mulq <rx0_stack=96(%rsp)
  1928. mulq 96(%rsp)
  1929. # qhasm: rx0 = squarerax
  1930. # asm 1: mov <squarerax=int64#7,>rx0=int64#2
  1931. # asm 2: mov <squarerax=%rax,>rx0=%rsi
  1932. mov %rax,%rsi
  1933. # qhasm: squarer01 = squarerdx
  1934. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#4
  1935. # asm 2: mov <squarerdx=%rdx,>squarer01=%rcx
  1936. mov %rdx,%rcx
  1937. # qhasm: squarerax = rx0_stack
  1938. # asm 1: movq <rx0_stack=stack64#13,>squarerax=int64#7
  1939. # asm 2: movq <rx0_stack=96(%rsp),>squarerax=%rax
  1940. movq 96(%rsp),%rax
  1941. # qhasm: squarerax <<= 1
  1942. # asm 1: shl $1,<squarerax=int64#7
  1943. # asm 2: shl $1,<squarerax=%rax
  1944. shl $1,%rax
  1945. # qhasm: (uint128) squarerdx squarerax = squarerax * rx1_stack
  1946. # asm 1: mulq <rx1_stack=stack64#14
  1947. # asm 2: mulq <rx1_stack=104(%rsp)
  1948. mulq 104(%rsp)
  1949. # qhasm: rx1 = squarerax
  1950. # asm 1: mov <squarerax=int64#7,>rx1=int64#5
  1951. # asm 2: mov <squarerax=%rax,>rx1=%r8
  1952. mov %rax,%r8
  1953. # qhasm: squarer11 = squarerdx
  1954. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#6
  1955. # asm 2: mov <squarerdx=%rdx,>squarer11=%r9
  1956. mov %rdx,%r9
  1957. # qhasm: squarerax = rx0_stack
  1958. # asm 1: movq <rx0_stack=stack64#13,>squarerax=int64#7
  1959. # asm 2: movq <rx0_stack=96(%rsp),>squarerax=%rax
  1960. movq 96(%rsp),%rax
  1961. # qhasm: squarerax <<= 1
  1962. # asm 1: shl $1,<squarerax=int64#7
  1963. # asm 2: shl $1,<squarerax=%rax
  1964. shl $1,%rax
  1965. # qhasm: (uint128) squarerdx squarerax = squarerax * rx2_stack
  1966. # asm 1: mulq <rx2_stack=stack64#15
  1967. # asm 2: mulq <rx2_stack=112(%rsp)
  1968. mulq 112(%rsp)
  1969. # qhasm: rx2 = squarerax
  1970. # asm 1: mov <squarerax=int64#7,>rx2=int64#8
  1971. # asm 2: mov <squarerax=%rax,>rx2=%r10
  1972. mov %rax,%r10
  1973. # qhasm: squarer21 = squarerdx
  1974. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#9
  1975. # asm 2: mov <squarerdx=%rdx,>squarer21=%r11
  1976. mov %rdx,%r11
  1977. # qhasm: squarerax = rx0_stack
  1978. # asm 1: movq <rx0_stack=stack64#13,>squarerax=int64#7
  1979. # asm 2: movq <rx0_stack=96(%rsp),>squarerax=%rax
  1980. movq 96(%rsp),%rax
  1981. # qhasm: squarerax <<= 1
  1982. # asm 1: shl $1,<squarerax=int64#7
  1983. # asm 2: shl $1,<squarerax=%rax
  1984. shl $1,%rax
  1985. # qhasm: (uint128) squarerdx squarerax = squarerax * rx3_stack
  1986. # asm 1: mulq <rx3_stack=stack64#16
  1987. # asm 2: mulq <rx3_stack=120(%rsp)
  1988. mulq 120(%rsp)
  1989. # qhasm: rx3 = squarerax
  1990. # asm 1: mov <squarerax=int64#7,>rx3=int64#10
  1991. # asm 2: mov <squarerax=%rax,>rx3=%r12
  1992. mov %rax,%r12
  1993. # qhasm: squarer31 = squarerdx
  1994. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#11
  1995. # asm 2: mov <squarerdx=%rdx,>squarer31=%r13
  1996. mov %rdx,%r13
  1997. # qhasm: squarerax = rx0_stack
  1998. # asm 1: movq <rx0_stack=stack64#13,>squarerax=int64#7
  1999. # asm 2: movq <rx0_stack=96(%rsp),>squarerax=%rax
  2000. movq 96(%rsp),%rax
  2001. # qhasm: squarerax <<= 1
  2002. # asm 1: shl $1,<squarerax=int64#7
  2003. # asm 2: shl $1,<squarerax=%rax
  2004. shl $1,%rax
  2005. # qhasm: (uint128) squarerdx squarerax = squarerax * rx4_stack
  2006. # asm 1: mulq <rx4_stack=stack64#17
  2007. # asm 2: mulq <rx4_stack=128(%rsp)
  2008. mulq 128(%rsp)
  2009. # qhasm: rx4 = squarerax
  2010. # asm 1: mov <squarerax=int64#7,>rx4=int64#12
  2011. # asm 2: mov <squarerax=%rax,>rx4=%r14
  2012. mov %rax,%r14
  2013. # qhasm: squarer41 = squarerdx
  2014. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#13
  2015. # asm 2: mov <squarerdx=%rdx,>squarer41=%r15
  2016. mov %rdx,%r15
  2017. # qhasm: squarerax = rx1_stack
  2018. # asm 1: movq <rx1_stack=stack64#14,>squarerax=int64#7
  2019. # asm 2: movq <rx1_stack=104(%rsp),>squarerax=%rax
  2020. movq 104(%rsp),%rax
  2021. # qhasm: (uint128) squarerdx squarerax = squarerax * rx1_stack
  2022. # asm 1: mulq <rx1_stack=stack64#14
  2023. # asm 2: mulq <rx1_stack=104(%rsp)
  2024. mulq 104(%rsp)
  2025. # qhasm: carry? rx2 += squarerax
  2026. # asm 1: add <squarerax=int64#7,<rx2=int64#8
  2027. # asm 2: add <squarerax=%rax,<rx2=%r10
  2028. add %rax,%r10
  2029. # qhasm: squarer21 += squarerdx + carry
  2030. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#9
  2031. # asm 2: adc <squarerdx=%rdx,<squarer21=%r11
  2032. adc %rdx,%r11
  2033. # qhasm: squarerax = rx1_stack
  2034. # asm 1: movq <rx1_stack=stack64#14,>squarerax=int64#7
  2035. # asm 2: movq <rx1_stack=104(%rsp),>squarerax=%rax
  2036. movq 104(%rsp),%rax
  2037. # qhasm: squarerax <<= 1
  2038. # asm 1: shl $1,<squarerax=int64#7
  2039. # asm 2: shl $1,<squarerax=%rax
  2040. shl $1,%rax
  2041. # qhasm: (uint128) squarerdx squarerax = squarerax * rx2_stack
  2042. # asm 1: mulq <rx2_stack=stack64#15
  2043. # asm 2: mulq <rx2_stack=112(%rsp)
  2044. mulq 112(%rsp)
  2045. # qhasm: carry? rx3 += squarerax
  2046. # asm 1: add <squarerax=int64#7,<rx3=int64#10
  2047. # asm 2: add <squarerax=%rax,<rx3=%r12
  2048. add %rax,%r12
  2049. # qhasm: squarer31 += squarerdx + carry
  2050. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#11
  2051. # asm 2: adc <squarerdx=%rdx,<squarer31=%r13
  2052. adc %rdx,%r13
  2053. # qhasm: squarerax = rx1_stack
  2054. # asm 1: movq <rx1_stack=stack64#14,>squarerax=int64#7
  2055. # asm 2: movq <rx1_stack=104(%rsp),>squarerax=%rax
  2056. movq 104(%rsp),%rax
  2057. # qhasm: squarerax <<= 1
  2058. # asm 1: shl $1,<squarerax=int64#7
  2059. # asm 2: shl $1,<squarerax=%rax
  2060. shl $1,%rax
  2061. # qhasm: (uint128) squarerdx squarerax = squarerax * rx3_stack
  2062. # asm 1: mulq <rx3_stack=stack64#16
  2063. # asm 2: mulq <rx3_stack=120(%rsp)
  2064. mulq 120(%rsp)
  2065. # qhasm: carry? rx4 += squarerax
  2066. # asm 1: add <squarerax=int64#7,<rx4=int64#12
  2067. # asm 2: add <squarerax=%rax,<rx4=%r14
  2068. add %rax,%r14
  2069. # qhasm: squarer41 += squarerdx + carry
  2070. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#13
  2071. # asm 2: adc <squarerdx=%rdx,<squarer41=%r15
  2072. adc %rdx,%r15
  2073. # qhasm: squarerax = rx1_stack
  2074. # asm 1: movq <rx1_stack=stack64#14,>squarerax=int64#3
  2075. # asm 2: movq <rx1_stack=104(%rsp),>squarerax=%rdx
  2076. movq 104(%rsp),%rdx
  2077. # qhasm: squarerax *= 38
  2078. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  2079. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  2080. imulq $38,%rdx,%rax
  2081. # qhasm: (uint128) squarerdx squarerax = squarerax * rx4_stack
  2082. # asm 1: mulq <rx4_stack=stack64#17
  2083. # asm 2: mulq <rx4_stack=128(%rsp)
  2084. mulq 128(%rsp)
  2085. # qhasm: carry? rx0 += squarerax
  2086. # asm 1: add <squarerax=int64#7,<rx0=int64#2
  2087. # asm 2: add <squarerax=%rax,<rx0=%rsi
  2088. add %rax,%rsi
  2089. # qhasm: squarer01 += squarerdx + carry
  2090. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#4
  2091. # asm 2: adc <squarerdx=%rdx,<squarer01=%rcx
  2092. adc %rdx,%rcx
  2093. # qhasm: squarerax = rx2_stack
  2094. # asm 1: movq <rx2_stack=stack64#15,>squarerax=int64#7
  2095. # asm 2: movq <rx2_stack=112(%rsp),>squarerax=%rax
  2096. movq 112(%rsp),%rax
  2097. # qhasm: (uint128) squarerdx squarerax = squarerax * rx2_stack
  2098. # asm 1: mulq <rx2_stack=stack64#15
  2099. # asm 2: mulq <rx2_stack=112(%rsp)
  2100. mulq 112(%rsp)
  2101. # qhasm: carry? rx4 += squarerax
  2102. # asm 1: add <squarerax=int64#7,<rx4=int64#12
  2103. # asm 2: add <squarerax=%rax,<rx4=%r14
  2104. add %rax,%r14
  2105. # qhasm: squarer41 += squarerdx + carry
  2106. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#13
  2107. # asm 2: adc <squarerdx=%rdx,<squarer41=%r15
  2108. adc %rdx,%r15
  2109. # qhasm: squarerax = rx2_stack
  2110. # asm 1: movq <rx2_stack=stack64#15,>squarerax=int64#3
  2111. # asm 2: movq <rx2_stack=112(%rsp),>squarerax=%rdx
  2112. movq 112(%rsp),%rdx
  2113. # qhasm: squarerax *= 38
  2114. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  2115. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  2116. imulq $38,%rdx,%rax
  2117. # qhasm: (uint128) squarerdx squarerax = squarerax * rx3_stack
  2118. # asm 1: mulq <rx3_stack=stack64#16
  2119. # asm 2: mulq <rx3_stack=120(%rsp)
  2120. mulq 120(%rsp)
  2121. # qhasm: carry? rx0 += squarerax
  2122. # asm 1: add <squarerax=int64#7,<rx0=int64#2
  2123. # asm 2: add <squarerax=%rax,<rx0=%rsi
  2124. add %rax,%rsi
  2125. # qhasm: squarer01 += squarerdx + carry
  2126. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#4
  2127. # asm 2: adc <squarerdx=%rdx,<squarer01=%rcx
  2128. adc %rdx,%rcx
  2129. # qhasm: squarerax = rx2_stack
  2130. # asm 1: movq <rx2_stack=stack64#15,>squarerax=int64#3
  2131. # asm 2: movq <rx2_stack=112(%rsp),>squarerax=%rdx
  2132. movq 112(%rsp),%rdx
  2133. # qhasm: squarerax *= 38
  2134. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  2135. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  2136. imulq $38,%rdx,%rax
  2137. # qhasm: (uint128) squarerdx squarerax = squarerax * rx4_stack
  2138. # asm 1: mulq <rx4_stack=stack64#17
  2139. # asm 2: mulq <rx4_stack=128(%rsp)
  2140. mulq 128(%rsp)
  2141. # qhasm: carry? rx1 += squarerax
  2142. # asm 1: add <squarerax=int64#7,<rx1=int64#5
  2143. # asm 2: add <squarerax=%rax,<rx1=%r8
  2144. add %rax,%r8
  2145. # qhasm: squarer11 += squarerdx + carry
  2146. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#6
  2147. # asm 2: adc <squarerdx=%rdx,<squarer11=%r9
  2148. adc %rdx,%r9
  2149. # qhasm: squarerax = rx3_stack
  2150. # asm 1: movq <rx3_stack=stack64#16,>squarerax=int64#3
  2151. # asm 2: movq <rx3_stack=120(%rsp),>squarerax=%rdx
  2152. movq 120(%rsp),%rdx
  2153. # qhasm: squarerax *= 19
  2154. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  2155. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  2156. imulq $19,%rdx,%rax
  2157. # qhasm: (uint128) squarerdx squarerax = squarerax * rx3_stack
  2158. # asm 1: mulq <rx3_stack=stack64#16
  2159. # asm 2: mulq <rx3_stack=120(%rsp)
  2160. mulq 120(%rsp)
  2161. # qhasm: carry? rx1 += squarerax
  2162. # asm 1: add <squarerax=int64#7,<rx1=int64#5
  2163. # asm 2: add <squarerax=%rax,<rx1=%r8
  2164. add %rax,%r8
  2165. # qhasm: squarer11 += squarerdx + carry
  2166. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#6
  2167. # asm 2: adc <squarerdx=%rdx,<squarer11=%r9
  2168. adc %rdx,%r9
  2169. # qhasm: squarerax = rx3_stack
  2170. # asm 1: movq <rx3_stack=stack64#16,>squarerax=int64#3
  2171. # asm 2: movq <rx3_stack=120(%rsp),>squarerax=%rdx
  2172. movq 120(%rsp),%rdx
  2173. # qhasm: squarerax *= 38
  2174. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  2175. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  2176. imulq $38,%rdx,%rax
  2177. # qhasm: (uint128) squarerdx squarerax = squarerax * rx4_stack
  2178. # asm 1: mulq <rx4_stack=stack64#17
  2179. # asm 2: mulq <rx4_stack=128(%rsp)
  2180. mulq 128(%rsp)
  2181. # qhasm: carry? rx2 += squarerax
  2182. # asm 1: add <squarerax=int64#7,<rx2=int64#8
  2183. # asm 2: add <squarerax=%rax,<rx2=%r10
  2184. add %rax,%r10
  2185. # qhasm: squarer21 += squarerdx + carry
  2186. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#9
  2187. # asm 2: adc <squarerdx=%rdx,<squarer21=%r11
  2188. adc %rdx,%r11
  2189. # qhasm: squarerax = rx4_stack
  2190. # asm 1: movq <rx4_stack=stack64#17,>squarerax=int64#3
  2191. # asm 2: movq <rx4_stack=128(%rsp),>squarerax=%rdx
  2192. movq 128(%rsp),%rdx
  2193. # qhasm: squarerax *= 19
  2194. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  2195. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  2196. imulq $19,%rdx,%rax
  2197. # qhasm: (uint128) squarerdx squarerax = squarerax * rx4_stack
  2198. # asm 1: mulq <rx4_stack=stack64#17
  2199. # asm 2: mulq <rx4_stack=128(%rsp)
  2200. mulq 128(%rsp)
  2201. # qhasm: carry? rx3 += squarerax
  2202. # asm 1: add <squarerax=int64#7,<rx3=int64#10
  2203. # asm 2: add <squarerax=%rax,<rx3=%r12
  2204. add %rax,%r12
  2205. # qhasm: squarer31 += squarerdx + carry
  2206. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#11
  2207. # asm 2: adc <squarerdx=%rdx,<squarer31=%r13
  2208. adc %rdx,%r13
  2209. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  2210. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#3
  2211. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rdx
  2212. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rdx
  2213. # qhasm: squarer01 = (squarer01.rx0) << 13
  2214. # asm 1: shld $13,<rx0=int64#2,<squarer01=int64#4
  2215. # asm 2: shld $13,<rx0=%rsi,<squarer01=%rcx
  2216. shld $13,%rsi,%rcx
  2217. # qhasm: rx0 &= squareredmask
  2218. # asm 1: and <squareredmask=int64#3,<rx0=int64#2
  2219. # asm 2: and <squareredmask=%rdx,<rx0=%rsi
  2220. and %rdx,%rsi
  2221. # qhasm: squarer11 = (squarer11.rx1) << 13
  2222. # asm 1: shld $13,<rx1=int64#5,<squarer11=int64#6
  2223. # asm 2: shld $13,<rx1=%r8,<squarer11=%r9
  2224. shld $13,%r8,%r9
  2225. # qhasm: rx1 &= squareredmask
  2226. # asm 1: and <squareredmask=int64#3,<rx1=int64#5
  2227. # asm 2: and <squareredmask=%rdx,<rx1=%r8
  2228. and %rdx,%r8
  2229. # qhasm: rx1 += squarer01
  2230. # asm 1: add <squarer01=int64#4,<rx1=int64#5
  2231. # asm 2: add <squarer01=%rcx,<rx1=%r8
  2232. add %rcx,%r8
  2233. # qhasm: squarer21 = (squarer21.rx2) << 13
  2234. # asm 1: shld $13,<rx2=int64#8,<squarer21=int64#9
  2235. # asm 2: shld $13,<rx2=%r10,<squarer21=%r11
  2236. shld $13,%r10,%r11
  2237. # qhasm: rx2 &= squareredmask
  2238. # asm 1: and <squareredmask=int64#3,<rx2=int64#8
  2239. # asm 2: and <squareredmask=%rdx,<rx2=%r10
  2240. and %rdx,%r10
  2241. # qhasm: rx2 += squarer11
  2242. # asm 1: add <squarer11=int64#6,<rx2=int64#8
  2243. # asm 2: add <squarer11=%r9,<rx2=%r10
  2244. add %r9,%r10
  2245. # qhasm: squarer31 = (squarer31.rx3) << 13
  2246. # asm 1: shld $13,<rx3=int64#10,<squarer31=int64#11
  2247. # asm 2: shld $13,<rx3=%r12,<squarer31=%r13
  2248. shld $13,%r12,%r13
  2249. # qhasm: rx3 &= squareredmask
  2250. # asm 1: and <squareredmask=int64#3,<rx3=int64#10
  2251. # asm 2: and <squareredmask=%rdx,<rx3=%r12
  2252. and %rdx,%r12
  2253. # qhasm: rx3 += squarer21
  2254. # asm 1: add <squarer21=int64#9,<rx3=int64#10
  2255. # asm 2: add <squarer21=%r11,<rx3=%r12
  2256. add %r11,%r12
  2257. # qhasm: squarer41 = (squarer41.rx4) << 13
  2258. # asm 1: shld $13,<rx4=int64#12,<squarer41=int64#13
  2259. # asm 2: shld $13,<rx4=%r14,<squarer41=%r15
  2260. shld $13,%r14,%r15
  2261. # qhasm: rx4 &= squareredmask
  2262. # asm 1: and <squareredmask=int64#3,<rx4=int64#12
  2263. # asm 2: and <squareredmask=%rdx,<rx4=%r14
  2264. and %rdx,%r14
  2265. # qhasm: rx4 += squarer31
  2266. # asm 1: add <squarer31=int64#11,<rx4=int64#12
  2267. # asm 2: add <squarer31=%r13,<rx4=%r14
  2268. add %r13,%r14
  2269. # qhasm: squarer41 = squarer41 * 19
  2270. # asm 1: imulq $19,<squarer41=int64#13,>squarer41=int64#4
  2271. # asm 2: imulq $19,<squarer41=%r15,>squarer41=%rcx
  2272. imulq $19,%r15,%rcx
  2273. # qhasm: rx0 += squarer41
  2274. # asm 1: add <squarer41=int64#4,<rx0=int64#2
  2275. # asm 2: add <squarer41=%rcx,<rx0=%rsi
  2276. add %rcx,%rsi
  2277. # qhasm: squaret = rx0
  2278. # asm 1: mov <rx0=int64#2,>squaret=int64#4
  2279. # asm 2: mov <rx0=%rsi,>squaret=%rcx
  2280. mov %rsi,%rcx
  2281. # qhasm: (uint64) squaret >>= 51
  2282. # asm 1: shr $51,<squaret=int64#4
  2283. # asm 2: shr $51,<squaret=%rcx
  2284. shr $51,%rcx
  2285. # qhasm: squaret += rx1
  2286. # asm 1: add <rx1=int64#5,<squaret=int64#4
  2287. # asm 2: add <rx1=%r8,<squaret=%rcx
  2288. add %r8,%rcx
  2289. # qhasm: rx0 &= squareredmask
  2290. # asm 1: and <squareredmask=int64#3,<rx0=int64#2
  2291. # asm 2: and <squareredmask=%rdx,<rx0=%rsi
  2292. and %rdx,%rsi
  2293. # qhasm: rx1 = squaret
  2294. # asm 1: mov <squaret=int64#4,>rx1=int64#5
  2295. # asm 2: mov <squaret=%rcx,>rx1=%r8
  2296. mov %rcx,%r8
  2297. # qhasm: (uint64) squaret >>= 51
  2298. # asm 1: shr $51,<squaret=int64#4
  2299. # asm 2: shr $51,<squaret=%rcx
  2300. shr $51,%rcx
  2301. # qhasm: squaret += rx2
  2302. # asm 1: add <rx2=int64#8,<squaret=int64#4
  2303. # asm 2: add <rx2=%r10,<squaret=%rcx
  2304. add %r10,%rcx
  2305. # qhasm: rx1 &= squareredmask
  2306. # asm 1: and <squareredmask=int64#3,<rx1=int64#5
  2307. # asm 2: and <squareredmask=%rdx,<rx1=%r8
  2308. and %rdx,%r8
  2309. # qhasm: rx2 = squaret
  2310. # asm 1: mov <squaret=int64#4,>rx2=int64#6
  2311. # asm 2: mov <squaret=%rcx,>rx2=%r9
  2312. mov %rcx,%r9
  2313. # qhasm: (uint64) squaret >>= 51
  2314. # asm 1: shr $51,<squaret=int64#4
  2315. # asm 2: shr $51,<squaret=%rcx
  2316. shr $51,%rcx
  2317. # qhasm: squaret += rx3
  2318. # asm 1: add <rx3=int64#10,<squaret=int64#4
  2319. # asm 2: add <rx3=%r12,<squaret=%rcx
  2320. add %r12,%rcx
  2321. # qhasm: rx2 &= squareredmask
  2322. # asm 1: and <squareredmask=int64#3,<rx2=int64#6
  2323. # asm 2: and <squareredmask=%rdx,<rx2=%r9
  2324. and %rdx,%r9
  2325. # qhasm: rx3 = squaret
  2326. # asm 1: mov <squaret=int64#4,>rx3=int64#7
  2327. # asm 2: mov <squaret=%rcx,>rx3=%rax
  2328. mov %rcx,%rax
  2329. # qhasm: (uint64) squaret >>= 51
  2330. # asm 1: shr $51,<squaret=int64#4
  2331. # asm 2: shr $51,<squaret=%rcx
  2332. shr $51,%rcx
  2333. # qhasm: squaret += rx4
  2334. # asm 1: add <rx4=int64#12,<squaret=int64#4
  2335. # asm 2: add <rx4=%r14,<squaret=%rcx
  2336. add %r14,%rcx
  2337. # qhasm: rx3 &= squareredmask
  2338. # asm 1: and <squareredmask=int64#3,<rx3=int64#7
  2339. # asm 2: and <squareredmask=%rdx,<rx3=%rax
  2340. and %rdx,%rax
  2341. # qhasm: rx4 = squaret
  2342. # asm 1: mov <squaret=int64#4,>rx4=int64#8
  2343. # asm 2: mov <squaret=%rcx,>rx4=%r10
  2344. mov %rcx,%r10
  2345. # qhasm: (uint64) squaret >>= 51
  2346. # asm 1: shr $51,<squaret=int64#4
  2347. # asm 2: shr $51,<squaret=%rcx
  2348. shr $51,%rcx
  2349. # qhasm: squaret *= 19
  2350. # asm 1: imulq $19,<squaret=int64#4,>squaret=int64#4
  2351. # asm 2: imulq $19,<squaret=%rcx,>squaret=%rcx
  2352. imulq $19,%rcx,%rcx
  2353. # qhasm: rx0 += squaret
  2354. # asm 1: add <squaret=int64#4,<rx0=int64#2
  2355. # asm 2: add <squaret=%rcx,<rx0=%rsi
  2356. add %rcx,%rsi
  2357. # qhasm: rx4 &= squareredmask
  2358. # asm 1: and <squareredmask=int64#3,<rx4=int64#8
  2359. # asm 2: and <squareredmask=%rdx,<rx4=%r10
  2360. and %rdx,%r10
  2361. # qhasm: rx0 += d0_stack
  2362. # asm 1: addq <d0_stack=stack64#8,<rx0=int64#2
  2363. # asm 2: addq <d0_stack=56(%rsp),<rx0=%rsi
  2364. addq 56(%rsp),%rsi
  2365. # qhasm: rx1 += d1_stack
  2366. # asm 1: addq <d1_stack=stack64#9,<rx1=int64#5
  2367. # asm 2: addq <d1_stack=64(%rsp),<rx1=%r8
  2368. addq 64(%rsp),%r8
  2369. # qhasm: rx2 += d2_stack
  2370. # asm 1: addq <d2_stack=stack64#10,<rx2=int64#6
  2371. # asm 2: addq <d2_stack=72(%rsp),<rx2=%r9
  2372. addq 72(%rsp),%r9
  2373. # qhasm: rx3 += d3_stack
  2374. # asm 1: addq <d3_stack=stack64#11,<rx3=int64#7
  2375. # asm 2: addq <d3_stack=80(%rsp),<rx3=%rax
  2376. addq 80(%rsp),%rax
  2377. # qhasm: rx4 += d4_stack
  2378. # asm 1: addq <d4_stack=stack64#12,<rx4=int64#8
  2379. # asm 2: addq <d4_stack=88(%rsp),<rx4=%r10
  2380. addq 88(%rsp),%r10
  2381. # qhasm: rx0 += e0_stack
  2382. # asm 1: addq <e0_stack=stack64#23,<rx0=int64#2
  2383. # asm 2: addq <e0_stack=176(%rsp),<rx0=%rsi
  2384. addq 176(%rsp),%rsi
  2385. # qhasm: rx1 += e1_stack
  2386. # asm 1: addq <e1_stack=stack64#24,<rx1=int64#5
  2387. # asm 2: addq <e1_stack=184(%rsp),<rx1=%r8
  2388. addq 184(%rsp),%r8
  2389. # qhasm: rx2 += e2_stack
  2390. # asm 1: addq <e2_stack=stack64#25,<rx2=int64#6
  2391. # asm 2: addq <e2_stack=192(%rsp),<rx2=%r9
  2392. addq 192(%rsp),%r9
  2393. # qhasm: rx3 += e3_stack
  2394. # asm 1: addq <e3_stack=stack64#26,<rx3=int64#7
  2395. # asm 2: addq <e3_stack=200(%rsp),<rx3=%rax
  2396. addq 200(%rsp),%rax
  2397. # qhasm: rx4 += e4_stack
  2398. # asm 1: addq <e4_stack=stack64#27,<rx4=int64#8
  2399. # asm 2: addq <e4_stack=208(%rsp),<rx4=%r10
  2400. addq 208(%rsp),%r10
  2401. # qhasm: *(uint64 *)(rp + 0) = rx0
  2402. # asm 1: movq <rx0=int64#2,0(<rp=int64#1)
  2403. # asm 2: movq <rx0=%rsi,0(<rp=%rdi)
  2404. movq %rsi,0(%rdi)
  2405. # qhasm: *(uint64 *)(rp + 8) = rx1
  2406. # asm 1: movq <rx1=int64#5,8(<rp=int64#1)
  2407. # asm 2: movq <rx1=%r8,8(<rp=%rdi)
  2408. movq %r8,8(%rdi)
  2409. # qhasm: *(uint64 *)(rp + 16) = rx2
  2410. # asm 1: movq <rx2=int64#6,16(<rp=int64#1)
  2411. # asm 2: movq <rx2=%r9,16(<rp=%rdi)
  2412. movq %r9,16(%rdi)
  2413. # qhasm: *(uint64 *)(rp + 24) = rx3
  2414. # asm 1: movq <rx3=int64#7,24(<rp=int64#1)
  2415. # asm 2: movq <rx3=%rax,24(<rp=%rdi)
  2416. movq %rax,24(%rdi)
  2417. # qhasm: *(uint64 *)(rp + 32) = rx4
  2418. # asm 1: movq <rx4=int64#8,32(<rp=int64#1)
  2419. # asm 2: movq <rx4=%r10,32(<rp=%rdi)
  2420. movq %r10,32(%rdi)
  2421. # qhasm: caller1 = caller1_stack
  2422. # asm 1: movq <caller1_stack=stack64#1,>caller1=int64#9
  2423. # asm 2: movq <caller1_stack=0(%rsp),>caller1=%r11
  2424. movq 0(%rsp),%r11
  2425. # qhasm: caller2 = caller2_stack
  2426. # asm 1: movq <caller2_stack=stack64#2,>caller2=int64#10
  2427. # asm 2: movq <caller2_stack=8(%rsp),>caller2=%r12
  2428. movq 8(%rsp),%r12
  2429. # qhasm: caller3 = caller3_stack
  2430. # asm 1: movq <caller3_stack=stack64#3,>caller3=int64#11
  2431. # asm 2: movq <caller3_stack=16(%rsp),>caller3=%r13
  2432. movq 16(%rsp),%r13
  2433. # qhasm: caller4 = caller4_stack
  2434. # asm 1: movq <caller4_stack=stack64#4,>caller4=int64#12
  2435. # asm 2: movq <caller4_stack=24(%rsp),>caller4=%r14
  2436. movq 24(%rsp),%r14
  2437. # qhasm: caller5 = caller5_stack
  2438. # asm 1: movq <caller5_stack=stack64#5,>caller5=int64#13
  2439. # asm 2: movq <caller5_stack=32(%rsp),>caller5=%r15
  2440. movq 32(%rsp),%r15
  2441. # qhasm: caller6 = caller6_stack
  2442. # asm 1: movq <caller6_stack=stack64#6,>caller6=int64#14
  2443. # asm 2: movq <caller6_stack=40(%rsp),>caller6=%rbx
  2444. movq 40(%rsp),%rbx
  2445. # qhasm: caller7 = caller7_stack
  2446. # asm 1: movq <caller7_stack=stack64#7,>caller7=int64#15
  2447. # asm 2: movq <caller7_stack=48(%rsp),>caller7=%rbp
  2448. movq 48(%rsp),%rbp
  2449. # qhasm: leave
  2450. add %r11,%rsp
  2451. mov %rdi,%rax
  2452. mov %rsi,%rdx
  2453. ret