1.text
2.globl	bn_mul_mont
3.type	bn_mul_mont,@function
4bn_mul_mont:
5	lgf	%r1,164(%r15)	# pull %r1
6	sla	%r1,3	# %r1 to enumerate bytes
7	la	%r4,0(%r1,%r4)
8
9	stg	%r2,2*8(%r15)
10
11	cghi	%r1,16		#
12	lghi	%r2,0		#
13	blr	%r14		# if(%r1<16) return 0;
14	cghi	%r1,96		#
15	bhr	%r14		# if(%r1>96) return 0;
16	stmg	%r3,%r15,3*8(%r15)
17
18	lghi	%r2,-160-8	# leave room for carry bit
19	lcgr	%r7,%r1		# -%r1
20	lgr	%r0,%r15
21	la	%r2,0(%r2,%r15)
22	la	%r15,0(%r7,%r2)	# alloca
23	stg	%r0,0(%r15)	# back chain
24
25	sra	%r1,3		# restore %r1
26	la	%r4,0(%r7,%r4)	# restore %r4
27	ahi	%r1,-1		# adjust %r1 for inner loop
28	lg	%r6,0(%r6)	# pull n0
29
30
31	lg	%r2,0(%r4)
32
33	lg	%r9,0(%r3)
34
35	mlgr	%r8,%r2	# ap[0]*bp[0]
36	lgr	%r12,%r8
37
38	lgr	%r0,%r9	# "tp[0]"*n0
39	msgr	%r0,%r6
40
41	lg	%r11,0(%r5)	#
42
43	mlgr	%r10,%r0	# np[0]*m1
44	algr	%r11,%r9	# +="tp[0]"
45	lghi	%r13,0
46	alcgr	%r13,%r10
47
48	la	%r7,8		# j=1
49	lr	%r14,%r1
50
51.align	16
52.L1st:
53	lg	%r9,0(%r7,%r3)
54
55	mlgr	%r8,%r2	# ap[j]*bp[0]
56	algr	%r9,%r12
57	lghi	%r12,0
58	alcgr	%r12,%r8
59
60	lg	%r11,0(%r7,%r5)
61
62	mlgr	%r10,%r0	# np[j]*m1
63	algr	%r11,%r13
64	lghi	%r13,0
65	alcgr	%r10,%r13	# +="tp[j]"
66	algr	%r11,%r9
67	alcgr	%r13,%r10
68
69	stg	%r11,160-8(%r7,%r15)	# tp[j-1]=
70	la	%r7,8(%r7)	# j++
71	brct	%r14,.L1st
72
73	algr	%r13,%r12
74	lghi	%r12,0
75	alcgr	%r12,%r12	# upmost overflow bit
76	stg	%r13,160-8(%r7,%r15)
77	stg	%r12,160(%r7,%r15)
78	la	%r4,8(%r4)	# bp++
79
80.Louter:
81	lg	%r2,0(%r4)	# bp[i]
82
83	lg	%r9,0(%r3)
84
85	mlgr	%r8,%r2	# ap[0]*bp[i]
86	alg	%r9,160(%r15)	# +=tp[0]
87	lghi	%r12,0
88	alcgr	%r12,%r8
89
90	lgr	%r0,%r9
91	msgr	%r0,%r6	# tp[0]*n0
92
93	lg	%r11,0(%r5)	# np[0]
94
95	mlgr	%r10,%r0	# np[0]*m1
96	algr	%r11,%r9	# +="tp[0]"
97	lghi	%r13,0
98	alcgr	%r13,%r10
99
100	la	%r7,8		# j=1
101	lr	%r14,%r1
102
103.align	16
104.Linner:
105	lg	%r9,0(%r7,%r3)
106
107	mlgr	%r8,%r2	# ap[j]*bp[i]
108	algr	%r9,%r12
109	lghi	%r12,0
110	alcgr	%r8,%r12
111	alg	%r9,160(%r7,%r15)# +=tp[j]
112	alcgr	%r12,%r8
113
114	lg	%r11,0(%r7,%r5)
115
116	mlgr	%r10,%r0	# np[j]*m1
117	algr	%r11,%r13
118	lghi	%r13,0
119	alcgr	%r10,%r13
120	algr	%r11,%r9	# +="tp[j]"
121	alcgr	%r13,%r10
122
123	stg	%r11,160-8(%r7,%r15)	# tp[j-1]=
124	la	%r7,8(%r7)	# j++
125	brct	%r14,.Linner
126
127	algr	%r13,%r12
128	lghi	%r12,0
129	alcgr	%r12,%r12
130	alg	%r13,160(%r7,%r15)# accumulate previous upmost overflow bit
131	lghi	%r8,0
132	alcgr	%r12,%r8	# new upmost overflow bit
133	stg	%r13,160-8(%r7,%r15)
134	stg	%r12,160(%r7,%r15)
135
136	la	%r4,8(%r4)	# bp++
137	clg	%r4,200(%r7,%r15)	# compare to &bp[num]
138	jne	.Louter
139
140	lg	%r2,184(%r7,%r15)	# reincarnate rp
141	la	%r3,160(%r15)
142	ahi	%r1,1		# restore %r1, incidentally clears "borrow"
143
144	la	%r7,0
145	lr	%r14,%r1
146.Lsub:	lg	%r9,0(%r7,%r3)
147	lg	%r11,0(%r7,%r5)
148
149	slbgr	%r9,%r11
150	stg	%r9,0(%r7,%r2)
151	la	%r7,8(%r7)
152	brct	%r14,.Lsub
153	lghi	%r8,0
154	slbgr	%r12,%r8	# handle upmost carry
155	lghi	%r13,-1
156	xgr	%r13,%r12
157
158	la	%r7,0
159	lgr	%r14,%r1
160.Lcopy:	lg	%r8,160(%r7,%r15)	# conditional copy
161	lg	%r9,0(%r7,%r2)
162	ngr	%r8,%r12
163	ngr	%r9,%r13
164	ogr	%r9,%r8
165
166	stg	%r7,160(%r7,%r15)	# zap tp
167	stg	%r9,0(%r7,%r2)
168	la	%r7,8(%r7)
169	brct	%r14,.Lcopy
170
171	la	%r1,216(%r7,%r15)
172	lmg	%r6,%r15,0(%r1)
173	lghi	%r2,1		# signal "processed"
174	br	%r14
175.size	bn_mul_mont,.-bn_mul_mont
176.string	"Montgomery Multiplication for s390x, CRYPTOGAMS by <appro@openssl.org>"
177