1@**: EMULib Emulation Library *********************************
2@**:                                                         **
3@**:                     LibARM-BPP32.asm                    **
4@**:                                                         **
5@**: This file contains optimized ARM assembler functions    **
6@**: used to copy and process 32bpp images on ARM-based      **
7@**: platforms such as Symbian/S60, Symbian/UIQ, and Maemo.  **
8@**:                                                         **
9@**: Copyright (C) Marat Fayzullin 2005-2009                 **
10@**:     You are not allowed to distribute this software     **
11@**:     commercially. Please, notify me, if you make any    **
12@**:     changes to this file.                               **
13@*************************************************************:
14	.text
15
16
17	.global	TELEVIZE0_32
18	.global	TELEVIZE1_32
19	.global	C256T120_32
20	.global	C256T160_32
21	.global	C256T176_32
22	.global	C256T208_32
23	.global	C256T240_32
24	.global	C256T256_32
25	.global	C240T320_32
26	.global	C256T320_32
27	.global	C256T352_32
28	.global	C256T416_32
29	.global	C256T512_32
30	.global	C256T768_32
31
32	@** M_LOADCONSTS32 ***********************************************
33	@** Loads two constants used for pixel merging.                 **
34	@** For 24BPP/32BPP: r12=0x007F7F7F, r14=0x00808080             **
35	@*****************************************************************
36	.macro M_LOADCONSTS32
37	mov r12,#0x00007F
38	orr r12,r12,#0x007F00
39	orr r12,r12,#0x7F0000
40	mov r14,#0x000080
41	orr r14,r14,#0x008000
42	orr r14,r14,#0x800000
43	.endm
44
45	@** M_MERGE32 ****************************************************
46	@** Merge two 24/32bpp pixels into one. Trashes \src1 and \tmp. **
47	@*****************************************************************
48	.macro M_MERGE32 dst,src1,src2,tmp
49	and \tmp,\src1,r12
50	and \dst,\src2,r12
51	add \tmp,\tmp,\dst
52	and \tmp,r12,\tmp,lsr #1
53	and \src1,\src1,r14
54	and \dst,\src2,r14
55	add \dst,\dst,\src1
56	add \dst,\tmp,\dst,lsr #1
57	.endm
58
59C256T120_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
60	M_LOADCONSTS32
61L120:	ldmia r1!,{r4,r5}
62	M_MERGE32 r3,r4,r5,r11
63	ldmia r1!,{r5,r6,r7,r8,r9,r10}
64	M_MERGE32 r4,r5,r6,r11
65	M_MERGE32 r5,r7,r8,r11
66	M_MERGE32 r6,r9,r10,r11
67	stmia r0!,{r3,r4,r5,r6}
68	subs r2,r2,#8
69	bhi L120
70	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
71
72C256T160_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
73	M_LOADCONSTS32
74L160:	ldmia r1!,{r3,r4,r6,r7,r8,r10}
75	M_MERGE32 r5,r3,r4,r11
76	M_MERGE32 r9,r7,r8,r11
77	stmia r0!,{r5,r6,r9,r10}	@ DST[0,1,2,3] = MERGE(0,1),SRC[2],MERGE(3,4),SRC[5]
78	ldmia r1!,{r3,r4,r6,r7,r9}
79	M_MERGE32 r5,r3,r4,r11
80	M_MERGE32 r8,r6,r7,r11
81	stmia r0!,{r5,r8,r9}		@ DST[4,5,6] = MERGE(6,7),MERGE(8,9),SRC[10]
82	ldmia r1!,{r3,r4,r6,r7,r8}
83	M_MERGE32 r5,r3,r4,r11
84	M_MERGE32 r9,r7,r8,r11
85	stmia r0!,{r5,r6,r9}		@ DST[7,8,9] = MERGE(11,12),SRC[13],MERGE(14,15)
86	subs r2,r2,#16
87	bhi L160
88	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
89
90C256T176_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
91	M_LOADCONSTS32
92L176:	ldmia r1!,{r3,r4,r6,r7,r8,r9}
93	M_MERGE32 r5,r3,r4,r11
94	M_MERGE32 r10,r8,r9,r11
95	stmia r0!,{r5,r6,r7,r10}	@ DST[0,1,2,3] = MERGE(0,1),SRC[2],SRC[3],MERGE(4,5)
96	ldmia r1!,{r3,r4,r5,r6,r8,r9,r10}
97	M_MERGE32 r7,r5,r6,r11
98	stmia r0!,{r3,r4,r7,r8}	@ DST[4,5,6,7] = SRC[6],SRC[7],MERGE(8,9),SRC[10]
99	M_MERGE32 r3,r9,r10,r11
100	ldmia r1!,{r4,r5,r6}
101	M_MERGE32 r7,r5,r6,r11
102	stmia r0!,{r3,r4,r7}		@ DST[8,9,10] = MERGE(11,12),SRC[13],MERGE(14,15)
103	subs r2,r2,#16
104	bhi L176
105	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
106
107C256T208_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
108	M_LOADCONSTS32
109L208:	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
110	stmia r0!,{r3,r4,r5,r6}
111	M_MERGE32 r3,r7,r8,r11
112	stmia r0!,{r3,r9,r10}
113	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
114	str r3,[r0],#4
115	M_MERGE32 r3,r4,r5,r11
116	stmia r0!,{r3,r6,r7,r8}
117	M_MERGE32 r3,r9,r10,r11
118	str r3,[r0],#4
119	subs r2,r2,#16
120	bhi L208
121	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
122
123C256T240_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
124	M_LOADCONSTS32
125L240:	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
126	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10}
127	ldmia r1!,{r5,r6}
128	M_MERGE32 r4,r5,r6,r11
129	ldmia r1!,{r5,r6,r7,r8,r9,r10}
130	stmia r0!,{r4,r5,r6,r7,r8,r9,r10}
131	subs r2,r2,#16
132	bhi L240
133	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
134
135C256T256_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10}
136L256:	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
137	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10}
138	subs r2,r2,#8
139	bhi L256
140	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10}
141	mov r15,r14
142
143C256T320_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
144	M_LOADCONSTS32
145L320:	ldmia r1!,{r3,r5,r6,r7,r8,r10}
146	str r3,[r0],#4			@ DST[0] = SRC[0]
147	M_MERGE32 r4,r3,r5,r11
148	mov r3,r8
149	M_MERGE32 r9,r3,r10,r11
150	stmia r0!,{r4,r5,r6,r7,r8,r9,r10}	@ DST[1-7] = MERGE(0,1),SRC[1-4],MERGE(4,5),SRC[5]
151	ldmia r1!,{r3,r4}
152	stmia r0!,{r3,r4}			@ DST[8-9] = SRC[6-7]
153	subs r2,r2,#8
154	bhi L320
155	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
156
157C240T320_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
158	M_LOADCONSTS32
159L240T320:	ldmia r1!,{r3,r4,r6,r7,r8,r10}
160	M_MERGE32 r5,r4,r6,r11
161	M_MERGE32 r9,r8,r10,r11
162	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10}	@ DST[0-7] = SRC[0-1],MERGE(1,2),SRC[2-4],MERGE(4,5),SRC[5]
163	subs r2,r2,#6
164	bhi L240T320
165	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
166
167C256T352_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
168	M_LOADCONSTS32
169L352:	ldmia r1!,{r3,r5,r6,r7,r8,r9,r10}
170	str r3,[r0],#4		@ DST[0]  = SRC[0]
171	M_MERGE32 r4,r3,r5,r11
172	stmia r0!,{r4,r5,r6,r7}	@ DST[1,2,3,4] = MERGE(0,1),SRC[1],SRC[2],SRC[3]
173	M_MERGE32 r6,r7,r8,r11
174	stmia r0!,{r6,r8,r9}		@ DST[5,6,7] = MERGE(3,4),SRC[4],SRC[5]
175	M_MERGE32 r6,r9,r10,r11
176	ldr r11,[r1],#4
177	stmia r0!,{r6,r10,r11}	@ DST[8,9,10] = MERGE(5,6),SRC[6],SRC[7]
178	subs r2,r2,#8
179	bhi L352
180	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
181
182C256T416_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
183	M_LOADCONSTS32
184L416:	ldmia r1!,{r3,r5,r6,r7,r8,r9,r10}
185	str r3,[r0],#4		@ DST[0]      = SRC[0]
186	M_MERGE32 r4,r3,r5,r11
187	stmia r0!,{r4,r5}		@ DST[1,2]    = MERGE(0,1),SRC[1]
188	M_MERGE32 r3,r5,r6,r11
189	stmia r0!,{r3,r6,r7}		@ DST[3,4,5]  = MERGE(1,2),SRC[2],SRC[3]
190	M_MERGE32 r3,r7,r8,r11
191	stmia r0!,{r3,r8}		@ DST[6,7]    = MERGE(3,4),SRC[4]
192	M_MERGE32 r3,r8,r9,r11
193	stmia r0!,{r3,r9,r10}		@ DST[8,9,10] = MERGE(4,5),SRC[5],SRC[6]
194	ldr r4,[r1],#4
195	M_MERGE32 r3,r10,r4,r11
196	stmia r0!,{r3,r4}		@ DST[11,12]  = MERGE(6,7),SRC[7]
197	subs r2,r2,#8
198	bhi L416
199	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
200
201C256T512_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
202L512:	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
203	mov r14,r5
204	mov r12,r5
205	mov r11,r4
206	mov r5,r4
207	mov r4,r3
208	stmia r0!,{r3,r4,r5,r11,r12,r14}
209	mov r3,r6
210	mov r4,r6
211	mov r5,r7
212	mov r6,r7
213	mov r7,r8
214	mov r12,r10
215	mov r11,r10
216	mov r10,r9
217	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}
218	subs r2,r2,#8
219	bhi L512
220	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
221
222C256T768_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r14}
223L768:	ldmia r1!,{r3,r4,r5,r6,r7,r8,r9,r10}
224	mov r14,r3
225	mov r12,r3
226	stmia r0!,{r3,r12,r14}
227	mov r14,r5
228	mov r12,r5
229	mov r11,r5
230	mov r3,r4
231	mov r5,r4
232	stmia r0!,{r3,r4,r5,r11,r12,r14}
233	mov r14,r7
234	mov r12,r7
235	mov r3,r6
236	mov r4,r6
237	stmia r0!,{r3,r4,r6,r7,r12,r14}
238	mov r14,r10
239	mov r12,r10
240	mov r3,r8
241	mov r4,r8
242	mov r5,r8
243	mov r6,r9
244	mov r7,r9
245	stmia r0!,{r3,r4,r5,r6,r7,r9,r10,r12,r14}
246	subs r2,r2,#8
247	bhi L768
248	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,r15}
249
250TELEVIZE0_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r14}
251	mov r14,#0x000F
252	orr r14,r14,#0x0F00
253	orr r14,r14,r14,lsl #8
254LTV0:	ldmia r0,{r3,r4,r5,r6,r7,r8,r9,r10}
255	and r2,r14,r3,lsr #4
256	sub r3,r3,r2
257	and r2,r14,r4,lsr #4
258	sub r4,r4,r2
259	and r2,r14,r5,lsr #4
260	sub r5,r5,r2
261	and r2,r14,r6,lsr #4
262	sub r6,r6,r2
263	and r2,r14,r7,lsr #4
264	sub r7,r7,r2
265	and r2,r14,r8,lsr #4
266	sub r8,r8,r2
267	and r2,r14,r9,lsr #4
268	sub r9,r9,r2
269	and r2,r14,r10,lsr #4
270	sub r10,r10,r2
271	subs r1,r1,#8
272	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10}
273	bhi LTV0
274	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r15}
275
276TELEVIZE1_32:	stmdb r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r14}
277	mov r14,#0x000F
278	orr r14,r14,#0x0F00
279	orr r14,r14,r14,lsl #8
280	mvn r11,#1
281LTV1:	ldmia r0,{r3,r4,r5,r6,r7,r8,r9,r10}
282	eor r2,r3,r11
283	and r2,r14,r2,lsr #4
284	add r3,r3,r2
285	eor r2,r4,r11
286	and r2,r14,r2,lsr #4
287	add r4,r4,r2
288	eor r2,r5,r11
289	and r2,r14,r2,lsr #4
290	add r5,r5,r2
291	eor r2,r6,r11
292	and r2,r14,r2,lsr #4
293	add r6,r6,r2
294	eor r2,r7,r11
295	and r2,r14,r2,lsr #4
296	add r7,r7,r2
297	eor r2,r8,r11
298	and r2,r14,r2,lsr #4
299	add r8,r8,r2
300	eor r2,r9,r11
301	and r2,r14,r2,lsr #4
302	add r9,r9,r2
303	eor r2,r10,r11
304	and r2,r14,r2,lsr #4
305	add r10,r10,r2
306	subs r1,r1,#8
307	stmia r0!,{r3,r4,r5,r6,r7,r8,r9,r10}
308	bhi LTV1
309	ldmia r13!,{r4,r5,r6,r7,r8,r9,r10,r11,r15}
310
311
312