1/****************************************************************************
2**
3** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qt_mips_asm_dsp_p.h"
41
42LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
43/*
44 * a0 - dst (a8r8g8b8)
45 * a1 - src (r5g6b5)
46 * a2 - w
47 */
48
49    beqz              a2, 3f
50     nop
51    addiu             t1, a2, -1
52    beqz              t1, 2f
53     nop
54    li                t4, 0x07e007e0
55    li                t5, 0x001F001F
56/* Convert two pixels at time (2 x rgb565 -> 2 x rgb8888) */
571:
58    lhu               t0, 0(a1)
59    lhu               t1, 2(a1)
60    addiu             a1, a1, 4
61    addiu             a2, a2, -2
62
63    sll               t6, t0, 16
64    or                t6, t6, t1          /* t6 = R1 G1 B1 | R2 G2 B2 */
65    lui               t3, 0xff00
66    ori               t3, t3, 0xff00      /* t3 = FF 00 | FF 00 (in place) */
67    shrl.ph           t7, t6, 11          /* t7 = 0 R1 | 0 R2 */
68    and               t8, t6, t4          /* t8 = 0 G1 0 | 0 G2 0 */
69    shra.ph           t9, t7, 2           /* t9 = 0 R1 | 0 R2   (lower) */
70    shll.ph           t7, t7, 3           /* t7 = 0 R1 | 0 R2   (higher) */
71    shll.ph           t8, t8, 5           /* t8 = G1 0 | G2 0   (higher) */
72    or                t7, t7, t9          /* t7 = 0 R1 | 0 R2   (in place) */
73    shrl.qb           t9, t8, 6           /* t9 = G1 0 | G2 0   (lower) */
74    or                t3, t3, t7          /* t3 = FF R1 | FF R2 (in place) */
75    or                t8, t8, t9          /* t8 = G1 0 | G2 0   (in place) */
76    and               t6, t6, t5          /* t6 = 0 B1 | 0 B2 */
77    shll.ph           t7, t6, 3           /* t7 = 0 B1 | 0 B2   (higher) */
78    shra.ph           t9, t6, 2           /* t9 = 0 B1 | 0 B2   (lower) */
79    or                t7, t7, t9          /* t7 = 0 B1 | 0 B2   (in place) */
80    or                t8, t7, t8          /* t8 = G1 B1 | G2 B2 (in place) */
81    precrq.ph.w       t2, t3, t8          /* t2 = FF R1 G1 B1   (in place) */
82    precr_sra.ph.w    t3, t8, 0           /* t3 = FF R2 G2 B2   (in place) */
83
84    sw                t2, 0(a0)
85    sw                t3, 4(a0)
86
87    addiu             t2, a2, -1
88    bgtz              t2, 1b
89     addiu            a0, a0, 8
902:
91    beqz              a2, 3f
92     nop
93    lhu               t0, 0(a1)
94
95/* Remaining pixel conversion (rgb565 -> rgb8888) */
96    lui               t1, 0xff00
97    sll               t2, t0, 0x3
98    andi              t3, t2, 0xff
99    ext               t2, t0, 0x2, 0x3
100    or                t2, t3, t2
101    or                t1, t1, t2
102
103    sll               t2, t0, 0x5
104    andi              t2, t2, 0xfc00
105    srl               t3, t0, 0x1
106    andi              t3, t3, 0x300
107    or                t3, t2, t3
108    or                t1, t1, t3
109
110    andi              t2, t0, 0xf800
111    srl               t3, t2, 0x5
112    andi              t3, t3, 0xff00
113    or                t2, t2, t3
114    sll               t2, t2, 0x8
115    or                t1, t1, t2
116
117    sw                t1, 0(a0)
1183:
119    j                 ra
120     nop
121
122END(qConvertRgb16To32_asm_mips_dspr2)
123
124
125#if defined(__MIPSEL) && __MIPSEL
126# define PACK(r, s, t)  packrl.ph r, s, t
127# define LDHI(r, o, b)  lwl r, o + 1 (b)
128# define LDLO(r, o, b)  lwr r, o + 2 (b)
129#else
130# define PACK(r, s, t)  packrl.ph r, t, s
131# define LDHI(r, o, b)  lwr r, o + 1 (b)
132# define LDLO(r, o, b)  lwl r, o + 2 (b)
133#endif
134
135
136LEAF_MIPS_DSPR2(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
137/*
138+ * a0 - dst (*r5g6b5)
139 * a1 - src (const *r5g6b5)
140 * a2 - len (unsigned int) - batch length
141 * a3 - alpha (int)
142 *
143 * Register usage:
144 *  t0-3 - Scratch registers
145 *  t4   - Number of iterations to do in unrolled loops
146 *  t5   - Inverse alpha
147 *  t6   - Alpha >> 2
148 *  t7   - Inverse alpha >> 2
149 *  t8   - magic1 (0x07e007e0)
150 *  t9   - magic2 (0xf81ff81f)
151 *
152 * NOTE:
153 *   Cannot use DSP instructions for the multiplication of two
154 *   16-bit values: overflow would be always rounded or saturated.
155 */
156
157    beqz  a2, 0f
158     andi t0, a0, 0x3
159    andi  t1, a1, 0x3
160    /* Adjust alpha value, and calculate inverse alpha value */
161    li    t5, 255
162    or    t2, t0, t1      /* t0 = (dst & 0x3) | (src & 0x3) */
163    sll   t8, a3, 8
164    subu  a3, t8, a3
165    li    t8, 0x07e007e0  /* magic1 */
166    srl   a3, a3, 8       /* alpha >>= 8  */
167    li    t9, 0xf81ff81f  /* magic2 */
168    subu  t5, t5, a3      /* ialpha = 255 - alpha */
169    addiu a3, a3, 1       /* alpha++ */
170    addiu t5, t5, 1       /* ialpha++ */
171    srl   t6, a3, 2       /* ashift = alpha >> 2 */
172
173    beqz  t2, 4f /* both aligned */
174     srl  t7, t5, 2       /* iashift = ialpha >> 2 */
175
176    beqz  t1, 2f /* src aligned, dst unaligned */
177     nop
178
179    beqz  t0, 3f /* dst aligned, src unaligned */
180     nop
181
182    /*
183     * Both src/dst are unaligned: read 1 halfword from each, then
184     * fall-off to continue with word-aligned operation.
185     */
186    lhu t1, 0 (a1)
187    lhu t0, 0 (a0)
188    addiu a2, a2, -1  /* len-- */
189    andi  t2, t1, 0x07e0
190    andi  t1, t1, 0xf81f
191    mul   t2, t2, a3
192    mul   t1, t1, t6
193    andi  t3, t0, 0x07e0
194    andi  t0, t0, 0xf81f
195    mul   t3, t3, t5
196    mul   t0, t0, t7
197    addiu a1, a1,  2  /* src++ */
198    srl   t2, t2, 8
199    srl   t1, t1, 6
200    andi  t2, t2, 0x07e0
201    andi  t1, t1, 0xf81f
202    or    t1, t1, t2
203    srl   t3, t3, 8
204    srl   t0, t0, 6
205    andi  t3, t3, 0x07e0
206    andi  t0, t0, 0xf81f
207    or    t0, t0, t3
208    addu  t0, t0, t1  /* src * alpha + dst * ialpha */
209    sh    t0,  0 (a0)
210    addiu a0, a0,  2  /* dst++ */
211
212    /*
213     * Both src/dst pointers are word-aligned, process eight
214     * items at a time in an unrolled loop.
215     */
2164:  beqz   a2, 0f
217     srl   t4, a2, 3   /* t4 = len / 8 */
218    beqz   t4, 5f
219     andi  a2, a2, 0x7 /* len = len % 8 */
220    SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
221
2221:  lw  t1,  0 (a1)  /* [s0, s1] */
223    lw  v1,  4 (a1)  /* [s2, s3] */
224    lw  s1,  8 (a1)  /* [s4, s5] */
225    lw  s3, 12 (a1)  /* [s6, s7] */
226
227    lw  t0,  0 (a0)  /* [d0, d1] */
228    lw  v0,  4 (a0)  /* [d2, d3] */
229    lw  s0,  8 (a0)  /* [d4, d5] */
230    lw  s2, 12 (a0)  /* [d6, d7] */
231
232    pref 4, 16 (a1)
233    pref 5, 16 (a0)
234
235    and     t2, t1, t8
236    ext     t3, t2, 0, 16
237    srl     t2, t2, 16
238    mul     t3, t3, a3
239    mul     t2, t2, a3
240    and     t1, t1, t9
241    ext     s4, t1, 0, 16
242    mul     s4, s4, t6
243    srl     t1, t1, 16
244    mul     t1, t1, t6
245    srl     t3, t3, 8
246    srl     t2, t2, 8
247    append  t2, t3, 16
248    and     t2, t2, t8
249    srl     s4, s4, 6
250    and     t3, v1, t8
251    srl     t1, t1, 6
252    append  t1, s4, 16
253    and     t1, t1, t9
254    or      t1, t1, t2
255    ext     t2, t3, 0, 16
256    srl     t3, t3, 16
257    mul     t2, t2, a3
258    mul     t3, t3, a3
259    and     v1, v1, t9
260    ext     s4, v1, 0, 16
261    mul     s4, s4, t6
262    srl     v1, v1, 16
263    mul     v1, v1, t6
264    srl     t2, t2, 8
265    srl     t3, t3, 8
266    append  t3, t2, 16
267    and     t3, t3, t8
268    srl     s4, s4, 6
269    and     t2, s1, t8
270    srl     v1, v1, 6
271    append  v1, s4, 16
272    and     v1, v1, t9
273    or      v1, v1, t3
274    ext     t3, t2, 0, 16
275    srl     t2, t2, 16
276    mul     t3, t3, a3
277    mul     t2, t2, a3
278    and     s1, s1, t9
279    ext     s4, s1, 0, 16
280    mul     s4, s4, t6
281    srl     s1, s1, 16
282    mul     s1, s1, t6
283    srl     t3, t3, 8
284    srl     t2, t2, 8
285    append  t2, t3, 16
286    and     t2, t2, t8
287    srl     s4, s4, 6
288    and     t3, s3, t8
289    srl     s1, s1, 6
290    append  s1, s4, 16
291    and     s1, s1, t9
292    or      s1, s1, t2
293    ext     t2, t3, 0, 16
294    srl     t3, t3, 16
295    mul     t2, t2, a3
296    mul     t3, t3, a3
297    and     s3, s3, t9
298    ext     s4, s3, 0, 16
299    mul     s4, s4, t6
300    srl     s3, s3, 16
301    mul     s3, s3, t6
302    srl     t2, t2, 8
303    srl     t3, t3, 8
304    append  t3, t2, 16
305    and     t3, t3, t8
306    srl     s4, s4, 6
307    and     t2, t0, t8
308    srl     s3, s3, 6
309    append  s3, s4, 16
310    and     s3, s3, t9
311    or      s3, s3, t3
312    ext     t3, t2, 0, 16
313    srl     t2, t2, 16
314    mul     t3, t3, t5
315    mul     t2, t2, t5
316    and     t0, t0, t9
317    ext     s4, t0, 0, 16
318    mul     s4, s4, t7
319    srl     t0, t0, 16
320    mul     t0, t0, t7
321    srl     t3, t3, 8
322    srl     t2, t2, 8
323    append  t2, t3, 16
324    and     t2, t2, t8
325    srl     s4, s4, 6
326    and     t3, v0, t8
327    srl     t0, t0, 6
328    append  t0, s4, 16
329    and     t0, t0, t9
330    or      t0, t0, t2
331    ext     t2, t3, 0, 16
332    srl     t3, t3, 16
333    mul     t2, t2, t5
334    mul     t3, t3, t5
335    and     v0, v0, t9
336    ext     s4, v0, 0, 16
337    mul     s4, s4, t7
338    srl     v0, v0, 16
339    mul     v0, v0, t7
340    srl     t2, t2, 8
341    srl     t3, t3, 8
342    append  t3, t2, 16
343    and     t3, t3, t8
344    srl     s4, s4, 6
345    and     t2, s0, t8
346    srl     v0, v0, 6
347    append  v0, s4, 16
348    and     v0, v0, t9
349    or      v0, v0, t3
350    ext     t3, t2, 0, 16
351    srl     t2, t2, 16
352    mul     t3, t3, t5
353    mul     t2, t2, t5
354    and     s0, s0, t9
355    ext     s4, s0, 0, 16
356    mul     s4, s4, t7
357    srl     s0, s0, 16
358    mul     s0, s0, t7
359    srl     t3, t3, 8
360    srl     t2, t2, 8
361    append  t2, t3, 16
362    and     t2, t2, t8
363    srl     s4, s4, 6
364    and     t3, s2, t8
365    srl     s0, s0, 6
366    append  s0, s4, 16
367    and     s0, s0, t9
368    or      s0, s0, t2
369    ext     t2, t3, 0, 16
370    srl     t3, t3, 16
371    mul     t2, t2, t5
372    mul     t3, t3, t5
373    and     s2, s2, t9
374    ext     s4, s2, 0, 16
375    mul     s4, s4, t7
376    srl     s2, s2, 16
377    mul     s2, s2, t7
378    srl     t2, t2, 8
379    srl     t3, t3, 8
380    append  t3, t2, 16
381    and     t3, t3, t8
382    srl     s4, s4, 6
383    addu.ph t0, t0, t1
384    srl     s2, s2, 6
385    append  s2, s4, 16
386    and     s2, s2, t9
387    or      s2, s2, t3
388    addu.ph v0, v0, v1  /* v0 = [S2 + D2, S3 + D3] */
389    addu.ph s0, s0, s1  /* s0 = [S4 + D4, S5 + D5] */
390    addu.ph s2, s2, s3  /* s2 = [S6 + D6, S7 + D7] */
391
392    sw      t0,  0 (a0) /* [SS0, SS1] */
393    sw      v0,  4 (a0) /* [SS2, SS3] */
394    sw      s0,  8 (a0) /* [SS4, SS5] */
395    sw      s2, 12 (a0) /* [SS6, SS7] */
396
397    addiu   t4, t4, -1   /* t4-- */
398    addiu   a1, a1, 16   /* src += 8 */
399
400    bnez    t4, 1b
401     addiu  a0, a0, 16   /* dst += 8 */
402
403    RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
404    b 5f
405     nop
406
407
408    /* dst unaligned: do one item and fall down to the src unaligned case */
4092:  lhu t1, 0 (a1)
410    lhu t0, 0 (a0)
411    addiu a2, a2, -1  /* len-- */
412    andi  t2, t1, 0x07e0
413    andi  t1, t1, 0xf81f
414    mul   t2, t2, a3
415    mul   t1, t1, t6
416    andi  t3, t0, 0x07e0
417    andi  t0, t0, 0xf81f
418    mul   t3, t3, t5
419    mul   t0, t0, t7
420    addiu a1, a1,  2  /* src++ */
421    srl   t2, t2, 8
422    srl   t1, t1, 6
423    andi  t2, t2, 0x07e0
424    andi  t1, t1, 0xf81f
425    or    t1, t1, t2
426    srl   t3, t3, 8
427    srl   t0, t0, 6
428    andi  t3, t3, 0x07e0
429    andi  t0, t0, 0xf81f
430    or    t0, t0, t3
431    addu  t0, t0, t1  /* src * alpha + dst * ialpha */
432    sh    t0,  0 (a0)
433    addiu a0, a0,  2  /* dst++ */
434
435    /* src unaligned */
4363:  beqz   a2, 0f
437     srl   t4, a2, 3   /* t4 = len / 8 */
438    beqz   t4, 5f
439     andi  a2, a2, 0x7 /* len = len % 8 */
440    SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
441
4421:  lw     t0,  0 (a0) /* [d0, d1] */
443    lw     v0,  4 (a0) /* [d2, d3] */
444    lw     s0,  8 (a0) /* [d4, d5] */
445    lw     s2, 12 (a0) /* [d6, d7] */
446
447    LDHI  (t1,  0, a1) /* [s0, __] */
448    lw     v1,  2 (a1) /* [s1, s2] */
449    lw     s1,  6 (a1) /* [s3, s4] */
450    lw     s3, 10 (a1) /* [s5, s6] */
451    LDLO  (s4, 12, a1) /* [__, s7] */
452
453    pref    4, 14 (a1)
454    pref    5, 16 (a0)
455
456    PACK  (t1, v1, t1) /* [s0, s1] */
457    PACK  (v1, s1, v1) /* [s2, s3] */
458    PACK  (s1, s3, s1) /* [s4, s5] */
459    PACK  (s3, s4, s3) /* [s6, s7] */
460
461    and     t2, t1, t8
462    ext     t3, t2, 0, 16
463    srl     t2, t2, 16
464    mul     t3, t3, a3
465    mul     t2, t2, a3
466    and     t1, t1, t9
467    ext     s4, t1, 0, 16
468    mul     s4, s4, t6
469    srl     t1, t1, 16
470    mul     t1, t1, t6
471    srl     t3, t3, 8
472    srl     t2, t2, 8
473    append  t2, t3, 16
474    and     t2, t2, t8
475    srl     s4, s4, 6
476    and     t3, v1, t8
477    srl     t1, t1, 6
478    append  t1, s4, 16
479    and     t1, t1, t9
480    or      t1, t1, t2
481    ext     t2, t3, 0, 16
482    srl     t3, t3, 16
483    mul     t2, t2, a3
484    mul     t3, t3, a3
485    and     v1, v1, t9
486    ext     s4, v1, 0, 16
487    mul     s4, s4, t6
488    srl     v1, v1, 16
489    mul     v1, v1, t6
490    srl     t2, t2, 8
491    srl     t3, t3, 8
492    append  t3, t2, 16
493    and     t3, t3, t8
494    srl     s4, s4, 6
495    and     t2, s1, t8
496    srl     v1, v1, 6
497    append  v1, s4, 16
498    and     v1, v1, t9
499    or      v1, v1, t3
500    ext     t3, t2, 0, 16
501    srl     t2, t2, 16
502    mul     t3, t3, a3
503    mul     t2, t2, a3
504    and     s1, s1, t9
505    ext     s4, s1, 0, 16
506    mul     s4, s4, t6
507    srl     s1, s1, 16
508    mul     s1, s1, t6
509    srl     t3, t3, 8
510    srl     t2, t2, 8
511    append  t2, t3, 16
512    and     t2, t2, t8
513    srl     s4, s4, 6
514    and     t3, s3, t8
515    srl     s1, s1, 6
516    append  s1, s4, 16
517    and     s1, s1, t9
518    or      s1, s1, t2
519    ext     t2, t3, 0, 16
520    srl     t3, t3, 16
521    mul     t2, t2, a3
522    mul     t3, t3, a3
523    and     s3, s3, t9
524    ext     s4, s3, 0, 16
525    mul     s4, s4, t6
526    srl     s3, s3, 16
527    mul     s3, s3, t6
528    srl     t2, t2, 8
529    srl     t3, t3, 8
530    append  t3, t2, 16
531    and     t3, t3, t8
532    srl     s4, s4, 6
533    and     t2, t0, t8
534    srl     s3, s3, 6
535    append  s3, s4, 16
536    and     s3, s3, t9
537    or      s3, s3, t3
538    ext     t3, t2, 0, 16
539    srl     t2, t2, 16
540    mul     t3, t3, t5
541    mul     t2, t2, t5
542    and     t0, t0, t9
543    ext     s4, t0, 0, 16
544    mul     s4, s4, t7
545    srl     t0, t0, 16
546    mul     t0, t0, t7
547    srl     t3, t3, 8
548    srl     t2, t2, 8
549    append  t2, t3, 16
550    and     t2, t2, t8
551    srl     s4, s4, 6
552    and     t3, v0, t8
553    srl     t0, t0, 6
554    append  t0, s4, 16
555    and     t0, t0, t9
556    or      t0, t0, t2
557    ext     t2, t3, 0, 16
558    srl     t3, t3, 16
559    mul     t2, t2, t5
560    mul     t3, t3, t5
561    and     v0, v0, t9
562    ext     s4, v0, 0, 16
563    mul     s4, s4, t7
564    srl     v0, v0, 16
565    mul     v0, v0, t7
566    srl     t2, t2, 8
567    srl     t3, t3, 8
568    append  t3, t2, 16
569    and     t3, t3, t8
570    srl     s4, s4, 6
571    and     t2, s0, t8
572    srl     v0, v0, 6
573    append  v0, s4, 16
574    and     v0, v0, t9
575    or      v0, v0, t3
576    ext     t3, t2, 0, 16
577    srl     t2, t2, 16
578    mul     t3, t3, t5
579    mul     t2, t2, t5
580    and     s0, s0, t9
581    ext     s4, s0, 0, 16
582    mul     s4, s4, t7
583    srl     s0, s0, 16
584    mul     s0, s0, t7
585    srl     t3, t3, 8
586    srl     t2, t2, 8
587    append  t2, t3, 16
588    and     t2, t2, t8
589    srl     s4, s4, 6
590    and     t3, s2, t8
591    srl     s0, s0, 6
592    append  s0, s4, 16
593    and     s0, s0, t9
594    or      s0, s0, t2
595    ext     t2, t3, 0, 16
596    srl     t3, t3, 16
597    mul     t2, t2, t5
598    mul     t3, t3, t5
599    and     s2, s2, t9
600    ext     s4, s2, 0, 16
601    mul     s4, s4, t7
602    srl     s2, s2, 16
603    mul     s2, s2, t7
604    srl     t2, t2, 8
605    srl     t3, t3, 8
606    append  t3, t2, 16
607    and     t3, t3, t8
608    srl     s4, s4, 6
609    addu.ph t0, t0, t1
610    srl     s2, s2, 6
611    append  s2, s4, 16
612    and     s2, s2, t9
613    or      s2, s2, t3
614    addu.ph v0, v0, v1  /* v0 = [S2 + D2, S3 + D3] */
615    addu.ph s0, s0, s1  /* s0 = [S4 + D4, S5 + D5] */
616    addu.ph s2, s2, s3  /* s2 = [S6 + D6, S7 + D7] */
617
618    sw      t0,  0 (a0) /* [SS0, SS1] */
619    sw      v0,  4 (a0) /* [SS2, SS3] */
620    sw      s0,  8 (a0) /* [SS4, SS5] */
621    sw      s2, 12 (a0) /* [SS6, SS7] */
622
623    addiu   t4, t4, -1   /* t4-- */
624    addiu   a1, a1, 16   /* src += 8 */
625
626    bnez    t4, 1b
627     addiu  a0, a0, 16   /* dst += 8 */
628
629    RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
630
6315:  /* Process remaining items (len < 8), one at a time */
632    beqz  a2, 0f
633     nop
634
6351:  lhu    t1, 0 (a1)
636    lhu    t0, 0 (a0)
637    addiu  a1, a1, 2   /* src++ */
638    andi  t2, t1, 0x07e0
639    andi  t1, t1, 0xf81f
640    mul   t2, t2, a3
641    mul   t1, t1, t6
642    andi  t3, t0, 0x07e0
643    andi  t0, t0, 0xf81f
644    mul   t3, t3, t5
645    mul   t0, t0, t7
646    addiu a2, a2,  -1   /* len-- */
647    srl   t2, t2, 8
648    srl   t1, t1, 6
649    andi  t2, t2, 0x07e0
650    andi  t1, t1, 0xf81f
651    or    t1, t1, t2
652    srl   t3, t3, 8
653    srl   t0, t0, 6
654    andi  t3, t3, 0x07e0
655    andi  t0, t0, 0xf81f
656    or    t0, t0, t3
657
658    addu   t0, t0, t1  /* src*alpha + dst*ialpha */
659    sh     t0, 0 (a0)
660    bnez   a2, 1b
661     addiu a0, a0, 2   /* dst++ */
662
6630:  jr ra
664     nop
665
666END(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
667
668#undef PACK
669#undef LDHI
670#undef LDLO
671