3** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
4** Contact: https://www.qt.io/licensing/
6** This file is part of the QtGui module of the Qt Toolkit.
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
40#include "../painting/qt_mips_asm_dsp_p.h"
44    SAVE_REGS_ON_STACK  0, s0, s1, s2, s3
463:  srl    v1, a2, 3    /* t1 = linelen / 8 */
47    addiu  a1, a1, -1   /* numlines-- */
48    beqz   v1, 1f       /* if (!(linelen / 8)): tail */
49     andi  v0, a2, 0x7  /* v0 = linelen % 8 */
50    pref    5,  0 (a0)  /* cache-hint: store-streamed */
52    /* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */
532:  addiu  v1, v1, -1
54    pref    5, 0(a0)
55    pref    5, 32(a0)
57    lw     t0, 0(a0)
58    lw     t1, 4(a0)
59    lw     t2, 8(a0)
60    lw     t3, 12(a0)
61    srl    t4, t0, 24  /*  00|00|00|A1 */
62    replv.ph t5, t4    /*  00|A1|00|A1 */
63    srl    t6, t1, 24  /*  00|00|00|A2 */
64    replv.ph t7, t6    /*  00|A2|00|A2 */
65    muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
66    muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
67    muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
68    muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
69    srl    t5, t2, 24  /*  00|00|00|A3 */
70    replv.ph s0, t5    /*  00|A3|00|A3 */
71    srl    t7, t3, 24  /*  00|00|00|A4 */
72    replv.ph s1, t7    /*  00|A4|00|A4 */
73    muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
74    muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
75    muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
76    muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
77    preceu.ph.qbla s1, t8
78    preceu.ph.qbla s3, t0
79    addu.ph t8, t8, s1
80    addu.ph t0, t0, s3
81    preceu.ph.qbla s1, t9
82    preceu.ph.qbla s3, t1
83    addu.ph t9, t9, s1
84    addu.ph t1, t1, s3
85    preceu.ph.qbla s1, s2
86    preceu.ph.qbla s3, t2
87    addu.ph s2, s2, s1
88    addu.ph t2, t2, s3
89    preceu.ph.qbla s1, s0
90    preceu.ph.qbla s3, t3
91    addu.ph s0, s0, s1
92    addu.ph t3, t3, s3
93    shra_r.ph t8, t8, 8 /*  xxAA1|xxRR1 */
94    shra_r.ph t0, t0, 8 /*  xxBB1|xxGG1 */
95    shra_r.ph t9, t9, 8
96    shra_r.ph t1, t1, 8
97    shra_r.ph s2, s2, 8
98    shra_r.ph t2, t2, 8
99    shra_r.ph s0, s0, 8
100    shra_r.ph t3, t3, 8
101    precr.qb.ph t0, t8, t0
102    precr.qb.ph t1, t9, t1
103    precr.qb.ph t2, s2, t2
104    precr.qb.ph t3, s0, t3
105    append t4, t0, 24
106    append t6, t1, 24
107    append t5, t2, 24
108    append t7, t3, 24
109    sw    t4, 0(a0)
110    sw    t6, 4(a0)
111    sw    t5, 8(a0)
112    sw    t7, 12(a0)
114    lw     t0, 16(a0)
115    lw     t1, 20(a0)
116    lw     t2, 24(a0)
117    lw     t3, 28(a0)
118    srl    t4, t0, 24  /*  00|00|00|A1 */
119    replv.ph t5, t4    /*  00|A1|00|A1 */
120    srl    t6, t1, 24  /*  00|00|00|A2 */
121    replv.ph t7, t6    /*  00|A2|00|A2 */
122    muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
123    muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
124    muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
125    muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
126    srl    t5, t2, 24  /*  00|00|00|A3 */
127    replv.ph s0, t5    /*  00|A3|00|A3 */
128    srl    t7, t3, 24  /*  00|00|00|A4 */
129    replv.ph s1, t7    /*  00|A4|00|A4 */
130    muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
131    muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
132    muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
133    muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
134    preceu.ph.qbla s1, t8
135    preceu.ph.qbla s3, t0
136    addu.ph t8, t8, s1
137    addu.ph t0, t0, s3
138    preceu.ph.qbla s1, t9
139    preceu.ph.qbla s3, t1
140    addu.ph t9, t9, s1
141    addu.ph t1, t1, s3
142    preceu.ph.qbla s1, s2
143    preceu.ph.qbla s3, t2
144    addu.ph s2, s2, s1
145    addu.ph t2, t2, s3
146    preceu.ph.qbla s1, s0
147    preceu.ph.qbla s3, t3
148    addu.ph s0, s0, s1
149    addu.ph t3, t3, s3
150    shra_r.ph t8, t8, 8 /*  xxAA1|xxRR1 */
151    shra_r.ph t0, t0, 8 /*  xxBB1|xxGG1 */
152    shra_r.ph t9, t9, 8
153    shra_r.ph t1, t1, 8
154    shra_r.ph s2, s2, 8
155    shra_r.ph t2, t2, 8
156    shra_r.ph s0, s0, 8
157    shra_r.ph t3, t3, 8
158    precr.qb.ph t0, t8, t0
159    precr.qb.ph t1, t9, t1
160    precr.qb.ph t2, s2, t2
161    precr.qb.ph t3, s0, t3
162    append t4, t0, 24
163    append t6, t1, 24
164    append t5, t2, 24
165    append t7, t3, 24
166    sw    t4, 16(a0)
167    sw    t6, 20(a0)
168    sw    t5, 24(a0)
169    sw    t7, 28(a0)
170    bgtz  v1, 2b       /* if (t1): unrolled loop */
171     addiu a0, a0, 32  /* data += 8 */
173    beqz   v0, 4f      /* if (!v0): skip tail loop */
174     nop
176    /* tail loop, handles (len < 8), one pixel at a time */
1771:  lw     t1, 0 (a0)
178    addiu  v0, v0, -1  /* len-- */
179    srl    t2, t1, 24  /* t2 = alpha */
180    replv.ph t3, t2
181    muleu_s.ph.qbl t4, t1, t3
182    muleu_s.ph.qbr t1, t1, t3
183    preceu.ph.qbla t3, t4
184    preceu.ph.qbla t5, t1
185    addu.ph   t4, t4, t3
186    addu.ph   t1, t1, t5
187    shra_r.ph t4, t4, 8
188    shra_r.ph t1, t1, 8
189    precr.qb.ph t1, t4, t1
190    append   t2, t1, 24
191    sw     t2, 0(a0)
192    bgtz   v0, 1b
193     addiu a0, a0,  4  /* src++ */
1954:  bnez   a1, 3b      /* if (numlines): loop */
196     addu  a0, a0, a3  /* src += srclineskip */
1980:  /* return */
199    RESTORE_REGS_FROM_STACK  0, s0, s1, s2, s3
201    jr ra
202     nop
209 * Parameters:
210 *   a0 - dst *a8r8g8b8
211 *   a1 - src *r8g8b8
212 *   a2 - len
213 *
214 * R G B r  g b R G  B r g b  R G B r  g b . . .  -- input
215 * -------  -------  -------  -------  -------
216 * _ R G B  _ r g b  _ R G B  _ r g b  _ R G . .  -- output
217 *
218 * Register usage:
219 *   a2 - tail (len % 4) == (len & 0x3)
220 *   t0 - batches (len / 4) == (len >> 2)
221 *   t1-t7, s1-s3 - temporary
222 */
224    srl   t0, a2, 2     /* batches = len / 4 */
225    andi  a2, a2, 0x3   /* tail = len % 4 */
227    beqz  t0, 5f        /* if !batches: tail */
228     lui  t7, 0xff00    /* [FF 00 00 00] */
229    SAVE_REGS_ON_STACK 8, s1, s2, s3, s0, v0, v1
2311:  pref    4, 0 (a1)   /* hint: read-streamed */
232    pref    5, 0 (a0)   /* hint: prepare-write */
233    addiu  t0, t0, -1   /* batches-- */
235    lbu    t1,  0 (a1)  /* [__ __ __ R1] */
236    lbu    t2,  1 (a1)  /* [__ __ __ G1] */
237    lbu    t3,  2 (a1)  /* [__ __ __ B1] */
239    lbu    t4,  3 (a1)  /* [__ __ __ r2] */
240    lbu    t5,  4 (a1)  /* [__ __ __ g2] */
241    lbu    t6,  5 (a1)  /* [__ __ __ b2] */
243    lbu    s1,  6 (a1)  /* [__ __ __ R3] */
244    lbu    s2,  7 (a1)  /* [__ __ __ G3] */
245    lbu    s3,  8 (a1)  /* [__ __ __ B3] */
247    lbu    s0,  9 (a1)  /* [__ __ __ r4] */
248    lbu    v0, 10 (a1)  /* [__ __ __ g4] */
249    lbu    v1, 11 (a1)  /* [__ __ __ b4] */
251    append t1, t2, 8    /* [__ __ R1 G1] */
252    append t4, t5, 8    /* [__ __ r2 g2] */
253    append s1, s2, 8    /* [__ __ R3 G3] */
254    append s0, v0, 8    /* [__ __ r4 g4] */
255    append t1, t3, 8    /* [__ R1 G1 B1] */
256    append t4, t6, 8    /* [__ r2 g2 b2] */
257    append s1, s3, 8    /* [__ R3 G4 B3] */
258    append s0, v1, 8    /* [__ r4 g4 b4] */
259    or     t1, t1, t7   /* [FF R1 G1 B1] */
260    or     t4, t4, t7   /* [FF r2 g2 b2] */
261    or     s1, s1, t7   /* [FF R3 G3 B3] */
262    or     s0, s0, t7   /* [FF r4 g4 b4] */
264    sw     t1,  0 (a0)
265    sw     t4,  4 (a0)
266    sw     s1,  8 (a0)
267    sw     s0, 12 (a0)
269    addiu  a1, a1, 12   /* src += 4*3 */
270    bnez   t0, 1b       /* if batches: loop */
271     addiu a0, a0, 16   /* dst += 4 */
273    RESTORE_REGS_FROM_STACK 8, s1, s2, s3, s0, v0, v1
275    /* handle remaining "tail" (a2) items */
2765:  beqz   a2, 0f
277     lui   t0, 0xff00   /* [FF __ __ __] */
2791:  lbu    t1, 0 (a1)   /* [__ __ __ RR] */
280    lbu    t2, 1 (a1)   /* [__ __ __ GG] */
281    lbu    t3, 2 (a1)   /* [__ __ __ BB] */
282    sll    t1, t1, 16   /* [__ RR __ __] */
283    sll    t2, t2,  8   /* [__ __ GG __] */
284    or     t0, t0, t1   /* [FF RR __ __] */
285    or     t2, t2, t3   /* [__ __ GG BB] */
286    addi   a2, a2, -1   /* len--         */
287    or     t0, t0, t2   /* [FF RR GG BB] */
288    addiu  a1, a1,  3   /* src += 3      */
289    sw     t0, 0 (a0)
290    addiu  a0, a0,  4   /* dst++         */
291    bnez   a2, 1b       /* if tail: loop */
292     lui   t0, 0xff00   /* [FF __ __ __] */
2940:  jr ra
295     nop