/* mediastreamer2 library - modular sound and video processing and streaming Copyright (C) 2006-2010 Belledonne Communications SARL (simon.morlat@linphone.org) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef __ELF__ # define ELF #else # define ELF @ #endif #ifdef __clang__ # define FUNC @.func # define ENDFUNC @.endfunc #else # define FUNC .func # define ENDFUNC .endfunc #endif .macro require8 val=1 ELF .eabi_attribute 24, \val .endm .macro preserve8 val=1 ELF .eabi_attribute 25, \val .endm .macro function name .global \name ELF .hidden \name ELF .type \name, %function FUNC \name \name: .endm .section .rodata .align 8 ymult: .word 9535 , 9535, 9535, 9535 rvmult: .word 13074, 13074, 13074, 13074 gbmult: .word 6660, 6660, 6660, 6660 gumult: .word 3203, 3203, 3203, 3203 bumult: .word 16531, 16531, 16531, 16531 .fpu neon .text /*void ms_line_rgb2rgb565_4 (const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width)*/ function ms_line_rgb2rgb565_4 push {r4} ldr r4 , [sp ,#4] 1: vld1.16 {d0}, [r0,:64]! vld1.16 {d1}, [r1,:64]! vld1.16 {d2}, [r2,:64]! vshr.u16 d0, d0, #3 vshr.u16 d1, d1, #2 vshr.u16 d2, d2, #3 vsli.16 d2, d1, #5 /*inserts g into d2*/ vsli.16 d2, d0, #11 /*inserts r into d2 */ vst1.16 {d2}, [r3,:64]! subs r4, r4, #4 bne 1b pop {r4} bx lr ENDFUNC /*void ms_line_rgb2rgb565_8 (const int16_t *r, const int16_t *g, const int16_t *b, uint16_t *dst, int width)*/ function ms_line_rgb2rgb565_8 push {r4} ldr r4 , [sp ,#4] /*load width into r4 */ 1: vld1.16 {d0,d1}, [r0,:64]! vshr.u16 q0, q0, #3 vld1.16 {d2,d3}, [r1,:64]! vshr.u16 q1, q1, #2 vld1.16 {d4,d5}, [r2,:64]! vshr.u16 q2, q2, #3 vsli.16 q2, q1, #5 /*inserts g into d2*/ vsli.16 q2, q0, #11 /*inserts r into d2 */ vst1.16 {q2}, [r3,:64]! subs r4, r4, #8 bne 1b pop {r4} bx lr ENDFUNC .macro load_pixels_4_2 d_reg1, d_reg2, src add r12, \src, #2 /* offset to reach next pixels */ vld1.16 \d_reg1[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */ vld1.16 \d_reg1[1], [\src], r5 vld1.16 \d_reg1[2], [\src], r6 vld1.16 \d_reg1[3], [\src], r7 vld1.16 \d_reg2[0], [r12], r4 /* transfer the pixel pointed by r4 into q2 */ vld1.16 \d_reg2[1], [r12], r5 vld1.16 \d_reg2[2], [r12], r6 vld1.16 \d_reg2[3], [r12], r7 .endm .macro filter_pixels_8 q_srcdst, q_src2 vsub.s16 q9 , \q_src2, \q_srcdst /* q9=x(n+1)-x(n) */ vmul.s16 q10 , q9, q8 /* q10 = coef * q9 */ vsra.s16 \q_srcdst , q10, #7 vabs.s16 \q_srcdst , \q_srcdst .endm /*void ms_line_scale_8(const uint32_t *grid, const int16_t **src, int16_t **dst int dst_width, int16_t *filter);*/ function ms_line_scale_8 push {r4-r12,lr} /* we use lr as a normal register here */ ldr lr , [sp ,#40] /*r4-r12+lr= 10 registers 40=10*4 offset to retrieve filter table*/ ldm r1, {r8,r9} ldr r1, [r1,#8] ldm r2, {r10,r11} ldr r2, [r2,#8] 1: ldm r0!, {r4,r5,r6,r7} /* load 4 entries of the grid into r4,r5,r6,r7 */ load_pixels_4_2 d4, d10, r1 load_pixels_4_2 d6, d12, r8 load_pixels_4_2 d8, d14, r9 ldm r0!, {r4,r5,r6,r7} /* load 4 more entries of the grid into r4,r5,r6,r7 */ load_pixels_4_2 d5, d11, r1 load_pixels_4_2 d7, d13, r8 load_pixels_4_2 d9, d15, r9 /* x(n)= q2,q3,q4 x(n+1)=q5,q6,q7 */ vld1.16 {q8} , [lr]! /* load the filtering coefficients in q8*/ /* we need to compute (coef*(x(n+1)-x(n)))>>7 + x(n) */ filter_pixels_8 q2 , q5 filter_pixels_8 q3 , q6 filter_pixels_8 q4 , q7 vst1.16 {q2} , [r2]! /*write q2 (the 8 selected pixels) into memory pointed by r2*/ vst1.16 {q3} , [r10]! vst1.16 {q4} , [r11]! subs r3,r3,#8 /*we have processed 8 pixels, decrement width*/ bne 1b pop {r4-r12,pc} ENDFUNC .macro load_pixels_4 d_reg, src vld1.16 \d_reg[0], [\src], r4 /* transfer the pixel pointed by r4 into q2 */ vld1.16 \d_reg[1], [\src], r5 vld1.16 \d_reg[2], [\src], r6 vld1.16 \d_reg[3], [\src], r7 .endm /*void ms_line_scale_8(const uint32_t *grid, const uint16_t **src, uint16_t **dst int dst_width);*/ function ms_line_scale_simple_8 push {r4-r11} ldr r8, [r1,#4] ldr r9, [r1,#8] ldr r1, [r1] ldr r10, [r2,#4] ldr r11, [r2,#8] ldr r2, [r2] 1: ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */ ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */ load_pixels_4 d4, r1 load_pixels_4 d6, r8 load_pixels_4 d8, r9 ldrd r4,r5, [r0],#8 /* load 2 entries of the grid into r4,r5 */ ldrd r6,r7, [r0],#8 /* load 2 entries of the grid into r6,r7 */ load_pixels_4 d5, r1 load_pixels_4 d7, r8 load_pixels_4 d9, r9 vst1.16 {q2} , [r2]! /*write q2 (the 8 selected pixels) into memory pointed by r2*/ vst1.16 {q3} , [r10]! vst1.16 {q4} , [r11]! subs r3,r3,#8 /*we have processed 8 pixels, decrement width*/ bne 1b pop {r4-r11} bx lr ENDFUNC .if 0 /* void line_yuv2rgb(uint8_t *y, uint8_t *u, uint8_t *v, int16_t *r, int16_t *g, int16_t *b, int n) */ function line_yuv2rgb push {r4-r7} ldr r6, [sp, #12] /*load n into r6*/ ldr r5, [sp, #16] /*load b into r5*/ ldr r4, [sp, #20] /*load g into r5*/ vld1.8 d12, [r0]! /* load 8 y */ vmovl.u8 q6, d12 /*expand them to 16bits */ vmovl.u16 q0 , d12 /*expand 4 first of them to 32 bits into q0 */ vmovl.u16 q1 , d13 /*expand 4 more of them to 32 bits into q1*/ vld1.8 d12[0], [r1]! /*load 4 u */ vmovl.u8 q6, d12 /*expand them to 16bits */ vmovl.u16 q2 , d12 /*expand 4 first of them to 32 bits into q2 */ vld1.8 d12[0], [r2]! /*load 4 v */ vmovl.u8 q6, d12 /*expand them to 16bits */ vmovl.u16 q3 , d12 /*expand 4 first of them to 32 bits into q2 */ /* at this stage we have y in q0 and q1, u in q2, and v in q3 */ mov r7 , # 16 vdup.32 q4, r7 vsub.s32 q0 , q0, q4 /*remove bias from y */ vsub.s32 q1 , q1, q4 /*remove bias from y */ mov r7 , # 128 vdup.32 q4, r7 vsub.s32 q2 , q2, q4 /*remove bias from u */ vsub.s32 q3 , q3, q4 /*remove bias from v */ movrel r7 , ymult vld1.i32 q4 , [r7] vmul.s32 q0, q0, q4 /*multiply y with 9535 */ vmul.s32 q1, q1, q4 /*multiply y with 9535 */ movrel r7 , rvmult vld1.i32 q4 , [r7] /**/ pop {r4-r7} bx lr ENDFUNC .endif