1//
2//  MNNNV21ToRGBAUnit.S
3//  MNN
4//
5//  Created by MNN on 2018/12/28.
6//  Copyright © 2018, Alibaba Group Holding Limited
7//
8
9#ifdef __aarch64__
10
11#include "MNNAsmGlobal.h"
12
13.text
14.align 5
15//void MNNNV21ToRGBAUnit(const unsigned char* source, unsigned char* dest, size_t count, const unsigned char* uv);
16//Auto: x0:source, x1:dest, x2:count, x3:uv
17asm_function MNNNV21ToRGBAUnit
18
19mov w4, #73
20movi v31.8b, #128
21mov w12, #25
22mov v0.h[0], w4
23mov v0.h[1], w12
24mov w4, #37
25mov w12, #130
26mov v0.h[2], w4
27mov v0.h[3], w12
28movi v30.8h, #0
29movi v7.8b, #255
30movi v26.8b, #255
31
32
33LoopL1:
34ld2 {v19.8b, v20.8b}, [x3], #16
35ld2 {v16.8b, v17.8b}, [x0], #16
36
37usubl v18.8h, v20.8b, v31.8b
38usubl v19.8h, v19.8b, v31.8b
39
40//v1.4s-v3.4s: RGB offset
41mul v1.8h, v19.8h, v0.h[0]// + R Offset
42mul v2.8h, v18.8h, v0.h[1]
43mul v3.8h, v18.8h, v0.h[3]// + B Offset
44mla v2.8h, v19.8h, v0.h[2]// - G Offset
45
46ushll v16.8h, v16.8b, #6
47ushll v17.8h, v17.8b, #6
48
49add v20.8h, v16.8h, v1.8h
50sub v21.8h, v16.8h, v2.8h
51add v22.8h, v16.8h, v3.8h
52
53smax v20.8h, v20.8h, v30.8h
54smax v21.8h, v21.8h, v30.8h
55smax v22.8h, v22.8h, v30.8h
56
57uqshrn v20.8b, v20.8h, #6
58uqshrn v21.8b, v21.8h, #6
59uqshrn v22.8b, v22.8h, #6
60
61add v23.8h, v17.8h, v1.8h
62sub v24.8h, v17.8h, v2.8h
63add v25.8h, v17.8h, v3.8h
64
65smax v23.8h, v23.8h, v30.8h
66smax v24.8h, v24.8h, v30.8h
67smax v25.8h, v25.8h, v30.8h
68
69uqshrn v23.8b, v23.8h, #6
70uqshrn v24.8b, v24.8h, #6
71uqshrn v25.8b, v25.8h, #6
72
73zip1 v4.8b, v20.8b, v23.8b
74zip2 v23.8b, v20.8b, v23.8b
75zip1 v5.8b, v21.8b, v24.8b
76zip2 v24.8b, v21.8b, v24.8b
77zip1 v6.8b, v22.8b, v25.8b
78zip2 v25.8b, v22.8b, v25.8b
79
80st4 {v4.8b, v5.8b, v6.8b, v7.8b}, [x1], #32
81st4 {v23.8b, v24.8b, v25.8b, v26.8b}, [x1], #32
82
83subs x2, x2, #1
84bne LoopL1
85
86ret
87#endif
88