1// 2// MNNNV21ToRGBAUnit.S 3// MNN 4// 5// Created by MNN on 2018/12/28. 6// Copyright © 2018, Alibaba Group Holding Limited 7// 8 9#ifdef __aarch64__ 10 11#include "MNNAsmGlobal.h" 12 13.text 14.align 5 15//void MNNNV21ToRGBAUnit(const unsigned char* source, unsigned char* dest, size_t count, const unsigned char* uv); 16//Auto: x0:source, x1:dest, x2:count, x3:uv 17asm_function MNNNV21ToRGBAUnit 18 19mov w4, #73 20movi v31.8b, #128 21mov w12, #25 22mov v0.h[0], w4 23mov v0.h[1], w12 24mov w4, #37 25mov w12, #130 26mov v0.h[2], w4 27mov v0.h[3], w12 28movi v30.8h, #0 29movi v7.8b, #255 30movi v26.8b, #255 31 32 33LoopL1: 34ld2 {v19.8b, v20.8b}, [x3], #16 35ld2 {v16.8b, v17.8b}, [x0], #16 36 37usubl v18.8h, v20.8b, v31.8b 38usubl v19.8h, v19.8b, v31.8b 39 40//v1.4s-v3.4s: RGB offset 41mul v1.8h, v19.8h, v0.h[0]// + R Offset 42mul v2.8h, v18.8h, v0.h[1] 43mul v3.8h, v18.8h, v0.h[3]// + B Offset 44mla v2.8h, v19.8h, v0.h[2]// - G Offset 45 46ushll v16.8h, v16.8b, #6 47ushll v17.8h, v17.8b, #6 48 49add v20.8h, v16.8h, v1.8h 50sub v21.8h, v16.8h, v2.8h 51add v22.8h, v16.8h, v3.8h 52 53smax v20.8h, v20.8h, v30.8h 54smax v21.8h, v21.8h, v30.8h 55smax v22.8h, v22.8h, v30.8h 56 57uqshrn v20.8b, v20.8h, #6 58uqshrn v21.8b, v21.8h, #6 59uqshrn v22.8b, v22.8h, #6 60 61add v23.8h, v17.8h, v1.8h 62sub v24.8h, v17.8h, v2.8h 63add v25.8h, v17.8h, v3.8h 64 65smax v23.8h, v23.8h, v30.8h 66smax v24.8h, v24.8h, v30.8h 67smax v25.8h, v25.8h, v30.8h 68 69uqshrn v23.8b, v23.8h, #6 70uqshrn v24.8b, v24.8h, #6 71uqshrn v25.8b, v25.8h, #6 72 73zip1 v4.8b, v20.8b, v23.8b 74zip2 v23.8b, v20.8b, v23.8b 75zip1 v5.8b, v21.8b, v24.8b 76zip2 v24.8b, v21.8b, v24.8b 77zip1 v6.8b, v22.8b, v25.8b 78zip2 v25.8b, v22.8b, v25.8b 79 80st4 {v4.8b, v5.8b, v6.8b, v7.8b}, [x1], #32 81st4 {v23.8b, v24.8b, v25.8b, v26.8b}, [x1], #32 82 83subs x2, x2, #1 84bne LoopL1 85 86ret 87#endif 88