1From f554daa098526e91c6440d29b1ddc213bd01ad0f Mon Sep 17 00:00:00 2001 2From: Damiano Galassi <damiog@gmail.com> 3Date: Tue, 26 Jan 2021 19:40:27 +0100 4Subject: [PATCH] Revert "Add aarch64 support - Part 2" 5 6This reverts commit ec7396adaa6afd2c8aab1918cfe4bb6e384740c3. 7--- 8 build/aarch64-linux/crosscompile.cmake | 15 -- 9 build/aarch64-linux/make-Makefiles.bash | 4 - 10 source/CMakeLists.txt | 38 +--- 11 source/common/CMakeLists.txt | 35 +-- 12 source/common/arm/asm-primitives.cpp | 291 ++++++++++++------------ 13 source/common/cpu.cpp | 4 - 14 source/common/pixel.cpp | 9 - 15 source/common/primitives.h | 11 - 16 source/test/CMakeLists.txt | 16 +- 17 source/test/testbench.cpp | 16 -- 18 source/test/testharness.h | 5 - 19 11 files changed, 170 insertions(+), 274 deletions(-) 20 delete mode 100644 build/aarch64-linux/crosscompile.cmake 21 delete mode 100644 build/aarch64-linux/make-Makefiles.bash 22 23diff --git a/build/aarch64-linux/crosscompile.cmake b/build/aarch64-linux/crosscompile.cmake 24deleted file mode 100644 25index 41c8217f2..000000000 26--- a/build/aarch64-linux/crosscompile.cmake 27+++ /dev/null 28@@ -1,15 +0,0 @@ 29-# CMake toolchain file for cross compiling x265 for aarch64 30-# This feature is only supported as experimental. Use with caution. 31-# Please report bugs on bitbucket 32-# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source 33- 34-set(CROSS_COMPILE_ARM 1) 35-set(CMAKE_SYSTEM_NAME Linux) 36-set(CMAKE_SYSTEM_PROCESSOR aarch64) 37- 38-# specify the cross compiler 39-set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) 40-set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) 41- 42-# specify the target environment 43-SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu) 44diff --git a/build/aarch64-linux/make-Makefiles.bash b/build/aarch64-linux/make-Makefiles.bash 45deleted file mode 100644 46index c9582da0a..000000000 47--- a/build/aarch64-linux/make-Makefiles.bash 48+++ /dev/null 49@@ -1,4 +0,0 @@ 50-#!/bin/bash 51-# Run this from within a bash shell 52- 53-cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source 54diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt 55index 95218f5dc..2ed5c24e3 100755 56--- a/source/CMakeLists.txt 57+++ b/source/CMakeLists.txt 58@@ -40,7 +40,7 @@ SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}") 59 # System architecture detection 60 string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC) 61 set(X86_ALIASES x86 i386 i686 x86_64 amd64) 62-set(ARM_ALIASES armv6l armv7l aarch64) 63+set(ARM_ALIASES armv6l armv7l) 64 list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) 65 list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) 66 set(POWER_ALIASES ppc64 ppc64le) 67@@ -70,15 +70,9 @@ elseif(ARMMATCH GREATER "-1") 68 else() 69 set(CROSS_COMPILE_ARM 0) 70 endif() 71+ message(STATUS "Detected ARM target processor") 72 set(ARM 1) 73- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) 74- message(STATUS "Detected ARM64 target processor") 75- set(ARM64 1) 76- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) 77- else() 78- message(STATUS "Detected ARM target processor") 79- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) 80- endif() 81+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) 82 else() 83 message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") 84 message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") 85@@ -239,24 +233,14 @@ if(GCC) 86 endif() 87 endif() 88 if(ARM AND CROSS_COMPILE_ARM) 89- if(ARM64) 90- set(ARM_ARGS -fPIC) 91- else() 92- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) 93- endif() 94- message(STATUS "cross compile arm") 95+ set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) 96 elseif(ARM) 97- if(ARM64) 98- set(ARM_ARGS -fPIC) 99+ find_package(Neon) 100+ if(CPU_HAS_NEON) 101+ set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) 102 add_definitions(-DHAVE_NEON) 103 else() 104- find_package(Neon) 105- if(CPU_HAS_NEON) 106- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) 107- add_definitions(-DHAVE_NEON) 108- else() 109- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) 110- endif() 111+ set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) 112 endif() 113 endif() 114 add_definitions(${ARM_ARGS}) 115@@ -536,11 +520,7 @@ if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY) 116 # compile ARM arch asm files here 117 enable_language(ASM) 118 foreach(ASM ${ARM_ASMS}) 119- if(ARM64) 120- set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM}) 121- else() 122- set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) 123- endif() 124+ set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) 125 list(APPEND ASM_SRCS ${ASM_SRC}) 126 list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) 127 add_custom_command( 128diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt 129index 12b643ad5..c34064b2f 100644 130--- a/source/common/CMakeLists.txt 131+++ b/source/common/CMakeLists.txt 132@@ -14,7 +14,7 @@ if(EXTRA_LIB) 133 endif(EXTRA_LIB) 134 135 if(ENABLE_ASSEMBLY) 136- set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) 137+ set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) 138 list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1") 139 endif(ENABLE_ASSEMBLY) 140 141@@ -84,33 +84,16 @@ if(ENABLE_ASSEMBLY AND X86) 142 endif(ENABLE_ASSEMBLY AND X86) 143 144 if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) 145- if(ARM64) 146- if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) 147- message(STATUS "Detected CXX compiler using -O3 optimization level") 148- add_definitions(-DAUTO_VECTORIZE=1) 149- endif() 150- set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h) 151- 152- # add ARM assembly/intrinsic files here 153- set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S) 154- set(VEC_PRIMITIVES) 155- 156- set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") 157- foreach(SRC ${C_SRCS}) 158- set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) 159- endforeach() 160- else() 161- set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) 162+ set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) 163 164- # add ARM assembly/intrinsic files here 165- set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) 166- set(VEC_PRIMITIVES) 167+ # add ARM assembly/intrinsic files here 168+ set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) 169+ set(VEC_PRIMITIVES) 170 171- set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") 172- foreach(SRC ${C_SRCS}) 173- set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) 174- endforeach() 175- endif() 176+ set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") 177+ foreach(SRC ${C_SRCS}) 178+ set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) 179+ endforeach() 180 source_group(Assembly FILES ${ASM_PRIMITIVES}) 181 endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) 182 183diff --git a/source/common/arm/asm-primitives.cpp b/source/common/arm/asm-primitives.cpp 184index 7f11503f9..422217845 100644 185--- a/source/common/arm/asm-primitives.cpp 186+++ b/source/common/arm/asm-primitives.cpp 187@@ -5,7 +5,6 @@ 188 * Praveen Kumar Tiwari <praveen@multicorewareinc.com> 189 * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> 190 * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> 191- * Hongbin Liu<liuhongbin1@huawei.com> 192 * 193 * This program is free software; you can redistribute it and/or modify 194 * it under the terms of the GNU General Public License as published by 195@@ -49,77 +48,77 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 196 p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon); 197 198 // addAvg 199- p.pu[LUMA_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); 200- p.pu[LUMA_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); 201- p.pu[LUMA_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); 202- p.pu[LUMA_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); 203- p.pu[LUMA_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); 204- p.pu[LUMA_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); 205- p.pu[LUMA_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); 206- p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); 207- p.pu[LUMA_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); 208- p.pu[LUMA_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); 209- p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); 210- p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); 211- p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); 212- p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); 213- p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); 214- p.pu[LUMA_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); 215- p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); 216- p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); 217- p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); 218- p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); 219- p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon); 220- p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon); 221- p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon); 222- p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon); 223- p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon); 224+ p.pu[LUMA_4x4].addAvg = PFX(addAvg_4x4_neon); 225+ p.pu[LUMA_4x8].addAvg = PFX(addAvg_4x8_neon); 226+ p.pu[LUMA_4x16].addAvg = PFX(addAvg_4x16_neon); 227+ p.pu[LUMA_8x4].addAvg = PFX(addAvg_8x4_neon); 228+ p.pu[LUMA_8x8].addAvg = PFX(addAvg_8x8_neon); 229+ p.pu[LUMA_8x16].addAvg = PFX(addAvg_8x16_neon); 230+ p.pu[LUMA_8x32].addAvg = PFX(addAvg_8x32_neon); 231+ p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon); 232+ p.pu[LUMA_16x4].addAvg = PFX(addAvg_16x4_neon); 233+ p.pu[LUMA_16x8].addAvg = PFX(addAvg_16x8_neon); 234+ p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon); 235+ p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon); 236+ p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon); 237+ p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon); 238+ p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon); 239+ p.pu[LUMA_32x8].addAvg = PFX(addAvg_32x8_neon); 240+ p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon); 241+ p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon); 242+ p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon); 243+ p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon); 244+ p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon); 245+ p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon); 246+ p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon); 247+ p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon); 248+ p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon); 249 250 // chroma addAvg 251- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED] = PFX(addAvg_4x2_neon); 252- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); 253- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); 254- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); 255- p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED] = PFX(addAvg_6x8_neon); 256- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED] = PFX(addAvg_8x2_neon); 257- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); 258- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED] = PFX(addAvg_8x6_neon); 259- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); 260- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); 261- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); 262- p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); 263- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); 264- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); 265- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); 266- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); 267- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); 268- p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); 269- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); 270- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); 271- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); 272- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); 273- 274- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); 275- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); 276- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED] = PFX(addAvg_4x32_neon); 277- p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED] = PFX(addAvg_6x16_neon); 278- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); 279- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); 280- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED] = PFX(addAvg_8x12_neon); 281- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); 282- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); 283- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED] = PFX(addAvg_8x64_neon); 284- p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon); 285- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); 286- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); 287- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon); 288- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); 289- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); 290- p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon); 291- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); 292- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); 293- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon); 294- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); 295+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg = PFX(addAvg_4x2_neon); 296+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg = PFX(addAvg_4x4_neon); 297+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg = PFX(addAvg_4x8_neon); 298+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg = PFX(addAvg_4x16_neon); 299+ p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg = PFX(addAvg_6x8_neon); 300+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg = PFX(addAvg_8x2_neon); 301+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg = PFX(addAvg_8x4_neon); 302+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg = PFX(addAvg_8x6_neon); 303+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg = PFX(addAvg_8x8_neon); 304+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg = PFX(addAvg_8x16_neon); 305+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg = PFX(addAvg_8x32_neon); 306+ p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon); 307+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg = PFX(addAvg_16x4_neon); 308+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg = PFX(addAvg_16x8_neon); 309+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon); 310+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon); 311+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon); 312+ p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon); 313+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg = PFX(addAvg_32x8_neon); 314+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon); 315+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon); 316+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon); 317+ 318+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg = PFX(addAvg_4x8_neon); 319+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg = PFX(addAvg_4x16_neon); 320+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg = PFX(addAvg_4x32_neon); 321+ p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg = PFX(addAvg_6x16_neon); 322+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = PFX(addAvg_8x4_neon); 323+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = PFX(addAvg_8x8_neon); 324+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = PFX(addAvg_8x12_neon); 325+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = PFX(addAvg_8x16_neon); 326+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = PFX(addAvg_8x32_neon); 327+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = PFX(addAvg_8x64_neon); 328+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon); 329+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = PFX(addAvg_16x8_neon); 330+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon); 331+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon); 332+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon); 333+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon); 334+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon); 335+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon); 336+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon); 337+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon); 338+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon); 339 340 // quant 341 p.quant = PFX(quant_neon); 342@@ -403,7 +402,7 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 343 p.scale2D_64to32 = PFX(scale2D_64to32_neon); 344 345 // scale1D_128to64 346- p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon); 347+ p.scale1D_128to64 = PFX(scale1D_128to64_neon); 348 349 // copy_count 350 p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon); 351@@ -412,37 +411,37 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 352 p.cu[BLOCK_32x32].copy_cnt = PFX(copy_cnt_32_neon); 353 354 // filterPixelToShort 355- p.pu[LUMA_4x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x4_neon); 356- p.pu[LUMA_4x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x8_neon); 357- p.pu[LUMA_4x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x16_neon); 358- p.pu[LUMA_8x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x4_neon); 359- p.pu[LUMA_8x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x8_neon); 360- p.pu[LUMA_8x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x16_neon); 361- p.pu[LUMA_8x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x32_neon); 362- p.pu[LUMA_12x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_12x16_neon); 363- p.pu[LUMA_16x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x4_neon); 364- p.pu[LUMA_16x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x8_neon); 365- p.pu[LUMA_16x12].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x12_neon); 366- p.pu[LUMA_16x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x16_neon); 367- p.pu[LUMA_16x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x32_neon); 368- p.pu[LUMA_16x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x64_neon); 369- p.pu[LUMA_24x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_24x32_neon); 370- p.pu[LUMA_32x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x8_neon); 371- p.pu[LUMA_32x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x16_neon); 372- p.pu[LUMA_32x24].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x24_neon); 373- p.pu[LUMA_32x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x32_neon); 374- p.pu[LUMA_32x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x64_neon); 375- p.pu[LUMA_48x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_48x64_neon); 376- p.pu[LUMA_64x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x16_neon); 377- p.pu[LUMA_64x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x32_neon); 378- p.pu[LUMA_64x48].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x48_neon); 379- p.pu[LUMA_64x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x64_neon); 380+ p.pu[LUMA_4x4].convert_p2s = PFX(filterPixelToShort_4x4_neon); 381+ p.pu[LUMA_4x8].convert_p2s = PFX(filterPixelToShort_4x8_neon); 382+ p.pu[LUMA_4x16].convert_p2s = PFX(filterPixelToShort_4x16_neon); 383+ p.pu[LUMA_8x4].convert_p2s = PFX(filterPixelToShort_8x4_neon); 384+ p.pu[LUMA_8x8].convert_p2s = PFX(filterPixelToShort_8x8_neon); 385+ p.pu[LUMA_8x16].convert_p2s = PFX(filterPixelToShort_8x16_neon); 386+ p.pu[LUMA_8x32].convert_p2s = PFX(filterPixelToShort_8x32_neon); 387+ p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon); 388+ p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_neon); 389+ p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_neon); 390+ p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon); 391+ p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon); 392+ p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon); 393+ p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon); 394+ p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon); 395+ p.pu[LUMA_32x8].convert_p2s = PFX(filterPixelToShort_32x8_neon); 396+ p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon); 397+ p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon); 398+ p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon); 399+ p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon); 400+ p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon); 401+ p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon); 402+ p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon); 403+ p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon); 404+ p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon); 405 406 // Block_fill 407- p.cu[BLOCK_4x4].blockfill_s[NONALIGNED] = PFX(blockfill_s_4x4_neon); 408- p.cu[BLOCK_8x8].blockfill_s[NONALIGNED] = PFX(blockfill_s_8x8_neon); 409- p.cu[BLOCK_16x16].blockfill_s[NONALIGNED] = PFX(blockfill_s_16x16_neon); 410- p.cu[BLOCK_32x32].blockfill_s[NONALIGNED] = PFX(blockfill_s_32x32_neon); 411+ p.cu[BLOCK_4x4].blockfill_s = PFX(blockfill_s_4x4_neon); 412+ p.cu[BLOCK_8x8].blockfill_s = PFX(blockfill_s_8x8_neon); 413+ p.cu[BLOCK_16x16].blockfill_s = PFX(blockfill_s_16x16_neon); 414+ p.cu[BLOCK_32x32].blockfill_s = PFX(blockfill_s_32x32_neon); 415 416 // Blockcopy_ss 417 p.cu[BLOCK_4x4].copy_ss = PFX(blockcopy_ss_4x4_neon); 418@@ -496,21 +495,21 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 419 p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_sp = PFX(blockcopy_sp_32x64_neon); 420 421 // pixel_add_ps 422- p.cu[BLOCK_4x4].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x4_neon); 423- p.cu[BLOCK_8x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x8_neon); 424- p.cu[BLOCK_16x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x16_neon); 425- p.cu[BLOCK_32x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x32_neon); 426- p.cu[BLOCK_64x64].add_ps[NONALIGNED] = PFX(pixel_add_ps_64x64_neon); 427+ p.cu[BLOCK_4x4].add_ps = PFX(pixel_add_ps_4x4_neon); 428+ p.cu[BLOCK_8x8].add_ps = PFX(pixel_add_ps_8x8_neon); 429+ p.cu[BLOCK_16x16].add_ps = PFX(pixel_add_ps_16x16_neon); 430+ p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_neon); 431+ p.cu[BLOCK_64x64].add_ps = PFX(pixel_add_ps_64x64_neon); 432 433 // chroma add_ps 434- p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x4_neon); 435- p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x8_neon); 436- p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x16_neon); 437- p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x32_neon); 438- p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x8_neon); 439- p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x16_neon); 440- p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x32_neon); 441- p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x64_neon); 442+ p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps = PFX(pixel_add_ps_4x4_neon); 443+ p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps = PFX(pixel_add_ps_8x8_neon); 444+ p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = PFX(pixel_add_ps_16x16_neon); 445+ p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = PFX(pixel_add_ps_32x32_neon); 446+ p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps = PFX(pixel_add_ps_4x8_neon); 447+ p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps = PFX(pixel_add_ps_8x16_neon); 448+ p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps = PFX(pixel_add_ps_16x32_neon); 449+ p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps = PFX(pixel_add_ps_32x64_neon); 450 451 // cpy2Dto1D_shr 452 p.cu[BLOCK_4x4].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_4x4_neon); 453@@ -519,10 +518,10 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 454 p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32x32_neon); 455 456 // ssd_s 457- p.cu[BLOCK_4x4].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_4x4_neon); 458- p.cu[BLOCK_8x8].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_8x8_neon); 459- p.cu[BLOCK_16x16].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_16x16_neon); 460- p.cu[BLOCK_32x32].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_32x32_neon); 461+ p.cu[BLOCK_4x4].ssd_s = PFX(pixel_ssd_s_4x4_neon); 462+ p.cu[BLOCK_8x8].ssd_s = PFX(pixel_ssd_s_8x8_neon); 463+ p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16x16_neon); 464+ p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32x32_neon); 465 466 // sse_ss 467 p.cu[BLOCK_4x4].sse_ss = PFX(pixel_sse_ss_4x4_neon); 468@@ -549,10 +548,10 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 469 p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = PFX(pixel_sub_ps_32x64_neon); 470 471 // calc_Residual 472- p.cu[BLOCK_4x4].calcresidual[NONALIGNED] = PFX(getResidual4_neon); 473- p.cu[BLOCK_8x8].calcresidual[NONALIGNED] = PFX(getResidual8_neon); 474- p.cu[BLOCK_16x16].calcresidual[NONALIGNED] = PFX(getResidual16_neon); 475- p.cu[BLOCK_32x32].calcresidual[NONALIGNED] = PFX(getResidual32_neon); 476+ p.cu[BLOCK_4x4].calcresidual = PFX(getResidual4_neon); 477+ p.cu[BLOCK_8x8].calcresidual = PFX(getResidual8_neon); 478+ p.cu[BLOCK_16x16].calcresidual = PFX(getResidual16_neon); 479+ p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_neon); 480 481 // sse_pp 482 p.cu[BLOCK_4x4].sse_pp = PFX(pixel_sse_pp_4x4_neon); 483@@ -723,31 +722,31 @@ void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 484 p.pu[LUMA_64x64].sad_x4 = PFX(sad_x4_64x64_neon); 485 486 // pixel_avg_pp 487- p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon); 488- p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon); 489- p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon); 490- p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon); 491- p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon); 492- p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon); 493- p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon); 494- p.pu[LUMA_12x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_12x16_neon); 495- p.pu[LUMA_16x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x4_neon); 496- p.pu[LUMA_16x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x8_neon); 497- p.pu[LUMA_16x12].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x12_neon); 498- p.pu[LUMA_16x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x16_neon); 499- p.pu[LUMA_16x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x32_neon); 500- p.pu[LUMA_16x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x64_neon); 501- p.pu[LUMA_24x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_24x32_neon); 502- p.pu[LUMA_32x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x8_neon); 503- p.pu[LUMA_32x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x16_neon); 504- p.pu[LUMA_32x24].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x24_neon); 505- p.pu[LUMA_32x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x32_neon); 506- p.pu[LUMA_32x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x64_neon); 507- p.pu[LUMA_48x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_48x64_neon); 508- p.pu[LUMA_64x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x16_neon); 509- p.pu[LUMA_64x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x32_neon); 510- p.pu[LUMA_64x48].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x48_neon); 511- p.pu[LUMA_64x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x64_neon); 512+ p.pu[LUMA_4x4].pixelavg_pp = PFX(pixel_avg_pp_4x4_neon); 513+ p.pu[LUMA_4x8].pixelavg_pp = PFX(pixel_avg_pp_4x8_neon); 514+ p.pu[LUMA_4x16].pixelavg_pp = PFX(pixel_avg_pp_4x16_neon); 515+ p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_pp_8x4_neon); 516+ p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_pp_8x8_neon); 517+ p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_pp_8x16_neon); 518+ p.pu[LUMA_8x32].pixelavg_pp = PFX(pixel_avg_pp_8x32_neon); 519+ p.pu[LUMA_12x16].pixelavg_pp = PFX(pixel_avg_pp_12x16_neon); 520+ p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_pp_16x4_neon); 521+ p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_pp_16x8_neon); 522+ p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_pp_16x12_neon); 523+ p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_pp_16x16_neon); 524+ p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_pp_16x32_neon); 525+ p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_pp_16x64_neon); 526+ p.pu[LUMA_24x32].pixelavg_pp = PFX(pixel_avg_pp_24x32_neon); 527+ p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_pp_32x8_neon); 528+ p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_pp_32x16_neon); 529+ p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_pp_32x24_neon); 530+ p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_pp_32x32_neon); 531+ p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_pp_32x64_neon); 532+ p.pu[LUMA_48x64].pixelavg_pp = PFX(pixel_avg_pp_48x64_neon); 533+ p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_pp_64x16_neon); 534+ p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_pp_64x32_neon); 535+ p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_pp_64x48_neon); 536+ p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_pp_64x64_neon); 537 538 // planecopy 539 p.planecopy_cp = PFX(pixel_planecopy_cp_neon); 540diff --git a/source/common/cpu.cpp b/source/common/cpu.cpp 541index 2eacfe4a9..26c82ea50 100644 542--- a/source/common/cpu.cpp 543+++ b/source/common/cpu.cpp 544@@ -5,8 +5,6 @@ 545 * Laurent Aimar <fenrir@via.ecp.fr> 546 * Fiona Glaser <fiona@x264.com> 547 * Steve Borho <steve@borho.org> 548- * Hongbin Liu <liuhongbin1@huawei.com> 549- * Yimeng Su <yimeng.su@huawei.com> 550 * 551 * This program is free software; you can redistribute it and/or modify 552 * it under the terms of the GNU General Public License as published by 553@@ -369,8 +367,6 @@ uint32_t cpu_detect(bool benableavx512) 554 flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; 555 #endif 556 // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) 557-#elif X265_ARCH_ARM64 558- flags |= X265_CPU_NEON; 559 #endif // if HAVE_ARMV6 560 return flags; 561 } 562diff --git a/source/common/pixel.cpp b/source/common/pixel.cpp 563index e4f890cd5..99b84449c 100644 564--- a/source/common/pixel.cpp 565+++ b/source/common/pixel.cpp 566@@ -5,7 +5,6 @@ 567 * Mandar Gurav <mandar@multicorewareinc.com> 568 * Mahesh Pittala <mahesh@multicorewareinc.com> 569 * Min Chen <min.chen@multicorewareinc.com> 570- * Hongbin Liu<liuhongbin1@huawei.com> 571 * 572 * This program is free software; you can redistribute it and/or modify 573 * it under the terms of the GNU General Public License as published by 574@@ -266,10 +265,6 @@ int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t s 575 { 576 int satd = 0; 577 578-#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 579- pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; 580-#endif 581- 582 for (int row = 0; row < h; row += 4) 583 for (int col = 0; col < w; col += 4) 584 satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, 585@@ -284,10 +279,6 @@ int satd8(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t s 586 { 587 int satd = 0; 588 589-#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 590- pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; 591-#endif 592- 593 for (int row = 0; row < h; row += 4) 594 for (int col = 0; col < w; col += 8) 595 satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, 596diff --git a/source/common/primitives.h b/source/common/primitives.h 597index 0b52f84de..5c64952fb 100644 598--- a/source/common/primitives.h 599+++ b/source/common/primitives.h 600@@ -8,8 +8,6 @@ 601 * Rajesh Paulraj <rajesh@multicorewareinc.com> 602 * Praveen Kumar Tiwari <praveen@multicorewareinc.com> 603 * Min Chen <chenm003@163.com> 604- * Hongbin Liu<liuhongbin1@huawei.com> 605- * Yimeng Su <yimeng.su@huawei.com> 606 * 607 * This program is free software; you can redistribute it and/or modify 608 * it under the terms of the GNU General Public License as published by 609@@ -469,9 +467,6 @@ void setupCPrimitives(EncoderPrimitives &p); 610 void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask); 611 void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask); 612 void setupAliasPrimitives(EncoderPrimitives &p); 613-#if X265_ARCH_ARM64 614-void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask); 615-#endif 616 #if HAVE_ALTIVEC 617 void setupPixelPrimitives_altivec(EncoderPrimitives &p); 618 void setupDCTPrimitives_altivec(EncoderPrimitives &p); 619@@ -486,10 +481,4 @@ extern const char* PFX(version_str); 620 extern const char* PFX(build_info_str); 621 #endif 622 623-#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 624-extern "C" { 625-#include "aarch64/pixel-util.h" 626-} 627-#endif 628- 629 #endif // ifndef X265_PRIMITIVES_H 630diff --git a/source/test/CMakeLists.txt b/source/test/CMakeLists.txt 631index 9abaf31ff..260195f53 100644 632--- a/source/test/CMakeLists.txt 633+++ b/source/test/CMakeLists.txt 634@@ -23,15 +23,13 @@ endif(X86) 635 636 # add ARM assembly files 637 if(ARM OR CROSS_COMPILE_ARM) 638- if(NOT ARM64) 639- enable_language(ASM) 640- set(NASM_SRC checkasm-arm.S) 641- add_custom_command( 642- OUTPUT checkasm-arm.obj 643- COMMAND ${CMAKE_CXX_COMPILER} 644- ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj 645- DEPENDS checkasm-arm.S) 646- endif() 647+ enable_language(ASM) 648+ set(NASM_SRC checkasm-arm.S) 649+ add_custom_command( 650+ OUTPUT checkasm-arm.obj 651+ COMMAND ${CMAKE_CXX_COMPILER} 652+ ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj 653+ DEPENDS checkasm-arm.S) 654 endif(ARM OR CROSS_COMPILE_ARM) 655 656 # add PowerPC assembly files 657diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp 658index 8db8c0c25..ac14f9710 100644 659--- a/source/test/testbench.cpp 660+++ b/source/test/testbench.cpp 661@@ -5,7 +5,6 @@ 662 * Mandar Gurav <mandar@multicorewareinc.com> 663 * Mahesh Pittala <mahesh@multicorewareinc.com> 664 * Min Chen <chenm003@163.com> 665- * Yimeng Su <yimeng.su@huawei.com> 666 * 667 * This program is free software; you can redistribute it and/or modify 668 * it under the terms of the GNU General Public License as published by 669@@ -209,14 +208,6 @@ int main(int argc, char *argv[]) 670 EncoderPrimitives asmprim; 671 memset(&asmprim, 0, sizeof(asmprim)); 672 setupAssemblyPrimitives(asmprim, test_arch[i].flag); 673- 674-#if X265_ARCH_ARM64 675- /* Temporary workaround because luma_vsp assembly primitive has not been completed 676- * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. 677- * Otherwise, segment fault occurs. */ 678- setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag); 679-#endif 680- 681 setupAliasPrimitives(asmprim); 682 memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives)); 683 for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++) 684@@ -241,13 +232,6 @@ int main(int argc, char *argv[]) 685 #endif 686 setupAssemblyPrimitives(optprim, cpuid); 687 688-#if X265_ARCH_ARM64 689- /* Temporary workaround because luma_vsp assembly primitive has not been completed 690- * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. 691- * Otherwise, segment fault occurs. */ 692- setupAliasCPrimitives(cprim, optprim, cpuid); 693-#endif 694- 695 /* Note that we do not setup aliases for performance tests, that would be 696 * redundant. The testbench only verifies they are correctly aliased */ 697 698diff --git a/source/test/testharness.h b/source/test/testharness.h 699index 6e680953f..771551583 100644 700--- a/source/test/testharness.h 701+++ b/source/test/testharness.h 702@@ -3,7 +3,6 @@ 703 * 704 * Authors: Steve Borho <steve@borho.org> 705 * Min Chen <chenm003@163.com> 706- * Yimeng Su <yimeng.su@huawei.com> 707 * 708 * This program is free software; you can redistribute it and/or modify 709 * it under the terms of the GNU General Public License as published by 710@@ -82,15 +81,11 @@ static inline uint32_t __rdtsc(void) 711 #if X265_ARCH_X86 712 asm volatile("rdtsc" : "=a" (a) ::"edx"); 713 #elif X265_ARCH_ARM 714-#if X265_ARCH_ARM64 715- asm volatile("mrs %0, cntvct_el0" : "=r"(a)); 716-#else 717 // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch 718 // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); 719 720 // TO-DO: replace clock() function with appropriate ARM cpu instructions 721 a = clock(); 722-#endif 723 #endif 724 return a; 725 } 726-- 7272.24.3 (Apple Git-128) 728 729