1{ 2 Free Pascal version of the Hermes pixel conversion library. 3 Copyright (C) 2012, 2013 Nikolay Nikolov (nickysn@users.sourceforge.net) 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version 9 with the following modification: 10 11 As a special exception, the copyright holders of this library give you 12 permission to link this library with independent modules to produce an 13 executable, regardless of the license terms of these independent modules,and 14 to copy and distribute the resulting executable under terms of your choice, 15 provided that you also meet, for each linked independent module, the terms 16 and conditions of the license of that module. An independent module is a 17 module which is not derived from or based on this library. If you modify 18 this library, you may extend this exception to your version of the library, 19 but you are not obligated to do so. If you do not wish to do so, delete this 20 exception statement from your version. 21 22 This library is distributed in the hope that it will be useful, 23 but WITHOUT ANY WARRANTY; without even the implied warranty of 24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 25 Lesser General Public License for more details. 26 27 You should have received a copy of the GNU Lesser General Public 28 License along with this library; if not, write to the Free Software 29 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 30} 31 32{$ASMMODE intel} 33 34{ ------------------------------------------------------------------------- 35 36 NORMAL CONVERTERS 37 38 ------------------------------------------------------------------------- } 39 40procedure ConvertX86_64_index8_32(iface: PHermesConverterInterface); cdecl; 41label 42 preloop_start, preloop_skip, loop_start, loop_pre_remainder, loop_remainder, done; 43var 44 i: Integer; 45 s_pixel: Uint8; 46 d_pixel: Uint32; 47 source, dest: PUint8; 48 lookup: PUint32; 49 s_width: int64; 50begin 51 source := iface^.s_pixels; 52 dest := iface^.d_pixels; 53 lookup := iface^.lookup; 54 s_width := iface^.s_width; 55 if s_width <= 0 then 56 exit; 57 repeat 58{ for i := 0 to iface^.s_width - 1 do 59 begin} 60{ s_pixel := source^; 61 d_pixel := iface^.lookup[s_pixel]; 62 PUint32(dest)^ := d_pixel or $FF; 63 Inc(source); 64 Inc(dest, 4);} 65 asm 66 mov rsi, [source] 67 mov rdi, [dest] 68 mov rbx, [lookup] 69 mov rcx, [s_width] 70 71preloop_start: 72 test rdi, 15 73 jz preloop_skip 74 75 movzx rax, byte [rsi] 76 mov edx, dword [rbx + rax * 4] 77 movnti [rdi], edx 78 inc rsi 79 add rdi, 4 80 sub rcx, 1 81 jz done 82 jmp preloop_start 83 84preloop_skip: 85 mov r8, rcx 86 and r8, 3 87 shr rcx, 2 88 test rcx, rcx 89 jz loop_pre_remainder 90 91 align 16 92loop_start: 93 movzx rax, byte [rsi] 94 movzx r9, byte [rsi + 1] 95 movzx r10, byte [rsi + 2] 96 movzx r11, byte [rsi + 3] 97 98 movd xmm0, dword [rbx + rax * 4] 99 movd xmm1, dword [rbx + r9 * 4] 100 movd xmm2, dword [rbx + r10 * 4] 101 movd xmm3, dword [rbx + r11 * 4] 102 punpckldq xmm0, xmm1 103 punpckldq xmm2, xmm3 104 punpcklqdq xmm0, xmm2 105 106 movntdq [rdi], xmm0 107 add rsi, 4 108 add rdi, 16 109 sub ecx, 1 110 jnz loop_start 111 112loop_pre_remainder: 113 mov rcx, r8 114 test rcx, rcx 115 jz done 116loop_remainder: 117 movzx rax, byte [rsi] 118 mov edx, dword [rbx + rax * 4] 119 movnti [rdi], edx 120 inc rsi 121 add rdi, 4 122 sub ecx, 1 123 jnz loop_remainder 124 125done: 126 mov [source], rsi 127 mov [dest], rdi 128 end; 129{ end;} 130 Inc(source, iface^.s_add); 131 Inc(dest, iface^.d_add); 132 Dec(iface^.s_height); 133 until iface^.s_height = 0; 134 asm 135 sfence 136 end; 137end; 138