1 /* FFdecsa -- fast decsa algorithm
2 *
3 * Copyright (C) 2003-2004 fatih89r
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20
21 #include <sys/types.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #ifndef NULL
27 #define NULL 0
28 #endif
29
30 //#define DEBUG
31 #ifdef DEBUG
32 #define DBG(a) a
33 #else
34 #define DBG(a)
35 #endif
36
37 //// parallelization stuff, large speed differences are possible
38 // possible choices
39 #define PARALLEL_32_4CHAR 320
40 #define PARALLEL_32_4CHARA 321
41 #define PARALLEL_32_INT 322
42 #define PARALLEL_64_8CHAR 640
43 #define PARALLEL_64_8CHARA 641
44 #define PARALLEL_64_2INT 642
45 #define PARALLEL_64_LONG 643
46 #define PARALLEL_64_MMX 644
47 #define PARALLEL_128_16CHAR 1280
48 #define PARALLEL_128_16CHARA 1281
49 #define PARALLEL_128_4INT 1282
50 #define PARALLEL_128_2LONG 1283
51 #define PARALLEL_128_2MMX 1284
52 #define PARALLEL_128_SSE 1285
53 #define PARALLEL_128_SSE2 1286
54
55 #include "parallel_generic.h"
56 //// conditionals
57 #if PARALLEL_MODE==PARALLEL_32_4CHAR
58 #include "parallel_032_4char.h"
59 #elif PARALLEL_MODE==PARALLEL_32_4CHARA
60 #include "parallel_032_4charA.h"
61 #elif PARALLEL_MODE==PARALLEL_32_INT
62 #include "parallel_032_int.h"
63 #define FUNC(x) (x ## _32int)
64 #elif PARALLEL_MODE==PARALLEL_64_8CHAR
65 #include "parallel_064_8char.h"
66 #elif PARALLEL_MODE==PARALLEL_64_8CHARA
67 #include "parallel_064_8charA.h"
68 #elif PARALLEL_MODE==PARALLEL_64_2INT
69 #include "parallel_064_2int.h"
70 #elif PARALLEL_MODE==PARALLEL_64_LONG
71 #include "parallel_064_long.h"
72 #elif PARALLEL_MODE==PARALLEL_64_MMX
73 #include "parallel_064_mmx.h"
74 #define FUNC(x) (x ## _64mmx)
75 #elif PARALLEL_MODE==PARALLEL_128_16CHAR
76 #include "parallel_128_16char.h"
77 #elif PARALLEL_MODE==PARALLEL_128_16CHARA
78 #include "parallel_128_16charA.h"
79 #elif PARALLEL_MODE==PARALLEL_128_4INT
80 #include "parallel_128_4int.h"
81 #elif PARALLEL_MODE==PARALLEL_128_2LONG
82 #include "parallel_128_2long.h"
83 #elif PARALLEL_MODE==PARALLEL_128_2MMX
84 #include "parallel_128_2mmx.h"
85 #elif PARALLEL_MODE==PARALLEL_128_SSE
86 #include "parallel_128_sse.h"
87 #elif PARALLEL_MODE==PARALLEL_128_SSE2
88 #include "parallel_128_sse2.h"
89 #define FUNC(x) (x ## _128sse2)
90 #else
91 #error "unknown/undefined parallel mode"
92 #endif
93
94
95 // stuff depending on conditionals
96
97 #define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
98 #define BYPG BYTES_PER_GROUP
99 #define BITS_PER_GROUP GROUP_PARALLELISM
100 #define BIPG BITS_PER_GROUP
101
102 #ifndef MALLOC
103 #define MALLOC(X) malloc(X)
104 #endif
105 #ifndef FREE
106 #define FREE(X) free(X)
107 #endif
108 #ifndef MEMALIGN
109 #define MEMALIGN
110 #endif
111
112 //// debug tool
113
114 #if 0
115 static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
116 int i;
117 for(i=0;i<len;i++){
118 if(i%linelen==0&&i) fprintf(stderr,"\n");
119 if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
120 else{
121 if(i%8==0) fprintf(stderr," ");
122 if(i%4==0) fprintf(stderr," ");
123 }
124 fprintf(stderr," %02x",p[i]);
125 }
126 if(i%linelen==0) fprintf(stderr,"\n");
127 }
128 #endif
129
130 //////////////////////////////////////////////////////////////////////////////////
131
132 struct csa_key_t{
133 unsigned char ck[8];
134 // used by stream
135 int iA[8]; // iA[0] is for A1, iA[7] is for A8
136 int iB[8]; // iB[0] is for B1, iB[7] is for B8
137 // used by stream (group)
138 MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
139 MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
140 MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
141 // used by block
142 unsigned char kk[56];
143 // used by block (group)
144 MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
145 };
146
147 struct csa_keys_t{
148 struct csa_key_t even;
149 struct csa_key_t odd;
150 };
151
152 //-----stream cypher
153
154 //-----key schedule for stream decypher
key_schedule_stream(unsigned char * ck,int * iA,int * iB)155 static void key_schedule_stream(
156 unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
157 int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
158 int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
159 {
160 iA[0]=(ck[0]>>4)&0xf;
161 iA[1]=(ck[0] )&0xf;
162 iA[2]=(ck[1]>>4)&0xf;
163 iA[3]=(ck[1] )&0xf;
164 iA[4]=(ck[2]>>4)&0xf;
165 iA[5]=(ck[2] )&0xf;
166 iA[6]=(ck[3]>>4)&0xf;
167 iA[7]=(ck[3] )&0xf;
168 iB[0]=(ck[4]>>4)&0xf;
169 iB[1]=(ck[4] )&0xf;
170 iB[2]=(ck[5]>>4)&0xf;
171 iB[3]=(ck[5] )&0xf;
172 iB[4]=(ck[6]>>4)&0xf;
173 iB[5]=(ck[6] )&0xf;
174 iB[6]=(ck[7]>>4)&0xf;
175 iB[7]=(ck[7] )&0xf;
176 }
177
178 //----- stream main function
179
180 #define STREAM_INIT
181 #include "stream.c"
182 #undef STREAM_INIT
183
184 #define STREAM_NORMAL
185 #include "stream.c"
186 #undef STREAM_NORMAL
187
188
189 //-----block decypher
190
191 //-----key schedule for block decypher
192
key_schedule_block(unsigned char * ck,unsigned char * kk)193 static void key_schedule_block(
194 unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
195 unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
196 {
197 static const unsigned char key_perm[0x40] = {
198 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
199 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
200 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
201 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
202 };
203
204 int i,j,k;
205 int bit[64];
206 int newbit[64];
207 int kb[7][8];
208
209 // 56 steps
210 // 56 key bytes kk(55)..kk(0) by key schedule from ck
211
212 // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
213 kb[6][0] = ck[0];
214 kb[6][1] = ck[1];
215 kb[6][2] = ck[2];
216 kb[6][3] = ck[3];
217 kb[6][4] = ck[4];
218 kb[6][5] = ck[5];
219 kb[6][6] = ck[6];
220 kb[6][7] = ck[7];
221
222 // calculate kb[5] .. kb[0]
223 for(i=5; i>=0; i--){
224 // 64 bit perm on kb
225 for(j=0; j<8; j++){
226 for(k=0; k<8; k++){
227 bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
228 newbit[key_perm[j*8+k]-1] = bit[j*8+k];
229 }
230 }
231 for(j=0; j<8; j++){
232 kb[i][j] = 0;
233 for(k=0; k<8; k++){
234 kb[i][j] |= newbit[j*8+k] << (7-k);
235 }
236 }
237 }
238
239 // xor to give kk
240 for(i=0; i<7; i++){
241 for(j=0; j<8; j++){
242 kk[i*8+j] = kb[i][j] ^ i;
243 }
244 }
245
246 }
247
248 //-----block utils
249
trasp_N_8(unsigned char * in,unsigned char * out,int count)250 static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
251 int *ri=(int *)in;
252 int *ibi=(int *)out;
253 int j,i,k,g;
254 // copy and first step
255 for(g=0;g<count;g++){
256 ri[g]=ibi[2*g];
257 ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
258 }
259 //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
260 // now 01230123
261 #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
262 for(j=0;j<8;j+=4){
263 for(i=0;i<2;i++){
264 for(k=0;k<INTS_PER_ROW;k++){
265 unsigned int t,b;
266 t=ri[INTS_PER_ROW*(j+i)+k];
267 b=ri[INTS_PER_ROW*(j+i+2)+k];
268 ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
269 ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
270 }
271 }
272 }
273 //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
274 // now 01010101
275 for(j=0;j<8;j+=2){
276 for(i=0;i<1;i++){
277 for(k=0;k<INTS_PER_ROW;k++){
278 unsigned int t,b;
279 t=ri[INTS_PER_ROW*(j+i)+k];
280 b=ri[INTS_PER_ROW*(j+i+1)+k];
281 ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
282 ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
283 }
284 }
285 }
286 //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
287 // now 00000000
288 }
289
trasp_8_N(unsigned char * in,unsigned char * out,int count)290 static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
291 int *ri=(int *)in;
292 int *bdi=(int *)out;
293 int j,i,k,g;
294 #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
295 //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
296 // now 00000000
297 for(j=0;j<8;j+=2){
298 for(i=0;i<1;i++){
299 for(k=0;k<INTS_PER_ROW;k++){
300 unsigned int t,b;
301 t=ri[INTS_PER_ROW*(j+i)+k];
302 b=ri[INTS_PER_ROW*(j+i+1)+k];
303 ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
304 ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
305 }
306 }
307 }
308 //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
309 // now 01010101
310 for(j=0;j<8;j+=4){
311 for(i=0;i<2;i++){
312 for(k=0;k<INTS_PER_ROW;k++){
313 unsigned int t,b;
314 t=ri[INTS_PER_ROW*(j+i)+k];
315 b=ri[INTS_PER_ROW*(j+i+2)+k];
316 ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
317 ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
318 }
319 }
320 }
321 //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
322 // now 01230123
323 for(g=0;g<count;g++){
324 bdi[2*g]=ri[g];
325 bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
326 }
327 }
328
329 //-----block main function
330
331 // block group
block_decypher_group(batch * kkmulti,unsigned char * ib,unsigned char * bd,int count)332 static void block_decypher_group (
333 batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
334 unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
335 unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
336 int count)
337 {
338 // int is faster than unsigned char. apparently not
339 static const unsigned char block_sbox[0x100] = {
340 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
341 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
342 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
343 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
344 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
345 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
346 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
347 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
348
349 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
350 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
351 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
352 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
353 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
354 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
355 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
356 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
357 };
358 MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
359 MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
360 int roff;
361 int i,g,count_all=GROUP_PARALLELISM;
362
363 roff=GROUP_PARALLELISM*56;
364 memset(r + roff, 0, sizeof(r) - roff);
365
366 #define FASTTRASP1
367 #ifndef FASTTRASP1
368 for(g=0;g<count;g++){
369 // Init registers
370 int j;
371 for(j=0;j<8;j++){
372 r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
373 }
374 }
375 #else
376 trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
377 #endif
378 //dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
379
380 // loop over kk[55]..kk[0]
381 for(i=55;i>=0;i--){
382 {
383 MEMALIGN batch tkkmulti=kkmulti[i];
384 batch *si=(batch *)sbox_in;
385 batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
386 for(g=0;g<count_all/BYTES_PER_BATCH;g++){
387 si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
388 }
389 }
390
391 // table lookup, this works on only one byte at a time
392 // most difficult part of all
393 // - can't be parallelized
394 // - can't be synthetized through boolean terms (8 input bits are too many)
395 for(g=0;g<count_all;g++){
396 sbox_out[g]=block_sbox[sbox_in[g]];
397 }
398
399 // bit permutation
400 {
401 unsigned char *po=(unsigned char *)perm_out;
402 unsigned char *so=(unsigned char *)sbox_out;
403 //dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
404 for(g=0;g<count_all;g+=BYTES_PER_BATCH){
405 MEMALIGN batch in,out;
406 in=*(batch *)&so[g];
407
408 out=B_FFOR(
409 B_FFOR(
410 B_FFOR(
411 B_FFOR(
412 B_FFOR(
413 B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
414 B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
415 B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
416 B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
417 B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
418 B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
419
420 *(batch *)&po[g]=out;
421 }
422 //dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
423 }
424
425 roff-=GROUP_PARALLELISM; /* virtual shift of registers */
426
427 #if 0
428 /* one by one */
429 for(g=0;g<count_all;g++){
430 r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
431 r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
432 r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
433 r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
434 r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
435 }
436 #else
437 for(g=0;g<count_all;g+=BEST_SPAN){
438 XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
439 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
440 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
441 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
442 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
443 }
444 #endif
445 }
446
447 #define FASTTRASP2
448 #ifndef FASTTRASP2
449 for(g=0;g<count;g++){
450 // Copy results
451 int j;
452 for(j=0;j<8;j++){
453 bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
454 }
455 }
456 #else
457 trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
458 #endif
459 }
460
461 //-----------------------------------EXTERNAL INTERFACE
462
463
464 //-----set control words
465
schedule_key(struct csa_key_t * key,const unsigned char * pk)466 static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
467 // could be made faster, but is not run often
468 int bi,by;
469 int i,j;
470 // key
471 memcpy(key->ck,pk,8);
472 // precalculations for stream
473 key_schedule_stream(key->ck,key->iA,key->iB);
474 for(by=0;by<8;by++){
475 for(bi=0;bi<8;bi++){
476 key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
477 }
478 }
479 for(by=0;by<8;by++){
480 for(bi=0;bi<4;bi++){
481 key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
482 key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
483 }
484 }
485 // precalculations for block
486 key_schedule_block(key->ck,key->kk);
487 for(i=0;i<56;i++){
488 for(j=0;j<BYTES_PER_BATCH;j++){
489 *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
490 }
491 }
492 }
493
494 extern void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od);
495
FUNC(set_control_words)496 void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od)
497 {
498 schedule_key(&((struct csa_keys_t *)keys)->even,ev);
499 schedule_key(&((struct csa_keys_t *)keys)->odd,od);
500 }
501
502 extern void FUNC(set_even_control_word)(void *keys, const unsigned char *pk);
503
FUNC(set_even_control_word)504 void FUNC(set_even_control_word)(void *keys, const unsigned char *pk)
505 {
506 schedule_key(&((struct csa_keys_t *)keys)->even,pk);
507 }
508
509 extern void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk);
510
FUNC(set_odd_control_word)511 void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk){
512 schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
513 }
514
515 //-----get internal parallelism
516
517 extern int FUNC(get_internal_parallelism)(void);
518
FUNC(get_internal_parallelism)519 int FUNC(get_internal_parallelism)(void)
520 {
521 return GROUP_PARALLELISM;
522 }
523
524 //-----get suggested cluster size
525
526 extern int FUNC(get_suggested_cluster_size)(void);
527
FUNC(get_suggested_cluster_size)528 int FUNC(get_suggested_cluster_size)(void)
529 {
530 int r;
531 r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
532 if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
533 return r;
534 }
535
536 //-----key structure
537
538 extern void *FUNC(get_key_struct)(void);
FUNC(get_key_struct)539 void *FUNC(get_key_struct)(void)
540 {
541 struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
542 if(keys) {
543 static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
544 FUNC(set_control_words)(keys,pk,pk);
545 }
546 return keys;
547 }
548
549 extern void FUNC(free_key_struct)(void *keys);
FUNC(free_key_struct)550 void FUNC(free_key_struct)(void *keys)
551 {
552 return FREE(keys);
553 }
554
555
556
557 //-----get control words
558 #if 0
559 void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
560 memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
561 memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
562 }
563 #endif
564
565 //----- decrypt
566
567 extern int FUNC(decrypt_packets)(void *keys, unsigned char **cluster);
FUNC(decrypt_packets)568 int FUNC(decrypt_packets)(void *keys, unsigned char **cluster)
569 {
570 // statistics, currently unused
571 int stat_no_scramble=0;
572 int stat_reserved=0;
573 int stat_decrypted[2]={0,0};
574 int stat_decrypted_mini=0;
575 unsigned char **clst;
576 unsigned char **clst2;
577 int grouped;
578 int group_ev_od;
579 int advanced;
580 int can_advance;
581 unsigned char *g_pkt[GROUP_PARALLELISM];
582 int g_len[GROUP_PARALLELISM];
583 int g_offset[GROUP_PARALLELISM];
584 int g_n[GROUP_PARALLELISM];
585 int g_residue[GROUP_PARALLELISM];
586 unsigned char *pkt;
587 int xc0,ev_od,len,offset,n,residue;
588 struct csa_key_t* k;
589 int i,j,iter,g;
590 int t23,tsmall;
591 int alive[24];
592 //icc craziness int pad1=0; //////////align! FIXME
593 unsigned char *encp[GROUP_PARALLELISM];
594 MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8];
595 MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8];
596 MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
597 MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
598 struct stream_regs regs;
599
600 //icc craziness i=(int)&pad1;//////////align!!! FIXME
601
602 // build a list of packets to be processed
603 clst=cluster;
604 grouped=0;
605 advanced=0;
606 can_advance=1;
607 group_ev_od=-1; // silence incorrect compiler warning
608 pkt=*clst;
609 do{ // find a new packet
610 if(grouped==GROUP_PARALLELISM){
611 // full
612 break;
613 }
614 if(pkt==NULL){
615 // no more ranges
616 break;
617 }
618 if(pkt>=*(clst+1)){
619 // out of this range, try next
620 clst++;clst++;
621 pkt=*clst;
622 continue;
623 }
624
625 do{ // handle this packet
626 xc0=pkt[3]&0xc0;
627 DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
628 if(xc0==0x00){
629 DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
630 advanced+=can_advance;
631 stat_no_scramble++;
632 break;
633 }
634 if(xc0==0x40){
635 DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
636 advanced+=can_advance;
637 stat_reserved++;
638 break;
639 }
640 if(xc0==0x80||xc0==0xc0){ // encrypted
641 ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
642 if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
643 if(group_ev_od==ev_od){ // could be added to group
644 pkt[3]&=0x3f; // consider it decrypted now
645 if(pkt[3]&0x20){ // incomplete packet
646 offset=4+pkt[4]+1;
647 len=188-offset;
648 n=len>>3;
649 residue=len-(n<<3);
650 if(n==0){ // decrypted==encrypted!
651 DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
652 advanced+=can_advance;
653 stat_decrypted_mini++;
654 break; // this doesn't need more processing
655 }
656 }else{
657 len=184;
658 offset=4;
659 n=23;
660 residue=0;
661 }
662 g_pkt[grouped]=pkt;
663 g_len[grouped]=len;
664 g_offset[grouped]=offset;
665 g_n[grouped]=n;
666 g_residue[grouped]=residue;
667 DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
668 grouped++;
669 advanced+=can_advance;
670 stat_decrypted[ev_od]++;
671 }
672 else{
673 can_advance=0;
674 DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
675 break; // skip and go on
676 }
677 }
678 } while(0);
679
680 if(can_advance){
681 // move range start forward
682 *clst+=188;
683 }
684 // next packet, if there is one
685 pkt+=188;
686 } while(1);
687 DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
688
689 // delete empty ranges and compact list
690 clst2=cluster;
691 for(clst=cluster;*clst!=NULL;clst+=2){
692 // if not empty
693 if(*clst<*(clst+1)){
694 // it will remain
695 *clst2=*clst;
696 *(clst2+1)=*(clst+1);
697 clst2+=2;
698 }
699 }
700 *clst2=NULL;
701
702 if(grouped==0){
703 // no processing needed
704 return advanced;
705 }
706
707 // sort them, longest payload first
708 // we expect many n=23 packets and a few n<23
709 DBG(fprintf(stderr,"PRESORTING\n"));
710 for(i=0;i<grouped;i++){
711 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
712 }
713 // grouped is always <= GROUP_PARALLELISM
714
715 #define g_swap(a,b) \
716 pkt=g_pkt[a]; \
717 g_pkt[a]=g_pkt[b]; \
718 g_pkt[b]=pkt; \
719 \
720 len=g_len[a]; \
721 g_len[a]=g_len[b]; \
722 g_len[b]=len; \
723 \
724 offset=g_offset[a]; \
725 g_offset[a]=g_offset[b]; \
726 g_offset[b]=offset; \
727 \
728 n=g_n[a]; \
729 g_n[a]=g_n[b]; \
730 g_n[b]=n; \
731 \
732 residue=g_residue[a]; \
733 g_residue[a]=g_residue[b]; \
734 g_residue[b]=residue;
735
736 // step 1: move n=23 packets before small packets
737 t23=0;
738 tsmall=grouped-1;
739 for(;;){
740 for(;t23<grouped;t23++){
741 if(g_n[t23]!=23) break;
742 }
743 DBG(fprintf(stderr,"t23 after for =%i\n",t23));
744
745 for(;tsmall>=0;tsmall--){
746 if(g_n[tsmall]==23) break;
747 }
748 DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
749
750 if(tsmall-t23<1) break;
751
752 DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
753
754 g_swap(t23,tsmall);
755
756 t23++;
757 tsmall--;
758 DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
759 }
760 DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
761 DBG(fprintf(stderr,"MIDSORTING\n"));
762 for(i=0;i<grouped;i++){
763 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
764 }
765
766 // step 2: sort small packets in decreasing order of n (bubble sort is enough)
767 for(i=t23;i<grouped;i++){
768 for(j=i+1;j<grouped;j++){
769 if(g_n[j]>g_n[i]){
770 g_swap(i,j);
771 }
772 }
773 }
774 DBG(fprintf(stderr,"POSTSORTING\n"));
775 for(i=0;i<grouped;i++){
776 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
777 }
778
779 // we need to know how many packets need 23 iterations, how many 22...
780 for(i=0;i<=23;i++){
781 alive[i]=0;
782 }
783 // count
784 alive[23-1]=t23;
785 for(i=t23;i<grouped;i++){
786 alive[g_n[i]-1]++;
787 }
788 // integrate
789 for(i=22;i>=0;i--){
790 alive[i]+=alive[i+1];
791 }
792 DBG(fprintf(stderr,"ALIVE\n"));
793 for(i=0;i<=23;i++){
794 DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
795 }
796
797 // choose key
798 if(group_ev_od==0){
799 k=&((struct csa_keys_t *)keys)->even;
800 }
801 else{
802 k=&((struct csa_keys_t *)keys)->odd;
803 }
804
805 //INIT
806 //#define INITIALIZE_UNUSED_INPUT
807 #ifdef INITIALIZE_UNUSED_INPUT
808 // unnecessary zeroing.
809 // without this, we operate on uninitialized memory
810 // when grouped<GROUP_PARALLELISM, but it's not a problem,
811 // as final results will be discarded.
812 // random data makes debugging sessions difficult.
813 for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
814 DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
815 #else
816 DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
817 #endif
818
819 for(g=0;g<grouped;g++){
820 encp[g]=g_pkt[g];
821 DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
822 encp[g]+=g_offset[g]; // skip header
823 FFTABLEIN(stream_in,g,encp[g]);
824 }
825 //dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
826
827
828 // ITER 0
829 DBG(fprintf(stderr,">>>>>ITER 0\n"));
830 iter=0;
831 stream_cypher_group_init(®s,k->iA_g,k->iB_g,stream_in);
832 // fill first ib
833 for(g=0;g<alive[iter];g++){
834 COPY_8_BY(ib+8*g,encp[g]);
835 }
836 DBG(dump_mem("IB ",ib,8*alive[iter],8));
837 // ITER 1..N-1
838 for (iter=1;iter<23&&alive[iter-1]>0;iter++){
839 DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
840 // alive and just dead packets: calc block
841 block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
842 DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
843 // all packets (dead too): calc stream
844 stream_cypher_group_normal(®s,stream_out);
845 //dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
846
847 // alive packets: calc ib
848 for(g=0;g<alive[iter];g++){
849 FFTABLEOUT(ib+8*g,stream_out,g);
850 DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
851 // XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
852 #if 1
853 XOREQ_4_BY(ib+8*g,encp[g]+8);
854 XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
855 #else
856 XOREQ_8_BY(ib+8*g,encp[g]+8);
857 #endif
858 DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
859 }
860 // alive packets: decrypt data
861 for(g=0;g<alive[iter];g++){
862 DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
863 XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
864 DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
865 }
866 // just dead packets: write decrypted data
867 for(g=alive[iter];g<alive[iter-1];g++){
868 DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
869 COPY_8_BY(encp[g],block_out+8*g);
870 DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
871 }
872 // just dead packets: decrypt residue
873 for(g=alive[iter];g<alive[iter-1];g++){
874 DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
875 FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
876 DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
877 }
878 // alive packets: pointers++
879 for(g=0;g<alive[iter];g++) encp[g]+=8;
880 };
881 // ITER N
882 DBG(fprintf(stderr,">>>>>ITER 23\n"));
883 iter=23;
884 // calc block
885 block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
886 DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
887 // just dead packets: write decrypted data
888 for(g=alive[iter];g<alive[iter-1];g++){
889 DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
890 COPY_8_BY(encp[g],block_out+8*g);
891 DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
892 }
893 // no residue possible
894 // so do nothing
895
896 DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
897
898 M_EMPTY(); // restore CPU multimedia state
899
900 return advanced;
901 }
902