1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 #include <stdio.h>
6
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <stdlib.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/CpuArch.h"
33
34 #ifndef _7ZIP_ST
35 #include "../../../Windows/Synchronization.h"
36 #include "../../../Windows/Thread.h"
37 #endif
38
39 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
40 #define USE_WIN_FILE
41 #endif
42
43 #ifdef USE_WIN_FILE
44 #include "../../../Windows/FileIO.h"
45 #endif
46
47
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/MyBuffer2.h"
50 #include "../../../Common/StringConvert.h"
51 #include "../../../Common/StringToInt.h"
52
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamUtils.h"
55
56 #include "Bench.h"
57
58 using namespace NWindows;
59
60 static const UInt32 k_LZMA = 0x030101;
61
62 static const UInt64 kComplexInCommands = (UInt64)1 <<
63 #ifdef UNDER_CE
64 31;
65 #else
66 34;
67 #endif
68
69 static const UInt32 kComplexInSeconds = 4;
70
SetComplexCommands(UInt32 complexInSeconds,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)71 static void SetComplexCommands(UInt32 complexInSeconds,
72 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
73 {
74 complexInCommands = kComplexInCommands;
75 const UInt64 kMinFreq = (UInt64)1000000 * 4;
76 const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
77 if (cpuFreq < kMinFreq && !isSpecifiedFreq)
78 cpuFreq = kMinFreq;
79 if (cpuFreq < kMaxFreq || isSpecifiedFreq)
80 {
81 if (complexInSeconds != 0)
82 complexInCommands = complexInSeconds * cpuFreq;
83 else
84 complexInCommands = cpuFreq >> 2;
85 }
86 }
87
88 static const unsigned kNumHashDictBits = 17;
89 static const UInt32 kFilterUnpackSize = (48 << 10);
90
91 static const unsigned kOldLzmaDictBits = 30;
92
93 static const UInt32 kAdditionalSize = (1 << 16);
94 static const UInt32 kCompressedAdditionalSize = (1 << 10);
95 static const UInt32 kMaxLzmaPropSize = 5;
96
97
98
99 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
100 (_buffer_)->Alloc(_size_); \
101 if (!(_buffer_)->IsAllocated()) return E_OUTOFMEMORY;
102
103
104 class CBaseRandomGenerator
105 {
106 UInt32 A1;
107 UInt32 A2;
108 UInt32 Salt;
109 public:
CBaseRandomGenerator(UInt32 salt=0)110 CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()111 void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()112 UInt32 GetRnd()
113 {
114 return Salt ^
115 (
116 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
117 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
118 );
119 }
120 };
121
122
123 class CBenchRandomGenerator: public CAlignedBuffer
124 {
GetVal(UInt32 & res,unsigned numBits)125 static UInt32 GetVal(UInt32 &res, unsigned numBits)
126 {
127 UInt32 val = res & (((UInt32)1 << numBits) - 1);
128 res >>= numBits;
129 return val;
130 }
131
GetLen(UInt32 & r)132 static UInt32 GetLen(UInt32 &r)
133 {
134 UInt32 len = GetVal(r, 2);
135 return GetVal(r, 1 + len);
136 }
137
138 public:
139
GenerateSimpleRandom(UInt32 salt)140 void GenerateSimpleRandom(UInt32 salt)
141 {
142 CBaseRandomGenerator rg(salt);
143 const size_t bufSize = Size();
144 Byte *buf = (Byte *)*this;
145 for (size_t i = 0; i < bufSize; i++)
146 buf[i] = (Byte)rg.GetRnd();
147 }
148
GenerateLz(unsigned dictBits,UInt32 salt)149 void GenerateLz(unsigned dictBits, UInt32 salt)
150 {
151 CBaseRandomGenerator rg(salt);
152 UInt32 pos = 0;
153 UInt32 rep0 = 1;
154 const size_t bufSize = Size();
155 Byte *buf = (Byte *)*this;
156 unsigned posBits = 1;
157
158 while (pos < bufSize)
159 {
160 UInt32 r = rg.GetRnd();
161 if (GetVal(r, 1) == 0 || pos < 1024)
162 buf[pos++] = (Byte)(r & 0xFF);
163 else
164 {
165 UInt32 len;
166 len = 1 + GetLen(r);
167
168 if (GetVal(r, 3) != 0)
169 {
170 len += GetLen(r);
171
172 while (((UInt32)1 << posBits) < pos)
173 posBits++;
174
175 unsigned numBitsMax = dictBits;
176 if (numBitsMax > posBits)
177 numBitsMax = posBits;
178
179 const unsigned kAddBits = 6;
180 unsigned numLogBits = 5;
181 if (numBitsMax <= (1 << 4) - 1 + kAddBits)
182 numLogBits = 4;
183
184 for (;;)
185 {
186 UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
187 r = rg.GetRnd();
188 if (ppp > numBitsMax)
189 continue;
190 rep0 = GetVal(r, ppp);
191 if (rep0 < pos)
192 break;
193 r = rg.GetRnd();
194 }
195 rep0++;
196 }
197
198 {
199 UInt32 rem = (UInt32)bufSize - pos;
200 if (len > rem)
201 len = rem;
202 }
203 Byte *dest = buf + pos;
204 const Byte *src = dest - rep0;
205 pos += len;
206 for (UInt32 i = 0; i < len; i++)
207 *dest++ = *src++;
208 }
209 }
210 }
211 };
212
213
214 class CBenchmarkInStream:
215 public ISequentialInStream,
216 public CMyUnknownImp
217 {
218 const Byte *Data;
219 size_t Pos;
220 size_t Size;
221 public:
222 MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)223 void Init(const Byte *data, size_t size)
224 {
225 Data = data;
226 Size = size;
227 Pos = 0;
228 }
229 STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
230 };
231
Read(void * data,UInt32 size,UInt32 * processedSize)232 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
233 {
234 size_t remain = Size - Pos;
235 UInt32 kMaxBlockSize = (1 << 20);
236 if (size > kMaxBlockSize)
237 size = kMaxBlockSize;
238 if (size > remain)
239 size = (UInt32)remain;
240 for (UInt32 i = 0; i < size; i++)
241 ((Byte *)data)[i] = Data[Pos + i];
242 Pos += size;
243 if (processedSize)
244 *processedSize = size;
245 return S_OK;
246 }
247
248 class CBenchmarkOutStream:
249 public ISequentialOutStream,
250 public CAlignedBuffer,
251 public CMyUnknownImp
252 {
253 // bool _overflow;
254 public:
255 size_t Pos;
256 bool RealCopy;
257 bool CalcCrc;
258 UInt32 Crc;
259
260 // CBenchmarkOutStream(): _overflow(false) {}
Init(bool realCopy,bool calcCrc)261 void Init(bool realCopy, bool calcCrc)
262 {
263 Crc = CRC_INIT_VAL;
264 RealCopy = realCopy;
265 CalcCrc = calcCrc;
266 // _overflow = false;
267 Pos = 0;
268 }
269
270 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
271
272 MY_UNKNOWN_IMP
273 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
274 };
275
Write(const void * data,UInt32 size,UInt32 * processedSize)276 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
277 {
278 size_t curSize = Size() - Pos;
279 if (curSize > size)
280 curSize = size;
281 if (curSize != 0)
282 {
283 if (RealCopy)
284 memcpy(((Byte *)*this) + Pos, data, curSize);
285 if (CalcCrc)
286 Crc = CrcUpdate(Crc, data, curSize);
287 Pos += curSize;
288 }
289 if (processedSize)
290 *processedSize = (UInt32)curSize;
291 if (curSize != size)
292 {
293 // _overflow = true;
294 return E_FAIL;
295 }
296 return S_OK;
297 }
298
299 class CCrcOutStream:
300 public ISequentialOutStream,
301 public CMyUnknownImp
302 {
303 public:
304 bool CalcCrc;
305 UInt32 Crc;
306 MY_UNKNOWN_IMP
307
CCrcOutStream()308 CCrcOutStream(): CalcCrc(true) {};
Init()309 void Init() { Crc = CRC_INIT_VAL; }
310 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
311 };
312
Write(const void * data,UInt32 size,UInt32 * processedSize)313 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
314 {
315 if (CalcCrc)
316 Crc = CrcUpdate(Crc, data, size);
317 if (processedSize)
318 *processedSize = size;
319 return S_OK;
320 }
321
GetTimeCount()322 static UInt64 GetTimeCount()
323 {
324 #ifdef USE_POSIX_TIME
325 #ifdef USE_POSIX_TIME2
326 timeval v;
327 if (gettimeofday(&v, 0) == 0)
328 return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
329 return (UInt64)time(NULL) * 1000000;
330 #else
331 return time(NULL);
332 #endif
333 #else
334 /*
335 LARGE_INTEGER value;
336 if (::QueryPerformanceCounter(&value))
337 return value.QuadPart;
338 */
339 return GetTickCount();
340 #endif
341 }
342
GetFreq()343 static UInt64 GetFreq()
344 {
345 #ifdef USE_POSIX_TIME
346 #ifdef USE_POSIX_TIME2
347 return 1000000;
348 #else
349 return 1;
350 #endif
351 #else
352 /*
353 LARGE_INTEGER value;
354 if (::QueryPerformanceFrequency(&value))
355 return value.QuadPart;
356 */
357 return 1000;
358 #endif
359 }
360
361 #ifdef USE_POSIX_TIME
362
363 struct CUserTime
364 {
365 UInt64 Sum;
366 clock_t Prev;
367
InitCUserTime368 void Init()
369 {
370 Prev = clock();
371 Sum = 0;
372 }
373
GetUserTimeCUserTime374 UInt64 GetUserTime()
375 {
376 clock_t v = clock();
377 Sum += v - Prev;
378 Prev = v;
379 return Sum;
380 }
381 };
382
383 #else
384
GetTime64(const FILETIME & t)385 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
GetWinUserTime()386 UInt64 GetWinUserTime()
387 {
388 FILETIME creationTime, exitTime, kernelTime, userTime;
389 if (
390 #ifdef UNDER_CE
391 ::GetThreadTimes(::GetCurrentThread()
392 #else
393 ::GetProcessTimes(::GetCurrentProcess()
394 #endif
395 , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
396 return GetTime64(userTime) + GetTime64(kernelTime);
397 return (UInt64)GetTickCount() * 10000;
398 }
399
400 struct CUserTime
401 {
402 UInt64 StartTime;
403
InitCUserTime404 void Init() { StartTime = GetWinUserTime(); }
GetUserTimeCUserTime405 UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
406 };
407
408 #endif
409
GetUserFreq()410 static UInt64 GetUserFreq()
411 {
412 #ifdef USE_POSIX_TIME
413 return CLOCKS_PER_SEC;
414 #else
415 return 10000000;
416 #endif
417 }
418
419 class CBenchProgressStatus
420 {
421 #ifndef _7ZIP_ST
422 NSynchronization::CCriticalSection CS;
423 #endif
424 public:
425 HRESULT Res;
426 bool EncodeMode;
SetResult(HRESULT res)427 void SetResult(HRESULT res)
428 {
429 #ifndef _7ZIP_ST
430 NSynchronization::CCriticalSectionLock lock(CS);
431 #endif
432 Res = res;
433 }
GetResult()434 HRESULT GetResult()
435 {
436 #ifndef _7ZIP_ST
437 NSynchronization::CCriticalSectionLock lock(CS);
438 #endif
439 return Res;
440 }
441 };
442
443 struct CBenchInfoCalc
444 {
445 CBenchInfo BenchInfo;
446 CUserTime UserTime;
447
448 void SetStartTime();
449 void SetFinishTime(CBenchInfo &dest);
450 };
451
SetStartTime()452 void CBenchInfoCalc::SetStartTime()
453 {
454 BenchInfo.GlobalFreq = GetFreq();
455 BenchInfo.UserFreq = GetUserFreq();
456 BenchInfo.GlobalTime = ::GetTimeCount();
457 BenchInfo.UserTime = 0;
458 UserTime.Init();
459 }
460
SetFinishTime(CBenchInfo & dest)461 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
462 {
463 dest = BenchInfo;
464 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
465 dest.UserTime = UserTime.GetUserTime();
466 }
467
468 class CBenchProgressInfo:
469 public ICompressProgressInfo,
470 public CMyUnknownImp,
471 public CBenchInfoCalc
472 {
473 public:
474 CBenchProgressStatus *Status;
475 IBenchCallback *Callback;
476
CBenchProgressInfo()477 CBenchProgressInfo(): Callback(NULL) {}
478 MY_UNKNOWN_IMP
479 STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
480 };
481
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)482 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
483 {
484 HRESULT res = Status->GetResult();
485 if (res != S_OK)
486 return res;
487 if (!Callback)
488 return res;
489 CBenchInfo info;
490 SetFinishTime(info);
491 if (Status->EncodeMode)
492 {
493 info.UnpackSize = BenchInfo.UnpackSize + *inSize;
494 info.PackSize = BenchInfo.PackSize + *outSize;
495 res = Callback->SetEncodeResult(info, false);
496 }
497 else
498 {
499 info.PackSize = BenchInfo.PackSize + *inSize;
500 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
501 res = Callback->SetDecodeResult(info, false);
502 }
503 if (res != S_OK)
504 Status->SetResult(res);
505 return res;
506 }
507
508 static const unsigned kSubBits = 8;
509
GetLogSize(UInt32 size)510 static UInt32 GetLogSize(UInt32 size)
511 {
512 for (unsigned i = kSubBits; i < 32; i++)
513 for (UInt32 j = 0; j < (1 << kSubBits); j++)
514 if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
515 return (i << kSubBits) + j;
516 return (32 << kSubBits);
517 }
518
NormalizeVals(UInt64 & v1,UInt64 & v2)519 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
520 {
521 while (v1 > 1000000)
522 {
523 v1 >>= 1;
524 v2 >>= 1;
525 }
526 }
527
GetUsage() const528 UInt64 CBenchInfo::GetUsage() const
529 {
530 UInt64 userTime = UserTime;
531 UInt64 userFreq = UserFreq;
532 UInt64 globalTime = GlobalTime;
533 UInt64 globalFreq = GlobalFreq;
534 NormalizeVals(userTime, userFreq);
535 NormalizeVals(globalFreq, globalTime);
536 if (userFreq == 0)
537 userFreq = 1;
538 if (globalTime == 0)
539 globalTime = 1;
540 return userTime * globalFreq * 1000000 / userFreq / globalTime;
541 }
542
GetRatingPerUsage(UInt64 rating) const543 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
544 {
545 UInt64 userTime = UserTime;
546 UInt64 userFreq = UserFreq;
547 UInt64 globalTime = GlobalTime;
548 UInt64 globalFreq = GlobalFreq;
549 NormalizeVals(userFreq, userTime);
550 NormalizeVals(globalTime, globalFreq);
551 if (globalFreq == 0)
552 globalFreq = 1;
553 if (userTime == 0)
554 userTime = 1;
555 return userFreq * globalTime / globalFreq * rating / userTime;
556 }
557
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)558 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
559 {
560 UInt64 elTime = elapsedTime;
561 NormalizeVals(freq, elTime);
562 if (elTime == 0)
563 elTime = 1;
564 return value * freq / elTime;
565 }
566
GetSpeed(UInt64 numCommands) const567 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
568 {
569 return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
570 }
571
572 struct CBenchProps
573 {
574 bool LzmaRatingMode;
575
576 UInt32 EncComplex;
577 UInt32 DecComplexCompr;
578 UInt32 DecComplexUnc;
579
CBenchPropsCBenchProps580 CBenchProps(): LzmaRatingMode(false) {}
581 void SetLzmaCompexity();
582
GeComprCommandsCBenchProps583 UInt64 GeComprCommands(UInt64 unpackSize)
584 {
585 return unpackSize * EncComplex;
586 }
587
GeDecomprCommandsCBenchProps588 UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
589 {
590 return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
591 }
592
593 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
594 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
595 };
596
SetLzmaCompexity()597 void CBenchProps::SetLzmaCompexity()
598 {
599 EncComplex = 1200;
600 DecComplexUnc = 4;
601 DecComplexCompr = 190;
602 LzmaRatingMode = true;
603 }
604
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)605 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
606 {
607 if (dictSize < (1 << kBenchMinDicLogSize))
608 dictSize = (1 << kBenchMinDicLogSize);
609 UInt64 encComplex = EncComplex;
610 if (LzmaRatingMode)
611 {
612 UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
613 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
614 }
615 UInt64 numCommands = (UInt64)size * encComplex;
616 return MyMultDiv64(numCommands, elapsedTime, freq);
617 }
618
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)619 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
620 {
621 UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
622 return MyMultDiv64(numCommands, elapsedTime, freq);
623 }
624
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)625 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
626 {
627 CBenchProps props;
628 props.SetLzmaCompexity();
629 return props.GetCompressRating(dictSize, elapsedTime, freq, size);
630 }
631
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)632 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
633 {
634 CBenchProps props;
635 props.SetLzmaCompexity();
636 return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
637 }
638
639
640
641
642 #ifndef _7ZIP_ST
643 struct CBenchSyncCommon
644 {
645 bool ExitMode;
646 NSynchronization::CManualResetEvent StartEvent;
647
CBenchSyncCommonCBenchSyncCommon648 CBenchSyncCommon(): ExitMode(false) {}
649 };
650 #endif
651
652
653 struct CEncoderInfo;
654
655 struct CEncoderInfo
656 {
657 #ifndef _7ZIP_ST
658 NWindows::CThread thread[2];
659 NSynchronization::CManualResetEvent ReadyEvent;
660 UInt32 NumDecoderSubThreads;
661 CBenchSyncCommon *Common;
662 #endif
663
664 CMyComPtr<ICompressCoder> _encoder;
665 CMyComPtr<ICompressFilter> _encoderFilter;
666 CBenchProgressInfo *progressInfoSpec[2];
667 CMyComPtr<ICompressProgressInfo> progressInfo[2];
668 UInt64 NumIterations;
669
670 UInt32 Salt;
671
672 #ifdef USE_ALLOCA
673 size_t AllocaSize;
674 #endif
675
676 Byte _key[32];
677 Byte _iv[16];
678 Byte _psw[16];
679 bool CheckCrc_Enc;
680 bool CheckCrc_Dec;
681
682 struct CDecoderInfo
683 {
684 CEncoderInfo *Encoder;
685 UInt32 DecoderIndex;
686 bool CallbackMode;
687
688 #ifdef USE_ALLOCA
689 size_t AllocaSize;
690 #endif
691 };
692 CDecoderInfo decodersInfo[2];
693
694 CMyComPtr<ICompressCoder> _decoders[2];
695 CMyComPtr<ICompressFilter> _decoderFilter;
696
697 HRESULT Results[2];
698 CBenchmarkOutStream *outStreamSpec;
699 CMyComPtr<ISequentialOutStream> outStream;
700 IBenchCallback *callback;
701 IBenchPrintCallback *printCallback;
702 UInt32 crc;
703 size_t kBufferSize;
704 size_t compressedSize;
705 const Byte *uncompressedDataPtr;
706
707 const Byte *fileData;
708 CBenchRandomGenerator rg;
709
710 CAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
711 CBenchmarkOutStream *propStreamSpec;
712 CMyComPtr<ISequentialOutStream> propStream;
713
714 unsigned generateDictBits;
715 COneMethodInfo _method;
716
717 // for decode
718 size_t _uncompressedDataSize;
719
720 HRESULT Generate();
721 HRESULT Encode();
722 HRESULT Decode(UInt32 decoderIndex);
723
CEncoderInfoCEncoderInfo724 CEncoderInfo():
725 #ifndef _7ZIP_ST
726 Common(NULL),
727 #endif
728 Salt(0),
729 fileData(NULL),
730 CheckCrc_Enc(true),
731 CheckCrc_Dec(true),
732 outStreamSpec(NULL), callback(NULL), printCallback(NULL), propStreamSpec(NULL) {}
733
734 #ifndef _7ZIP_ST
735
EncodeThreadFunctionCEncoderInfo736 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
737 {
738 HRESULT res;
739 CEncoderInfo *encoder = (CEncoderInfo *)param;
740 try
741 {
742 #ifdef USE_ALLOCA
743 alloca(encoder->AllocaSize);
744 #endif
745
746 res = encoder->Encode();
747 }
748 catch(...)
749 {
750 res = E_FAIL;
751 }
752 encoder->Results[0] = res;
753 if (res != S_OK)
754 encoder->progressInfoSpec[0]->Status->SetResult(res);
755 encoder->ReadyEvent.Set();
756 return 0;
757 }
758
DecodeThreadFunctionCEncoderInfo759 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
760 {
761 CDecoderInfo *decoder = (CDecoderInfo *)param;
762
763 #ifdef USE_ALLOCA
764 alloca(decoder->AllocaSize);
765 #endif
766
767 CEncoderInfo *encoder = decoder->Encoder;
768 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
769 return 0;
770 }
771
CreateEncoderThreadCEncoderInfo772 HRESULT CreateEncoderThread()
773 {
774 WRes res = 0;
775 if (!ReadyEvent.IsCreated())
776 res = ReadyEvent.Create();
777 if (res == 0)
778 res = thread[0].Create(EncodeThreadFunction, this);
779 return HRESULT_FROM_WIN32(res);
780 }
781
CreateDecoderThreadCEncoderInfo782 HRESULT CreateDecoderThread(unsigned index, bool callbackMode
783 #ifdef USE_ALLOCA
784 , size_t allocaSize
785 #endif
786 )
787 {
788 CDecoderInfo &decoder = decodersInfo[index];
789 decoder.DecoderIndex = index;
790 decoder.Encoder = this;
791
792 #ifdef USE_ALLOCA
793 decoder.AllocaSize = allocaSize;
794 #endif
795
796 decoder.CallbackMode = callbackMode;
797 return thread[index].Create(DecodeThreadFunction, &decoder);
798 }
799
800 #endif
801 };
802
803
Generate()804 HRESULT CEncoderInfo::Generate()
805 {
806 const COneMethodInfo &method = _method;
807
808 // we need extra space, if input data is already compressed
809 const size_t kCompressedBufferSize =
810 kCompressedAdditionalSize +
811 kBufferSize + kBufferSize / 16;
812 // kBufferSize / 2;
813
814 if (kCompressedBufferSize < kBufferSize)
815 return E_FAIL;
816
817 uncompressedDataPtr = fileData;
818
819 if (!fileData)
820 {
821 ALLOC_WITH_HRESULT(&rg, kBufferSize);
822
823 // DWORD ttt = GetTickCount();
824 if (generateDictBits == 0)
825 rg.GenerateSimpleRandom(Salt);
826 else
827 rg.GenerateLz(generateDictBits, Salt);
828 // printf("\n%d\n ", GetTickCount() - ttt);
829
830 crc = CrcCalc((const Byte *)rg, rg.Size());
831 uncompressedDataPtr = (const Byte *)rg;
832 }
833
834 if (_encoderFilter)
835 {
836 ALLOC_WITH_HRESULT(&rgCopy, kBufferSize);
837 }
838
839
840 if (!outStream)
841 {
842 outStreamSpec = new CBenchmarkOutStream;
843 outStream = outStreamSpec;
844 }
845
846 ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
847
848 if (!propStream)
849 {
850 propStreamSpec = new CBenchmarkOutStream;
851 propStream = propStreamSpec;
852 }
853 ALLOC_WITH_HRESULT(propStreamSpec, kMaxLzmaPropSize);
854 propStreamSpec->Init(true, false);
855
856
857 CMyComPtr<IUnknown> coder;
858 if (_encoderFilter)
859 coder = _encoderFilter;
860 else
861 coder = _encoder;
862 {
863 CMyComPtr<ICompressSetCoderProperties> scp;
864 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
865 if (scp)
866 {
867 UInt64 reduceSize = kBufferSize;
868 RINOK(method.SetCoderProps(scp, &reduceSize));
869 }
870 else
871 {
872 if (method.AreThereNonOptionalProps())
873 return E_INVALIDARG;
874 }
875
876 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
877 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
878 if (writeCoderProps)
879 {
880 RINOK(writeCoderProps->WriteCoderProperties(propStream));
881 }
882
883 {
884 CMyComPtr<ICryptoSetPassword> sp;
885 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
886 if (sp)
887 {
888 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
889
890 // we must call encoding one time to calculate password key for key cache.
891 // it must be after WriteCoderProperties!
892 Byte temp[16];
893 memset(temp, 0, sizeof(temp));
894
895 if (_encoderFilter)
896 {
897 _encoderFilter->Init();
898 _encoderFilter->Filter(temp, sizeof(temp));
899 }
900 else
901 {
902 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
903 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
904 inStreamSpec->Init(temp, sizeof(temp));
905
906 CCrcOutStream *crcStreamSpec = new CCrcOutStream;
907 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
908 crcStreamSpec->Init();
909
910 RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
911 }
912 }
913 }
914 }
915
916 return S_OK;
917 }
918
919
My_FilterBench(ICompressFilter * filter,Byte * data,size_t size)920 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
921 {
922 while (size != 0)
923 {
924 UInt32 cur = (UInt32)1 << 31;
925 if (cur > size)
926 cur = (UInt32)size;
927 UInt32 processed = filter->Filter(data, cur);
928 data += processed;
929 // if (processed > size) (in AES filter), we must fill last block with zeros.
930 // but it is not important for benchmark. So we just copy that data without filtering.
931 if (processed > size || processed == 0)
932 break;
933 size -= processed;
934 }
935 }
936
937
Encode()938 HRESULT CEncoderInfo::Encode()
939 {
940 RINOK(Generate());
941
942 #ifndef _7ZIP_ST
943 if (Common)
944 {
945 Results[0] = S_OK;
946 WRes wres = ReadyEvent.Set();
947 if (wres == 0)
948 wres = Common->StartEvent.Lock();
949 if (wres != 0)
950 return HRESULT_FROM_WIN32(wres);
951 if (Common->ExitMode)
952 return S_OK;
953 }
954 else
955 #endif
956 {
957 CBenchProgressInfo *bpi = progressInfoSpec[0];
958 bpi->SetStartTime();
959 }
960
961
962 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
963 bi.UnpackSize = 0;
964 bi.PackSize = 0;
965 CMyComPtr<ICryptoProperties> cp;
966 CMyComPtr<IUnknown> coder;
967 if (_encoderFilter)
968 coder = _encoderFilter;
969 else
970 coder = _encoder;
971 coder.QueryInterface(IID_ICryptoProperties, &cp);
972 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
973 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
974 UInt64 prev = 0;
975
976 UInt32 crcPrev = 0;
977
978 if (cp)
979 {
980 RINOK(cp->SetKey(_key, sizeof(_key)));
981 RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
982 }
983
984 for (UInt64 i = 0; i < NumIterations; i++)
985 {
986 if (printCallback && bi.UnpackSize - prev > (1 << 20))
987 {
988 RINOK(printCallback->CheckBreak());
989 prev = bi.UnpackSize;
990 }
991
992 bool isLast = (i == NumIterations - 1);
993 bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
994 outStreamSpec->Init(isLast, calcCrc);
995
996 if (_encoderFilter)
997 {
998 memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
999 _encoderFilter->Init();
1000 My_FilterBench(_encoderFilter, (Byte *)rgCopy, kBufferSize);
1001 RINOK(WriteStream(outStream, (const Byte *)rgCopy, kBufferSize));
1002 }
1003 else
1004 {
1005 inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1006 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
1007 }
1008
1009 // outStreamSpec->Print();
1010
1011 UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
1012 if (i == 0)
1013 crcPrev = crcNew;
1014 else if (calcCrc && crcPrev != crcNew)
1015 return E_FAIL;
1016
1017 compressedSize = outStreamSpec->Pos;
1018 bi.UnpackSize += kBufferSize;
1019 bi.PackSize += compressedSize;
1020 }
1021
1022 _encoder.Release();
1023 _encoderFilter.Release();
1024 return S_OK;
1025 }
1026
1027
Decode(UInt32 decoderIndex)1028 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1029 {
1030 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1031 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1032 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1033 CMyComPtr<IUnknown> coder;
1034 if (_decoderFilter)
1035 {
1036 if (decoderIndex != 0)
1037 return E_FAIL;
1038 coder = _decoderFilter;
1039 }
1040 else
1041 coder = decoder;
1042
1043 CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1044 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1045 if (!setDecProps && propStreamSpec->Pos != 0)
1046 return E_FAIL;
1047
1048 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1049 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1050
1051 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1052 pi->BenchInfo.UnpackSize = 0;
1053 pi->BenchInfo.PackSize = 0;
1054
1055 #ifndef _7ZIP_ST
1056 {
1057 CMyComPtr<ICompressSetCoderMt> setCoderMt;
1058 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1059 if (setCoderMt)
1060 {
1061 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
1062 }
1063 }
1064 #endif
1065
1066 CMyComPtr<ICompressSetCoderProperties> scp;
1067 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1068 if (scp)
1069 {
1070 UInt64 reduceSize = _uncompressedDataSize;
1071 RINOK(_method.SetCoderProps(scp, &reduceSize));
1072 }
1073
1074 CMyComPtr<ICryptoProperties> cp;
1075 coder.QueryInterface(IID_ICryptoProperties, &cp);
1076
1077 if (setDecProps)
1078 {
1079 RINOK(setDecProps->SetDecoderProperties2((const Byte *)*propStreamSpec, (UInt32)propStreamSpec->Pos));
1080 }
1081
1082 {
1083 CMyComPtr<ICryptoSetPassword> sp;
1084 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1085 if (sp)
1086 {
1087 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
1088 }
1089 }
1090
1091 UInt64 prev = 0;
1092
1093 if (cp)
1094 {
1095 RINOK(cp->SetKey(_key, sizeof(_key)));
1096 RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
1097 }
1098
1099 for (UInt64 i = 0; i < NumIterations; i++)
1100 {
1101 if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
1102 {
1103 RINOK(printCallback->CheckBreak());
1104 prev = pi->BenchInfo.UnpackSize;
1105 }
1106
1107 inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1108 crcOutStreamSpec->Init();
1109
1110 UInt64 outSize = kBufferSize;
1111 crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
1112
1113 if (_decoderFilter)
1114 {
1115 if (compressedSize > rgCopy.Size())
1116 return E_FAIL;
1117 memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1118 _decoderFilter->Init();
1119 My_FilterBench(_decoderFilter, (Byte *)rgCopy, compressedSize);
1120 RINOK(WriteStream(crcOutStream, (const Byte *)rgCopy, compressedSize));
1121 }
1122 else
1123 {
1124 RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
1125 }
1126
1127 if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1128 return S_FALSE;
1129 pi->BenchInfo.UnpackSize += kBufferSize;
1130 pi->BenchInfo.PackSize += compressedSize;
1131 }
1132
1133 decoder.Release();
1134 _decoderFilter.Release();
1135 return S_OK;
1136 }
1137
1138
1139 static const UInt32 kNumThreadsMax = (1 << 12);
1140
1141 struct CBenchEncoders
1142 {
1143 CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders1144 CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders1145 ~CBenchEncoders() { delete []encoders; }
1146 };
1147
1148
GetNumIterations(UInt64 numCommands,UInt64 complexInCommands)1149 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1150 {
1151 if (numCommands < (1 << 4))
1152 numCommands = (1 << 4);
1153 UInt64 res = complexInCommands / numCommands;
1154 return (res == 0 ? 1 : res);
1155 }
1156
1157
1158
1159 #ifndef _7ZIP_ST
1160
1161 // ---------- CBenchThreadsFlusher ----------
1162
1163 struct CBenchThreadsFlusher
1164 {
1165 CBenchEncoders *EncodersSpec;
1166 CBenchSyncCommon Common;
1167 unsigned NumThreads;
1168 bool NeedClose;
1169
CBenchThreadsFlusherCBenchThreadsFlusher1170 CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1171
~CBenchThreadsFlusherCBenchThreadsFlusher1172 ~CBenchThreadsFlusher()
1173 {
1174 StartAndWait(true);
1175 }
1176
1177 WRes StartAndWait(bool exitMode = false);
1178 };
1179
1180
StartAndWait(bool exitMode)1181 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1182 {
1183 if (!NeedClose)
1184 return 0;
1185
1186 Common.ExitMode = exitMode;
1187 WRes res = Common.StartEvent.Set();
1188
1189 for (unsigned i = 0; i < NumThreads; i++)
1190 {
1191 NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1192 if (t.IsCreated())
1193 {
1194 WRes res2 = t.Wait();
1195 if (res2 == 0)
1196 res2 = t.Close();
1197 if (res == S_OK)
1198 res = res2;
1199 }
1200 }
1201 NeedClose = false;
1202 return res;
1203 }
1204
1205 #endif
1206
1207
1208
MethodBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,bool oldLzmaBenchMode,UInt32 numThreads,const COneMethodInfo & method2,size_t uncompressedDataSize,const Byte * fileData,unsigned generateDictBits,IBenchPrintCallback * printCallback,IBenchCallback * callback,CBenchProps * benchProps)1209 static HRESULT MethodBench(
1210 DECL_EXTERNAL_CODECS_LOC_VARS
1211 UInt64 complexInCommands,
1212 bool
1213 #ifndef _7ZIP_ST
1214 oldLzmaBenchMode
1215 #endif
1216 ,
1217 UInt32
1218 #ifndef _7ZIP_ST
1219 numThreads
1220 #endif
1221 ,
1222 const COneMethodInfo &method2,
1223 size_t uncompressedDataSize,
1224 const Byte *fileData,
1225 unsigned generateDictBits,
1226
1227 IBenchPrintCallback *printCallback,
1228 IBenchCallback *callback,
1229 CBenchProps *benchProps)
1230 {
1231 COneMethodInfo method = method2;
1232 UInt64 methodId;
1233 UInt32 numStreams;
1234 int codecIndex = FindMethod_Index(
1235 EXTERNAL_CODECS_LOC_VARS
1236 method.MethodName, true,
1237 methodId, numStreams);
1238 if (codecIndex < 0)
1239 return E_NOTIMPL;
1240 if (numStreams != 1)
1241 return E_INVALIDARG;
1242
1243 UInt32 numEncoderThreads = 1;
1244 UInt32 numSubDecoderThreads = 1;
1245
1246 #ifndef _7ZIP_ST
1247 numEncoderThreads = numThreads;
1248
1249 if (oldLzmaBenchMode && methodId == k_LZMA)
1250 {
1251 if (numThreads == 1 && method.Get_NumThreads() < 0)
1252 method.AddProp_NumThreads(1);
1253 const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1254 if (numThreads > 1 && numLzmaThreads > 1)
1255 {
1256 numEncoderThreads = numThreads / 2;
1257 numSubDecoderThreads = 2;
1258 }
1259 }
1260
1261 bool mtEncMode = (numEncoderThreads > 1);
1262 #endif
1263
1264 CBenchEncoders encodersSpec(numEncoderThreads);
1265 CEncoderInfo *encoders = encodersSpec.encoders;
1266
1267 UInt32 i;
1268
1269 for (i = 0; i < numEncoderThreads; i++)
1270 {
1271 CEncoderInfo &encoder = encoders[i];
1272 encoder.callback = (i == 0) ? callback : 0;
1273 encoder.printCallback = printCallback;
1274
1275 {
1276 CCreatedCoder cod;
1277 RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS codecIndex, true, encoder._encoderFilter, cod));
1278 encoder._encoder = cod.Coder;
1279 if (!encoder._encoder && !encoder._encoderFilter)
1280 return E_NOTIMPL;
1281 }
1282
1283 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1284 encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1285
1286 memset(encoder._iv, 0, sizeof(encoder._iv));
1287 memset(encoder._key, 0, sizeof(encoder._key));
1288 memset(encoder._psw, 0, sizeof(encoder._psw));
1289
1290 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1291 {
1292 CCreatedCoder cod;
1293 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1294 RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
1295 decoder = cod.Coder;
1296 if (!encoder._decoderFilter && !decoder)
1297 return E_NOTIMPL;
1298 }
1299 }
1300
1301 UInt32 crc = 0;
1302 if (fileData)
1303 crc = CrcCalc(fileData, uncompressedDataSize);
1304
1305 for (i = 0; i < numEncoderThreads; i++)
1306 {
1307 CEncoderInfo &encoder = encoders[i];
1308 encoder._method = method;
1309 encoder.generateDictBits = generateDictBits;
1310 encoder._uncompressedDataSize = uncompressedDataSize;
1311 encoder.kBufferSize = uncompressedDataSize;
1312 encoder.fileData = fileData;
1313 encoder.crc = crc;
1314 }
1315
1316 CBenchProgressStatus status;
1317 status.Res = S_OK;
1318 status.EncodeMode = true;
1319
1320 #ifndef _7ZIP_ST
1321 CBenchThreadsFlusher encoderFlusher;
1322 if (mtEncMode)
1323 {
1324 WRes wres = encoderFlusher.Common.StartEvent.Create();
1325 if (wres != 0)
1326 return HRESULT_FROM_WIN32(wres);
1327 encoderFlusher.NumThreads = numEncoderThreads;
1328 encoderFlusher.EncodersSpec = &encodersSpec;
1329 encoderFlusher.NeedClose = true;
1330 }
1331 #endif
1332
1333 for (i = 0; i < numEncoderThreads; i++)
1334 {
1335 CEncoderInfo &encoder = encoders[i];
1336 encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1337 encoder.Salt = g_CrcTable[i & 0xFF];
1338 encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
1339 // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1340 // printf(" %8x", encoder.Salt);
1341
1342 for (int j = 0; j < 2; j++)
1343 {
1344 CBenchProgressInfo *spec = new CBenchProgressInfo;
1345 encoder.progressInfoSpec[j] = spec;
1346 encoder.progressInfo[j] = spec;
1347 spec->Status = &status;
1348 }
1349
1350 if (i == 0)
1351 {
1352 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1353 bpi->Callback = callback;
1354 bpi->BenchInfo.NumIterations = numEncoderThreads;
1355 }
1356
1357 #ifndef _7ZIP_ST
1358 if (mtEncMode)
1359 {
1360 #ifdef USE_ALLOCA
1361 encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1362 #endif
1363
1364 encoder.Common = &encoderFlusher.Common;
1365 RINOK(encoder.CreateEncoderThread())
1366 }
1367 #endif
1368 }
1369
1370 if (printCallback)
1371 {
1372 RINOK(printCallback->CheckBreak());
1373 }
1374
1375 #ifndef _7ZIP_ST
1376 if (mtEncMode)
1377 {
1378 for (i = 0; i < numEncoderThreads; i++)
1379 {
1380 CEncoderInfo &encoder = encoders[i];
1381 WRes wres = encoder.ReadyEvent.Lock();
1382 if (wres != 0)
1383 return HRESULT_FROM_WIN32(wres);
1384 RINOK(encoder.Results[0]);
1385 }
1386
1387 CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
1388 bpi->SetStartTime();
1389
1390 WRes wres = encoderFlusher.StartAndWait();
1391 if (status.Res == 0 && wres != 0)
1392 return HRESULT_FROM_WIN32(wres);
1393 }
1394 else
1395 #endif
1396 {
1397 RINOK(encoders[0].Encode());
1398 }
1399
1400 RINOK(status.Res);
1401
1402 CBenchInfo info;
1403
1404 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1405 info.UnpackSize = 0;
1406 info.PackSize = 0;
1407 info.NumIterations = encoders[0].NumIterations;
1408
1409 for (i = 0; i < numEncoderThreads; i++)
1410 {
1411 CEncoderInfo &encoder = encoders[i];
1412 info.UnpackSize += encoder.kBufferSize;
1413 info.PackSize += encoder.compressedSize;
1414 // printf("\n%7d\n", encoder.compressedSize);
1415 }
1416
1417 RINOK(callback->SetEncodeResult(info, true));
1418
1419
1420
1421
1422 // ---------- Decode ----------
1423
1424 status.Res = S_OK;
1425 status.EncodeMode = false;
1426
1427 UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1428
1429 for (i = 0; i < numEncoderThreads; i++)
1430 {
1431 CEncoderInfo &encoder = encoders[i];
1432
1433 if (i == 0)
1434 {
1435 encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1436 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1437 bpi->Callback = callback;
1438 bpi->BenchInfo.NumIterations = numDecoderThreads;
1439 bpi->SetStartTime();
1440 }
1441 else
1442 encoder.NumIterations = encoders[0].NumIterations;
1443
1444 #ifndef _7ZIP_ST
1445 {
1446 int numSubThreads = method.Get_NumThreads();
1447 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1448 }
1449 if (numDecoderThreads > 1)
1450 {
1451 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1452 {
1453 HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1454 #ifdef USE_ALLOCA
1455 , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1456 #endif
1457 );
1458 RINOK(res);
1459 }
1460 }
1461 else
1462 #endif
1463 {
1464 RINOK(encoder.Decode(0));
1465 }
1466 }
1467
1468 #ifndef _7ZIP_ST
1469 HRESULT res = S_OK;
1470 if (numDecoderThreads > 1)
1471 for (i = 0; i < numEncoderThreads; i++)
1472 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1473 {
1474 CEncoderInfo &encoder = encoders[i];
1475 encoder.thread[j].Wait();
1476 if (encoder.Results[j] != S_OK)
1477 res = encoder.Results[j];
1478 }
1479 RINOK(res);
1480 #endif
1481
1482 RINOK(status.Res);
1483 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1484
1485 #ifndef _7ZIP_ST
1486 #ifdef UNDER_CE
1487 if (numDecoderThreads > 1)
1488 for (i = 0; i < numEncoderThreads; i++)
1489 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1490 {
1491 FILETIME creationTime, exitTime, kernelTime, userTime;
1492 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1493 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1494 }
1495 #endif
1496 #endif
1497
1498 info.UnpackSize = 0;
1499 info.PackSize = 0;
1500 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1501
1502 for (i = 0; i < numEncoderThreads; i++)
1503 {
1504 CEncoderInfo &encoder = encoders[i];
1505 info.UnpackSize += encoder.kBufferSize;
1506 info.PackSize += encoder.compressedSize;
1507 }
1508
1509 RINOK(callback->SetDecodeResult(info, false));
1510 RINOK(callback->SetDecodeResult(info, true));
1511
1512 return S_OK;
1513 }
1514
1515
GetLZMAUsage(bool multiThread,UInt32 dictionary)1516 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1517 {
1518 UInt32 hs = dictionary - 1;
1519 hs |= (hs >> 1);
1520 hs |= (hs >> 2);
1521 hs |= (hs >> 4);
1522 hs |= (hs >> 8);
1523 hs >>= 1;
1524 hs |= 0xFFFF;
1525 if (hs > (1 << 24))
1526 hs >>= 1;
1527 hs++;
1528 return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1529 (1 << 20) + (multiThread ? (6 << 20) : 0);
1530 }
1531
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary,bool totalBench)1532 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
1533 {
1534 const UInt32 kBufferSize = dictionary;
1535 const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
1536 bool lzmaMt = (totalBench || numThreads > 1);
1537 UInt32 numBigThreads = numThreads;
1538 if (!totalBench && lzmaMt)
1539 numBigThreads /= 2;
1540 return ((UInt64)kBufferSize + kCompressedBufferSize +
1541 GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
1542 }
1543
CrcBig(const void * data,UInt32 size,UInt64 numIterations,const UInt32 * checkSum,IHasher * hf,IBenchPrintCallback * callback)1544 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1545 const UInt32 *checkSum, IHasher *hf,
1546 IBenchPrintCallback *callback)
1547 {
1548 Byte hash[64];
1549 UInt64 i;
1550 for (i = 0; i < sizeof(hash); i++)
1551 hash[i] = 0;
1552 for (i = 0; i < numIterations; i++)
1553 {
1554 if (callback && (i & 0xFF) == 0)
1555 {
1556 RINOK(callback->CheckBreak());
1557 }
1558 hf->Init();
1559 hf->Update(data, size);
1560 hf->Final(hash);
1561 UInt32 hashSize = hf->GetDigestSize();
1562 if (hashSize > sizeof(hash))
1563 return S_FALSE;
1564 UInt32 sum = 0;
1565 for (UInt32 j = 0; j < hashSize; j += 4)
1566 sum ^= GetUi32(hash + j);
1567 if (checkSum && sum != *checkSum)
1568 {
1569 return S_FALSE;
1570 }
1571 }
1572 return S_OK;
1573 }
1574
1575 UInt32 g_BenchCpuFreqTemp = 1;
1576
1577 #define YY1 sum += val; sum ^= val;
1578 #define YY3 YY1 YY1 YY1 YY1
1579 #define YY5 YY3 YY3 YY3 YY3
1580 #define YY7 YY5 YY5 YY5 YY5
1581 static const UInt32 kNumFreqCommands = 128;
1582
1583 EXTERN_C_BEGIN
1584
CountCpuFreq(UInt32 sum,UInt32 num,UInt32 val)1585 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1586 {
1587 for (UInt32 i = 0; i < num; i++)
1588 {
1589 YY7
1590 }
1591 return sum;
1592 }
1593
1594 EXTERN_C_END
1595
1596
1597 #ifndef _7ZIP_ST
1598
1599 struct CFreqInfo
1600 {
1601 NWindows::CThread Thread;
1602 IBenchPrintCallback *Callback;
1603 HRESULT CallbackRes;
1604 UInt32 ValRes;
1605 UInt32 Size;
1606 UInt64 NumIterations;
1607
WaitCFreqInfo1608 void Wait()
1609 {
1610 Thread.Wait();
1611 Thread.Close();
1612 }
1613 };
1614
FreqThreadFunction(void * param)1615 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1616 {
1617 CFreqInfo *p = (CFreqInfo *)param;
1618
1619 UInt32 sum = g_BenchCpuFreqTemp;
1620 for (UInt64 k = p->NumIterations; k > 0; k--)
1621 {
1622 p->CallbackRes = p->Callback->CheckBreak();
1623 if (p->CallbackRes != S_OK)
1624 return 0;
1625 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1626 }
1627 p->ValRes = sum;
1628 return 0;
1629 }
1630
1631 struct CFreqThreads
1632 {
1633 CFreqInfo *Items;
1634 UInt32 NumThreads;
1635
CFreqThreadsCFreqThreads1636 CFreqThreads(): Items(NULL), NumThreads(0) {}
WaitAllCFreqThreads1637 void WaitAll()
1638 {
1639 for (UInt32 i = 0; i < NumThreads; i++)
1640 Items[i].Wait();
1641 NumThreads = 0;
1642 }
~CFreqThreadsCFreqThreads1643 ~CFreqThreads()
1644 {
1645 WaitAll();
1646 delete []Items;
1647 }
1648 };
1649
1650 struct CCrcInfo
1651 {
1652 NWindows::CThread Thread;
1653 IBenchPrintCallback *Callback;
1654 HRESULT CallbackRes;
1655
1656 const Byte *Data;
1657 UInt32 Size;
1658 UInt64 NumIterations;
1659 bool CheckSumDefined;
1660 UInt32 CheckSum;
1661 CMyComPtr<IHasher> Hasher;
1662 HRESULT Res;
1663
1664 #ifdef USE_ALLOCA
1665 size_t AllocaSize;
1666 #endif
1667
WaitCCrcInfo1668 void Wait()
1669 {
1670 Thread.Wait();
1671 Thread.Close();
1672 }
1673 };
1674
CrcThreadFunction(void * param)1675 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1676 {
1677 CCrcInfo *p = (CCrcInfo *)param;
1678
1679 #ifdef USE_ALLOCA
1680 alloca(p->AllocaSize);
1681 #endif
1682
1683 p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1684 p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1685 p->Callback);
1686 return 0;
1687 }
1688
1689 struct CCrcThreads
1690 {
1691 CCrcInfo *Items;
1692 UInt32 NumThreads;
1693
CCrcThreadsCCrcThreads1694 CCrcThreads(): Items(NULL), NumThreads(0) {}
WaitAllCCrcThreads1695 void WaitAll()
1696 {
1697 for (UInt32 i = 0; i < NumThreads; i++)
1698 Items[i].Wait();
1699 NumThreads = 0;
1700 }
~CCrcThreadsCCrcThreads1701 ~CCrcThreads()
1702 {
1703 WaitAll();
1704 delete []Items;
1705 }
1706 };
1707
1708 #endif
1709
CrcCalc1(const Byte * buf,size_t size)1710 static UInt32 CrcCalc1(const Byte *buf, size_t size)
1711 {
1712 UInt32 crc = CRC_INIT_VAL;;
1713 for (size_t i = 0; i < size; i++)
1714 crc = CRC_UPDATE_BYTE(crc, buf[i]);
1715 return CRC_GET_DIGEST(crc);
1716 }
1717
RandGen(Byte * buf,size_t size,CBaseRandomGenerator & RG)1718 static void RandGen(Byte *buf, size_t size, CBaseRandomGenerator &RG)
1719 {
1720 for (size_t i = 0; i < size; i++)
1721 buf[i] = (Byte)RG.GetRnd();
1722 }
1723
RandGenCrc(Byte * buf,size_t size,CBaseRandomGenerator & RG)1724 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
1725 {
1726 RandGen(buf, size, RG);
1727 return CrcCalc1(buf, size);
1728 }
1729
CrcInternalTest()1730 bool CrcInternalTest()
1731 {
1732 CAlignedBuffer buffer;
1733 const size_t kBufferSize0 = (1 << 8);
1734 const size_t kBufferSize1 = (1 << 10);
1735 const unsigned kCheckSize = (1 << 5);
1736 buffer.Alloc(kBufferSize0 + kBufferSize1);
1737 if (!buffer.IsAllocated())
1738 return false;
1739 Byte *buf = (Byte *)buffer;
1740 size_t i;
1741 for (i = 0; i < kBufferSize0; i++)
1742 buf[i] = (Byte)i;
1743 UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1744 if (crc1 != 0x29058C73)
1745 return false;
1746 CBaseRandomGenerator RG;
1747 RandGen(buf + kBufferSize0, kBufferSize1, RG);
1748 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1749 for (unsigned j = 0; j < kCheckSize; j++)
1750 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1751 return false;
1752 return true;
1753 }
1754
1755 struct CBenchMethod
1756 {
1757 unsigned Weight;
1758 unsigned DictBits;
1759 UInt32 EncComplex;
1760 UInt32 DecComplexCompr;
1761 UInt32 DecComplexUnc;
1762 const char *Name;
1763 };
1764
1765 static const CBenchMethod g_Bench[] =
1766 {
1767 { 40, 17, 357, 145, 20, "LZMA:x1" },
1768 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
1769 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
1770
1771 { 10, 16, 124, 40, 14, "Deflate:x1" },
1772 { 20, 16, 376, 40, 14, "Deflate:x5" },
1773 { 10, 16, 1082, 40, 14, "Deflate:x7" },
1774 { 10, 17, 422, 40, 14, "Deflate64:x5" },
1775
1776 { 10, 15, 590, 69, 69, "BZip2:x1" },
1777 { 20, 19, 815, 122, 122, "BZip2:x5" },
1778 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
1779 { 10, 19, 2530, 122, 122, "BZip2:x7" },
1780
1781 { 10, 18, 1010, 0, 1150, "PPMD:x1" },
1782 { 10, 22, 1655, 0, 1830, "PPMD:x5" },
1783
1784 { 2, 0, 6, 0, 6, "Delta:4" },
1785 { 2, 0, 4, 0, 4, "BCJ" },
1786
1787 { 10, 0, 24, 0, 24, "AES256CBC:1" },
1788 { 2, 0, 8, 0, 2, "AES256CBC:2" }
1789 };
1790
1791 struct CBenchHash
1792 {
1793 unsigned Weight;
1794 UInt32 Complex;
1795 UInt32 CheckSum;
1796 const char *Name;
1797 };
1798
1799 static const CBenchHash g_Hash[] =
1800 {
1801 { 1, 1820, 0x8F8FEDAB, "CRC32:1" },
1802 { 10, 558, 0x8F8FEDAB, "CRC32:4" },
1803 { 10, 339, 0x8F8FEDAB, "CRC32:8" },
1804 { 10, 512, 0xDF1C17CC, "CRC64" },
1805 { 10, 5100, 0x2D79FF2E, "SHA256" },
1806 { 10, 2340, 0x4C25132B, "SHA1" },
1807 { 2, 5500, 0xE084E913, "BLAKE2sp" }
1808 };
1809
1810 struct CTotalBenchRes
1811 {
1812 // UInt64 NumIterations1; // for Usage
1813 UInt64 NumIterations2; // for Rating / RPU
1814
1815 UInt64 Rating;
1816 UInt64 Usage;
1817 UInt64 RPU;
1818
InitCTotalBenchRes1819 void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
1820
SetSumCTotalBenchRes1821 void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1822 {
1823 Rating = (r1.Rating + r2.Rating);
1824 Usage = (r1.Usage + r2.Usage);
1825 RPU = (r1.RPU + r2.RPU);
1826 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
1827 NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
1828 }
1829 };
1830
PrintNumber(IBenchPrintCallback & f,UInt64 value,unsigned size)1831 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
1832 {
1833 char s[128];
1834 unsigned startPos = (unsigned)sizeof(s) - 32;
1835 memset(s, ' ', startPos);
1836 ConvertUInt64ToString(value, s + startPos);
1837 // if (withSpace)
1838 {
1839 startPos--;
1840 size++;
1841 }
1842 unsigned len = (unsigned)strlen(s + startPos);
1843 if (size > len)
1844 {
1845 startPos -= (size - len);
1846 if (startPos < 0)
1847 startPos = 0;
1848 }
1849 f.Print(s + startPos);
1850 }
1851
1852 static const unsigned kFieldSize_Name = 12;
1853 static const unsigned kFieldSize_SmallName = 4;
1854 static const unsigned kFieldSize_Speed = 9;
1855 static const unsigned kFieldSize_Usage = 5;
1856 static const unsigned kFieldSize_RU = 6;
1857 static const unsigned kFieldSize_Rating = 6;
1858 static const unsigned kFieldSize_EU = 5;
1859 static const unsigned kFieldSize_Effec = 5;
1860
1861 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1862 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1863
1864
PrintRating(IBenchPrintCallback & f,UInt64 rating,unsigned size)1865 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
1866 {
1867 PrintNumber(f, (rating + 500000) / 1000000, size);
1868 }
1869
1870
PrintPercents(IBenchPrintCallback & f,UInt64 val,UInt64 divider,unsigned size)1871 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
1872 {
1873 PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1874 }
1875
PrintChars(IBenchPrintCallback & f,char c,unsigned size)1876 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
1877 {
1878 char s[256];
1879 memset(s, (Byte)c, size);
1880 s[size] = 0;
1881 f.Print(s);
1882 }
1883
PrintSpaces(IBenchPrintCallback & f,unsigned size)1884 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
1885 {
1886 PrintChars(f, ' ', size);
1887 }
1888
PrintResults(IBenchPrintCallback & f,UInt64 usage,UInt64 rpu,UInt64 rating,bool showFreq,UInt64 cpuFreq)1889 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1890 {
1891 PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1892 PrintRating(f, rpu, kFieldSize_RU);
1893 PrintRating(f, rating, kFieldSize_Rating);
1894 if (showFreq)
1895 {
1896 if (cpuFreq == 0)
1897 PrintSpaces(f, kFieldSize_EUAndEffec);
1898 else
1899 {
1900 UInt64 ddd = cpuFreq * usage / 100;
1901 if (ddd == 0)
1902 ddd = 1;
1903 PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1904 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1905 }
1906 }
1907 }
1908
PrintResults(IBenchPrintCallback * f,const CBenchInfo & info,unsigned weight,UInt64 rating,bool showFreq,UInt64 cpuFreq,CTotalBenchRes * res)1909 static void PrintResults(IBenchPrintCallback *f,
1910 const CBenchInfo &info,
1911 unsigned weight,
1912 UInt64 rating,
1913 bool showFreq, UInt64 cpuFreq,
1914 CTotalBenchRes *res)
1915 {
1916 UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1917 if (f)
1918 {
1919 if (speed != 0)
1920 PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1921 else
1922 PrintSpaces(*f, 1 + kFieldSize_Speed);
1923 }
1924 UInt64 usage = info.GetUsage();
1925 UInt64 rpu = info.GetRatingPerUsage(rating);
1926 if (f)
1927 {
1928 PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1929 }
1930
1931 if (res)
1932 {
1933 // res->NumIterations1++;
1934 res->NumIterations2 += weight;
1935 res->RPU += (rpu * weight);
1936 res->Rating += (rating * weight);
1937 res->Usage += (usage * weight);
1938 }
1939 }
1940
PrintTotals(IBenchPrintCallback & f,bool showFreq,UInt64 cpuFreq,const CTotalBenchRes & res)1941 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1942 {
1943 PrintSpaces(f, 1 + kFieldSize_Speed);
1944 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
1945 UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
1946 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
1947 }
1948
1949
PrintHex(AString & s,UInt64 v)1950 static void PrintHex(AString &s, UInt64 v)
1951 {
1952 char temp[32];
1953 ConvertUInt64ToHex(v, temp);
1954 s += temp;
1955 }
1956
GetProcessThreadsInfo(const NSystem::CProcessAffinity & ti)1957 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
1958 {
1959 AString s;
1960 // s.Add_UInt32(ti.numProcessThreads);
1961 if (ti.processAffinityMask != ti.systemAffinityMask)
1962 {
1963 // if (ti.numProcessThreads != ti.numSysThreads)
1964 {
1965 s += " / ";
1966 s.Add_UInt32(ti.GetNumSystemThreads());
1967 }
1968 s += " : ";
1969 PrintHex(s, ti.processAffinityMask);
1970 s += " / ";
1971 PrintHex(s, ti.systemAffinityMask);
1972 }
1973 return s;
1974 }
1975
1976
PrintSize(AString & s,UInt64 v)1977 static void PrintSize(AString &s, UInt64 v)
1978 {
1979 char c = 0;
1980 if ((v & 0x3FF) == 0) { v >>= 10; c = 'K';
1981 if ((v & 0x3FF) == 0) { v >>= 10; c = 'M';
1982 if ((v & 0x3FF) == 0) { v >>= 10; c = 'G';
1983 if ((v & 0x3FF) == 0) { v >>= 10; c = 'T';
1984 }}}}
1985 else
1986 {
1987 PrintHex(s, v);
1988 return;
1989 }
1990 char temp[32];
1991 ConvertUInt64ToString(v, temp);
1992 s += temp;
1993 if (c)
1994 s += c;
1995 }
1996
1997
1998 #ifdef _7ZIP_LARGE_PAGES
1999
2000 extern bool g_LargePagesMode;
2001
2002 extern "C"
2003 {
2004 extern SIZE_T g_LargePageSize;
2005 }
2006
Add_LargePages_String(AString & s)2007 void Add_LargePages_String(AString &s)
2008 {
2009 if (g_LargePagesMode || g_LargePageSize != 0)
2010 {
2011 s += " (LP-";
2012 PrintSize(s, g_LargePageSize);
2013 #ifdef MY_CPU_X86_OR_AMD64
2014 if (CPU_IsSupported_PageGB())
2015 s += "-1G";
2016 #endif
2017 if (!g_LargePagesMode)
2018 s += "-NA";
2019 s += ")";
2020 }
2021 }
2022
2023 #endif
2024
2025
2026
PrintRequirements(IBenchPrintCallback & f,const char * sizeString,bool size_Defined,UInt64 size,const char * threadsString,UInt32 numThreads)2027 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
2028 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
2029 {
2030 f.Print("RAM ");
2031 f.Print(sizeString);
2032 if (size_Defined)
2033 PrintNumber(f, (size >> 20), 6);
2034 else
2035 f.Print(" ?");
2036 f.Print(" MB");
2037
2038 #ifdef _7ZIP_LARGE_PAGES
2039 {
2040 AString s;
2041 Add_LargePages_String(s);
2042 f.Print(s);
2043 }
2044 #endif
2045
2046 f.Print(", # ");
2047 f.Print(threadsString);
2048 PrintNumber(f, numThreads, 3);
2049 }
2050
2051
2052
2053 struct CBenchCallbackToPrint: public IBenchCallback
2054 {
2055 CBenchProps BenchProps;
2056 CTotalBenchRes EncodeRes;
2057 CTotalBenchRes DecodeRes;
2058 IBenchPrintCallback *_file;
2059 UInt32 DictSize;
2060
2061 bool Use2Columns;
2062 unsigned NameFieldSize;
2063
2064 bool ShowFreq;
2065 UInt64 CpuFreq;
2066
2067 unsigned EncodeWeight;
2068 unsigned DecodeWeight;
2069
CBenchCallbackToPrintCBenchCallbackToPrint2070 CBenchCallbackToPrint():
2071 Use2Columns(false),
2072 NameFieldSize(0),
2073 ShowFreq(false),
2074 CpuFreq(0),
2075 EncodeWeight(1),
2076 DecodeWeight(1)
2077 {}
2078
InitCBenchCallbackToPrint2079 void Init() { EncodeRes.Init(); DecodeRes.Init(); }
2080 void Print(const char *s);
2081 void NewLine();
2082
2083 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
2084 HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
2085 HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
2086 };
2087
SetFreq(bool showFreq,UInt64 cpuFreq)2088 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
2089 {
2090 ShowFreq = showFreq;
2091 CpuFreq = cpuFreq;
2092 return S_OK;
2093 }
2094
SetEncodeResult(const CBenchInfo & info,bool final)2095 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
2096 {
2097 RINOK(_file->CheckBreak());
2098 if (final)
2099 {
2100 UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
2101 PrintResults(_file, info,
2102 EncodeWeight, rating,
2103 ShowFreq, CpuFreq, &EncodeRes);
2104 if (!Use2Columns)
2105 _file->NewLine();
2106 }
2107 return S_OK;
2108 }
2109
2110 static const char * const kSep = " | ";
2111
SetDecodeResult(const CBenchInfo & info,bool final)2112 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
2113 {
2114 RINOK(_file->CheckBreak());
2115 if (final)
2116 {
2117 UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
2118 if (Use2Columns)
2119 _file->Print(kSep);
2120 else
2121 PrintSpaces(*_file, NameFieldSize);
2122 CBenchInfo info2 = info;
2123 info2.UnpackSize *= info2.NumIterations;
2124 info2.PackSize *= info2.NumIterations;
2125 info2.NumIterations = 1;
2126 PrintResults(_file, info2,
2127 DecodeWeight, rating,
2128 ShowFreq, CpuFreq, &DecodeRes);
2129 }
2130 return S_OK;
2131 }
2132
Print(const char * s)2133 void CBenchCallbackToPrint::Print(const char *s)
2134 {
2135 _file->Print(s);
2136 }
2137
NewLine()2138 void CBenchCallbackToPrint::NewLine()
2139 {
2140 _file->NewLine();
2141 }
2142
PrintLeft(IBenchPrintCallback & f,const char * s,unsigned size)2143 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
2144 {
2145 f.Print(s);
2146 int numSpaces = size - MyStringLen(s);
2147 if (numSpaces > 0)
2148 PrintSpaces(f, numSpaces);
2149 }
2150
PrintRight(IBenchPrintCallback & f,const char * s,unsigned size)2151 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
2152 {
2153 int numSpaces = size - MyStringLen(s);
2154 if (numSpaces > 0)
2155 PrintSpaces(f, numSpaces);
2156 f.Print(s);
2157 }
2158
TotalBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,bool forceUnpackSize,size_t unpackSize,const Byte * fileData,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback)2159 static HRESULT TotalBench(
2160 DECL_EXTERNAL_CODECS_LOC_VARS
2161 UInt64 complexInCommands,
2162 UInt32 numThreads,
2163 bool forceUnpackSize,
2164 size_t unpackSize,
2165 const Byte *fileData,
2166 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
2167 {
2168 for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
2169 {
2170 const CBenchMethod &bench = g_Bench[i];
2171 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2172 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2173 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2174 callback->BenchProps.EncComplex = bench.EncComplex;
2175
2176 COneMethodInfo method;
2177 NCOM::CPropVariant propVariant;
2178 propVariant = bench.Name;
2179 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2180
2181 size_t unpackSize2 = unpackSize;
2182 if (!forceUnpackSize && bench.DictBits == 0)
2183 unpackSize2 = kFilterUnpackSize;
2184
2185 callback->EncodeWeight = bench.Weight;
2186 callback->DecodeWeight = bench.Weight;
2187
2188 HRESULT res = MethodBench(
2189 EXTERNAL_CODECS_LOC_VARS
2190 complexInCommands,
2191 false, numThreads, method,
2192 unpackSize2, fileData,
2193 bench.DictBits,
2194 printCallback, callback, &callback->BenchProps);
2195
2196 if (res == E_NOTIMPL)
2197 {
2198 // callback->Print(" ---");
2199 // we need additional empty line as line for decompression results
2200 if (!callback->Use2Columns)
2201 callback->NewLine();
2202 }
2203 else
2204 {
2205 RINOK(res);
2206 }
2207
2208 callback->NewLine();
2209 }
2210 return S_OK;
2211 }
2212
2213
FreqBench(UInt64 complexInCommands,UInt32 numThreads,IBenchPrintCallback * _file,bool showFreq,UInt64 specifiedFreq,UInt64 & cpuFreq,UInt32 & res)2214 static HRESULT FreqBench(
2215 UInt64 complexInCommands,
2216 UInt32 numThreads,
2217 IBenchPrintCallback *_file,
2218 bool showFreq,
2219 UInt64 specifiedFreq,
2220 UInt64 &cpuFreq,
2221 UInt32 &res)
2222 {
2223 res = 0;
2224 cpuFreq = 0;
2225
2226 UInt32 bufferSize = 1 << 20;
2227 UInt32 complexity = kNumFreqCommands;
2228 if (numThreads == 0)
2229 numThreads = 1;
2230
2231 #ifdef _7ZIP_ST
2232 numThreads = 1;
2233 #endif
2234
2235 UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2236 UInt64 numIterations = complexInCommands / complexity / bsize;
2237 if (numIterations == 0)
2238 numIterations = 1;
2239
2240 CBenchInfoCalc progressInfoSpec;
2241
2242 #ifndef _7ZIP_ST
2243 CFreqThreads threads;
2244 if (numThreads > 1)
2245 {
2246 threads.Items = new CFreqInfo[numThreads];
2247 UInt32 i;
2248 for (i = 0; i < numThreads; i++)
2249 {
2250 CFreqInfo &info = threads.Items[i];
2251 info.Callback = _file;
2252 info.CallbackRes = S_OK;
2253 info.NumIterations = numIterations;
2254 info.Size = bufferSize;
2255 }
2256 progressInfoSpec.SetStartTime();
2257 for (i = 0; i < numThreads; i++)
2258 {
2259 CFreqInfo &info = threads.Items[i];
2260 RINOK(info.Thread.Create(FreqThreadFunction, &info));
2261 threads.NumThreads++;
2262 }
2263 threads.WaitAll();
2264 for (i = 0; i < numThreads; i++)
2265 {
2266 RINOK(threads.Items[i].CallbackRes);
2267 }
2268 }
2269 else
2270 #endif
2271 {
2272 progressInfoSpec.SetStartTime();
2273 UInt32 sum = g_BenchCpuFreqTemp;
2274 for (UInt64 k = numIterations; k > 0; k--)
2275 {
2276 RINOK(_file->CheckBreak());
2277 sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
2278 }
2279 res += sum;
2280 }
2281
2282 CBenchInfo info;
2283 progressInfoSpec.SetFinishTime(info);
2284
2285 info.UnpackSize = 0;
2286 info.PackSize = 0;
2287 info.NumIterations = 1;
2288
2289 if (_file)
2290 {
2291 {
2292 UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
2293 UInt64 rating = info.GetSpeed(numCommands);
2294 cpuFreq = rating / numThreads;
2295 PrintResults(_file, info,
2296 0, // weight
2297 rating,
2298 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
2299 }
2300 RINOK(_file->CheckBreak());
2301 }
2302
2303 return S_OK;
2304 }
2305
2306
2307
CrcBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufferSize,UInt64 & speed,UInt32 complexity,unsigned benchWeight,const UInt32 * checkSum,const COneMethodInfo & method,IBenchPrintCallback * _file,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2308 static HRESULT CrcBench(
2309 DECL_EXTERNAL_CODECS_LOC_VARS
2310 UInt64 complexInCommands,
2311 UInt32 numThreads, UInt32 bufferSize,
2312 UInt64 &speed,
2313 UInt32 complexity, unsigned benchWeight,
2314 const UInt32 *checkSum,
2315 const COneMethodInfo &method,
2316 IBenchPrintCallback *_file,
2317 CTotalBenchRes *encodeRes,
2318 bool showFreq, UInt64 cpuFreq)
2319 {
2320 if (numThreads == 0)
2321 numThreads = 1;
2322
2323 #ifdef _7ZIP_ST
2324 numThreads = 1;
2325 #endif
2326
2327 const AString &methodName = method.MethodName;
2328 // methodName.RemoveChar(L'-');
2329 CMethodId hashID;
2330 if (!FindHashMethod(
2331 EXTERNAL_CODECS_LOC_VARS
2332 methodName, hashID))
2333 return E_NOTIMPL;
2334
2335 CAlignedBuffer buffer;
2336 size_t totalSize = (size_t)bufferSize * numThreads;
2337 if (totalSize / numThreads != bufferSize)
2338 return E_OUTOFMEMORY;
2339 ALLOC_WITH_HRESULT(&buffer, totalSize)
2340
2341 Byte *buf = (Byte *)buffer;
2342 CBaseRandomGenerator RG;
2343 UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2344 UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
2345 if (numIterations == 0)
2346 numIterations = 1;
2347
2348 CBenchInfoCalc progressInfoSpec;
2349
2350 #ifndef _7ZIP_ST
2351 CCrcThreads threads;
2352 if (numThreads > 1)
2353 {
2354 threads.Items = new CCrcInfo[numThreads];
2355
2356 UInt32 i;
2357 for (i = 0; i < numThreads; i++)
2358 {
2359 CCrcInfo &info = threads.Items[i];
2360 AString name;
2361 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
2362 if (!info.Hasher)
2363 return E_NOTIMPL;
2364 CMyComPtr<ICompressSetCoderProperties> scp;
2365 info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2366 if (scp)
2367 {
2368 UInt64 reduceSize = 1;
2369 RINOK(method.SetCoderProps(scp, &reduceSize));
2370 }
2371
2372 Byte *data = buf + (size_t)bufferSize * i;
2373 info.Callback = _file;
2374 info.Data = data;
2375 info.NumIterations = numIterations;
2376 info.Size = bufferSize;
2377 /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
2378 info.CheckSumDefined = false;
2379 if (checkSum)
2380 {
2381 info.CheckSum = *checkSum;
2382 info.CheckSumDefined = (checkSum && (i == 0));
2383 }
2384
2385 #ifdef USE_ALLOCA
2386 info.AllocaSize = (i * 16 * 21) & 0x7FF;
2387 #endif
2388 }
2389
2390 progressInfoSpec.SetStartTime();
2391
2392 for (i = 0; i < numThreads; i++)
2393 {
2394 CCrcInfo &info = threads.Items[i];
2395 RINOK(info.Thread.Create(CrcThreadFunction, &info));
2396 threads.NumThreads++;
2397 }
2398 threads.WaitAll();
2399 for (i = 0; i < numThreads; i++)
2400 {
2401 RINOK(threads.Items[i].Res);
2402 }
2403 }
2404 else
2405 #endif
2406 {
2407 /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
2408 progressInfoSpec.SetStartTime();
2409 CMyComPtr<IHasher> hasher;
2410 AString name;
2411 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
2412 if (!hasher)
2413 return E_NOTIMPL;
2414 CMyComPtr<ICompressSetCoderProperties> scp;
2415 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2416 if (scp)
2417 {
2418 UInt64 reduceSize = 1;
2419 RINOK(method.SetCoderProps(scp, &reduceSize));
2420 }
2421 RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
2422 }
2423
2424 CBenchInfo info;
2425 progressInfoSpec.SetFinishTime(info);
2426
2427 UInt64 unpSize = numIterations * bufferSize;
2428 UInt64 unpSizeThreads = unpSize * numThreads;
2429 info.UnpackSize = unpSizeThreads;
2430 info.PackSize = unpSizeThreads;
2431 info.NumIterations = 1;
2432
2433 if (_file)
2434 {
2435 {
2436 UInt64 numCommands = unpSizeThreads * complexity / 256;
2437 UInt64 rating = info.GetSpeed(numCommands);
2438 PrintResults(_file, info,
2439 benchWeight, rating,
2440 showFreq, cpuFreq, encodeRes);
2441 }
2442 RINOK(_file->CheckBreak());
2443 }
2444
2445 speed = info.GetSpeed(unpSizeThreads);
2446
2447 return S_OK;
2448 }
2449
TotalBench_Hash(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufSize,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2450 static HRESULT TotalBench_Hash(
2451 DECL_EXTERNAL_CODECS_LOC_VARS
2452 UInt64 complexInCommands,
2453 UInt32 numThreads, UInt32 bufSize,
2454 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2455 CTotalBenchRes *encodeRes,
2456 bool showFreq, UInt64 cpuFreq)
2457 {
2458 for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2459 {
2460 const CBenchHash &bench = g_Hash[i];
2461 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2462 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2463 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2464 // callback->BenchProps.EncComplex = bench.EncComplex;
2465
2466 COneMethodInfo method;
2467 NCOM::CPropVariant propVariant;
2468 propVariant = bench.Name;
2469 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2470
2471 UInt64 speed;
2472 HRESULT res = CrcBench(
2473 EXTERNAL_CODECS_LOC_VARS
2474 complexInCommands,
2475 numThreads, bufSize,
2476 speed,
2477 bench.Complex, bench.Weight,
2478 &bench.CheckSum, method,
2479 printCallback, encodeRes, showFreq, cpuFreq);
2480 if (res == E_NOTIMPL)
2481 {
2482 // callback->Print(" ---");
2483 }
2484 else
2485 {
2486 RINOK(res);
2487 }
2488 callback->NewLine();
2489 }
2490 return S_OK;
2491 }
2492
2493 struct CTempValues
2494 {
2495 UInt64 *Values;
CTempValuesCTempValues2496 CTempValues(UInt32 num) { Values = new UInt64[num]; }
~CTempValuesCTempValues2497 ~CTempValues() { delete []Values; }
2498 };
2499
ParseNumberString(const UString & s,NCOM::CPropVariant & prop)2500 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2501 {
2502 const wchar_t *end;
2503 UInt64 result = ConvertStringToUInt64(s, &end);
2504 if (*end != 0 || s.IsEmpty())
2505 prop = s;
2506 else if (result <= (UInt32)0xFFFFFFFF)
2507 prop = (UInt32)result;
2508 else
2509 prop = result;
2510 }
2511
GetNumThreadsNext(unsigned i,UInt32 numThreads)2512 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2513 {
2514 if (i < 2)
2515 return i + 1;
2516 i -= 1;
2517 UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2518 return (num <= numThreads) ? num : numThreads;
2519 }
2520
AreSameMethodNames(const char * fullName,const char * shortName)2521 static bool AreSameMethodNames(const char *fullName, const char *shortName)
2522 {
2523 return StringsAreEqualNoCase_Ascii(fullName, shortName);
2524 }
2525
2526
2527 #ifdef MY_CPU_X86_OR_AMD64
2528
PrintCpuChars(AString & s,UInt32 v)2529 static void PrintCpuChars(AString &s, UInt32 v)
2530 {
2531 for (int j = 0; j < 4; j++)
2532 {
2533 Byte b = (Byte)(v & 0xFF);
2534 v >>= 8;
2535 if (b == 0)
2536 break;
2537 s += (char)b;
2538 }
2539 }
2540
x86cpuid_to_String(const Cx86cpuid & c,AString & s)2541 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
2542 {
2543 s.Empty();
2544
2545 UInt32 maxFunc2 = 0;
2546 UInt32 t[3];
2547
2548 MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
2549
2550 bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
2551
2552 if (!fullNameIsAvail)
2553 {
2554 for (int i = 0; i < 3; i++)
2555 PrintCpuChars(s, c.vendor[i]);
2556 }
2557 else
2558 {
2559 for (int i = 0; i < 3; i++)
2560 {
2561 UInt32 d[4] = { 0 };
2562 MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
2563 for (int j = 0; j < 4; j++)
2564 PrintCpuChars(s, d[j]);
2565 }
2566 }
2567
2568 s.Add_Space_if_NotEmpty();
2569 {
2570 char temp[32];
2571 ConvertUInt32ToHex(c.ver, temp);
2572 s += '(';
2573 s += temp;
2574 s += ')';
2575 }
2576 }
2577
2578 #endif
2579
2580
2581
2582 static const char * const k_PROCESSOR_ARCHITECTURE[] =
2583 {
2584 "x86" // "INTEL"
2585 , "MIPS"
2586 , "ALPHA"
2587 , "PPC"
2588 , "SHX"
2589 , "ARM"
2590 , "IA64"
2591 , "ALPHA64"
2592 , "MSIL"
2593 , "x64" // "AMD64"
2594 , "IA32_ON_WIN64"
2595 , "NEUTRAL"
2596 , "ARM64"
2597 , "ARM32_ON_WIN64"
2598 };
2599
2600 #define MY__PROCESSOR_ARCHITECTURE_INTEL 0
2601 #define MY__PROCESSOR_ARCHITECTURE_AMD64 9
2602
2603
2604 #define MY__PROCESSOR_INTEL_PENTIUM 586
2605 #define MY__PROCESSOR_AMD_X8664 8664
2606
2607 /*
2608 static const CUInt32PCharPair k_PROCESSOR[] =
2609 {
2610 { 2200, "IA64" },
2611 { 8664, "x64" }
2612 };
2613
2614 #define PROCESSOR_INTEL_386 386
2615 #define PROCESSOR_INTEL_486 486
2616 #define PROCESSOR_INTEL_PENTIUM 586
2617 #define PROCESSOR_INTEL_860 860
2618 #define PROCESSOR_INTEL_IA64 2200
2619 #define PROCESSOR_AMD_X8664 8664
2620 #define PROCESSOR_MIPS_R2000 2000
2621 #define PROCESSOR_MIPS_R3000 3000
2622 #define PROCESSOR_MIPS_R4000 4000
2623 #define PROCESSOR_ALPHA_21064 21064
2624 #define PROCESSOR_PPC_601 601
2625 #define PROCESSOR_PPC_603 603
2626 #define PROCESSOR_PPC_604 604
2627 #define PROCESSOR_PPC_620 620
2628 #define PROCESSOR_HITACHI_SH3 10003
2629 #define PROCESSOR_HITACHI_SH3E 10004
2630 #define PROCESSOR_HITACHI_SH4 10005
2631 #define PROCESSOR_MOTOROLA_821 821
2632 #define PROCESSOR_SHx_SH3 103
2633 #define PROCESSOR_SHx_SH4 104
2634 #define PROCESSOR_STRONGARM 2577 // 0xA11
2635 #define PROCESSOR_ARM720 1824 // 0x720
2636 #define PROCESSOR_ARM820 2080 // 0x820
2637 #define PROCESSOR_ARM920 2336 // 0x920
2638 #define PROCESSOR_ARM_7TDMI 70001
2639 #define PROCESSOR_OPTIL 18767 // 0x494f
2640 */
2641
2642 #ifdef _WIN32
2643
2644 static const char * const k_PF[] =
2645 {
2646 "FP_ERRATA"
2647 , "FP_EMU"
2648 , "CMPXCHG"
2649 , "MMX"
2650 , "PPC_MOVEMEM_64BIT"
2651 , "ALPHA_BYTE"
2652 , "SSE"
2653 , "3DNOW"
2654 , "RDTSC"
2655 , "PAE"
2656 , "SSE2"
2657 , "SSE_DAZ"
2658 , "NX"
2659 , "SSE3"
2660 , "CMPXCHG16B"
2661 , "CMP8XCHG16"
2662 , "CHANNELS"
2663 , "XSAVE"
2664 , "ARM_VFP_32"
2665 , "ARM_NEON"
2666 , "L2AT"
2667 , "VIRT_FIRMWARE"
2668 , "RDWRFSGSBASE"
2669 , "FASTFAIL"
2670 , "ARM_DIVIDE"
2671 , "ARM_64BIT_LOADSTORE_ATOMIC"
2672 , "ARM_EXTERNAL_CACHE"
2673 , "ARM_FMAC"
2674 , "RDRAND"
2675 , "ARM_V8"
2676 , "ARM_V8_CRYPTO"
2677 , "ARM_V8_CRC32"
2678 , "RDTSCP"
2679 };
2680
2681 #endif
2682
2683
2684
2685
PrintPage(AString & s,UInt32 v)2686 static void PrintPage(AString &s, UInt32 v)
2687 {
2688 if ((v & 0x3FF) == 0)
2689 {
2690 s.Add_UInt32(v >> 10);
2691 s += "K";
2692 }
2693 else
2694 s.Add_UInt32(v >> 10);
2695 }
2696
TypeToString2(const char * const table[],unsigned num,UInt32 value)2697 static AString TypeToString2(const char * const table[], unsigned num, UInt32 value)
2698 {
2699 char sz[16];
2700 const char *p = NULL;
2701 if (value < num)
2702 p = table[value];
2703 if (!p)
2704 {
2705 ConvertUInt32ToString(value, sz);
2706 p = sz;
2707 }
2708 return (AString)p;
2709 }
2710
2711 #ifdef _WIN32
2712
SysInfo_To_String(AString & s,const SYSTEM_INFO & si)2713 static void SysInfo_To_String(AString &s, const SYSTEM_INFO &si)
2714 {
2715 s += TypeToString2(k_PROCESSOR_ARCHITECTURE, ARRAY_SIZE(k_PROCESSOR_ARCHITECTURE), si.wProcessorArchitecture);
2716
2717 if (!( si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_INTEL && si.dwProcessorType == MY__PROCESSOR_INTEL_PENTIUM
2718 || si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_AMD64 && si.dwProcessorType == MY__PROCESSOR_AMD_X8664))
2719 {
2720 s += " ";
2721 // s += TypePairToString(k_PROCESSOR, ARRAY_SIZE(k_PROCESSOR), si.dwProcessorType);
2722 s.Add_UInt32(si.dwProcessorType);
2723 }
2724 s += " ";
2725 PrintHex(s, si.wProcessorLevel);
2726 s += ".";
2727 PrintHex(s, si.wProcessorRevision);
2728 if ((UInt64)si.dwActiveProcessorMask + 1 != ((UInt64)1 << si.dwNumberOfProcessors))
2729 if ((UInt64)si.dwActiveProcessorMask + 1 != 0 || si.dwNumberOfProcessors != sizeof(UInt64) * 8)
2730 {
2731 s += " act:";
2732 PrintHex(s, si.dwActiveProcessorMask);
2733 }
2734 s += " cpus:";
2735 s.Add_UInt32(si.dwNumberOfProcessors);
2736 if (si.dwPageSize != 1 << 12)
2737 {
2738 s += " page:";
2739 PrintPage(s, si.dwPageSize);
2740 }
2741 if (si.dwAllocationGranularity != 1 << 16)
2742 {
2743 s += " gran:";
2744 PrintPage(s, si.dwAllocationGranularity);
2745 }
2746 s += " ";
2747
2748 DWORD_PTR minAdd = (DWORD_PTR)si.lpMinimumApplicationAddress;
2749 UInt64 maxSize = (UInt64)(DWORD_PTR)si.lpMaximumApplicationAddress + 1;
2750 const UInt32 kReserveSize = ((UInt32)1 << 16);
2751 if (minAdd != kReserveSize)
2752 {
2753 PrintSize(s, minAdd);
2754 s += "-";
2755 }
2756 else
2757 {
2758 if ((maxSize & (kReserveSize - 1)) == 0)
2759 maxSize += kReserveSize;
2760 }
2761 PrintSize(s, maxSize);
2762 }
2763
2764 #ifndef _WIN64
2765 typedef VOID (WINAPI *Func_GetNativeSystemInfo)(LPSYSTEM_INFO lpSystemInfo);
2766 #endif
2767
2768 #endif
2769
GetSysInfo(AString & s1,AString & s2)2770 void GetSysInfo(AString &s1, AString &s2)
2771 {
2772 s1.Empty();
2773 s2.Empty();
2774
2775 #ifdef _WIN32
2776 SYSTEM_INFO si;
2777 GetSystemInfo(&si);
2778 {
2779 SysInfo_To_String(s1, si);
2780 // s += " : ";
2781 }
2782
2783 #if !defined(_WIN64) && !defined(UNDER_CE)
2784 Func_GetNativeSystemInfo fn_GetNativeSystemInfo = (Func_GetNativeSystemInfo)GetProcAddress(
2785 GetModuleHandleA("kernel32.dll"), "GetNativeSystemInfo");
2786 if (fn_GetNativeSystemInfo)
2787 {
2788 SYSTEM_INFO si2;
2789 fn_GetNativeSystemInfo(&si2);
2790 // if (memcmp(&si, &si2, sizeof(si)) != 0)
2791 {
2792 // s += " - ";
2793 SysInfo_To_String(s2, si2);
2794 }
2795 }
2796 #endif
2797 #endif
2798 }
2799
2800
GetCpuName(AString & s)2801 void GetCpuName(AString &s)
2802 {
2803 s.Empty();
2804
2805 #ifdef MY_CPU_X86_OR_AMD64
2806 {
2807 Cx86cpuid cpuid;
2808 if (x86cpuid_CheckAndRead(&cpuid))
2809 {
2810 AString s2;
2811 x86cpuid_to_String(cpuid, s2);
2812 s += s2;
2813 }
2814 else
2815 {
2816 #ifdef MY_CPU_AMD64
2817 s += "x64";
2818 #else
2819 s += "x86";
2820 #endif
2821 }
2822 }
2823 #else
2824
2825 #ifdef MY_CPU_LE
2826 s += "LE";
2827 #elif defined(MY_CPU_BE)
2828 s += "BE";
2829 #endif
2830
2831 #endif
2832
2833 #ifdef _7ZIP_LARGE_PAGES
2834 Add_LargePages_String(s);
2835 #endif
2836 }
2837
2838
GetCpuFeatures(AString & s)2839 void GetCpuFeatures(AString &s)
2840 {
2841 s.Empty();
2842
2843 #ifdef _WIN32
2844 const unsigned kNumFeatures_Extra = 32; // we check also for unknown features
2845 const unsigned kNumFeatures = ARRAY_SIZE(k_PF) + kNumFeatures_Extra;
2846 for (unsigned i = 0; i < kNumFeatures; i++)
2847 {
2848 if (IsProcessorFeaturePresent(i))
2849 {
2850 s.Add_Space_if_NotEmpty();
2851 s += TypeToString2(k_PF, ARRAY_SIZE(k_PF), i);
2852 }
2853 }
2854 #endif
2855 }
2856
2857
2858 #ifdef _WIN32
2859 #ifndef UNDER_CE
2860
2861 typedef void (WINAPI * Func_RtlGetVersion) (OSVERSIONINFOEXW *);
2862
My_RtlGetVersion(OSVERSIONINFOEXW * vi)2863 static BOOL My_RtlGetVersion(OSVERSIONINFOEXW *vi)
2864 {
2865 HMODULE ntdll = ::GetModuleHandleW(L"ntdll.dll");
2866 if (!ntdll)
2867 return FALSE;
2868 Func_RtlGetVersion func = (Func_RtlGetVersion)GetProcAddress(ntdll, "RtlGetVersion");
2869 if (!func)
2870 return FALSE;
2871 func(vi);
2872 return TRUE;
2873 }
2874
2875 #endif
2876 #endif
2877
2878
Bench(DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback * printCallback,IBenchCallback * benchCallback,const CObjectVector<CProperty> & props,UInt32 numIterations,bool multiDict)2879 HRESULT Bench(
2880 DECL_EXTERNAL_CODECS_LOC_VARS
2881 IBenchPrintCallback *printCallback,
2882 IBenchCallback *benchCallback,
2883 // IBenchFreqCallback *freqCallback,
2884 const CObjectVector<CProperty> &props,
2885 UInt32 numIterations,
2886 bool multiDict)
2887 {
2888 if (!CrcInternalTest())
2889 return E_FAIL;
2890
2891 UInt32 numCPUs = 1;
2892 UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
2893
2894 NSystem::CProcessAffinity threadsInfo;
2895 threadsInfo.InitST();
2896
2897 #ifndef _7ZIP_ST
2898
2899 if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0)
2900 numCPUs = threadsInfo.GetNumProcessThreads();
2901 else
2902 numCPUs = NSystem::GetNumberOfProcessors();
2903
2904 #endif
2905
2906 bool ramSize_Defined = NSystem::GetRamSize(ramSize);
2907
2908 UInt32 numThreadsSpecified = numCPUs;
2909
2910 UInt32 testTime = kComplexInSeconds;
2911
2912 UInt64 specifiedFreq = 0;
2913
2914 bool multiThreadTests = false;
2915
2916 COneMethodInfo method;
2917
2918 CAlignedBuffer fileDataBuffer;
2919
2920 {
2921 unsigned i;
2922 for (i = 0; i < props.Size(); i++)
2923 {
2924 const CProperty &property = props[i];
2925 UString name (property.Name);
2926 name.MakeLower_Ascii();
2927
2928 if (name.IsEqualTo("file"))
2929 {
2930 if (property.Value.IsEmpty())
2931 return E_INVALIDARG;
2932
2933 #ifdef USE_WIN_FILE
2934
2935 NFile::NIO::CInFile file;
2936 if (!file.Open(us2fs(property.Value)))
2937 return E_INVALIDARG;
2938 UInt64 len;
2939 if (!file.GetLength(len))
2940 return E_FAIL;
2941 if (len >= ((UInt32)1 << 31) || len == 0)
2942 return E_INVALIDARG;
2943 ALLOC_WITH_HRESULT(&fileDataBuffer, (size_t)len);
2944 UInt32 processedSize;
2945 file.Read((Byte *)fileDataBuffer, (UInt32)len, processedSize);
2946 if (processedSize != len)
2947 return E_FAIL;
2948 if (printCallback)
2949 {
2950 printCallback->Print("file size =");
2951 PrintNumber(*printCallback, len, 0);
2952 printCallback->NewLine();
2953 }
2954 continue;
2955
2956 #else
2957
2958 return E_NOTIMPL;
2959
2960 #endif
2961 }
2962
2963 NCOM::CPropVariant propVariant;
2964 if (!property.Value.IsEmpty())
2965 ParseNumberString(property.Value, propVariant);
2966
2967 if (name.IsEqualTo("time"))
2968 {
2969 RINOK(ParsePropToUInt32(UString(), propVariant, testTime));
2970 continue;
2971 }
2972
2973 if (name.IsEqualTo("freq"))
2974 {
2975 UInt32 freq32 = 0;
2976 RINOK(ParsePropToUInt32(UString(), propVariant, freq32));
2977 if (freq32 == 0)
2978 return E_INVALIDARG;
2979 specifiedFreq = (UInt64)freq32 * 1000000;
2980
2981 if (printCallback)
2982 {
2983 printCallback->Print("freq=");
2984 PrintNumber(*printCallback, freq32, 0);
2985 printCallback->NewLine();
2986 }
2987
2988 continue;
2989 }
2990
2991 if (name.IsPrefixedBy_Ascii_NoCase("mt"))
2992 {
2993 UString s = name.Ptr(2);
2994 if (s.IsEqualTo("*")
2995 || s.IsEmpty() && propVariant.vt == VT_BSTR && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))
2996 {
2997 multiThreadTests = true;
2998 continue;
2999 }
3000 #ifndef _7ZIP_ST
3001 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
3002 #endif
3003 continue;
3004 }
3005
3006 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
3007 }
3008 }
3009
3010 if (printCallback)
3011 {
3012 #ifdef _WIN32
3013 #ifndef UNDER_CE
3014 {
3015 AString s;
3016 // OSVERSIONINFO vi;
3017 OSVERSIONINFOEXW vi;
3018 vi.dwOSVersionInfoSize = sizeof(vi);
3019 // if (::GetVersionEx(&vi))
3020 if (My_RtlGetVersion(&vi))
3021 {
3022 s += "Windows";
3023 if (vi.dwPlatformId != VER_PLATFORM_WIN32_NT)
3024 s.Add_UInt32(vi.dwPlatformId);
3025 s += " "; s.Add_UInt32(vi.dwMajorVersion);
3026 s += "."; s.Add_UInt32(vi.dwMinorVersion);
3027 s += " "; s.Add_UInt32(vi.dwBuildNumber);
3028 // s += " "; s += GetAnsiString(vi.szCSDVersion);
3029 }
3030 printCallback->Print(s);
3031 printCallback->NewLine();
3032 }
3033 #endif
3034 #endif
3035
3036 {
3037 AString s1, s2;
3038 GetSysInfo(s1, s2);
3039 if (!s1.IsEmpty() || !s2.IsEmpty())
3040 {
3041 printCallback->Print(s1);
3042 if (s1 != s2 && !s2.IsEmpty())
3043 {
3044 printCallback->Print(" - ");
3045 printCallback->Print(s2);
3046 }
3047 printCallback->NewLine();
3048 }
3049 }
3050 {
3051 AString s;
3052 GetCpuFeatures(s);
3053 if (!s.IsEmpty())
3054 {
3055 printCallback->Print(s);
3056 printCallback->NewLine();
3057 }
3058 }
3059 {
3060 AString s;
3061 GetCpuName(s);
3062 if (!s.IsEmpty())
3063 {
3064 printCallback->Print(s);
3065 printCallback->NewLine();
3066 }
3067 }
3068 }
3069
3070 if (printCallback)
3071 {
3072 printCallback->Print("CPU Freq:");
3073 }
3074
3075 UInt64 complexInCommands = kComplexInCommands;
3076
3077 if (printCallback /* || freqCallback */)
3078 {
3079 UInt64 numMilCommands = 1 << 6;
3080 if (specifiedFreq != 0)
3081 {
3082 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3083 numMilCommands >>= 1;
3084 }
3085
3086 for (int jj = 0;; jj++)
3087 {
3088 if (printCallback)
3089 RINOK(printCallback->CheckBreak());
3090
3091 UInt64 start = ::GetTimeCount();
3092 UInt32 sum = (UInt32)start;
3093 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3094 if (sum == 0xF1541213)
3095 if (printCallback)
3096 printCallback->Print("");
3097 const UInt64 realDelta = ::GetTimeCount() - start;
3098 start = realDelta;
3099 if (start == 0)
3100 start = 1;
3101 UInt64 freq = GetFreq();
3102 // mips is constant in some compilers
3103 const UInt64 mipsVal = numMilCommands * freq / start;
3104 if (printCallback)
3105 {
3106 if (realDelta == 0)
3107 {
3108 printCallback->Print(" -");
3109 }
3110 else
3111 {
3112 // PrintNumber(*printCallback, start, 0);
3113 PrintNumber(*printCallback, mipsVal, 5);
3114 }
3115 }
3116 /*
3117 if (freqCallback)
3118 freqCallback->AddCpuFreq(mipsVal);
3119 */
3120
3121 if (jj >= 3)
3122 {
3123 SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
3124 if (jj >= 8 || start >= freq)
3125 break;
3126 // break; // change it
3127 numMilCommands <<= 1;
3128 }
3129 }
3130 }
3131
3132 if (printCallback)
3133 {
3134 printCallback->NewLine();
3135 printCallback->NewLine();
3136 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
3137 printCallback->Print(GetProcessThreadsInfo(threadsInfo));
3138 printCallback->NewLine();
3139 }
3140
3141 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
3142 return E_INVALIDARG;
3143
3144 UInt32 dict;
3145 bool dictIsDefined = method.Get_DicSize(dict);
3146
3147 if (method.MethodName.IsEmpty())
3148 method.MethodName = "LZMA";
3149
3150 if (benchCallback)
3151 {
3152 CBenchProps benchProps;
3153 benchProps.SetLzmaCompexity();
3154 UInt32 dictSize = method.Get_Lzma_DicSize();
3155 UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
3156 return MethodBench(
3157 EXTERNAL_CODECS_LOC_VARS
3158 complexInCommands,
3159 true, numThreadsSpecified,
3160 method,
3161 uncompressedDataSize, (const Byte *)fileDataBuffer,
3162 kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
3163 }
3164
3165 AString methodName (method.MethodName);
3166 if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
3167 methodName = "crc32";
3168 method.MethodName = methodName;
3169 CMethodId hashID;
3170
3171 if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
3172 {
3173 if (!printCallback)
3174 return S_FALSE;
3175 IBenchPrintCallback &f = *printCallback;
3176 if (!dictIsDefined)
3177 dict = (1 << 24);
3178
3179
3180 // methhodName.RemoveChar(L'-');
3181 UInt32 complexity = 10000;
3182 const UInt32 *checkSum = NULL;
3183 {
3184 unsigned i;
3185 for (i = 0; i < ARRAY_SIZE(g_Hash); i++)
3186 {
3187 const CBenchHash &h = g_Hash[i];
3188 AString benchMethod (h.Name);
3189 AString benchProps;
3190 int propPos = benchMethod.Find(':');
3191 if (propPos >= 0)
3192 {
3193 benchProps = benchMethod.Ptr(propPos + 1);
3194 benchMethod.DeleteFrom(propPos);
3195 }
3196
3197 if (AreSameMethodNames(benchMethod, methodName))
3198 {
3199 if (benchProps.IsEmpty()
3200 || benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps == "8" && method.PropsString.IsEmpty()
3201 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3202 {
3203 complexity = h.Complex;
3204 checkSum = &h.CheckSum;
3205 if (method.PropsString.IsEqualTo_Ascii_NoCase(benchProps))
3206 break;
3207 }
3208 }
3209 }
3210 if (i == ARRAY_SIZE(g_Hash))
3211 return E_NOTIMPL;
3212 }
3213
3214 f.NewLine();
3215 f.Print("Size");
3216 const unsigned kFieldSize_CrcSpeed = 6;
3217 unsigned numThreadsTests = 0;
3218 for (;;)
3219 {
3220 UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
3221 PrintNumber(f, t, kFieldSize_CrcSpeed);
3222 numThreadsTests++;
3223 if (t >= numThreadsSpecified)
3224 break;
3225 }
3226 f.NewLine();
3227 f.NewLine();
3228 CTempValues speedTotals(numThreadsTests);
3229 {
3230 for (unsigned ti = 0; ti < numThreadsTests; ti++)
3231 speedTotals.Values[ti] = 0;
3232 }
3233
3234 UInt64 numSteps = 0;
3235 for (UInt32 i = 0; i < numIterations; i++)
3236 {
3237 for (unsigned pow = 10; pow < 32; pow++)
3238 {
3239 UInt32 bufSize = (UInt32)1 << pow;
3240 if (bufSize > dict)
3241 break;
3242 char s[16];
3243 ConvertUInt32ToString(pow, s);
3244 unsigned pos = MyStringLen(s);
3245 s[pos++] = ':';
3246 s[pos++] = ' ';
3247 s[pos] = 0;
3248 f.Print(s);
3249
3250 for (unsigned ti = 0; ti < numThreadsTests; ti++)
3251 {
3252 RINOK(f.CheckBreak());
3253 UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
3254 UInt64 speed = 0;
3255 RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
3256 t, bufSize, speed,
3257 complexity,
3258 1, // benchWeight,
3259 (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
3260 PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
3261 speedTotals.Values[ti] += speed;
3262 }
3263 f.NewLine();
3264 numSteps++;
3265 }
3266 }
3267 if (numSteps != 0)
3268 {
3269 f.NewLine();
3270 f.Print("Avg:");
3271 for (unsigned ti = 0; ti < numThreadsTests; ti++)
3272 {
3273 PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
3274 }
3275 f.NewLine();
3276 }
3277 return S_OK;
3278 }
3279
3280 bool use2Columns = false;
3281
3282 bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
3283 bool onlyHashBench = false;
3284 if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
3285 {
3286 onlyHashBench = true;
3287 totalBenchMode = true;
3288 }
3289
3290 // ---------- Threads loop ----------
3291 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
3292 {
3293
3294 UInt32 numThreads = numThreadsSpecified;
3295
3296 if (!multiThreadTests)
3297 {
3298 if (threadsPassIndex != 0)
3299 break;
3300 }
3301 else
3302 {
3303 numThreads = 1;
3304 if (threadsPassIndex != 0)
3305 {
3306 if (numCPUs < 2)
3307 break;
3308 numThreads = numCPUs;
3309 if (threadsPassIndex == 1)
3310 {
3311 if (numCPUs >= 4)
3312 numThreads = numCPUs / 2;
3313 }
3314 else if (numCPUs < 4)
3315 break;
3316 }
3317 }
3318
3319 CBenchCallbackToPrint callback;
3320 callback.Init();
3321 callback._file = printCallback;
3322
3323 IBenchPrintCallback &f = *printCallback;
3324
3325 if (threadsPassIndex > 0)
3326 {
3327 f.NewLine();
3328 f.NewLine();
3329 }
3330
3331 if (!dictIsDefined)
3332 {
3333 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
3334 unsigned dicSizeLog = dicSizeLog_Main;
3335
3336 #ifdef UNDER_CE
3337 dicSizeLog = (UInt64)1 << 20;
3338 #endif
3339
3340 if (ramSize_Defined)
3341 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
3342 if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
3343 break;
3344
3345 dict = (UInt32)1 << dicSizeLog;
3346
3347 if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
3348 {
3349 f.Print("Dictionary reduced to: ");
3350 PrintNumber(f, dicSizeLog, 1);
3351 f.NewLine();
3352 }
3353 }
3354
3355 PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads: ", numThreads);
3356 f.NewLine();
3357
3358 f.NewLine();
3359
3360 if (totalBenchMode)
3361 {
3362 callback.NameFieldSize = kFieldSize_Name;
3363 use2Columns = false;
3364 }
3365 else
3366 {
3367 callback.NameFieldSize = kFieldSize_SmallName;
3368 use2Columns = true;
3369 }
3370 callback.Use2Columns = use2Columns;
3371
3372 bool showFreq = false;
3373 UInt64 cpuFreq = 0;
3374
3375 if (totalBenchMode)
3376 {
3377 showFreq = true;
3378 }
3379
3380 unsigned fileldSize = kFieldSize_TotalSize;
3381 if (showFreq)
3382 fileldSize += kFieldSize_EUAndEffec;
3383
3384 if (use2Columns)
3385 {
3386 PrintSpaces(f, callback.NameFieldSize);
3387 PrintRight(f, "Compressing", fileldSize);
3388 f.Print(kSep);
3389 PrintRight(f, "Decompressing", fileldSize);
3390 }
3391 f.NewLine();
3392 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
3393
3394 int j;
3395
3396 for (j = 0; j < 2; j++)
3397 {
3398 PrintRight(f, "Speed", kFieldSize_Speed + 1);
3399 PrintRight(f, "Usage", kFieldSize_Usage + 1);
3400 PrintRight(f, "R/U", kFieldSize_RU + 1);
3401 PrintRight(f, "Rating", kFieldSize_Rating + 1);
3402 if (showFreq)
3403 {
3404 PrintRight(f, "E/U", kFieldSize_EU + 1);
3405 PrintRight(f, "Effec", kFieldSize_Effec + 1);
3406 }
3407 if (!use2Columns)
3408 break;
3409 if (j == 0)
3410 f.Print(kSep);
3411 }
3412
3413 f.NewLine();
3414 PrintSpaces(f, callback.NameFieldSize);
3415
3416 for (j = 0; j < 2; j++)
3417 {
3418 PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
3419 PrintRight(f, "%", kFieldSize_Usage + 1);
3420 PrintRight(f, "MIPS", kFieldSize_RU + 1);
3421 PrintRight(f, "MIPS", kFieldSize_Rating + 1);
3422 if (showFreq)
3423 {
3424 PrintRight(f, "%", kFieldSize_EU + 1);
3425 PrintRight(f, "%", kFieldSize_Effec + 1);
3426 }
3427 if (!use2Columns)
3428 break;
3429 if (j == 0)
3430 f.Print(kSep);
3431 }
3432
3433 f.NewLine();
3434 f.NewLine();
3435
3436 if (specifiedFreq != 0)
3437 cpuFreq = specifiedFreq;
3438
3439
3440 if (totalBenchMode)
3441 {
3442 for (UInt32 i = 0; i < numIterations; i++)
3443 {
3444 if (i != 0)
3445 printCallback->NewLine();
3446 HRESULT res;
3447
3448 const unsigned kNumCpuTests = 3;
3449 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
3450 {
3451 PrintLeft(f, "CPU", kFieldSize_Name);
3452 UInt32 resVal;
3453 RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3454 (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
3455 specifiedFreq,
3456 cpuFreq, resVal));
3457 callback.NewLine();
3458
3459 if (specifiedFreq != 0)
3460 cpuFreq = specifiedFreq;
3461
3462 if (freqTest == kNumCpuTests - 1)
3463 SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
3464 }
3465 callback.NewLine();
3466
3467 callback.SetFreq(true, cpuFreq);
3468
3469 if (!onlyHashBench)
3470 {
3471 res = TotalBench(EXTERNAL_CODECS_LOC_VARS
3472 complexInCommands, numThreads,
3473 dictIsDefined || fileDataBuffer.IsAllocated(), // forceUnpackSize
3474 fileDataBuffer.IsAllocated() ? fileDataBuffer.Size() : dict,
3475 (const Byte *)fileDataBuffer,
3476 printCallback, &callback);
3477 RINOK(res);
3478 }
3479
3480 res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
3481 1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
3482 RINOK(res);
3483
3484 callback.NewLine();
3485 {
3486 PrintLeft(f, "CPU", kFieldSize_Name);
3487 UInt32 resVal;
3488 UInt64 cpuFreqLastTemp = cpuFreq;
3489 RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3490 specifiedFreq != 0, // showFreq
3491 specifiedFreq,
3492 cpuFreqLastTemp, resVal));
3493 callback.NewLine();
3494 }
3495 }
3496 }
3497 else
3498 {
3499 bool needSetComplexity = true;
3500 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
3501 {
3502 unsigned i;
3503 for (i = 0; i < ARRAY_SIZE(g_Bench); i++)
3504 {
3505 const CBenchMethod &h = g_Bench[i];
3506 AString benchMethod (h.Name);
3507 AString benchProps;
3508 int propPos = benchMethod.Find(':');
3509 if (propPos >= 0)
3510 {
3511 benchProps = benchMethod.Ptr(propPos + 1);
3512 benchMethod.DeleteFrom(propPos);
3513 }
3514
3515 if (AreSameMethodNames(benchMethod, methodName))
3516 {
3517 if (benchProps.IsEmpty()
3518 || benchProps == "x5" && method.PropsString.IsEmpty()
3519 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3520 {
3521 callback.BenchProps.EncComplex = h.EncComplex;
3522 callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
3523 callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
3524 needSetComplexity = false;
3525 break;
3526 }
3527 }
3528 }
3529 if (i == ARRAY_SIZE(g_Bench))
3530 return E_NOTIMPL;
3531 }
3532 if (needSetComplexity)
3533 callback.BenchProps.SetLzmaCompexity();
3534
3535 for (unsigned i = 0; i < numIterations; i++)
3536 {
3537 const unsigned kStartDicLog = 22;
3538 unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
3539 if (!multiDict)
3540 pow = 31;
3541 while (((UInt32)1 << pow) > dict && pow > 0)
3542 pow--;
3543 for (; ((UInt32)1 << pow) <= dict; pow++)
3544 {
3545 char s[16];
3546 ConvertUInt32ToString(pow, s);
3547 unsigned pos = MyStringLen(s);
3548 s[pos++] = ':';
3549 s[pos] = 0;
3550 PrintLeft(f, s, kFieldSize_SmallName);
3551 callback.DictSize = (UInt32)1 << pow;
3552
3553 COneMethodInfo method2 = method;
3554
3555 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
3556 {
3557 // We add dictionary size property.
3558 // method2 can have two different dictionary size properties.
3559 // And last property is main.
3560 NCOM::CPropVariant propVariant = (UInt32)pow;
3561 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant));
3562 }
3563
3564 size_t uncompressedDataSize;
3565 if (fileDataBuffer.IsAllocated())
3566 {
3567 uncompressedDataSize = fileDataBuffer.Size();
3568 }
3569 else
3570 {
3571 uncompressedDataSize = callback.DictSize;
3572 if (uncompressedDataSize >= (1 << 18))
3573 uncompressedDataSize += kAdditionalSize;
3574 }
3575
3576 HRESULT res = MethodBench(
3577 EXTERNAL_CODECS_LOC_VARS
3578 complexInCommands,
3579 true, numThreads,
3580 method2,
3581 uncompressedDataSize, (const Byte *)fileDataBuffer,
3582 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
3583 f.NewLine();
3584 RINOK(res);
3585 if (!multiDict)
3586 break;
3587 }
3588 }
3589 }
3590
3591 PrintChars(f, '-', callback.NameFieldSize + fileldSize);
3592
3593 if (use2Columns)
3594 {
3595 f.Print(kSep);
3596 PrintChars(f, '-', fileldSize);
3597 }
3598
3599 f.NewLine();
3600
3601 if (use2Columns)
3602 {
3603 PrintLeft(f, "Avr:", callback.NameFieldSize);
3604 PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
3605 f.Print(kSep);
3606 PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
3607 f.NewLine();
3608 }
3609
3610 PrintLeft(f, "Tot:", callback.NameFieldSize);
3611 CTotalBenchRes midRes;
3612 midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
3613 PrintTotals(f, showFreq, cpuFreq, midRes);
3614 f.NewLine();
3615
3616 }
3617 return S_OK;
3618 }
3619