1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 #include <stdio.h>
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18 
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22 
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <stdlib.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30 
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/CpuArch.h"
33 
34 #ifndef _7ZIP_ST
35 #include "../../../Windows/Synchronization.h"
36 #include "../../../Windows/Thread.h"
37 #endif
38 
39 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
40 #define USE_WIN_FILE
41 #endif
42 
43 #ifdef USE_WIN_FILE
44 #include "../../../Windows/FileIO.h"
45 #endif
46 
47 
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/MyBuffer2.h"
50 #include "../../../Common/StringConvert.h"
51 #include "../../../Common/StringToInt.h"
52 
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamUtils.h"
55 
56 #include "Bench.h"
57 
58 using namespace NWindows;
59 
60 static const UInt32 k_LZMA = 0x030101;
61 
62 static const UInt64 kComplexInCommands = (UInt64)1 <<
63   #ifdef UNDER_CE
64     31;
65   #else
66     34;
67   #endif
68 
69 static const UInt32 kComplexInSeconds = 4;
70 
SetComplexCommands(UInt32 complexInSeconds,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)71 static void SetComplexCommands(UInt32 complexInSeconds,
72     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
73 {
74   complexInCommands = kComplexInCommands;
75   const UInt64 kMinFreq = (UInt64)1000000 * 4;
76   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
77   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
78     cpuFreq = kMinFreq;
79   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
80   {
81     if (complexInSeconds != 0)
82       complexInCommands = complexInSeconds * cpuFreq;
83     else
84       complexInCommands = cpuFreq >> 2;
85   }
86 }
87 
88 static const unsigned kNumHashDictBits = 17;
89 static const UInt32 kFilterUnpackSize = (48 << 10);
90 
91 static const unsigned kOldLzmaDictBits = 30;
92 
93 static const UInt32 kAdditionalSize = (1 << 16);
94 static const UInt32 kCompressedAdditionalSize = (1 << 10);
95 static const UInt32 kMaxLzmaPropSize = 5;
96 
97 
98 
99 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
100   (_buffer_)->Alloc(_size_); \
101   if (!(_buffer_)->IsAllocated()) return E_OUTOFMEMORY;
102 
103 
104 class CBaseRandomGenerator
105 {
106   UInt32 A1;
107   UInt32 A2;
108   UInt32 Salt;
109 public:
CBaseRandomGenerator(UInt32 salt=0)110   CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()111   void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()112   UInt32 GetRnd()
113   {
114     return Salt ^
115     (
116       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
117       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
118     );
119   }
120 };
121 
122 
123 class CBenchRandomGenerator: public CAlignedBuffer
124 {
GetVal(UInt32 & res,unsigned numBits)125   static UInt32 GetVal(UInt32 &res, unsigned numBits)
126   {
127     UInt32 val = res & (((UInt32)1 << numBits) - 1);
128     res >>= numBits;
129     return val;
130   }
131 
GetLen(UInt32 & r)132   static UInt32 GetLen(UInt32 &r)
133   {
134     UInt32 len = GetVal(r, 2);
135     return GetVal(r, 1 + len);
136   }
137 
138 public:
139 
GenerateSimpleRandom(UInt32 salt)140   void GenerateSimpleRandom(UInt32 salt)
141   {
142     CBaseRandomGenerator rg(salt);
143     const size_t bufSize = Size();
144     Byte *buf = (Byte *)*this;
145     for (size_t i = 0; i < bufSize; i++)
146       buf[i] = (Byte)rg.GetRnd();
147   }
148 
GenerateLz(unsigned dictBits,UInt32 salt)149   void GenerateLz(unsigned dictBits, UInt32 salt)
150   {
151     CBaseRandomGenerator rg(salt);
152     UInt32 pos = 0;
153     UInt32 rep0 = 1;
154     const size_t bufSize = Size();
155     Byte *buf = (Byte *)*this;
156     unsigned posBits = 1;
157 
158     while (pos < bufSize)
159     {
160       UInt32 r = rg.GetRnd();
161       if (GetVal(r, 1) == 0 || pos < 1024)
162         buf[pos++] = (Byte)(r & 0xFF);
163       else
164       {
165         UInt32 len;
166         len = 1 + GetLen(r);
167 
168         if (GetVal(r, 3) != 0)
169         {
170           len += GetLen(r);
171 
172           while (((UInt32)1 << posBits) < pos)
173             posBits++;
174 
175           unsigned numBitsMax = dictBits;
176           if (numBitsMax > posBits)
177             numBitsMax = posBits;
178 
179           const unsigned kAddBits = 6;
180           unsigned numLogBits = 5;
181           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
182             numLogBits = 4;
183 
184           for (;;)
185           {
186             UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
187             r = rg.GetRnd();
188             if (ppp > numBitsMax)
189               continue;
190             rep0 = GetVal(r, ppp);
191             if (rep0 < pos)
192               break;
193             r = rg.GetRnd();
194           }
195           rep0++;
196         }
197 
198         {
199           UInt32 rem = (UInt32)bufSize - pos;
200           if (len > rem)
201             len = rem;
202         }
203         Byte *dest = buf + pos;
204         const Byte *src = dest - rep0;
205         pos += len;
206         for (UInt32 i = 0; i < len; i++)
207           *dest++ = *src++;
208       }
209     }
210   }
211 };
212 
213 
214 class CBenchmarkInStream:
215   public ISequentialInStream,
216   public CMyUnknownImp
217 {
218   const Byte *Data;
219   size_t Pos;
220   size_t Size;
221 public:
222   MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)223   void Init(const Byte *data, size_t size)
224   {
225     Data = data;
226     Size = size;
227     Pos = 0;
228   }
229   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
230 };
231 
Read(void * data,UInt32 size,UInt32 * processedSize)232 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
233 {
234   size_t remain = Size - Pos;
235   UInt32 kMaxBlockSize = (1 << 20);
236   if (size > kMaxBlockSize)
237     size = kMaxBlockSize;
238   if (size > remain)
239     size = (UInt32)remain;
240   for (UInt32 i = 0; i < size; i++)
241     ((Byte *)data)[i] = Data[Pos + i];
242   Pos += size;
243   if (processedSize)
244     *processedSize = size;
245   return S_OK;
246 }
247 
248 class CBenchmarkOutStream:
249   public ISequentialOutStream,
250   public CAlignedBuffer,
251   public CMyUnknownImp
252 {
253   // bool _overflow;
254 public:
255   size_t Pos;
256   bool RealCopy;
257   bool CalcCrc;
258   UInt32 Crc;
259 
260   // CBenchmarkOutStream(): _overflow(false) {}
Init(bool realCopy,bool calcCrc)261   void Init(bool realCopy, bool calcCrc)
262   {
263     Crc = CRC_INIT_VAL;
264     RealCopy = realCopy;
265     CalcCrc = calcCrc;
266     // _overflow = false;
267     Pos = 0;
268   }
269 
270   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
271 
272   MY_UNKNOWN_IMP
273   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
274 };
275 
Write(const void * data,UInt32 size,UInt32 * processedSize)276 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
277 {
278   size_t curSize = Size() - Pos;
279   if (curSize > size)
280     curSize = size;
281   if (curSize != 0)
282   {
283     if (RealCopy)
284       memcpy(((Byte *)*this) + Pos, data, curSize);
285     if (CalcCrc)
286       Crc = CrcUpdate(Crc, data, curSize);
287     Pos += curSize;
288   }
289   if (processedSize)
290     *processedSize = (UInt32)curSize;
291   if (curSize != size)
292   {
293     // _overflow = true;
294     return E_FAIL;
295   }
296   return S_OK;
297 }
298 
299 class CCrcOutStream:
300   public ISequentialOutStream,
301   public CMyUnknownImp
302 {
303 public:
304   bool CalcCrc;
305   UInt32 Crc;
306   MY_UNKNOWN_IMP
307 
CCrcOutStream()308   CCrcOutStream(): CalcCrc(true) {};
Init()309   void Init() { Crc = CRC_INIT_VAL; }
310   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
311 };
312 
Write(const void * data,UInt32 size,UInt32 * processedSize)313 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
314 {
315   if (CalcCrc)
316     Crc = CrcUpdate(Crc, data, size);
317   if (processedSize)
318     *processedSize = size;
319   return S_OK;
320 }
321 
GetTimeCount()322 static UInt64 GetTimeCount()
323 {
324   #ifdef USE_POSIX_TIME
325   #ifdef USE_POSIX_TIME2
326   timeval v;
327   if (gettimeofday(&v, 0) == 0)
328     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
329   return (UInt64)time(NULL) * 1000000;
330   #else
331   return time(NULL);
332   #endif
333   #else
334   /*
335   LARGE_INTEGER value;
336   if (::QueryPerformanceCounter(&value))
337     return value.QuadPart;
338   */
339   return GetTickCount();
340   #endif
341 }
342 
GetFreq()343 static UInt64 GetFreq()
344 {
345   #ifdef USE_POSIX_TIME
346   #ifdef USE_POSIX_TIME2
347   return 1000000;
348   #else
349   return 1;
350   #endif
351   #else
352   /*
353   LARGE_INTEGER value;
354   if (::QueryPerformanceFrequency(&value))
355     return value.QuadPart;
356   */
357   return 1000;
358   #endif
359 }
360 
361 #ifdef USE_POSIX_TIME
362 
363 struct CUserTime
364 {
365   UInt64 Sum;
366   clock_t Prev;
367 
InitCUserTime368   void Init()
369   {
370     Prev = clock();
371     Sum = 0;
372   }
373 
GetUserTimeCUserTime374   UInt64 GetUserTime()
375   {
376     clock_t v = clock();
377     Sum += v - Prev;
378     Prev = v;
379     return Sum;
380   }
381 };
382 
383 #else
384 
GetTime64(const FILETIME & t)385 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
GetWinUserTime()386 UInt64 GetWinUserTime()
387 {
388   FILETIME creationTime, exitTime, kernelTime, userTime;
389   if (
390   #ifdef UNDER_CE
391     ::GetThreadTimes(::GetCurrentThread()
392   #else
393     ::GetProcessTimes(::GetCurrentProcess()
394   #endif
395     , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
396     return GetTime64(userTime) + GetTime64(kernelTime);
397   return (UInt64)GetTickCount() * 10000;
398 }
399 
400 struct CUserTime
401 {
402   UInt64 StartTime;
403 
InitCUserTime404   void Init() { StartTime = GetWinUserTime(); }
GetUserTimeCUserTime405   UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
406 };
407 
408 #endif
409 
GetUserFreq()410 static UInt64 GetUserFreq()
411 {
412   #ifdef USE_POSIX_TIME
413   return CLOCKS_PER_SEC;
414   #else
415   return 10000000;
416   #endif
417 }
418 
419 class CBenchProgressStatus
420 {
421   #ifndef _7ZIP_ST
422   NSynchronization::CCriticalSection CS;
423   #endif
424 public:
425   HRESULT Res;
426   bool EncodeMode;
SetResult(HRESULT res)427   void SetResult(HRESULT res)
428   {
429     #ifndef _7ZIP_ST
430     NSynchronization::CCriticalSectionLock lock(CS);
431     #endif
432     Res = res;
433   }
GetResult()434   HRESULT GetResult()
435   {
436     #ifndef _7ZIP_ST
437     NSynchronization::CCriticalSectionLock lock(CS);
438     #endif
439     return Res;
440   }
441 };
442 
443 struct CBenchInfoCalc
444 {
445   CBenchInfo BenchInfo;
446   CUserTime UserTime;
447 
448   void SetStartTime();
449   void SetFinishTime(CBenchInfo &dest);
450 };
451 
SetStartTime()452 void CBenchInfoCalc::SetStartTime()
453 {
454   BenchInfo.GlobalFreq = GetFreq();
455   BenchInfo.UserFreq = GetUserFreq();
456   BenchInfo.GlobalTime = ::GetTimeCount();
457   BenchInfo.UserTime = 0;
458   UserTime.Init();
459 }
460 
SetFinishTime(CBenchInfo & dest)461 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
462 {
463   dest = BenchInfo;
464   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
465   dest.UserTime = UserTime.GetUserTime();
466 }
467 
468 class CBenchProgressInfo:
469   public ICompressProgressInfo,
470   public CMyUnknownImp,
471   public CBenchInfoCalc
472 {
473 public:
474   CBenchProgressStatus *Status;
475   IBenchCallback *Callback;
476 
CBenchProgressInfo()477   CBenchProgressInfo(): Callback(NULL) {}
478   MY_UNKNOWN_IMP
479   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
480 };
481 
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)482 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
483 {
484   HRESULT res = Status->GetResult();
485   if (res != S_OK)
486     return res;
487   if (!Callback)
488     return res;
489   CBenchInfo info;
490   SetFinishTime(info);
491   if (Status->EncodeMode)
492   {
493     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
494     info.PackSize = BenchInfo.PackSize + *outSize;
495     res = Callback->SetEncodeResult(info, false);
496   }
497   else
498   {
499     info.PackSize = BenchInfo.PackSize + *inSize;
500     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
501     res = Callback->SetDecodeResult(info, false);
502   }
503   if (res != S_OK)
504     Status->SetResult(res);
505   return res;
506 }
507 
508 static const unsigned kSubBits = 8;
509 
GetLogSize(UInt32 size)510 static UInt32 GetLogSize(UInt32 size)
511 {
512   for (unsigned i = kSubBits; i < 32; i++)
513     for (UInt32 j = 0; j < (1 << kSubBits); j++)
514       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
515         return (i << kSubBits) + j;
516   return (32 << kSubBits);
517 }
518 
NormalizeVals(UInt64 & v1,UInt64 & v2)519 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
520 {
521   while (v1 > 1000000)
522   {
523     v1 >>= 1;
524     v2 >>= 1;
525   }
526 }
527 
GetUsage() const528 UInt64 CBenchInfo::GetUsage() const
529 {
530   UInt64 userTime = UserTime;
531   UInt64 userFreq = UserFreq;
532   UInt64 globalTime = GlobalTime;
533   UInt64 globalFreq = GlobalFreq;
534   NormalizeVals(userTime, userFreq);
535   NormalizeVals(globalFreq, globalTime);
536   if (userFreq == 0)
537     userFreq = 1;
538   if (globalTime == 0)
539     globalTime = 1;
540   return userTime * globalFreq * 1000000 / userFreq / globalTime;
541 }
542 
GetRatingPerUsage(UInt64 rating) const543 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
544 {
545   UInt64 userTime = UserTime;
546   UInt64 userFreq = UserFreq;
547   UInt64 globalTime = GlobalTime;
548   UInt64 globalFreq = GlobalFreq;
549   NormalizeVals(userFreq, userTime);
550   NormalizeVals(globalTime, globalFreq);
551   if (globalFreq == 0)
552     globalFreq = 1;
553   if (userTime == 0)
554     userTime = 1;
555   return userFreq * globalTime / globalFreq * rating / userTime;
556 }
557 
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)558 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
559 {
560   UInt64 elTime = elapsedTime;
561   NormalizeVals(freq, elTime);
562   if (elTime == 0)
563     elTime = 1;
564   return value * freq / elTime;
565 }
566 
GetSpeed(UInt64 numCommands) const567 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
568 {
569   return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
570 }
571 
572 struct CBenchProps
573 {
574   bool LzmaRatingMode;
575 
576   UInt32 EncComplex;
577   UInt32 DecComplexCompr;
578   UInt32 DecComplexUnc;
579 
CBenchPropsCBenchProps580   CBenchProps(): LzmaRatingMode(false) {}
581   void SetLzmaCompexity();
582 
GeComprCommandsCBenchProps583   UInt64 GeComprCommands(UInt64 unpackSize)
584   {
585     return unpackSize * EncComplex;
586   }
587 
GeDecomprCommandsCBenchProps588   UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
589   {
590     return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
591   }
592 
593   UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
594   UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
595 };
596 
SetLzmaCompexity()597 void CBenchProps::SetLzmaCompexity()
598 {
599   EncComplex = 1200;
600   DecComplexUnc = 4;
601   DecComplexCompr = 190;
602   LzmaRatingMode = true;
603 }
604 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)605 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
606 {
607   if (dictSize < (1 << kBenchMinDicLogSize))
608     dictSize = (1 << kBenchMinDicLogSize);
609   UInt64 encComplex = EncComplex;
610   if (LzmaRatingMode)
611   {
612     UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
613     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
614   }
615   UInt64 numCommands = (UInt64)size * encComplex;
616   return MyMultDiv64(numCommands, elapsedTime, freq);
617 }
618 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)619 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
620 {
621   UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
622   return MyMultDiv64(numCommands, elapsedTime, freq);
623 }
624 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)625 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
626 {
627   CBenchProps props;
628   props.SetLzmaCompexity();
629   return props.GetCompressRating(dictSize, elapsedTime, freq, size);
630 }
631 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)632 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
633 {
634   CBenchProps props;
635   props.SetLzmaCompexity();
636   return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
637 }
638 
639 
640 
641 
642 #ifndef _7ZIP_ST
643 struct CBenchSyncCommon
644 {
645   bool ExitMode;
646   NSynchronization::CManualResetEvent StartEvent;
647 
CBenchSyncCommonCBenchSyncCommon648   CBenchSyncCommon(): ExitMode(false) {}
649 };
650 #endif
651 
652 
653 struct CEncoderInfo;
654 
655 struct CEncoderInfo
656 {
657   #ifndef _7ZIP_ST
658   NWindows::CThread thread[2];
659   NSynchronization::CManualResetEvent ReadyEvent;
660   UInt32 NumDecoderSubThreads;
661   CBenchSyncCommon *Common;
662   #endif
663 
664   CMyComPtr<ICompressCoder> _encoder;
665   CMyComPtr<ICompressFilter> _encoderFilter;
666   CBenchProgressInfo *progressInfoSpec[2];
667   CMyComPtr<ICompressProgressInfo> progressInfo[2];
668   UInt64 NumIterations;
669 
670   UInt32 Salt;
671 
672   #ifdef USE_ALLOCA
673   size_t AllocaSize;
674   #endif
675 
676   Byte _key[32];
677   Byte _iv[16];
678   Byte _psw[16];
679   bool CheckCrc_Enc;
680   bool CheckCrc_Dec;
681 
682   struct CDecoderInfo
683   {
684     CEncoderInfo *Encoder;
685     UInt32 DecoderIndex;
686     bool CallbackMode;
687 
688     #ifdef USE_ALLOCA
689     size_t AllocaSize;
690     #endif
691   };
692   CDecoderInfo decodersInfo[2];
693 
694   CMyComPtr<ICompressCoder> _decoders[2];
695   CMyComPtr<ICompressFilter> _decoderFilter;
696 
697   HRESULT Results[2];
698   CBenchmarkOutStream *outStreamSpec;
699   CMyComPtr<ISequentialOutStream> outStream;
700   IBenchCallback *callback;
701   IBenchPrintCallback *printCallback;
702   UInt32 crc;
703   size_t kBufferSize;
704   size_t compressedSize;
705   const Byte *uncompressedDataPtr;
706 
707   const Byte *fileData;
708   CBenchRandomGenerator rg;
709 
710   CAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
711   CBenchmarkOutStream *propStreamSpec;
712   CMyComPtr<ISequentialOutStream> propStream;
713 
714   unsigned generateDictBits;
715   COneMethodInfo _method;
716 
717   // for decode
718   size_t _uncompressedDataSize;
719 
720   HRESULT Generate();
721   HRESULT Encode();
722   HRESULT Decode(UInt32 decoderIndex);
723 
CEncoderInfoCEncoderInfo724   CEncoderInfo():
725     #ifndef _7ZIP_ST
726     Common(NULL),
727     #endif
728     Salt(0),
729     fileData(NULL),
730     CheckCrc_Enc(true),
731     CheckCrc_Dec(true),
732     outStreamSpec(NULL), callback(NULL), printCallback(NULL), propStreamSpec(NULL) {}
733 
734   #ifndef _7ZIP_ST
735 
EncodeThreadFunctionCEncoderInfo736   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
737   {
738     HRESULT res;
739     CEncoderInfo *encoder = (CEncoderInfo *)param;
740     try
741     {
742       #ifdef USE_ALLOCA
743       alloca(encoder->AllocaSize);
744       #endif
745 
746       res = encoder->Encode();
747     }
748     catch(...)
749     {
750       res = E_FAIL;
751     }
752     encoder->Results[0] = res;
753     if (res != S_OK)
754       encoder->progressInfoSpec[0]->Status->SetResult(res);
755     encoder->ReadyEvent.Set();
756     return 0;
757   }
758 
DecodeThreadFunctionCEncoderInfo759   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
760   {
761     CDecoderInfo *decoder = (CDecoderInfo *)param;
762 
763     #ifdef USE_ALLOCA
764     alloca(decoder->AllocaSize);
765     #endif
766 
767     CEncoderInfo *encoder = decoder->Encoder;
768     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
769     return 0;
770   }
771 
CreateEncoderThreadCEncoderInfo772   HRESULT CreateEncoderThread()
773   {
774     WRes res = 0;
775     if (!ReadyEvent.IsCreated())
776       res = ReadyEvent.Create();
777     if (res == 0)
778       res = thread[0].Create(EncodeThreadFunction, this);
779     return HRESULT_FROM_WIN32(res);
780   }
781 
CreateDecoderThreadCEncoderInfo782   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
783       #ifdef USE_ALLOCA
784       , size_t allocaSize
785       #endif
786       )
787   {
788     CDecoderInfo &decoder = decodersInfo[index];
789     decoder.DecoderIndex = index;
790     decoder.Encoder = this;
791 
792     #ifdef USE_ALLOCA
793     decoder.AllocaSize = allocaSize;
794     #endif
795 
796     decoder.CallbackMode = callbackMode;
797     return thread[index].Create(DecodeThreadFunction, &decoder);
798   }
799 
800   #endif
801 };
802 
803 
Generate()804 HRESULT CEncoderInfo::Generate()
805 {
806   const COneMethodInfo &method = _method;
807 
808   // we need extra space, if input data is already compressed
809   const size_t kCompressedBufferSize =
810       kCompressedAdditionalSize +
811       kBufferSize + kBufferSize / 16;
812       // kBufferSize / 2;
813 
814   if (kCompressedBufferSize < kBufferSize)
815     return E_FAIL;
816 
817   uncompressedDataPtr = fileData;
818 
819   if (!fileData)
820   {
821     ALLOC_WITH_HRESULT(&rg, kBufferSize);
822 
823     // DWORD ttt = GetTickCount();
824     if (generateDictBits == 0)
825       rg.GenerateSimpleRandom(Salt);
826     else
827       rg.GenerateLz(generateDictBits, Salt);
828     // printf("\n%d\n            ", GetTickCount() - ttt);
829 
830     crc = CrcCalc((const Byte *)rg, rg.Size());
831     uncompressedDataPtr = (const Byte *)rg;
832   }
833 
834   if (_encoderFilter)
835   {
836     ALLOC_WITH_HRESULT(&rgCopy, kBufferSize);
837   }
838 
839 
840   if (!outStream)
841   {
842     outStreamSpec = new CBenchmarkOutStream;
843     outStream = outStreamSpec;
844   }
845 
846   ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
847 
848   if (!propStream)
849   {
850     propStreamSpec = new CBenchmarkOutStream;
851     propStream = propStreamSpec;
852   }
853   ALLOC_WITH_HRESULT(propStreamSpec, kMaxLzmaPropSize);
854   propStreamSpec->Init(true, false);
855 
856 
857   CMyComPtr<IUnknown> coder;
858   if (_encoderFilter)
859     coder = _encoderFilter;
860   else
861     coder = _encoder;
862   {
863     CMyComPtr<ICompressSetCoderProperties> scp;
864     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
865     if (scp)
866     {
867       UInt64 reduceSize = kBufferSize;
868       RINOK(method.SetCoderProps(scp, &reduceSize));
869     }
870     else
871     {
872       if (method.AreThereNonOptionalProps())
873         return E_INVALIDARG;
874     }
875 
876     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
877     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
878     if (writeCoderProps)
879     {
880       RINOK(writeCoderProps->WriteCoderProperties(propStream));
881     }
882 
883     {
884       CMyComPtr<ICryptoSetPassword> sp;
885       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
886       if (sp)
887       {
888         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
889 
890         // we must call encoding one time to calculate password key for key cache.
891         // it must be after WriteCoderProperties!
892         Byte temp[16];
893         memset(temp, 0, sizeof(temp));
894 
895         if (_encoderFilter)
896         {
897           _encoderFilter->Init();
898           _encoderFilter->Filter(temp, sizeof(temp));
899         }
900         else
901         {
902           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
903           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
904           inStreamSpec->Init(temp, sizeof(temp));
905 
906           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
907           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
908           crcStreamSpec->Init();
909 
910           RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
911         }
912       }
913     }
914   }
915 
916   return S_OK;
917 }
918 
919 
My_FilterBench(ICompressFilter * filter,Byte * data,size_t size)920 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
921 {
922   while (size != 0)
923   {
924     UInt32 cur = (UInt32)1 << 31;
925     if (cur > size)
926       cur = (UInt32)size;
927     UInt32 processed = filter->Filter(data, cur);
928     data += processed;
929     // if (processed > size) (in AES filter), we must fill last block with zeros.
930     // but it is not important for benchmark. So we just copy that data without filtering.
931     if (processed > size || processed == 0)
932       break;
933     size -= processed;
934   }
935 }
936 
937 
Encode()938 HRESULT CEncoderInfo::Encode()
939 {
940   RINOK(Generate());
941 
942   #ifndef _7ZIP_ST
943   if (Common)
944   {
945     Results[0] = S_OK;
946     WRes wres = ReadyEvent.Set();
947     if (wres == 0)
948       wres = Common->StartEvent.Lock();
949     if (wres != 0)
950       return HRESULT_FROM_WIN32(wres);
951     if (Common->ExitMode)
952       return S_OK;
953   }
954   else
955   #endif
956   {
957     CBenchProgressInfo *bpi = progressInfoSpec[0];
958     bpi->SetStartTime();
959   }
960 
961 
962   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
963   bi.UnpackSize = 0;
964   bi.PackSize = 0;
965   CMyComPtr<ICryptoProperties> cp;
966   CMyComPtr<IUnknown> coder;
967   if (_encoderFilter)
968     coder = _encoderFilter;
969   else
970     coder = _encoder;
971   coder.QueryInterface(IID_ICryptoProperties, &cp);
972   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
973   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
974   UInt64 prev = 0;
975 
976   UInt32 crcPrev = 0;
977 
978   if (cp)
979   {
980     RINOK(cp->SetKey(_key, sizeof(_key)));
981     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
982   }
983 
984   for (UInt64 i = 0; i < NumIterations; i++)
985   {
986     if (printCallback && bi.UnpackSize - prev > (1 << 20))
987     {
988       RINOK(printCallback->CheckBreak());
989       prev = bi.UnpackSize;
990     }
991 
992     bool isLast = (i == NumIterations - 1);
993     bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
994     outStreamSpec->Init(isLast, calcCrc);
995 
996     if (_encoderFilter)
997     {
998       memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
999       _encoderFilter->Init();
1000       My_FilterBench(_encoderFilter, (Byte *)rgCopy, kBufferSize);
1001       RINOK(WriteStream(outStream, (const Byte *)rgCopy, kBufferSize));
1002     }
1003     else
1004     {
1005       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1006       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
1007     }
1008 
1009     // outStreamSpec->Print();
1010 
1011     UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
1012     if (i == 0)
1013       crcPrev = crcNew;
1014     else if (calcCrc && crcPrev != crcNew)
1015       return E_FAIL;
1016 
1017     compressedSize = outStreamSpec->Pos;
1018     bi.UnpackSize += kBufferSize;
1019     bi.PackSize += compressedSize;
1020   }
1021 
1022   _encoder.Release();
1023   _encoderFilter.Release();
1024   return S_OK;
1025 }
1026 
1027 
Decode(UInt32 decoderIndex)1028 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1029 {
1030   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1031   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1032   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1033   CMyComPtr<IUnknown> coder;
1034   if (_decoderFilter)
1035   {
1036     if (decoderIndex != 0)
1037       return E_FAIL;
1038     coder = _decoderFilter;
1039   }
1040   else
1041     coder = decoder;
1042 
1043   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1044   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1045   if (!setDecProps && propStreamSpec->Pos != 0)
1046     return E_FAIL;
1047 
1048   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1049   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1050 
1051   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1052   pi->BenchInfo.UnpackSize = 0;
1053   pi->BenchInfo.PackSize = 0;
1054 
1055   #ifndef _7ZIP_ST
1056   {
1057     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1058     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1059     if (setCoderMt)
1060     {
1061       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
1062     }
1063   }
1064   #endif
1065 
1066   CMyComPtr<ICompressSetCoderProperties> scp;
1067   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1068   if (scp)
1069   {
1070     UInt64 reduceSize = _uncompressedDataSize;
1071     RINOK(_method.SetCoderProps(scp, &reduceSize));
1072   }
1073 
1074   CMyComPtr<ICryptoProperties> cp;
1075   coder.QueryInterface(IID_ICryptoProperties, &cp);
1076 
1077   if (setDecProps)
1078   {
1079     RINOK(setDecProps->SetDecoderProperties2((const Byte *)*propStreamSpec, (UInt32)propStreamSpec->Pos));
1080   }
1081 
1082   {
1083     CMyComPtr<ICryptoSetPassword> sp;
1084     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1085     if (sp)
1086     {
1087       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
1088     }
1089   }
1090 
1091   UInt64 prev = 0;
1092 
1093   if (cp)
1094   {
1095     RINOK(cp->SetKey(_key, sizeof(_key)));
1096     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
1097   }
1098 
1099   for (UInt64 i = 0; i < NumIterations; i++)
1100   {
1101     if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
1102     {
1103       RINOK(printCallback->CheckBreak());
1104       prev = pi->BenchInfo.UnpackSize;
1105     }
1106 
1107     inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1108     crcOutStreamSpec->Init();
1109 
1110     UInt64 outSize = kBufferSize;
1111     crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
1112 
1113     if (_decoderFilter)
1114     {
1115       if (compressedSize > rgCopy.Size())
1116         return E_FAIL;
1117       memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1118       _decoderFilter->Init();
1119       My_FilterBench(_decoderFilter, (Byte *)rgCopy, compressedSize);
1120       RINOK(WriteStream(crcOutStream, (const Byte *)rgCopy, compressedSize));
1121     }
1122     else
1123     {
1124       RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
1125     }
1126 
1127     if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1128       return S_FALSE;
1129     pi->BenchInfo.UnpackSize += kBufferSize;
1130     pi->BenchInfo.PackSize += compressedSize;
1131   }
1132 
1133   decoder.Release();
1134   _decoderFilter.Release();
1135   return S_OK;
1136 }
1137 
1138 
1139 static const UInt32 kNumThreadsMax = (1 << 12);
1140 
1141 struct CBenchEncoders
1142 {
1143   CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders1144   CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders1145   ~CBenchEncoders() { delete []encoders; }
1146 };
1147 
1148 
GetNumIterations(UInt64 numCommands,UInt64 complexInCommands)1149 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1150 {
1151   if (numCommands < (1 << 4))
1152     numCommands = (1 << 4);
1153   UInt64 res = complexInCommands / numCommands;
1154   return (res == 0 ? 1 : res);
1155 }
1156 
1157 
1158 
1159 #ifndef _7ZIP_ST
1160 
1161 // ---------- CBenchThreadsFlusher ----------
1162 
1163 struct CBenchThreadsFlusher
1164 {
1165   CBenchEncoders *EncodersSpec;
1166   CBenchSyncCommon Common;
1167   unsigned NumThreads;
1168   bool NeedClose;
1169 
CBenchThreadsFlusherCBenchThreadsFlusher1170   CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1171 
~CBenchThreadsFlusherCBenchThreadsFlusher1172   ~CBenchThreadsFlusher()
1173   {
1174     StartAndWait(true);
1175   }
1176 
1177   WRes StartAndWait(bool exitMode = false);
1178 };
1179 
1180 
StartAndWait(bool exitMode)1181 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1182 {
1183   if (!NeedClose)
1184     return 0;
1185 
1186   Common.ExitMode = exitMode;
1187   WRes res = Common.StartEvent.Set();
1188 
1189   for (unsigned i = 0; i < NumThreads; i++)
1190   {
1191     NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1192     if (t.IsCreated())
1193     {
1194       WRes res2 = t.Wait();
1195       if (res2 == 0)
1196         res2 = t.Close();
1197       if (res == S_OK)
1198         res = res2;
1199     }
1200   }
1201   NeedClose = false;
1202   return res;
1203 }
1204 
1205 #endif
1206 
1207 
1208 
MethodBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,bool oldLzmaBenchMode,UInt32 numThreads,const COneMethodInfo & method2,size_t uncompressedDataSize,const Byte * fileData,unsigned generateDictBits,IBenchPrintCallback * printCallback,IBenchCallback * callback,CBenchProps * benchProps)1209 static HRESULT MethodBench(
1210     DECL_EXTERNAL_CODECS_LOC_VARS
1211     UInt64 complexInCommands,
1212     bool
1213       #ifndef _7ZIP_ST
1214         oldLzmaBenchMode
1215       #endif
1216     ,
1217     UInt32
1218       #ifndef _7ZIP_ST
1219         numThreads
1220       #endif
1221     ,
1222     const COneMethodInfo &method2,
1223     size_t uncompressedDataSize,
1224     const Byte *fileData,
1225     unsigned generateDictBits,
1226 
1227     IBenchPrintCallback *printCallback,
1228     IBenchCallback *callback,
1229     CBenchProps *benchProps)
1230 {
1231   COneMethodInfo method = method2;
1232   UInt64 methodId;
1233   UInt32 numStreams;
1234   int codecIndex = FindMethod_Index(
1235       EXTERNAL_CODECS_LOC_VARS
1236       method.MethodName, true,
1237       methodId, numStreams);
1238   if (codecIndex < 0)
1239     return E_NOTIMPL;
1240   if (numStreams != 1)
1241     return E_INVALIDARG;
1242 
1243   UInt32 numEncoderThreads = 1;
1244   UInt32 numSubDecoderThreads = 1;
1245 
1246   #ifndef _7ZIP_ST
1247     numEncoderThreads = numThreads;
1248 
1249     if (oldLzmaBenchMode && methodId == k_LZMA)
1250     {
1251       if (numThreads == 1 && method.Get_NumThreads() < 0)
1252         method.AddProp_NumThreads(1);
1253       const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1254       if (numThreads > 1 && numLzmaThreads > 1)
1255       {
1256         numEncoderThreads = numThreads / 2;
1257         numSubDecoderThreads = 2;
1258       }
1259     }
1260 
1261   bool mtEncMode = (numEncoderThreads > 1);
1262   #endif
1263 
1264   CBenchEncoders encodersSpec(numEncoderThreads);
1265   CEncoderInfo *encoders = encodersSpec.encoders;
1266 
1267   UInt32 i;
1268 
1269   for (i = 0; i < numEncoderThreads; i++)
1270   {
1271     CEncoderInfo &encoder = encoders[i];
1272     encoder.callback = (i == 0) ? callback : 0;
1273     encoder.printCallback = printCallback;
1274 
1275     {
1276       CCreatedCoder cod;
1277       RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS codecIndex, true, encoder._encoderFilter, cod));
1278       encoder._encoder = cod.Coder;
1279       if (!encoder._encoder && !encoder._encoderFilter)
1280         return E_NOTIMPL;
1281     }
1282 
1283     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1284     encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1285 
1286     memset(encoder._iv, 0, sizeof(encoder._iv));
1287     memset(encoder._key, 0, sizeof(encoder._key));
1288     memset(encoder._psw, 0, sizeof(encoder._psw));
1289 
1290     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1291     {
1292       CCreatedCoder cod;
1293       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1294       RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
1295       decoder = cod.Coder;
1296       if (!encoder._decoderFilter && !decoder)
1297         return E_NOTIMPL;
1298     }
1299   }
1300 
1301   UInt32 crc = 0;
1302   if (fileData)
1303     crc = CrcCalc(fileData, uncompressedDataSize);
1304 
1305   for (i = 0; i < numEncoderThreads; i++)
1306   {
1307     CEncoderInfo &encoder = encoders[i];
1308     encoder._method = method;
1309     encoder.generateDictBits = generateDictBits;
1310     encoder._uncompressedDataSize = uncompressedDataSize;
1311     encoder.kBufferSize = uncompressedDataSize;
1312     encoder.fileData = fileData;
1313     encoder.crc = crc;
1314   }
1315 
1316   CBenchProgressStatus status;
1317   status.Res = S_OK;
1318   status.EncodeMode = true;
1319 
1320   #ifndef _7ZIP_ST
1321   CBenchThreadsFlusher encoderFlusher;
1322   if (mtEncMode)
1323   {
1324     WRes wres = encoderFlusher.Common.StartEvent.Create();
1325     if (wres != 0)
1326       return HRESULT_FROM_WIN32(wres);
1327     encoderFlusher.NumThreads = numEncoderThreads;
1328     encoderFlusher.EncodersSpec = &encodersSpec;
1329     encoderFlusher.NeedClose = true;
1330   }
1331   #endif
1332 
1333   for (i = 0; i < numEncoderThreads; i++)
1334   {
1335     CEncoderInfo &encoder = encoders[i];
1336     encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1337     encoder.Salt = g_CrcTable[i & 0xFF];
1338     encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
1339     // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1340     // printf(" %8x", encoder.Salt);
1341 
1342     for (int j = 0; j < 2; j++)
1343     {
1344       CBenchProgressInfo *spec = new CBenchProgressInfo;
1345       encoder.progressInfoSpec[j] = spec;
1346       encoder.progressInfo[j] = spec;
1347       spec->Status = &status;
1348     }
1349 
1350     if (i == 0)
1351     {
1352       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1353       bpi->Callback = callback;
1354       bpi->BenchInfo.NumIterations = numEncoderThreads;
1355     }
1356 
1357     #ifndef _7ZIP_ST
1358     if (mtEncMode)
1359     {
1360       #ifdef USE_ALLOCA
1361       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1362       #endif
1363 
1364       encoder.Common = &encoderFlusher.Common;
1365       RINOK(encoder.CreateEncoderThread())
1366     }
1367     #endif
1368   }
1369 
1370   if (printCallback)
1371   {
1372     RINOK(printCallback->CheckBreak());
1373   }
1374 
1375   #ifndef _7ZIP_ST
1376   if (mtEncMode)
1377   {
1378     for (i = 0; i < numEncoderThreads; i++)
1379     {
1380       CEncoderInfo &encoder = encoders[i];
1381       WRes wres = encoder.ReadyEvent.Lock();
1382       if (wres != 0)
1383         return HRESULT_FROM_WIN32(wres);
1384       RINOK(encoder.Results[0]);
1385     }
1386 
1387     CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
1388     bpi->SetStartTime();
1389 
1390     WRes wres = encoderFlusher.StartAndWait();
1391     if (status.Res == 0 && wres != 0)
1392       return HRESULT_FROM_WIN32(wres);
1393   }
1394   else
1395   #endif
1396   {
1397     RINOK(encoders[0].Encode());
1398   }
1399 
1400   RINOK(status.Res);
1401 
1402   CBenchInfo info;
1403 
1404   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1405   info.UnpackSize = 0;
1406   info.PackSize = 0;
1407   info.NumIterations = encoders[0].NumIterations;
1408 
1409   for (i = 0; i < numEncoderThreads; i++)
1410   {
1411     CEncoderInfo &encoder = encoders[i];
1412     info.UnpackSize += encoder.kBufferSize;
1413     info.PackSize += encoder.compressedSize;
1414     // printf("\n%7d\n", encoder.compressedSize);
1415   }
1416 
1417   RINOK(callback->SetEncodeResult(info, true));
1418 
1419 
1420 
1421 
1422   // ---------- Decode ----------
1423 
1424   status.Res = S_OK;
1425   status.EncodeMode = false;
1426 
1427   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1428 
1429   for (i = 0; i < numEncoderThreads; i++)
1430   {
1431     CEncoderInfo &encoder = encoders[i];
1432 
1433     if (i == 0)
1434     {
1435       encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1436       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1437       bpi->Callback = callback;
1438       bpi->BenchInfo.NumIterations = numDecoderThreads;
1439       bpi->SetStartTime();
1440     }
1441     else
1442       encoder.NumIterations = encoders[0].NumIterations;
1443 
1444     #ifndef _7ZIP_ST
1445     {
1446       int numSubThreads = method.Get_NumThreads();
1447       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1448     }
1449     if (numDecoderThreads > 1)
1450     {
1451       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1452       {
1453         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1454             #ifdef USE_ALLOCA
1455             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1456             #endif
1457             );
1458         RINOK(res);
1459       }
1460     }
1461     else
1462     #endif
1463     {
1464       RINOK(encoder.Decode(0));
1465     }
1466   }
1467 
1468   #ifndef _7ZIP_ST
1469   HRESULT res = S_OK;
1470   if (numDecoderThreads > 1)
1471     for (i = 0; i < numEncoderThreads; i++)
1472       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1473       {
1474         CEncoderInfo &encoder = encoders[i];
1475         encoder.thread[j].Wait();
1476         if (encoder.Results[j] != S_OK)
1477           res = encoder.Results[j];
1478       }
1479   RINOK(res);
1480   #endif
1481 
1482   RINOK(status.Res);
1483   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1484 
1485   #ifndef _7ZIP_ST
1486   #ifdef UNDER_CE
1487   if (numDecoderThreads > 1)
1488     for (i = 0; i < numEncoderThreads; i++)
1489       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1490       {
1491         FILETIME creationTime, exitTime, kernelTime, userTime;
1492         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1493           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1494       }
1495   #endif
1496   #endif
1497 
1498   info.UnpackSize = 0;
1499   info.PackSize = 0;
1500   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1501 
1502   for (i = 0; i < numEncoderThreads; i++)
1503   {
1504     CEncoderInfo &encoder = encoders[i];
1505     info.UnpackSize += encoder.kBufferSize;
1506     info.PackSize += encoder.compressedSize;
1507   }
1508 
1509   RINOK(callback->SetDecodeResult(info, false));
1510   RINOK(callback->SetDecodeResult(info, true));
1511 
1512   return S_OK;
1513 }
1514 
1515 
GetLZMAUsage(bool multiThread,UInt32 dictionary)1516 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1517 {
1518   UInt32 hs = dictionary - 1;
1519   hs |= (hs >> 1);
1520   hs |= (hs >> 2);
1521   hs |= (hs >> 4);
1522   hs |= (hs >> 8);
1523   hs >>= 1;
1524   hs |= 0xFFFF;
1525   if (hs > (1 << 24))
1526     hs >>= 1;
1527   hs++;
1528   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1529       (1 << 20) + (multiThread ? (6 << 20) : 0);
1530 }
1531 
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary,bool totalBench)1532 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
1533 {
1534   const UInt32 kBufferSize = dictionary;
1535   const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
1536   bool lzmaMt = (totalBench || numThreads > 1);
1537   UInt32 numBigThreads = numThreads;
1538   if (!totalBench && lzmaMt)
1539     numBigThreads /= 2;
1540   return ((UInt64)kBufferSize + kCompressedBufferSize +
1541     GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
1542 }
1543 
CrcBig(const void * data,UInt32 size,UInt64 numIterations,const UInt32 * checkSum,IHasher * hf,IBenchPrintCallback * callback)1544 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1545     const UInt32 *checkSum, IHasher *hf,
1546     IBenchPrintCallback *callback)
1547 {
1548   Byte hash[64];
1549   UInt64 i;
1550   for (i = 0; i < sizeof(hash); i++)
1551     hash[i] = 0;
1552   for (i = 0; i < numIterations; i++)
1553   {
1554     if (callback && (i & 0xFF) == 0)
1555     {
1556       RINOK(callback->CheckBreak());
1557     }
1558     hf->Init();
1559     hf->Update(data, size);
1560     hf->Final(hash);
1561     UInt32 hashSize = hf->GetDigestSize();
1562     if (hashSize > sizeof(hash))
1563       return S_FALSE;
1564     UInt32 sum = 0;
1565     for (UInt32 j = 0; j < hashSize; j += 4)
1566       sum ^= GetUi32(hash + j);
1567     if (checkSum && sum != *checkSum)
1568     {
1569       return S_FALSE;
1570     }
1571   }
1572   return S_OK;
1573 }
1574 
1575 UInt32 g_BenchCpuFreqTemp = 1;
1576 
1577 #define YY1 sum += val; sum ^= val;
1578 #define YY3 YY1 YY1 YY1 YY1
1579 #define YY5 YY3 YY3 YY3 YY3
1580 #define YY7 YY5 YY5 YY5 YY5
1581 static const UInt32 kNumFreqCommands = 128;
1582 
1583 EXTERN_C_BEGIN
1584 
CountCpuFreq(UInt32 sum,UInt32 num,UInt32 val)1585 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1586 {
1587   for (UInt32 i = 0; i < num; i++)
1588   {
1589     YY7
1590   }
1591   return sum;
1592 }
1593 
1594 EXTERN_C_END
1595 
1596 
1597 #ifndef _7ZIP_ST
1598 
1599 struct CFreqInfo
1600 {
1601   NWindows::CThread Thread;
1602   IBenchPrintCallback *Callback;
1603   HRESULT CallbackRes;
1604   UInt32 ValRes;
1605   UInt32 Size;
1606   UInt64 NumIterations;
1607 
WaitCFreqInfo1608   void Wait()
1609   {
1610     Thread.Wait();
1611     Thread.Close();
1612   }
1613 };
1614 
FreqThreadFunction(void * param)1615 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1616 {
1617   CFreqInfo *p = (CFreqInfo *)param;
1618 
1619   UInt32 sum = g_BenchCpuFreqTemp;
1620   for (UInt64 k = p->NumIterations; k > 0; k--)
1621   {
1622     p->CallbackRes = p->Callback->CheckBreak();
1623     if (p->CallbackRes != S_OK)
1624       return 0;
1625     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1626   }
1627   p->ValRes = sum;
1628   return 0;
1629 }
1630 
1631 struct CFreqThreads
1632 {
1633   CFreqInfo *Items;
1634   UInt32 NumThreads;
1635 
CFreqThreadsCFreqThreads1636   CFreqThreads(): Items(NULL), NumThreads(0) {}
WaitAllCFreqThreads1637   void WaitAll()
1638   {
1639     for (UInt32 i = 0; i < NumThreads; i++)
1640       Items[i].Wait();
1641     NumThreads = 0;
1642   }
~CFreqThreadsCFreqThreads1643   ~CFreqThreads()
1644   {
1645     WaitAll();
1646     delete []Items;
1647   }
1648 };
1649 
1650 struct CCrcInfo
1651 {
1652   NWindows::CThread Thread;
1653   IBenchPrintCallback *Callback;
1654   HRESULT CallbackRes;
1655 
1656   const Byte *Data;
1657   UInt32 Size;
1658   UInt64 NumIterations;
1659   bool CheckSumDefined;
1660   UInt32 CheckSum;
1661   CMyComPtr<IHasher> Hasher;
1662   HRESULT Res;
1663 
1664   #ifdef USE_ALLOCA
1665   size_t AllocaSize;
1666   #endif
1667 
WaitCCrcInfo1668   void Wait()
1669   {
1670     Thread.Wait();
1671     Thread.Close();
1672   }
1673 };
1674 
CrcThreadFunction(void * param)1675 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1676 {
1677   CCrcInfo *p = (CCrcInfo *)param;
1678 
1679   #ifdef USE_ALLOCA
1680   alloca(p->AllocaSize);
1681   #endif
1682 
1683   p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1684       p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1685       p->Callback);
1686   return 0;
1687 }
1688 
1689 struct CCrcThreads
1690 {
1691   CCrcInfo *Items;
1692   UInt32 NumThreads;
1693 
CCrcThreadsCCrcThreads1694   CCrcThreads(): Items(NULL), NumThreads(0) {}
WaitAllCCrcThreads1695   void WaitAll()
1696   {
1697     for (UInt32 i = 0; i < NumThreads; i++)
1698       Items[i].Wait();
1699     NumThreads = 0;
1700   }
~CCrcThreadsCCrcThreads1701   ~CCrcThreads()
1702   {
1703     WaitAll();
1704     delete []Items;
1705   }
1706 };
1707 
1708 #endif
1709 
CrcCalc1(const Byte * buf,size_t size)1710 static UInt32 CrcCalc1(const Byte *buf, size_t size)
1711 {
1712   UInt32 crc = CRC_INIT_VAL;;
1713   for (size_t i = 0; i < size; i++)
1714     crc = CRC_UPDATE_BYTE(crc, buf[i]);
1715   return CRC_GET_DIGEST(crc);
1716 }
1717 
RandGen(Byte * buf,size_t size,CBaseRandomGenerator & RG)1718 static void RandGen(Byte *buf, size_t size, CBaseRandomGenerator &RG)
1719 {
1720   for (size_t i = 0; i < size; i++)
1721     buf[i] = (Byte)RG.GetRnd();
1722 }
1723 
RandGenCrc(Byte * buf,size_t size,CBaseRandomGenerator & RG)1724 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
1725 {
1726   RandGen(buf, size, RG);
1727   return CrcCalc1(buf, size);
1728 }
1729 
CrcInternalTest()1730 bool CrcInternalTest()
1731 {
1732   CAlignedBuffer buffer;
1733   const size_t kBufferSize0 = (1 << 8);
1734   const size_t kBufferSize1 = (1 << 10);
1735   const unsigned kCheckSize = (1 << 5);
1736   buffer.Alloc(kBufferSize0 + kBufferSize1);
1737   if (!buffer.IsAllocated())
1738     return false;
1739   Byte *buf = (Byte *)buffer;
1740   size_t i;
1741   for (i = 0; i < kBufferSize0; i++)
1742     buf[i] = (Byte)i;
1743   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1744   if (crc1 != 0x29058C73)
1745     return false;
1746   CBaseRandomGenerator RG;
1747   RandGen(buf + kBufferSize0, kBufferSize1, RG);
1748   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1749     for (unsigned j = 0; j < kCheckSize; j++)
1750       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1751         return false;
1752   return true;
1753 }
1754 
1755 struct CBenchMethod
1756 {
1757   unsigned Weight;
1758   unsigned DictBits;
1759   UInt32 EncComplex;
1760   UInt32 DecComplexCompr;
1761   UInt32 DecComplexUnc;
1762   const char *Name;
1763 };
1764 
1765 static const CBenchMethod g_Bench[] =
1766 {
1767   { 40, 17,  357,  145,   20, "LZMA:x1" },
1768   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
1769   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
1770 
1771   { 10, 16,  124,   40,   14, "Deflate:x1" },
1772   { 20, 16,  376,   40,   14, "Deflate:x5" },
1773   { 10, 16, 1082,   40,   14, "Deflate:x7" },
1774   { 10, 17,  422,   40,   14, "Deflate64:x5" },
1775 
1776   { 10, 15,  590,   69,   69, "BZip2:x1" },
1777   { 20, 19,  815,  122,  122, "BZip2:x5" },
1778   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
1779   { 10, 19, 2530,  122,  122, "BZip2:x7" },
1780 
1781   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
1782   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
1783 
1784   {  2,  0,    6,    0,    6, "Delta:4" },
1785   {  2,  0,    4,    0,    4, "BCJ" },
1786 
1787   { 10,  0,   24,    0,   24, "AES256CBC:1" },
1788   {  2,  0,    8,    0,    2, "AES256CBC:2" }
1789 };
1790 
1791 struct CBenchHash
1792 {
1793   unsigned Weight;
1794   UInt32 Complex;
1795   UInt32 CheckSum;
1796   const char *Name;
1797 };
1798 
1799 static const CBenchHash g_Hash[] =
1800 {
1801   {  1,  1820, 0x8F8FEDAB, "CRC32:1" },
1802   { 10,   558, 0x8F8FEDAB, "CRC32:4" },
1803   { 10,   339, 0x8F8FEDAB, "CRC32:8" },
1804   { 10,   512, 0xDF1C17CC, "CRC64" },
1805   { 10,  5100, 0x2D79FF2E, "SHA256" },
1806   { 10,  2340, 0x4C25132B, "SHA1" },
1807   {  2,  5500, 0xE084E913, "BLAKE2sp" }
1808 };
1809 
1810 struct CTotalBenchRes
1811 {
1812   // UInt64 NumIterations1; // for Usage
1813   UInt64 NumIterations2; // for Rating / RPU
1814 
1815   UInt64 Rating;
1816   UInt64 Usage;
1817   UInt64 RPU;
1818 
InitCTotalBenchRes1819   void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
1820 
SetSumCTotalBenchRes1821   void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1822   {
1823     Rating = (r1.Rating + r2.Rating);
1824     Usage = (r1.Usage + r2.Usage);
1825     RPU = (r1.RPU + r2.RPU);
1826     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
1827     NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
1828   }
1829 };
1830 
PrintNumber(IBenchPrintCallback & f,UInt64 value,unsigned size)1831 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
1832 {
1833   char s[128];
1834   unsigned startPos = (unsigned)sizeof(s) - 32;
1835   memset(s, ' ', startPos);
1836   ConvertUInt64ToString(value, s + startPos);
1837   // if (withSpace)
1838   {
1839     startPos--;
1840     size++;
1841   }
1842   unsigned len = (unsigned)strlen(s + startPos);
1843   if (size > len)
1844   {
1845     startPos -= (size - len);
1846     if (startPos < 0)
1847       startPos = 0;
1848   }
1849   f.Print(s + startPos);
1850 }
1851 
1852 static const unsigned kFieldSize_Name = 12;
1853 static const unsigned kFieldSize_SmallName = 4;
1854 static const unsigned kFieldSize_Speed = 9;
1855 static const unsigned kFieldSize_Usage = 5;
1856 static const unsigned kFieldSize_RU = 6;
1857 static const unsigned kFieldSize_Rating = 6;
1858 static const unsigned kFieldSize_EU = 5;
1859 static const unsigned kFieldSize_Effec = 5;
1860 
1861 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1862 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1863 
1864 
PrintRating(IBenchPrintCallback & f,UInt64 rating,unsigned size)1865 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
1866 {
1867   PrintNumber(f, (rating + 500000) / 1000000, size);
1868 }
1869 
1870 
PrintPercents(IBenchPrintCallback & f,UInt64 val,UInt64 divider,unsigned size)1871 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
1872 {
1873   PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1874 }
1875 
PrintChars(IBenchPrintCallback & f,char c,unsigned size)1876 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
1877 {
1878   char s[256];
1879   memset(s, (Byte)c, size);
1880   s[size] = 0;
1881   f.Print(s);
1882 }
1883 
PrintSpaces(IBenchPrintCallback & f,unsigned size)1884 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
1885 {
1886   PrintChars(f, ' ', size);
1887 }
1888 
PrintResults(IBenchPrintCallback & f,UInt64 usage,UInt64 rpu,UInt64 rating,bool showFreq,UInt64 cpuFreq)1889 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1890 {
1891   PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1892   PrintRating(f, rpu, kFieldSize_RU);
1893   PrintRating(f, rating, kFieldSize_Rating);
1894   if (showFreq)
1895   {
1896     if (cpuFreq == 0)
1897       PrintSpaces(f, kFieldSize_EUAndEffec);
1898     else
1899     {
1900       UInt64 ddd = cpuFreq * usage / 100;
1901       if (ddd == 0)
1902         ddd = 1;
1903       PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1904       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1905     }
1906   }
1907 }
1908 
PrintResults(IBenchPrintCallback * f,const CBenchInfo & info,unsigned weight,UInt64 rating,bool showFreq,UInt64 cpuFreq,CTotalBenchRes * res)1909 static void PrintResults(IBenchPrintCallback *f,
1910     const CBenchInfo &info,
1911     unsigned weight,
1912     UInt64 rating,
1913     bool showFreq, UInt64 cpuFreq,
1914     CTotalBenchRes *res)
1915 {
1916   UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1917   if (f)
1918   {
1919     if (speed != 0)
1920       PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1921     else
1922       PrintSpaces(*f, 1 + kFieldSize_Speed);
1923   }
1924   UInt64 usage = info.GetUsage();
1925   UInt64 rpu = info.GetRatingPerUsage(rating);
1926   if (f)
1927   {
1928     PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1929   }
1930 
1931   if (res)
1932   {
1933     // res->NumIterations1++;
1934     res->NumIterations2 += weight;
1935     res->RPU += (rpu * weight);
1936     res->Rating += (rating * weight);
1937     res->Usage += (usage * weight);
1938   }
1939 }
1940 
PrintTotals(IBenchPrintCallback & f,bool showFreq,UInt64 cpuFreq,const CTotalBenchRes & res)1941 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1942 {
1943   PrintSpaces(f, 1 + kFieldSize_Speed);
1944   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
1945   UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
1946   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
1947 }
1948 
1949 
PrintHex(AString & s,UInt64 v)1950 static void PrintHex(AString &s, UInt64 v)
1951 {
1952   char temp[32];
1953   ConvertUInt64ToHex(v, temp);
1954   s += temp;
1955 }
1956 
GetProcessThreadsInfo(const NSystem::CProcessAffinity & ti)1957 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
1958 {
1959   AString s;
1960   // s.Add_UInt32(ti.numProcessThreads);
1961   if (ti.processAffinityMask != ti.systemAffinityMask)
1962   {
1963     // if (ti.numProcessThreads != ti.numSysThreads)
1964     {
1965       s += " / ";
1966       s.Add_UInt32(ti.GetNumSystemThreads());
1967     }
1968     s += " : ";
1969     PrintHex(s, ti.processAffinityMask);
1970     s += " / ";
1971     PrintHex(s, ti.systemAffinityMask);
1972   }
1973   return s;
1974 }
1975 
1976 
PrintSize(AString & s,UInt64 v)1977 static void PrintSize(AString &s, UInt64 v)
1978 {
1979   char c = 0;
1980   if ((v & 0x3FF) == 0) { v >>= 10; c = 'K';
1981   if ((v & 0x3FF) == 0) { v >>= 10; c = 'M';
1982   if ((v & 0x3FF) == 0) { v >>= 10; c = 'G';
1983   if ((v & 0x3FF) == 0) { v >>= 10; c = 'T';
1984   }}}}
1985   else
1986   {
1987     PrintHex(s, v);
1988     return;
1989   }
1990   char temp[32];
1991   ConvertUInt64ToString(v, temp);
1992   s += temp;
1993   if (c)
1994     s += c;
1995 }
1996 
1997 
1998 #ifdef _7ZIP_LARGE_PAGES
1999 
2000 extern bool g_LargePagesMode;
2001 
2002 extern "C"
2003 {
2004   extern SIZE_T g_LargePageSize;
2005 }
2006 
Add_LargePages_String(AString & s)2007 void Add_LargePages_String(AString &s)
2008 {
2009   if (g_LargePagesMode || g_LargePageSize != 0)
2010   {
2011     s += " (LP-";
2012     PrintSize(s, g_LargePageSize);
2013     #ifdef MY_CPU_X86_OR_AMD64
2014     if (CPU_IsSupported_PageGB())
2015       s += "-1G";
2016     #endif
2017     if (!g_LargePagesMode)
2018       s += "-NA";
2019     s += ")";
2020   }
2021 }
2022 
2023 #endif
2024 
2025 
2026 
PrintRequirements(IBenchPrintCallback & f,const char * sizeString,bool size_Defined,UInt64 size,const char * threadsString,UInt32 numThreads)2027 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
2028     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
2029 {
2030   f.Print("RAM ");
2031   f.Print(sizeString);
2032   if (size_Defined)
2033     PrintNumber(f, (size >> 20), 6);
2034   else
2035     f.Print("      ?");
2036   f.Print(" MB");
2037 
2038   #ifdef _7ZIP_LARGE_PAGES
2039   {
2040     AString s;
2041     Add_LargePages_String(s);
2042     f.Print(s);
2043   }
2044   #endif
2045 
2046   f.Print(",  # ");
2047   f.Print(threadsString);
2048   PrintNumber(f, numThreads, 3);
2049 }
2050 
2051 
2052 
2053 struct CBenchCallbackToPrint: public IBenchCallback
2054 {
2055   CBenchProps BenchProps;
2056   CTotalBenchRes EncodeRes;
2057   CTotalBenchRes DecodeRes;
2058   IBenchPrintCallback *_file;
2059   UInt32 DictSize;
2060 
2061   bool Use2Columns;
2062   unsigned NameFieldSize;
2063 
2064   bool ShowFreq;
2065   UInt64 CpuFreq;
2066 
2067   unsigned EncodeWeight;
2068   unsigned DecodeWeight;
2069 
CBenchCallbackToPrintCBenchCallbackToPrint2070   CBenchCallbackToPrint():
2071       Use2Columns(false),
2072       NameFieldSize(0),
2073       ShowFreq(false),
2074       CpuFreq(0),
2075       EncodeWeight(1),
2076       DecodeWeight(1)
2077       {}
2078 
InitCBenchCallbackToPrint2079   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
2080   void Print(const char *s);
2081   void NewLine();
2082 
2083   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
2084   HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
2085   HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
2086 };
2087 
SetFreq(bool showFreq,UInt64 cpuFreq)2088 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
2089 {
2090   ShowFreq = showFreq;
2091   CpuFreq = cpuFreq;
2092   return S_OK;
2093 }
2094 
SetEncodeResult(const CBenchInfo & info,bool final)2095 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
2096 {
2097   RINOK(_file->CheckBreak());
2098   if (final)
2099   {
2100     UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
2101     PrintResults(_file, info,
2102         EncodeWeight, rating,
2103         ShowFreq, CpuFreq, &EncodeRes);
2104     if (!Use2Columns)
2105       _file->NewLine();
2106   }
2107   return S_OK;
2108 }
2109 
2110 static const char * const kSep = "  | ";
2111 
SetDecodeResult(const CBenchInfo & info,bool final)2112 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
2113 {
2114   RINOK(_file->CheckBreak());
2115   if (final)
2116   {
2117     UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
2118     if (Use2Columns)
2119       _file->Print(kSep);
2120     else
2121       PrintSpaces(*_file, NameFieldSize);
2122     CBenchInfo info2 = info;
2123     info2.UnpackSize *= info2.NumIterations;
2124     info2.PackSize *= info2.NumIterations;
2125     info2.NumIterations = 1;
2126     PrintResults(_file, info2,
2127         DecodeWeight, rating,
2128         ShowFreq, CpuFreq, &DecodeRes);
2129   }
2130   return S_OK;
2131 }
2132 
Print(const char * s)2133 void CBenchCallbackToPrint::Print(const char *s)
2134 {
2135   _file->Print(s);
2136 }
2137 
NewLine()2138 void CBenchCallbackToPrint::NewLine()
2139 {
2140   _file->NewLine();
2141 }
2142 
PrintLeft(IBenchPrintCallback & f,const char * s,unsigned size)2143 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
2144 {
2145   f.Print(s);
2146   int numSpaces = size - MyStringLen(s);
2147   if (numSpaces > 0)
2148     PrintSpaces(f, numSpaces);
2149 }
2150 
PrintRight(IBenchPrintCallback & f,const char * s,unsigned size)2151 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
2152 {
2153   int numSpaces = size - MyStringLen(s);
2154   if (numSpaces > 0)
2155     PrintSpaces(f, numSpaces);
2156   f.Print(s);
2157 }
2158 
TotalBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,bool forceUnpackSize,size_t unpackSize,const Byte * fileData,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback)2159 static HRESULT TotalBench(
2160     DECL_EXTERNAL_CODECS_LOC_VARS
2161     UInt64 complexInCommands,
2162     UInt32 numThreads,
2163     bool forceUnpackSize,
2164     size_t unpackSize,
2165     const Byte *fileData,
2166     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
2167 {
2168   for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
2169   {
2170     const CBenchMethod &bench = g_Bench[i];
2171     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2172     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2173     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2174     callback->BenchProps.EncComplex = bench.EncComplex;
2175 
2176     COneMethodInfo method;
2177     NCOM::CPropVariant propVariant;
2178     propVariant = bench.Name;
2179     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2180 
2181     size_t unpackSize2 = unpackSize;
2182     if (!forceUnpackSize && bench.DictBits == 0)
2183       unpackSize2 = kFilterUnpackSize;
2184 
2185     callback->EncodeWeight = bench.Weight;
2186     callback->DecodeWeight = bench.Weight;
2187 
2188     HRESULT res = MethodBench(
2189         EXTERNAL_CODECS_LOC_VARS
2190         complexInCommands,
2191         false, numThreads, method,
2192         unpackSize2, fileData,
2193         bench.DictBits,
2194         printCallback, callback, &callback->BenchProps);
2195 
2196     if (res == E_NOTIMPL)
2197     {
2198       // callback->Print(" ---");
2199       // we need additional empty line as line for decompression results
2200       if (!callback->Use2Columns)
2201         callback->NewLine();
2202     }
2203     else
2204     {
2205       RINOK(res);
2206     }
2207 
2208     callback->NewLine();
2209   }
2210   return S_OK;
2211 }
2212 
2213 
FreqBench(UInt64 complexInCommands,UInt32 numThreads,IBenchPrintCallback * _file,bool showFreq,UInt64 specifiedFreq,UInt64 & cpuFreq,UInt32 & res)2214 static HRESULT FreqBench(
2215     UInt64 complexInCommands,
2216     UInt32 numThreads,
2217     IBenchPrintCallback *_file,
2218     bool showFreq,
2219     UInt64 specifiedFreq,
2220     UInt64 &cpuFreq,
2221     UInt32 &res)
2222 {
2223   res = 0;
2224   cpuFreq = 0;
2225 
2226   UInt32 bufferSize = 1 << 20;
2227   UInt32 complexity = kNumFreqCommands;
2228   if (numThreads == 0)
2229     numThreads = 1;
2230 
2231   #ifdef _7ZIP_ST
2232   numThreads = 1;
2233   #endif
2234 
2235   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2236   UInt64 numIterations = complexInCommands / complexity / bsize;
2237   if (numIterations == 0)
2238     numIterations = 1;
2239 
2240   CBenchInfoCalc progressInfoSpec;
2241 
2242   #ifndef _7ZIP_ST
2243   CFreqThreads threads;
2244   if (numThreads > 1)
2245   {
2246     threads.Items = new CFreqInfo[numThreads];
2247     UInt32 i;
2248     for (i = 0; i < numThreads; i++)
2249     {
2250       CFreqInfo &info = threads.Items[i];
2251       info.Callback = _file;
2252       info.CallbackRes = S_OK;
2253       info.NumIterations = numIterations;
2254       info.Size = bufferSize;
2255     }
2256     progressInfoSpec.SetStartTime();
2257     for (i = 0; i < numThreads; i++)
2258     {
2259       CFreqInfo &info = threads.Items[i];
2260       RINOK(info.Thread.Create(FreqThreadFunction, &info));
2261       threads.NumThreads++;
2262     }
2263     threads.WaitAll();
2264     for (i = 0; i < numThreads; i++)
2265     {
2266       RINOK(threads.Items[i].CallbackRes);
2267     }
2268   }
2269   else
2270   #endif
2271   {
2272     progressInfoSpec.SetStartTime();
2273     UInt32 sum = g_BenchCpuFreqTemp;
2274     for (UInt64 k = numIterations; k > 0; k--)
2275     {
2276       RINOK(_file->CheckBreak());
2277       sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
2278     }
2279     res += sum;
2280   }
2281 
2282   CBenchInfo info;
2283   progressInfoSpec.SetFinishTime(info);
2284 
2285   info.UnpackSize = 0;
2286   info.PackSize = 0;
2287   info.NumIterations = 1;
2288 
2289   if (_file)
2290   {
2291     {
2292       UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
2293       UInt64 rating = info.GetSpeed(numCommands);
2294       cpuFreq = rating / numThreads;
2295       PrintResults(_file, info,
2296           0, // weight
2297           rating,
2298           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
2299     }
2300     RINOK(_file->CheckBreak());
2301   }
2302 
2303   return S_OK;
2304 }
2305 
2306 
2307 
CrcBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufferSize,UInt64 & speed,UInt32 complexity,unsigned benchWeight,const UInt32 * checkSum,const COneMethodInfo & method,IBenchPrintCallback * _file,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2308 static HRESULT CrcBench(
2309     DECL_EXTERNAL_CODECS_LOC_VARS
2310     UInt64 complexInCommands,
2311     UInt32 numThreads, UInt32 bufferSize,
2312     UInt64 &speed,
2313     UInt32 complexity, unsigned benchWeight,
2314     const UInt32 *checkSum,
2315     const COneMethodInfo &method,
2316     IBenchPrintCallback *_file,
2317     CTotalBenchRes *encodeRes,
2318     bool showFreq, UInt64 cpuFreq)
2319 {
2320   if (numThreads == 0)
2321     numThreads = 1;
2322 
2323   #ifdef _7ZIP_ST
2324   numThreads = 1;
2325   #endif
2326 
2327   const AString &methodName = method.MethodName;
2328   // methodName.RemoveChar(L'-');
2329   CMethodId hashID;
2330   if (!FindHashMethod(
2331       EXTERNAL_CODECS_LOC_VARS
2332       methodName, hashID))
2333     return E_NOTIMPL;
2334 
2335   CAlignedBuffer buffer;
2336   size_t totalSize = (size_t)bufferSize * numThreads;
2337   if (totalSize / numThreads != bufferSize)
2338     return E_OUTOFMEMORY;
2339   ALLOC_WITH_HRESULT(&buffer, totalSize)
2340 
2341   Byte *buf = (Byte *)buffer;
2342   CBaseRandomGenerator RG;
2343   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2344   UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
2345   if (numIterations == 0)
2346     numIterations = 1;
2347 
2348   CBenchInfoCalc progressInfoSpec;
2349 
2350   #ifndef _7ZIP_ST
2351   CCrcThreads threads;
2352   if (numThreads > 1)
2353   {
2354     threads.Items = new CCrcInfo[numThreads];
2355 
2356     UInt32 i;
2357     for (i = 0; i < numThreads; i++)
2358     {
2359       CCrcInfo &info = threads.Items[i];
2360       AString name;
2361       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
2362       if (!info.Hasher)
2363         return E_NOTIMPL;
2364       CMyComPtr<ICompressSetCoderProperties> scp;
2365       info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2366       if (scp)
2367       {
2368         UInt64 reduceSize = 1;
2369         RINOK(method.SetCoderProps(scp, &reduceSize));
2370       }
2371 
2372       Byte *data = buf + (size_t)bufferSize * i;
2373       info.Callback = _file;
2374       info.Data = data;
2375       info.NumIterations = numIterations;
2376       info.Size = bufferSize;
2377       /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
2378       info.CheckSumDefined = false;
2379       if (checkSum)
2380       {
2381         info.CheckSum = *checkSum;
2382         info.CheckSumDefined = (checkSum && (i == 0));
2383       }
2384 
2385       #ifdef USE_ALLOCA
2386       info.AllocaSize = (i * 16 * 21) & 0x7FF;
2387       #endif
2388     }
2389 
2390     progressInfoSpec.SetStartTime();
2391 
2392     for (i = 0; i < numThreads; i++)
2393     {
2394       CCrcInfo &info = threads.Items[i];
2395       RINOK(info.Thread.Create(CrcThreadFunction, &info));
2396       threads.NumThreads++;
2397     }
2398     threads.WaitAll();
2399     for (i = 0; i < numThreads; i++)
2400     {
2401       RINOK(threads.Items[i].Res);
2402     }
2403   }
2404   else
2405   #endif
2406   {
2407     /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
2408     progressInfoSpec.SetStartTime();
2409     CMyComPtr<IHasher> hasher;
2410     AString name;
2411     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
2412     if (!hasher)
2413       return E_NOTIMPL;
2414     CMyComPtr<ICompressSetCoderProperties> scp;
2415     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2416     if (scp)
2417     {
2418       UInt64 reduceSize = 1;
2419       RINOK(method.SetCoderProps(scp, &reduceSize));
2420     }
2421     RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
2422   }
2423 
2424   CBenchInfo info;
2425   progressInfoSpec.SetFinishTime(info);
2426 
2427   UInt64 unpSize = numIterations * bufferSize;
2428   UInt64 unpSizeThreads = unpSize * numThreads;
2429   info.UnpackSize = unpSizeThreads;
2430   info.PackSize = unpSizeThreads;
2431   info.NumIterations = 1;
2432 
2433   if (_file)
2434   {
2435     {
2436       UInt64 numCommands = unpSizeThreads * complexity / 256;
2437       UInt64 rating = info.GetSpeed(numCommands);
2438       PrintResults(_file, info,
2439           benchWeight, rating,
2440           showFreq, cpuFreq, encodeRes);
2441     }
2442     RINOK(_file->CheckBreak());
2443   }
2444 
2445   speed = info.GetSpeed(unpSizeThreads);
2446 
2447   return S_OK;
2448 }
2449 
TotalBench_Hash(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufSize,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2450 static HRESULT TotalBench_Hash(
2451     DECL_EXTERNAL_CODECS_LOC_VARS
2452     UInt64 complexInCommands,
2453     UInt32 numThreads, UInt32 bufSize,
2454     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2455     CTotalBenchRes *encodeRes,
2456     bool showFreq, UInt64 cpuFreq)
2457 {
2458   for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2459   {
2460     const CBenchHash &bench = g_Hash[i];
2461     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2462     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2463     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2464     // callback->BenchProps.EncComplex = bench.EncComplex;
2465 
2466     COneMethodInfo method;
2467     NCOM::CPropVariant propVariant;
2468     propVariant = bench.Name;
2469     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2470 
2471     UInt64 speed;
2472     HRESULT res = CrcBench(
2473         EXTERNAL_CODECS_LOC_VARS
2474         complexInCommands,
2475         numThreads, bufSize,
2476         speed,
2477         bench.Complex, bench.Weight,
2478         &bench.CheckSum, method,
2479         printCallback, encodeRes, showFreq, cpuFreq);
2480     if (res == E_NOTIMPL)
2481     {
2482       // callback->Print(" ---");
2483     }
2484     else
2485     {
2486       RINOK(res);
2487     }
2488     callback->NewLine();
2489   }
2490   return S_OK;
2491 }
2492 
2493 struct CTempValues
2494 {
2495   UInt64 *Values;
CTempValuesCTempValues2496   CTempValues(UInt32 num) { Values = new UInt64[num]; }
~CTempValuesCTempValues2497   ~CTempValues() { delete []Values; }
2498 };
2499 
ParseNumberString(const UString & s,NCOM::CPropVariant & prop)2500 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2501 {
2502   const wchar_t *end;
2503   UInt64 result = ConvertStringToUInt64(s, &end);
2504   if (*end != 0 || s.IsEmpty())
2505     prop = s;
2506   else if (result <= (UInt32)0xFFFFFFFF)
2507     prop = (UInt32)result;
2508   else
2509     prop = result;
2510 }
2511 
GetNumThreadsNext(unsigned i,UInt32 numThreads)2512 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2513 {
2514   if (i < 2)
2515     return i + 1;
2516   i -= 1;
2517   UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2518   return (num <= numThreads) ? num : numThreads;
2519 }
2520 
AreSameMethodNames(const char * fullName,const char * shortName)2521 static bool AreSameMethodNames(const char *fullName, const char *shortName)
2522 {
2523   return StringsAreEqualNoCase_Ascii(fullName, shortName);
2524 }
2525 
2526 
2527 #ifdef MY_CPU_X86_OR_AMD64
2528 
PrintCpuChars(AString & s,UInt32 v)2529 static void PrintCpuChars(AString &s, UInt32 v)
2530 {
2531   for (int j = 0; j < 4; j++)
2532   {
2533     Byte b = (Byte)(v & 0xFF);
2534     v >>= 8;
2535     if (b == 0)
2536       break;
2537     s += (char)b;
2538   }
2539 }
2540 
x86cpuid_to_String(const Cx86cpuid & c,AString & s)2541 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
2542 {
2543   s.Empty();
2544 
2545   UInt32 maxFunc2 = 0;
2546   UInt32 t[3];
2547 
2548   MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
2549 
2550   bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
2551 
2552   if (!fullNameIsAvail)
2553   {
2554     for (int i = 0; i < 3; i++)
2555       PrintCpuChars(s, c.vendor[i]);
2556   }
2557   else
2558   {
2559     for (int i = 0; i < 3; i++)
2560     {
2561       UInt32 d[4] = { 0 };
2562       MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
2563       for (int j = 0; j < 4; j++)
2564         PrintCpuChars(s, d[j]);
2565     }
2566   }
2567 
2568   s.Add_Space_if_NotEmpty();
2569   {
2570     char temp[32];
2571     ConvertUInt32ToHex(c.ver, temp);
2572     s += '(';
2573     s += temp;
2574     s += ')';
2575   }
2576 }
2577 
2578 #endif
2579 
2580 
2581 
2582 static const char * const k_PROCESSOR_ARCHITECTURE[] =
2583 {
2584     "x86" // "INTEL"
2585   , "MIPS"
2586   , "ALPHA"
2587   , "PPC"
2588   , "SHX"
2589   , "ARM"
2590   , "IA64"
2591   , "ALPHA64"
2592   , "MSIL"
2593   , "x64" // "AMD64"
2594   , "IA32_ON_WIN64"
2595   , "NEUTRAL"
2596   , "ARM64"
2597   , "ARM32_ON_WIN64"
2598 };
2599 
2600 #define MY__PROCESSOR_ARCHITECTURE_INTEL 0
2601 #define MY__PROCESSOR_ARCHITECTURE_AMD64 9
2602 
2603 
2604 #define MY__PROCESSOR_INTEL_PENTIUM  586
2605 #define MY__PROCESSOR_AMD_X8664      8664
2606 
2607 /*
2608 static const CUInt32PCharPair k_PROCESSOR[] =
2609 {
2610   { 2200, "IA64" },
2611   { 8664, "x64" }
2612 };
2613 
2614 #define PROCESSOR_INTEL_386      386
2615 #define PROCESSOR_INTEL_486      486
2616 #define PROCESSOR_INTEL_PENTIUM  586
2617 #define PROCESSOR_INTEL_860      860
2618 #define PROCESSOR_INTEL_IA64     2200
2619 #define PROCESSOR_AMD_X8664      8664
2620 #define PROCESSOR_MIPS_R2000     2000
2621 #define PROCESSOR_MIPS_R3000     3000
2622 #define PROCESSOR_MIPS_R4000     4000
2623 #define PROCESSOR_ALPHA_21064    21064
2624 #define PROCESSOR_PPC_601        601
2625 #define PROCESSOR_PPC_603        603
2626 #define PROCESSOR_PPC_604        604
2627 #define PROCESSOR_PPC_620        620
2628 #define PROCESSOR_HITACHI_SH3    10003
2629 #define PROCESSOR_HITACHI_SH3E   10004
2630 #define PROCESSOR_HITACHI_SH4    10005
2631 #define PROCESSOR_MOTOROLA_821   821
2632 #define PROCESSOR_SHx_SH3        103
2633 #define PROCESSOR_SHx_SH4        104
2634 #define PROCESSOR_STRONGARM      2577    // 0xA11
2635 #define PROCESSOR_ARM720         1824    // 0x720
2636 #define PROCESSOR_ARM820         2080    // 0x820
2637 #define PROCESSOR_ARM920         2336    // 0x920
2638 #define PROCESSOR_ARM_7TDMI      70001
2639 #define PROCESSOR_OPTIL          18767   // 0x494f
2640 */
2641 
2642 #ifdef _WIN32
2643 
2644 static const char * const k_PF[] =
2645 {
2646     "FP_ERRATA"
2647   , "FP_EMU"
2648   , "CMPXCHG"
2649   , "MMX"
2650   , "PPC_MOVEMEM_64BIT"
2651   , "ALPHA_BYTE"
2652   , "SSE"
2653   , "3DNOW"
2654   , "RDTSC"
2655   , "PAE"
2656   , "SSE2"
2657   , "SSE_DAZ"
2658   , "NX"
2659   , "SSE3"
2660   , "CMPXCHG16B"
2661   , "CMP8XCHG16"
2662   , "CHANNELS"
2663   , "XSAVE"
2664   , "ARM_VFP_32"
2665   , "ARM_NEON"
2666   , "L2AT"
2667   , "VIRT_FIRMWARE"
2668   , "RDWRFSGSBASE"
2669   , "FASTFAIL"
2670   , "ARM_DIVIDE"
2671   , "ARM_64BIT_LOADSTORE_ATOMIC"
2672   , "ARM_EXTERNAL_CACHE"
2673   , "ARM_FMAC"
2674   , "RDRAND"
2675   , "ARM_V8"
2676   , "ARM_V8_CRYPTO"
2677   , "ARM_V8_CRC32"
2678   , "RDTSCP"
2679 };
2680 
2681 #endif
2682 
2683 
2684 
2685 
PrintPage(AString & s,UInt32 v)2686 static void PrintPage(AString &s, UInt32 v)
2687 {
2688   if ((v & 0x3FF) == 0)
2689   {
2690     s.Add_UInt32(v >> 10);
2691     s += "K";
2692   }
2693   else
2694     s.Add_UInt32(v >> 10);
2695 }
2696 
TypeToString2(const char * const table[],unsigned num,UInt32 value)2697 static AString TypeToString2(const char * const table[], unsigned num, UInt32 value)
2698 {
2699   char sz[16];
2700   const char *p = NULL;
2701   if (value < num)
2702     p = table[value];
2703   if (!p)
2704   {
2705     ConvertUInt32ToString(value, sz);
2706     p = sz;
2707   }
2708   return (AString)p;
2709 }
2710 
2711 #ifdef _WIN32
2712 
SysInfo_To_String(AString & s,const SYSTEM_INFO & si)2713 static void SysInfo_To_String(AString &s, const SYSTEM_INFO &si)
2714 {
2715   s += TypeToString2(k_PROCESSOR_ARCHITECTURE, ARRAY_SIZE(k_PROCESSOR_ARCHITECTURE), si.wProcessorArchitecture);
2716 
2717   if (!(   si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_INTEL && si.dwProcessorType == MY__PROCESSOR_INTEL_PENTIUM
2718       || si.wProcessorArchitecture == MY__PROCESSOR_ARCHITECTURE_AMD64 && si.dwProcessorType == MY__PROCESSOR_AMD_X8664))
2719   {
2720     s += " ";
2721     // s += TypePairToString(k_PROCESSOR, ARRAY_SIZE(k_PROCESSOR), si.dwProcessorType);
2722     s.Add_UInt32(si.dwProcessorType);
2723   }
2724   s += " ";
2725   PrintHex(s, si.wProcessorLevel);
2726   s += ".";
2727   PrintHex(s, si.wProcessorRevision);
2728   if ((UInt64)si.dwActiveProcessorMask + 1 != ((UInt64)1 << si.dwNumberOfProcessors))
2729   if ((UInt64)si.dwActiveProcessorMask + 1 != 0 || si.dwNumberOfProcessors != sizeof(UInt64) * 8)
2730   {
2731     s += " act:";
2732     PrintHex(s, si.dwActiveProcessorMask);
2733   }
2734   s += " cpus:";
2735   s.Add_UInt32(si.dwNumberOfProcessors);
2736   if (si.dwPageSize != 1 << 12)
2737   {
2738     s += " page:";
2739     PrintPage(s, si.dwPageSize);
2740   }
2741   if (si.dwAllocationGranularity != 1 << 16)
2742   {
2743     s += " gran:";
2744     PrintPage(s, si.dwAllocationGranularity);
2745   }
2746   s += " ";
2747 
2748   DWORD_PTR minAdd = (DWORD_PTR)si.lpMinimumApplicationAddress;
2749   UInt64 maxSize = (UInt64)(DWORD_PTR)si.lpMaximumApplicationAddress + 1;
2750   const UInt32 kReserveSize = ((UInt32)1 << 16);
2751   if (minAdd != kReserveSize)
2752   {
2753     PrintSize(s, minAdd);
2754     s += "-";
2755   }
2756   else
2757   {
2758     if ((maxSize & (kReserveSize - 1)) == 0)
2759       maxSize += kReserveSize;
2760   }
2761   PrintSize(s, maxSize);
2762 }
2763 
2764 #ifndef _WIN64
2765 typedef VOID (WINAPI *Func_GetNativeSystemInfo)(LPSYSTEM_INFO lpSystemInfo);
2766 #endif
2767 
2768 #endif
2769 
GetSysInfo(AString & s1,AString & s2)2770 void GetSysInfo(AString &s1, AString &s2)
2771 {
2772   s1.Empty();
2773   s2.Empty();
2774 
2775   #ifdef _WIN32
2776     SYSTEM_INFO si;
2777     GetSystemInfo(&si);
2778     {
2779       SysInfo_To_String(s1, si);
2780       // s += " : ";
2781     }
2782 
2783     #if !defined(_WIN64) && !defined(UNDER_CE)
2784     Func_GetNativeSystemInfo fn_GetNativeSystemInfo = (Func_GetNativeSystemInfo)GetProcAddress(
2785         GetModuleHandleA("kernel32.dll"), "GetNativeSystemInfo");
2786     if (fn_GetNativeSystemInfo)
2787     {
2788       SYSTEM_INFO si2;
2789       fn_GetNativeSystemInfo(&si2);
2790       // if (memcmp(&si, &si2, sizeof(si)) != 0)
2791       {
2792         // s += " - ";
2793         SysInfo_To_String(s2, si2);
2794       }
2795     }
2796     #endif
2797   #endif
2798 }
2799 
2800 
GetCpuName(AString & s)2801 void GetCpuName(AString &s)
2802 {
2803   s.Empty();
2804 
2805   #ifdef MY_CPU_X86_OR_AMD64
2806   {
2807     Cx86cpuid cpuid;
2808     if (x86cpuid_CheckAndRead(&cpuid))
2809     {
2810       AString s2;
2811       x86cpuid_to_String(cpuid, s2);
2812       s += s2;
2813     }
2814     else
2815     {
2816     #ifdef MY_CPU_AMD64
2817     s += "x64";
2818     #else
2819     s += "x86";
2820     #endif
2821     }
2822   }
2823   #else
2824 
2825     #ifdef MY_CPU_LE
2826       s += "LE";
2827     #elif defined(MY_CPU_BE)
2828       s += "BE";
2829     #endif
2830 
2831   #endif
2832 
2833   #ifdef _7ZIP_LARGE_PAGES
2834   Add_LargePages_String(s);
2835   #endif
2836 }
2837 
2838 
GetCpuFeatures(AString & s)2839 void GetCpuFeatures(AString &s)
2840 {
2841   s.Empty();
2842 
2843   #ifdef _WIN32
2844   const unsigned kNumFeatures_Extra = 32; // we check also for unknown features
2845   const unsigned kNumFeatures = ARRAY_SIZE(k_PF) + kNumFeatures_Extra;
2846   for (unsigned i = 0; i < kNumFeatures; i++)
2847   {
2848     if (IsProcessorFeaturePresent(i))
2849     {
2850       s.Add_Space_if_NotEmpty();
2851       s += TypeToString2(k_PF, ARRAY_SIZE(k_PF), i);
2852     }
2853   }
2854   #endif
2855 }
2856 
2857 
2858 #ifdef _WIN32
2859 #ifndef UNDER_CE
2860 
2861 typedef void (WINAPI * Func_RtlGetVersion) (OSVERSIONINFOEXW *);
2862 
My_RtlGetVersion(OSVERSIONINFOEXW * vi)2863 static BOOL My_RtlGetVersion(OSVERSIONINFOEXW *vi)
2864 {
2865   HMODULE ntdll = ::GetModuleHandleW(L"ntdll.dll");
2866   if (!ntdll)
2867     return FALSE;
2868   Func_RtlGetVersion func = (Func_RtlGetVersion)GetProcAddress(ntdll, "RtlGetVersion");
2869   if (!func)
2870     return FALSE;
2871   func(vi);
2872   return TRUE;
2873 }
2874 
2875 #endif
2876 #endif
2877 
2878 
Bench(DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback * printCallback,IBenchCallback * benchCallback,const CObjectVector<CProperty> & props,UInt32 numIterations,bool multiDict)2879 HRESULT Bench(
2880     DECL_EXTERNAL_CODECS_LOC_VARS
2881     IBenchPrintCallback *printCallback,
2882     IBenchCallback *benchCallback,
2883     // IBenchFreqCallback *freqCallback,
2884     const CObjectVector<CProperty> &props,
2885     UInt32 numIterations,
2886     bool multiDict)
2887 {
2888   if (!CrcInternalTest())
2889     return E_FAIL;
2890 
2891   UInt32 numCPUs = 1;
2892   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
2893 
2894   NSystem::CProcessAffinity threadsInfo;
2895   threadsInfo.InitST();
2896 
2897   #ifndef _7ZIP_ST
2898 
2899   if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0)
2900     numCPUs = threadsInfo.GetNumProcessThreads();
2901   else
2902     numCPUs = NSystem::GetNumberOfProcessors();
2903 
2904   #endif
2905 
2906   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
2907 
2908   UInt32 numThreadsSpecified = numCPUs;
2909 
2910   UInt32 testTime = kComplexInSeconds;
2911 
2912   UInt64 specifiedFreq = 0;
2913 
2914   bool multiThreadTests = false;
2915 
2916   COneMethodInfo method;
2917 
2918   CAlignedBuffer fileDataBuffer;
2919 
2920   {
2921   unsigned i;
2922   for (i = 0; i < props.Size(); i++)
2923   {
2924     const CProperty &property = props[i];
2925     UString name (property.Name);
2926     name.MakeLower_Ascii();
2927 
2928     if (name.IsEqualTo("file"))
2929     {
2930       if (property.Value.IsEmpty())
2931         return E_INVALIDARG;
2932 
2933       #ifdef USE_WIN_FILE
2934 
2935       NFile::NIO::CInFile file;
2936       if (!file.Open(us2fs(property.Value)))
2937         return E_INVALIDARG;
2938       UInt64 len;
2939       if (!file.GetLength(len))
2940         return E_FAIL;
2941       if (len >= ((UInt32)1 << 31) || len == 0)
2942         return E_INVALIDARG;
2943       ALLOC_WITH_HRESULT(&fileDataBuffer, (size_t)len);
2944       UInt32 processedSize;
2945       file.Read((Byte *)fileDataBuffer, (UInt32)len, processedSize);
2946       if (processedSize != len)
2947         return E_FAIL;
2948       if (printCallback)
2949       {
2950         printCallback->Print("file size =");
2951         PrintNumber(*printCallback, len, 0);
2952         printCallback->NewLine();
2953       }
2954       continue;
2955 
2956       #else
2957 
2958       return E_NOTIMPL;
2959 
2960       #endif
2961     }
2962 
2963     NCOM::CPropVariant propVariant;
2964     if (!property.Value.IsEmpty())
2965       ParseNumberString(property.Value, propVariant);
2966 
2967     if (name.IsEqualTo("time"))
2968     {
2969       RINOK(ParsePropToUInt32(UString(), propVariant, testTime));
2970       continue;
2971     }
2972 
2973     if (name.IsEqualTo("freq"))
2974     {
2975       UInt32 freq32 = 0;
2976       RINOK(ParsePropToUInt32(UString(), propVariant, freq32));
2977       if (freq32 == 0)
2978         return E_INVALIDARG;
2979       specifiedFreq = (UInt64)freq32 * 1000000;
2980 
2981       if (printCallback)
2982       {
2983         printCallback->Print("freq=");
2984         PrintNumber(*printCallback, freq32, 0);
2985         printCallback->NewLine();
2986       }
2987 
2988       continue;
2989     }
2990 
2991     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
2992     {
2993       UString s = name.Ptr(2);
2994       if (s.IsEqualTo("*")
2995           || s.IsEmpty() && propVariant.vt == VT_BSTR && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))
2996       {
2997         multiThreadTests = true;
2998         continue;
2999       }
3000       #ifndef _7ZIP_ST
3001       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
3002       #endif
3003       continue;
3004     }
3005 
3006     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
3007   }
3008   }
3009 
3010   if (printCallback)
3011   {
3012     #ifdef _WIN32
3013     #ifndef UNDER_CE
3014     {
3015       AString s;
3016       // OSVERSIONINFO vi;
3017       OSVERSIONINFOEXW vi;
3018       vi.dwOSVersionInfoSize = sizeof(vi);
3019       // if (::GetVersionEx(&vi))
3020       if (My_RtlGetVersion(&vi))
3021       {
3022         s += "Windows";
3023         if (vi.dwPlatformId != VER_PLATFORM_WIN32_NT)
3024           s.Add_UInt32(vi.dwPlatformId);
3025         s += " "; s.Add_UInt32(vi.dwMajorVersion);
3026         s += "."; s.Add_UInt32(vi.dwMinorVersion);
3027         s += " "; s.Add_UInt32(vi.dwBuildNumber);
3028         // s += " "; s += GetAnsiString(vi.szCSDVersion);
3029       }
3030       printCallback->Print(s);
3031       printCallback->NewLine();
3032     }
3033     #endif
3034     #endif
3035 
3036     {
3037       AString s1, s2;
3038       GetSysInfo(s1, s2);
3039       if (!s1.IsEmpty() || !s2.IsEmpty())
3040       {
3041         printCallback->Print(s1);
3042         if (s1 != s2 && !s2.IsEmpty())
3043         {
3044           printCallback->Print(" - ");
3045           printCallback->Print(s2);
3046         }
3047         printCallback->NewLine();
3048       }
3049     }
3050     {
3051       AString s;
3052       GetCpuFeatures(s);
3053       if (!s.IsEmpty())
3054       {
3055         printCallback->Print(s);
3056         printCallback->NewLine();
3057       }
3058     }
3059     {
3060       AString s;
3061       GetCpuName(s);
3062       if (!s.IsEmpty())
3063       {
3064         printCallback->Print(s);
3065         printCallback->NewLine();
3066       }
3067     }
3068   }
3069 
3070   if (printCallback)
3071   {
3072     printCallback->Print("CPU Freq:");
3073   }
3074 
3075   UInt64 complexInCommands = kComplexInCommands;
3076 
3077   if (printCallback /* || freqCallback */)
3078   {
3079     UInt64 numMilCommands = 1 << 6;
3080     if (specifiedFreq != 0)
3081     {
3082       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3083         numMilCommands >>= 1;
3084     }
3085 
3086     for (int jj = 0;; jj++)
3087     {
3088       if (printCallback)
3089         RINOK(printCallback->CheckBreak());
3090 
3091       UInt64 start = ::GetTimeCount();
3092       UInt32 sum = (UInt32)start;
3093       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3094       if (sum == 0xF1541213)
3095         if (printCallback)
3096           printCallback->Print("");
3097       const UInt64 realDelta = ::GetTimeCount() - start;
3098       start = realDelta;
3099       if (start == 0)
3100         start = 1;
3101       UInt64 freq = GetFreq();
3102       // mips is constant in some compilers
3103       const UInt64 mipsVal = numMilCommands * freq / start;
3104       if (printCallback)
3105       {
3106         if (realDelta == 0)
3107         {
3108           printCallback->Print(" -");
3109         }
3110         else
3111         {
3112           // PrintNumber(*printCallback, start, 0);
3113           PrintNumber(*printCallback, mipsVal, 5);
3114         }
3115       }
3116       /*
3117       if (freqCallback)
3118         freqCallback->AddCpuFreq(mipsVal);
3119       */
3120 
3121       if (jj >= 3)
3122       {
3123         SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
3124         if (jj >= 8 || start >= freq)
3125           break;
3126         // break; // change it
3127         numMilCommands <<= 1;
3128       }
3129     }
3130   }
3131 
3132   if (printCallback)
3133   {
3134     printCallback->NewLine();
3135     printCallback->NewLine();
3136     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
3137     printCallback->Print(GetProcessThreadsInfo(threadsInfo));
3138     printCallback->NewLine();
3139   }
3140 
3141   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
3142     return E_INVALIDARG;
3143 
3144   UInt32 dict;
3145   bool dictIsDefined = method.Get_DicSize(dict);
3146 
3147   if (method.MethodName.IsEmpty())
3148     method.MethodName = "LZMA";
3149 
3150   if (benchCallback)
3151   {
3152     CBenchProps benchProps;
3153     benchProps.SetLzmaCompexity();
3154     UInt32 dictSize = method.Get_Lzma_DicSize();
3155     UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
3156     return MethodBench(
3157         EXTERNAL_CODECS_LOC_VARS
3158         complexInCommands,
3159         true, numThreadsSpecified,
3160         method,
3161         uncompressedDataSize, (const Byte *)fileDataBuffer,
3162         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
3163   }
3164 
3165   AString methodName (method.MethodName);
3166   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
3167     methodName = "crc32";
3168   method.MethodName = methodName;
3169   CMethodId hashID;
3170 
3171   if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
3172   {
3173     if (!printCallback)
3174       return S_FALSE;
3175     IBenchPrintCallback &f = *printCallback;
3176     if (!dictIsDefined)
3177       dict = (1 << 24);
3178 
3179 
3180     // methhodName.RemoveChar(L'-');
3181     UInt32 complexity = 10000;
3182     const UInt32 *checkSum = NULL;
3183     {
3184       unsigned i;
3185       for (i = 0; i < ARRAY_SIZE(g_Hash); i++)
3186       {
3187         const CBenchHash &h = g_Hash[i];
3188         AString benchMethod (h.Name);
3189         AString benchProps;
3190         int propPos = benchMethod.Find(':');
3191         if (propPos >= 0)
3192         {
3193           benchProps = benchMethod.Ptr(propPos + 1);
3194           benchMethod.DeleteFrom(propPos);
3195         }
3196 
3197         if (AreSameMethodNames(benchMethod, methodName))
3198         {
3199           if (benchProps.IsEmpty()
3200               || benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps == "8" && method.PropsString.IsEmpty()
3201               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3202           {
3203             complexity = h.Complex;
3204             checkSum = &h.CheckSum;
3205             if (method.PropsString.IsEqualTo_Ascii_NoCase(benchProps))
3206               break;
3207           }
3208         }
3209       }
3210       if (i == ARRAY_SIZE(g_Hash))
3211         return E_NOTIMPL;
3212     }
3213 
3214     f.NewLine();
3215     f.Print("Size");
3216     const unsigned kFieldSize_CrcSpeed = 6;
3217     unsigned numThreadsTests = 0;
3218     for (;;)
3219     {
3220       UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
3221       PrintNumber(f, t, kFieldSize_CrcSpeed);
3222       numThreadsTests++;
3223       if (t >= numThreadsSpecified)
3224         break;
3225     }
3226     f.NewLine();
3227     f.NewLine();
3228     CTempValues speedTotals(numThreadsTests);
3229     {
3230       for (unsigned ti = 0; ti < numThreadsTests; ti++)
3231         speedTotals.Values[ti] = 0;
3232     }
3233 
3234     UInt64 numSteps = 0;
3235     for (UInt32 i = 0; i < numIterations; i++)
3236     {
3237       for (unsigned pow = 10; pow < 32; pow++)
3238       {
3239         UInt32 bufSize = (UInt32)1 << pow;
3240         if (bufSize > dict)
3241           break;
3242         char s[16];
3243         ConvertUInt32ToString(pow, s);
3244         unsigned pos = MyStringLen(s);
3245         s[pos++] = ':';
3246         s[pos++] = ' ';
3247         s[pos] = 0;
3248         f.Print(s);
3249 
3250         for (unsigned ti = 0; ti < numThreadsTests; ti++)
3251         {
3252           RINOK(f.CheckBreak());
3253           UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
3254           UInt64 speed = 0;
3255           RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
3256               t, bufSize, speed,
3257               complexity,
3258               1, // benchWeight,
3259               (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
3260           PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
3261           speedTotals.Values[ti] += speed;
3262         }
3263         f.NewLine();
3264         numSteps++;
3265       }
3266     }
3267     if (numSteps != 0)
3268     {
3269       f.NewLine();
3270       f.Print("Avg:");
3271       for (unsigned ti = 0; ti < numThreadsTests; ti++)
3272       {
3273         PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
3274       }
3275       f.NewLine();
3276     }
3277     return S_OK;
3278   }
3279 
3280   bool use2Columns = false;
3281 
3282   bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
3283   bool onlyHashBench = false;
3284   if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
3285   {
3286     onlyHashBench = true;
3287     totalBenchMode = true;
3288   }
3289 
3290   // ---------- Threads loop ----------
3291   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
3292   {
3293 
3294   UInt32 numThreads = numThreadsSpecified;
3295 
3296   if (!multiThreadTests)
3297   {
3298     if (threadsPassIndex != 0)
3299       break;
3300   }
3301   else
3302   {
3303     numThreads = 1;
3304     if (threadsPassIndex != 0)
3305     {
3306       if (numCPUs < 2)
3307         break;
3308       numThreads = numCPUs;
3309       if (threadsPassIndex == 1)
3310       {
3311         if (numCPUs >= 4)
3312           numThreads = numCPUs / 2;
3313       }
3314       else if (numCPUs < 4)
3315         break;
3316     }
3317   }
3318 
3319   CBenchCallbackToPrint callback;
3320   callback.Init();
3321   callback._file = printCallback;
3322 
3323   IBenchPrintCallback &f = *printCallback;
3324 
3325   if (threadsPassIndex > 0)
3326   {
3327     f.NewLine();
3328     f.NewLine();
3329   }
3330 
3331   if (!dictIsDefined)
3332   {
3333     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
3334     unsigned dicSizeLog = dicSizeLog_Main;
3335 
3336     #ifdef UNDER_CE
3337     dicSizeLog = (UInt64)1 << 20;
3338     #endif
3339 
3340     if (ramSize_Defined)
3341     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
3342       if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
3343         break;
3344 
3345     dict = (UInt32)1 << dicSizeLog;
3346 
3347     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
3348     {
3349       f.Print("Dictionary reduced to: ");
3350       PrintNumber(f, dicSizeLog, 1);
3351       f.NewLine();
3352     }
3353   }
3354 
3355   PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads:   ", numThreads);
3356   f.NewLine();
3357 
3358   f.NewLine();
3359 
3360   if (totalBenchMode)
3361   {
3362     callback.NameFieldSize = kFieldSize_Name;
3363     use2Columns = false;
3364   }
3365   else
3366   {
3367     callback.NameFieldSize = kFieldSize_SmallName;
3368     use2Columns = true;
3369   }
3370   callback.Use2Columns = use2Columns;
3371 
3372   bool showFreq = false;
3373   UInt64 cpuFreq = 0;
3374 
3375   if (totalBenchMode)
3376   {
3377     showFreq = true;
3378   }
3379 
3380   unsigned fileldSize = kFieldSize_TotalSize;
3381   if (showFreq)
3382     fileldSize += kFieldSize_EUAndEffec;
3383 
3384   if (use2Columns)
3385   {
3386     PrintSpaces(f, callback.NameFieldSize);
3387     PrintRight(f, "Compressing", fileldSize);
3388     f.Print(kSep);
3389     PrintRight(f, "Decompressing", fileldSize);
3390   }
3391   f.NewLine();
3392   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
3393 
3394   int j;
3395 
3396   for (j = 0; j < 2; j++)
3397   {
3398     PrintRight(f, "Speed", kFieldSize_Speed + 1);
3399     PrintRight(f, "Usage", kFieldSize_Usage + 1);
3400     PrintRight(f, "R/U", kFieldSize_RU + 1);
3401     PrintRight(f, "Rating", kFieldSize_Rating + 1);
3402     if (showFreq)
3403     {
3404       PrintRight(f, "E/U", kFieldSize_EU + 1);
3405       PrintRight(f, "Effec", kFieldSize_Effec + 1);
3406     }
3407     if (!use2Columns)
3408       break;
3409     if (j == 0)
3410       f.Print(kSep);
3411   }
3412 
3413   f.NewLine();
3414   PrintSpaces(f, callback.NameFieldSize);
3415 
3416   for (j = 0; j < 2; j++)
3417   {
3418     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
3419     PrintRight(f, "%", kFieldSize_Usage + 1);
3420     PrintRight(f, "MIPS", kFieldSize_RU + 1);
3421     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
3422     if (showFreq)
3423     {
3424       PrintRight(f, "%", kFieldSize_EU + 1);
3425       PrintRight(f, "%", kFieldSize_Effec + 1);
3426     }
3427     if (!use2Columns)
3428       break;
3429     if (j == 0)
3430       f.Print(kSep);
3431   }
3432 
3433   f.NewLine();
3434   f.NewLine();
3435 
3436   if (specifiedFreq != 0)
3437     cpuFreq = specifiedFreq;
3438 
3439 
3440   if (totalBenchMode)
3441   {
3442     for (UInt32 i = 0; i < numIterations; i++)
3443     {
3444       if (i != 0)
3445         printCallback->NewLine();
3446       HRESULT res;
3447 
3448       const unsigned kNumCpuTests = 3;
3449       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
3450       {
3451         PrintLeft(f, "CPU", kFieldSize_Name);
3452         UInt32 resVal;
3453         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3454             (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
3455             specifiedFreq,
3456             cpuFreq, resVal));
3457         callback.NewLine();
3458 
3459         if (specifiedFreq != 0)
3460           cpuFreq = specifiedFreq;
3461 
3462         if (freqTest == kNumCpuTests - 1)
3463           SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
3464       }
3465       callback.NewLine();
3466 
3467       callback.SetFreq(true, cpuFreq);
3468 
3469       if (!onlyHashBench)
3470       {
3471         res = TotalBench(EXTERNAL_CODECS_LOC_VARS
3472             complexInCommands, numThreads,
3473             dictIsDefined || fileDataBuffer.IsAllocated(), // forceUnpackSize
3474             fileDataBuffer.IsAllocated() ? fileDataBuffer.Size() : dict,
3475             (const Byte *)fileDataBuffer,
3476             printCallback, &callback);
3477         RINOK(res);
3478       }
3479 
3480       res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
3481           1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
3482       RINOK(res);
3483 
3484       callback.NewLine();
3485       {
3486         PrintLeft(f, "CPU", kFieldSize_Name);
3487         UInt32 resVal;
3488         UInt64 cpuFreqLastTemp = cpuFreq;
3489         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
3490             specifiedFreq != 0, // showFreq
3491             specifiedFreq,
3492             cpuFreqLastTemp, resVal));
3493         callback.NewLine();
3494       }
3495     }
3496   }
3497   else
3498   {
3499     bool needSetComplexity = true;
3500     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
3501     {
3502       unsigned i;
3503       for (i = 0; i < ARRAY_SIZE(g_Bench); i++)
3504       {
3505         const CBenchMethod &h = g_Bench[i];
3506         AString benchMethod (h.Name);
3507         AString benchProps;
3508         int propPos = benchMethod.Find(':');
3509         if (propPos >= 0)
3510         {
3511           benchProps = benchMethod.Ptr(propPos + 1);
3512           benchMethod.DeleteFrom(propPos);
3513         }
3514 
3515         if (AreSameMethodNames(benchMethod, methodName))
3516         {
3517           if (benchProps.IsEmpty()
3518               || benchProps == "x5" && method.PropsString.IsEmpty()
3519               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
3520           {
3521             callback.BenchProps.EncComplex = h.EncComplex;
3522             callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
3523             callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
3524             needSetComplexity = false;
3525             break;
3526           }
3527         }
3528       }
3529       if (i == ARRAY_SIZE(g_Bench))
3530         return E_NOTIMPL;
3531     }
3532     if (needSetComplexity)
3533       callback.BenchProps.SetLzmaCompexity();
3534 
3535   for (unsigned i = 0; i < numIterations; i++)
3536   {
3537     const unsigned kStartDicLog = 22;
3538     unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
3539     if (!multiDict)
3540       pow = 31;
3541     while (((UInt32)1 << pow) > dict && pow > 0)
3542       pow--;
3543     for (; ((UInt32)1 << pow) <= dict; pow++)
3544     {
3545       char s[16];
3546       ConvertUInt32ToString(pow, s);
3547       unsigned pos = MyStringLen(s);
3548       s[pos++] = ':';
3549       s[pos] = 0;
3550       PrintLeft(f, s, kFieldSize_SmallName);
3551       callback.DictSize = (UInt32)1 << pow;
3552 
3553       COneMethodInfo method2 = method;
3554 
3555       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
3556       {
3557         // We add dictionary size property.
3558         // method2 can have two different dictionary size properties.
3559         // And last property is main.
3560         NCOM::CPropVariant propVariant = (UInt32)pow;
3561         RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant));
3562       }
3563 
3564       size_t uncompressedDataSize;
3565       if (fileDataBuffer.IsAllocated())
3566       {
3567         uncompressedDataSize = fileDataBuffer.Size();
3568       }
3569       else
3570       {
3571         uncompressedDataSize = callback.DictSize;
3572         if (uncompressedDataSize >= (1 << 18))
3573           uncompressedDataSize += kAdditionalSize;
3574       }
3575 
3576       HRESULT res = MethodBench(
3577           EXTERNAL_CODECS_LOC_VARS
3578           complexInCommands,
3579           true, numThreads,
3580           method2,
3581           uncompressedDataSize, (const Byte *)fileDataBuffer,
3582           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
3583       f.NewLine();
3584       RINOK(res);
3585       if (!multiDict)
3586         break;
3587     }
3588   }
3589   }
3590 
3591   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
3592 
3593   if (use2Columns)
3594   {
3595     f.Print(kSep);
3596     PrintChars(f, '-', fileldSize);
3597   }
3598 
3599   f.NewLine();
3600 
3601   if (use2Columns)
3602   {
3603     PrintLeft(f, "Avr:", callback.NameFieldSize);
3604     PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
3605     f.Print(kSep);
3606     PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
3607     f.NewLine();
3608   }
3609 
3610   PrintLeft(f, "Tot:", callback.NameFieldSize);
3611   CTotalBenchRes midRes;
3612   midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
3613   PrintTotals(f, showFreq, cpuFreq, midRes);
3614   f.NewLine();
3615 
3616   }
3617   return S_OK;
3618 }
3619