1 // @file parallel.cpp - This is an example demo file that demonstrates timing of
2 // Parallel operations using openmp
3 // @author TPOC: contact@palisade-crypto.org
4 //
5 // @copyright Copyright (c) 2019, New Jersey Institute of Technology (NJIT)
6 // All rights reserved.
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 // 1. Redistributions of source code must retain the above copyright notice,
10 // this list of conditions and the following disclaimer.
11 // 2. Redistributions in binary form must reproduce the above copyright notice,
12 // this list of conditions and the following disclaimer in the documentation
13 // and/or other materials provided with the distribution. THIS SOFTWARE IS
14 // PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
15 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
17 // EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
18 // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 
25 #define PROFILE  // by defining this we activate the PROFILELOG() outputs
26 
27 #include <chrono>
28 #include <fstream>
29 #include <iostream>
30 #include <thread>
31 #include "palisadecore.h"
32 #include "time.h"
33 
34 // function to verify our generated array
35 void verify(float *foo, uint32_t array_size) {
36   // verify that the data was generated correctly.
37   bool goodflag = true;
38   for (size_t i = 1; i < array_size; ++i) {
39     if ((foo[i] - foo[i - 1]) != 1) {
40       goodflag = goodflag & false;
41     }
42   }
43   if (goodflag) {
44     std::cout << "verification succeeded" << std::endl;
45   } else {
46     std::cout << "verification failed" << std::endl;
47     for (size_t i = 0; i < array_size; ++i) {
48       std::cout << foo[i] << " ";
49     }
50     std::cout << std::endl;
51   }
52   return;
53 }
54 
55 int main(int argc, char *argv[]) {
56   // note if you set dbg_flag = true then all  the following DEBUG() statments
57   // print to stdout.
58   DEBUG_FLAG(true);
59 
60   lbcrypto::PalisadeParallelControls.Enable();
61 
62   uint32_t array_size = 1000;
63   DEBUGEXP(argc);
64   DEBUGEXP(argv[0]);
65 
66   if (argc < 2) {
67     std::cout << "running " << argv[0] << " with default array size of 1000"
68               << std::endl;
69   } else {
70     array_size = atoi(argv[1]);
71     if (array_size <= 0) {
72       std::cout << "error in argment " << argv[1]
73                 << " must be greater than zero " << std::endl;
74       exit(-1);
75     }
76   }
77 
78   // build the array and zero it out.
79   float *foo = new float[array_size];
80   for (size_t i = 0; i < array_size; i++) {
81     foo[i] = 0;
82   }
83 
84   TimeVar t_total;   // define timer variable for TIC() TOC() timing functions.
85   double timeTotal;  // holds the resulting time
86 
87   std::cout << "Parallel computation demo using " << omp_get_num_procs()
88             << " processors." << std::endl;
89   std::cout << "and maximum of " << omp_get_max_threads() << " threads."
90             << std::endl
91             << std::endl;
92   std::cout
93       << "to change # threads from the default, execute at the comamnd line "
94       << std::endl;
95   std::cout << " For the bash shell, enter:" << std::endl
96             << "export OMP_NUM_THREADS=<number of threads to use>" << std::endl
97             << "For the csh or tcsh shell, enter: " << std::endl
98             << " setenv OMP_NUM_THREADS <number of threads to use>"
99             << std::endl;
100   std::cout << " or use omp_set_num_threads() in your code." << std::endl
101             << std::endl;
102 
103   std::cout << "HINT: use export OMP_DISPLAY_ENV=TRUE to see all your settings"
104             << std::endl;
105 
106   int nthreads, tid;
107 // determine how many threads we will have.
108 #pragma omp parallel private(nthreads, tid)
109   {
110     /* Obtain thread number */
111     tid = omp_get_thread_num();
112 
113     /* Only master thread does this */
114     if (tid == 0) {
115       nthreads = omp_get_num_threads();
116       std::cout << "Confirmed Number of threads = " << nthreads << std::endl;
117     }
118   }
119 
120   // demonstrate debug functions (only active when dbg_flag = true)
121   std::cout << "demonstrating DEBUG()" << std::endl;
122   DEBUG("array_size = " << array_size);
123   DEBUGEXP(array_size);
124   DEBUGWHERE(array_size);
125 
126 #if !defined(NDEBUG)
127   dbg_flag = false;
128 #endif
129   // these three no longer report any value
130   DEBUG("array_size = " << array_size);
131   DEBUGEXP(array_size);
132   DEBUGWHERE(array_size);
133 
134   std::cout << std::endl;
135   // now run the parallel job
136 
137   TIC(t_total);  // set the timer.
138 
139   // define a parallel loop that takes 10 milliseconds to execute then performs
140   // a small task of filling in an array
141 #pragma omp parallel for
142   for (size_t i = 0; i < array_size; ++i) {
143     float tmp = i;
144     std::this_thread::sleep_for(std::chrono::milliseconds(10));
145     foo[i] = tmp;
146   }
147 
148   // read the timer to get the computation time in miliseconds
149   // look at debug.h to find other timers you can use
150 
151   timeTotal = TOC_MS(t_total);
152   PROFILELOG("Total time with internal delay: "
153              << "\t" << timeTotal << " ms");
154   verify(foo, array_size);
155   std::cout << std::endl;
156 
157   // repeat the parallel process without the internal delay
158   // clear out foo.
159   for (size_t i = 0; i < array_size; i++) {
160     foo[i] = 0;
161   }
162 
163   TIC(t_total);  // reset the timer.
164   // define a parallel loop that takes 10 milliseconds to execute then performs
165   // a small task of filling in an array
166 #pragma omp parallel for
167   for (size_t i = 0; i < array_size; ++i) {
168     float tmp = i;
169     foo[i] = tmp;
170   }
171 
172   // read the timer to get the computation time in micro seconds
173   timeTotal = TOC_US(t_total);
174   PROFILELOG("Total time without internal delay: "
175              << "\t" << timeTotal << " us");
176   verify(foo, array_size);
177 
178   return 0;
179 }
180