1 /*  _______________________________________________________________________
2 
3     Surfpack: A Software Library of Multidimensional Surface Fitting Methods
4     Copyright (c) 2006, Sandia National Laboratories.
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Surfpack directory.
7     _______________________________________________________________________ */
8 
9 #ifdef HAVE_CONFIG_H
10 #include "surfpack_config.h"
11 #endif
12 
13 #include <vector>
14 #include <fstream>
15 #include <iostream>
16 #include <string>
17 #include <iterator>
18 
19 #include "LinearRegressionModelTest.h"
20 #include "LinearRegressionModel.h"
21 #include "surfpack.h"
22 #include "SurfPoint.h"
23 #include "SurfData.h"
24 #include "AxesBounds.h"
25 #include "SurfpackInterface.h"
26 #include "unittests.h"
27 #include "ModelFitness.h"
28 
29 using std::cout;
30 using std::endl;
31 using std::ifstream;
32 using std::ios;
33 using std::ofstream;
34 using std::string;
35 using std::vector;
36 using std::ostream_iterator;
37 using std::ostringstream;
38 using surfpack::shared_rng;
39 
40 const int TESTDIMS = 2;
41 const int GRIDSIZE = 50;
42 CPPUNIT_TEST_SUITE_REGISTRATION( LinearRegressionModelTest );
43 
44 
45 // BMA: These functions previously in ModelFitness.h, but not used there
46 template< typename T>
vecSubInPlace(std::vector<T> & sub,std::vector<T> & min)47 std::vector< T >& vecSubInPlace(std::vector< T >& sub, std::vector< T >& min)
48 {
49   assert(sub.size() == min.size());
50   typedef typename std::vector< T >::iterator VecIt;
51   VecIt subIt, minIt;
52   for (subIt = sub.begin(), minIt = min.begin();
53 	subIt != sub.end();
54 	++subIt, ++minIt) {
55     *subIt -= *minIt;
56   }
57   return sub;
58 }
59 
60 template< typename T>
vecSub(std::vector<T> & sub,std::vector<T> & min)61 std::vector< T > vecSub(std::vector< T >& sub, std::vector< T >& min)
62 {
63   assert(sub.size() == min.size());
64   std::vector< T > result(sub.size());
65   typedef typename std::vector< T >::iterator VecIt;
66   VecIt subIt, minIt, resIt;
67   for (subIt = sub.begin(), minIt = min.begin(), resIt = result.begin();
68 	subIt != sub.end();
69 	++subIt, ++minIt, ++resIt) {
70     *resIt = *subIt - *minIt;
71   }
72   return result;
73 }
74 
75 
generateDerivPlot(const std::string & plotname,const std::string & datafilename,int x,int y1,int y2)76 void generateDerivPlot(const std::string& plotname, const std::string& datafilename, int x, int y1, int y2)
77 {
78 
79   std::ofstream outfile("gnuplotscript",std::ios::out);
80   outfile << "set term png\n"
81 	     "set output \"" << plotname << ".png\"\n"
82 	     "set autoscale\n"
83              "plot  \"" << datafilename << ".spd\" using "
84 	  << x << ":"  << y1
85 	 << ", \"" << datafilename << ".spd\" using "
86 	  << x << ":"  << y2
87 	 << ", \"" << datafilename << "pt\" using 1:2 points 10\n" ;
88   outfile.close();
89   system("gnuplot ./gnuplotscript\n");
90   system("rm gnuplotscript");
91 
92 }
93 
setUp()94 void LinearRegressionModelTest::setUp()
95 {
96   // Create data set
97   ab = new AxesBounds(string("-2 2 | -2 2"));
98   vector< unsigned > grid_points(2);
99   grid_points[0] = grid_points[1] = GRIDSIZE;
100   sd = 0;
101   randsd = 0;
102   vector< string > test_functions;
103   // Points on grid
104   SurfpackInterface::CreateSample(sd,*ab,grid_points,test_functions);
105   // Random sample of points
106   SurfpackInterface::CreateSample(randsd,*ab,10,test_functions);
107 
108 }
109 
tearDown()110 void LinearRegressionModelTest::tearDown()
111 {
112   delete sd;
113   delete ab;
114   delete randsd;
115 }
116 
constructorTest()117 void LinearRegressionModelTest::constructorTest()
118 {
119   VecDbl cf(1,1.0);
120   LRMBasisSet bs;
121   bs.bases.push_back(VecUns());
122   LinearRegressionModel lrm(1,bs,cf);
123   CPPUNIT_ASSERT(matches(cf[0],lrm.coeffs[0]));
124   CPPUNIT_ASSERT(lrm.size() == 1);
125 
126 }
127 
unityBasisTest()128 void LinearRegressionModelTest::unityBasisTest()
129 {
130   VecUns list;
131   bs.bases.push_back(list);
132   VecDbl x(1,0.0);
133   VecUns vars(1,0);
134   CPPUNIT_ASSERT(matches(bs.eval(0,x),1.0));
135   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),0.0));
136 }
137 
singleLinearTest()138 void LinearRegressionModelTest::singleLinearTest()
139 {
140 
141   VecUns list(1,0);
142   bs.bases.push_back(list);
143   VecDbl x(1,0.0);
144   VecUns vars(1,0);
145   // The term is x0
146   // At x0 = 0, eval = 0 and deriv = 1
147   CPPUNIT_ASSERT(matches(bs.eval(0,x),0.0));
148   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),1.0));
149   x[0] = 1.0;
150   // At x0 = 1, eval = 1 and deriv = 1
151   CPPUNIT_ASSERT(matches(bs.eval(0,x),1.0));
152   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),1.0));
153   x[0] = 2.0;
154   // At x0 = 2, eval = 2 and deriv = 1
155   CPPUNIT_ASSERT(matches(bs.eval(0,x),2.0));
156   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),1.0));
157   // Now differentiate again with respect to x0
158   vars.push_back(0);
159   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),0.0));
160 }
161 
singleQuadraticTest()162 void LinearRegressionModelTest::singleQuadraticTest()
163 {
164   VecUns list(2,0);
165   bs.bases.push_back(list);
166   VecDbl x(1,0.0);
167   VecUns vars(1,0);
168   // The term is x0^2
169   // At x0 = 0, eval = 0 and deriv = 0
170   CPPUNIT_ASSERT(matches(bs.eval(0,x),0.0));
171   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),0.0));
172   x[0] = 1.0;
173   // At x0 = 1, eval = 1 and deriv = 2
174   CPPUNIT_ASSERT(matches(bs.eval(0,x),1.0));
175   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),2.0));
176   x[0] = 2.0;
177   // At x0 = 2, eval = 4 and deriv = 4
178   CPPUNIT_ASSERT(matches(bs.eval(0,x),4.0));
179   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),4.0));
180   // Now differentiate again with respect to x0
181   vars.push_back(0);
182   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),2.0));
183   // Now differentiate again with respect to x0
184   vars.push_back(0);
185   CPPUNIT_ASSERT(matches(bs.deriv(0,x,vars),0.0));
186 }
187 
lineEvalTest()188 void LinearRegressionModelTest::lineEvalTest()
189 {
190   VecUns list;
191   bs.bases.push_back(list);
192   list.push_back(0);
193   bs.bases.push_back(list);
194   VecDbl cf(2,1.0);
195   LinearRegressionModel lrm(1,bs,cf); // lrm = x[0] + 1;
196   VecDbl x(1,1.0);
197   CPPUNIT_ASSERT(lrm.size() == 1);
198   CPPUNIT_ASSERT(matches(lrm(x),2.0));
199   VecDbl g = lrm.gradient(x);
200   CPPUNIT_ASSERT(matches(g[0],1.0));
201 
202   // Evaluate at x[0] = -3.0;
203   x[0] = -3.0;
204   CPPUNIT_ASSERT(matches(lrm(x),-2.0));
205   VecDbl g1 = lrm.gradient(x);
206   CPPUNIT_ASSERT(matches(g1[0],1.0));
207 
208   // Change function to 2*x[0] + 3;
209   lrm.coeffs[0] = 3.0;
210   lrm.coeffs[1] = 2.0;
211   // Evaluation = 2*-3 + 3;
212   CPPUNIT_ASSERT(matches(lrm(x),-3.0));
213   VecDbl g2 = lrm.gradient(x);
214   cout << "grad: " << g2[0] << endl;
215   CPPUNIT_ASSERT(matches(g2[0],2.0));
216 }
217 
quadratic2DTest()218 void LinearRegressionModelTest::quadratic2DTest()
219 {
220   VecUns list;
221   bs.bases.push_back(list); // Unity
222   list.push_back(0);
223   bs.bases.push_back(list); // x[0]
224   list.push_back(1);
225   bs.bases.push_back(list); // x[0]*x[1]
226   list[0] = 1;
227   bs.bases.push_back(list); // *x[1]*x[1]
228 
229   VecDbl cf(4,-1.0);
230   cf[1] = 2.0;
231   cf[2] = -3.0;
232   cf[3] = 4.0;
233   // lrm = -1 + 2.0*x[0] - 3.0*x[0]*x[1] + 4.0*x[1]^2;
234   LinearRegressionModel lrm(2,bs,cf);
235   VecDbl x(2,1.0);
236   // lrm(1,1) = -1+2(1)-3(1)(1)+4(1)(1) = 2
237   CPPUNIT_ASSERT(lrm.size() == 2);
238   CPPUNIT_ASSERT(matches(lrm(x),2.0));
239   VecDbl g = lrm.gradient(x);
240   // dlrm/dx0 = 2.0 - 3.0*x[1]
241   // dlrm/dx1 = -3.0*x[0]+8.0*x[1]
242   CPPUNIT_ASSERT(matches(g[0],-1.0));
243   CPPUNIT_ASSERT(matches(g[1],5.0));
244 
245    //Evaluate at (-3,-1)
246   x[0] = -3.0;
247   x[1] = -1.0;
248   // -1+2(-3)-3(-3)(-1)+4(-1)(-1) = -12
249   CPPUNIT_ASSERT(matches(lrm(x),-12.0));
250   VecDbl g1 = lrm.gradient(x);
251   CPPUNIT_ASSERT(matches(g1[0],5.0));
252   CPPUNIT_ASSERT(matches(g1[1],1.0));
253 }
254 
plotTest1()255 void LinearRegressionModelTest::plotTest1()
256 {
257 
258 
259   VecUns list;
260   bs.bases.push_back(list); // Unity
261   list.push_back(0);
262   bs.bases.push_back(list); // x[0]
263   list.push_back(1);
264   bs.bases.push_back(list); // x[0]*x[1]
265   list[0] = 1;
266   bs.bases.push_back(list); // *x[1]*x[1]
267 
268   VecDbl cf(4,-1.0);
269   cf[1] = 2.0;
270   cf[2] = -3.0;
271   cf[3] = 4.0;
272   // lrm = -1 + 2.0*x[0] - 3.0*x[0]*x[1] + 4.0*x[1]^2;
273   LinearRegressionModel lrm(2,bs,cf);
274 
275   // Prepare a basis set for a hyperplane model
276   LRMBasisSet hpbs; // hyperplane basis set
277   list.resize(1);
278   hpbs.bases.push_back(VecUns()); // Unity
279   for (unsigned i = 0; i < randsd->xSize(); i++) {
280     list[0] = i;
281     hpbs.bases.push_back(list);
282   }
283 
284 
285   SurfData& rsd = *randsd;
286   for (unsigned i = 0; i < rsd.size(); i++) {
287     VecDbl pt = rsd(i);
288     double resp = lrm(pt);
289     VecDbl grad = lrm.gradient(pt);
290     // solve for b in y = m1x1 + m2x2 + ... + b;
291     double b = resp;
292     for (unsigned j = 0; j < pt.size(); j++) {
293       b -= pt[j]*grad[j];
294     }
295 
296     // Now create a tangent hyperplane to the surface
297     VecDbl hyp_coeff = grad;
298     reverse(hyp_coeff.begin(),hyp_coeff.end());
299     hyp_coeff.push_back(b);
300     reverse(hyp_coeff.begin(),hyp_coeff.end());
301     LinearRegressionModel tang_plane(pt.size(),hpbs,hyp_coeff);
302 
303     // Now prepare a set of axes, so that we can plot a slice of both
304     // functions along each dimension
305     // This is a set of axes along which each dimension is fixed
306     vector<AxesBounds::Axis> axes(pt.size());
307     for (unsigned j = 0; j < pt.size(); j++) {
308       axes[j].min = axes[j].max = pt[j];
309     }
310     VecUns dims(pt.size(),1);
311 
312     for (unsigned dim = 0; dim < pt.size(); dim++) {
313       vector<AxesBounds::Axis> var_axes = axes;
314       var_axes[dim].min = -2.0;
315       var_axes[dim].max = 2.0;
316       AxesBounds var_ab(var_axes);
317       VecUns var_dims = dims;
318       var_dims[dim] = GRIDSIZE;
319       SurfData* test_data = 0;
320       vector<string> test_functions;
321       SurfpackInterface::CreateSample(test_data,var_ab,var_dims,test_functions);
322       SurfData& td = *test_data;
323       // Now evaluate both models on this data
324       ///\\todo Creat a base model class that can do operator()(SurfData&)
325       VecDbl lrm_resp(td.size());
326       VecDbl hyp_resp(td.size());
327       for (unsigned k = 0; k < td.size(); k++) {
328         lrm_resp[k] = lrm(td(k));
329         hyp_resp[k] = tang_plane(td(k));
330       }
331       unsigned y1 = td.addResponse(lrm_resp) + td.xSize() + 1; // gnuplot counts cols from 1
332       unsigned y2 = td.addResponse(hyp_resp) + td.xSize() + 1;
333       unsigned xdim = dim + 1;
334       string s1("deriv");
335       ostringstream os;
336       os << s1 << i << "_" << dim ;
337       string plotfilename = os.str();
338       string datafilename = plotfilename + ".spd";
339       string ptfilename = plotfilename + "pt";
340       std::ofstream ptfile(ptfilename.c_str(),ios::out);
341       ptfile << pt[dim] << " " << resp << endl;
342       ptfile.close();
343       td.write(datafilename);
344       generateDerivPlot(plotfilename,plotfilename,xdim,y1,y2);
345       cout << "Pt: " << i << " dim: " << dim << endl;
346       cout << lrm.asString() << tang_plane.asString() << endl;
347       delete test_data;
348     }
349 
350   }
351 }
352 
termPrinterTest()353 void LinearRegressionModelTest::termPrinterTest()
354 {
355   LinearRegressionModelFactory::CreateLRM(2,2);
356   VecUns u, v;
357   u.push_back(1);
358   u.push_back(10);
359   u.push_back(100);
360   v.push_back(0);
361   v.push_back(5);
362   v.push_back(30);
363   vecSub<unsigned>(u,v);
364   copy(u.begin(),u.end(),std::ostream_iterator<unsigned>(std::cout,"\n"));
365 }
366 
createModelTest()367 void LinearRegressionModelTest::createModelTest()
368 {
369   SurfData* sd = SurfpackInterface::CreateSample(string("-2 2 | -2 2"),
370     string("8 8"),string("sphere"));
371   LinearRegressionModelFactory lrmf;
372   SurfpackModel* lrm = lrmf.Create(*sd);
373   cout << lrm->asString() << endl;
374   return;
375   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[0],0.0));
376   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[1],0.0));
377   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[2],1.0));
378   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[3],0.0));
379   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[4],0.0));
380   CPPUNIT_ASSERT(matches(dynamic_cast<LinearRegressionModel*>(lrm)->coeffs[5],1.0));
381   delete lrm;
382   delete sd;
383 }
384 
385 //extern "C" double gsl_ran_fdist_pdf(double,double,double);
386 //void LinearRegressionModelTest::FTest()
387 //{
388 //  // Make a set of data that is a full quadratic fit plus some random noise
389 //  SurfData* sd = SurfpackInterface::CreateSample(string("-2 2 | -2 2"),
390 //    string("8 8"),string("sphere"));
391 //  LRMBasisSet bs = LinearRegressionModelFactory::CreateLRM(2,2);
392 //  CPPUNIT_ASSERT(bs.size() == 6);
393 //  VecDbl coeffs = surfpack::toVec<double>(string("1 -2 3 -1 3 1"));
394 //  LinearRegressionModel lrmTemp(2,bs,coeffs);
395 //  VecDbl responses = lrmTemp(*sd);
396 //  // Add some random noise to the data
397 //  for (unsigned i = 0; i < responses.size(); i++) {
398 //    responses[i] += (shared_rng().rand()-0.5)/2.0;
399 //  }
400 //  unsigned new_index = sd->addResponse(responses);
401 //  sd->setDefaultIndex(new_index);
402 //  // Now fit a full model to this data
403 //  LinearRegressionModelFactory lrmf;
404 //  LinearRegressionModel* lrmFull = lrmf.Create(*sd);
405 //  cout << lrmFull->asString() << endl;
406 //  StandardFitness sf;
407 //  double ssr_full = sf(*lrmFull,*sd);
408 //  // Now remove a term to create a reduced model
409 //  bs.bases.erase(bs.bases.begin()+2);
410 //  VecDbl coeffs_reduced = LinearRegressionModel::lrmSolve(bs,ScaledSurfData(NonScaler(),*sd));
411 //  LinearRegressionModel lrmReduced(2,bs,coeffs_reduced);
412 //  double ssr_reduced = sf(lrmReduced,*sd);
413 //  cout << "Fitness Full: " << ssr_full << endl;
414 //  cout << "Fitness Reduced: " << ssr_reduced << endl;
415 //
416 //  // compute degrees of freedom
417 //  unsigned df_num = 1;
418 //  unsigned df_denom = sd->size() - coeffs.size();
419 //  double Fstat = (ssr_reduced - ssr_full)*df_denom/ssr_full;
420 //  double fpdf = gsl_ran_fdist_pdf(Fstat,df_num,df_denom);
421 //  cout << "Fstat: " << Fstat << " fpdf: " << fpdf << endl;
422 //  delete lrmFull;
423 //}
424 
425