1 /*! \file extract_matrix.h
2  *  \brief functions to extract parts of matrix
3  *
4  *  \version 1
5  *
6  *  \date Created: 25/06/11
7  *  \date Modifications: see cvs log
8  *
9  *  \author A. Lucas
10  *
11  * \note
12  *  as usually, matrix x[i,j] (n x p) is represented by a vector
13  *              x[i + j x n]  (i=0..n-1 ; j=0..p-1)
14  *
15  *  \note Licence: GPL (>= 2)
16  */
17 
18 
19 #ifndef EXTRACT_MATRIX_HEADER_GMP_R_
20 #define EXTRACT_MATRIX_HEADER_GMP_R_ 1
21 
22 #include <R.h>
23 #include <Rinternals.h>
24 #include <functional>
25 #include "bigvec_q.h"
26 #include "bigintegerR.h"
27 #include <algorithm>
28 
29 extern "C"
30 {
31 
32   /** \brief get subsets of a matrix */
33   SEXP matrix_get_at_z(SEXP A,SEXP INDI, SEXP INDJ);
34 
35   /** \brief set subsets of a matrix */
36   SEXP matrix_set_at_z(SEXP A,SEXP VAL ,SEXP INDI, SEXP INDJ);
37 
38   /** \brief get subsets of a matrix */
39   SEXP matrix_get_at_q(SEXP A, SEXP INDI, SEXP INDJ);
40 
41   /** \brief set subsets of a matrix */
42   SEXP matrix_set_at_q(SEXP A,SEXP VAL, SEXP INDI, SEXP INDJ);
43 
44 }
45 
46 
47 namespace extract_gmp_R
48 {
49 
50   /** \brief Change R indices (in function like A[IND]) from
51    * R (it can be logical/positive/negative) to a vector of size n
52    * containing boolean: true: we update line/row, flase : we do not
53    * change anything
54    *
55    * \return a vector of n boolean corresponding to values that must be affected.
56    *
57    */
58   std::vector<bool> indice_set_at (unsigned int n , SEXP & IND);
59 
60 
61 
62   /**
63    * \brief tranform a matrix from bigvec or bigvec_q format to
64    * vector<vector< big > > (a vector of columns)
65    *
66    * \note for bigvec: it does not take into account modulus.
67    *
68    */
toVecVec(T & A,std::vector<T * > & retour)69   template< class T> void  toVecVec(T& A, std::vector<T * > & retour )
70   {
71     //typename std::vector<T> retour;
72     unsigned int i;
73 
74     // case: vector
75     if(A.nrow < 0)
76       A.nrow = A.size();
77     else // check that size is a multiple of row
78 	if((A.size() / A.nrow) != static_cast<float>(A.size()) / static_cast<float>(A.nrow))
79 	    Rf_error("malformed matrix");
80 
81     retour.resize(A.size() / A.nrow);
82     for(i = 0; i < retour.size();  ++i)
83       {
84 	retour[i] = new T();
85 	retour[i]->value.resize(A.nrow);
86       }
87     // go !
88     for(i= 0 ; i < A.value.size(); ++i)
89       // retour[col    �   ]  [row        ]
90       (retour[i / A.nrow ])->value[ i % A.nrow].setValue(A.value[i]);
91 
92     //return(retour);
93 
94   }
95 
96 
97 
98 
clearVec(typename std::vector<T * > & vec)99   template< class T> void clearVec(typename std::vector<T*> & vec )
100     {
101       for (typename std::vector<T*>::const_iterator it = vec.begin();
102 	   it != vec.end();
103 	   ++it)
104 	delete *it;
105     }
106 
107 
108   /**
109    * \brief extract parts of a matrix
110    *
111    * \param A matrix (class bigvec or bigvec_q)
112    * \param INDI,INDJ indices: either "LOGICAL" (true/false) or
113    *  numbers:
114    *      - positives: we return row/col in INDI/INDJ
115    *      - negatives: we retun all except row/col in INDI/INJ
116    */
get_at(T & A,SEXP & INDI,SEXP & INDJ)117   template <class T> T get_at (T & A, SEXP& INDI, SEXP& INDJ)
118   {
119 
120     // result = A[indi,indj]
121     int oldnrow = A.nrow;
122     std::vector<int> indJ;
123 
124     typename std::vector<T*> matricetmp ;
125     typename std::vector<T*> matricetmp2;
126 
127     toVecVec(A,matricetmp);
128     typename std::vector<T*> copyAdress(matricetmp);
129 
130     // only pointers
131     typename std::vector<T*> * matrice = &matricetmp;
132     typename std::vector<T*> * matricework = &matricetmp2;
133 
134     T retour;
135 
136     unsigned int i,j, newnrow;
137     std::vector<int>::iterator it;
138 
139     // --------------------------
140     // PART 1:  COLUMNS
141     // --------------------------
142 
143     if(A.size()==0)
144       {
145 	clearVec<T>(copyAdress);
146 	return(retour);
147       }
148 
149     if(TYPEOF(INDJ) != NILSXP) {
150       indJ = bigintegerR::create_int(INDJ);
151 
152       if (TYPEOF(INDJ) == LGLSXP) // LOGICAL
153 	{
154 
155 	  // for all columns
156 	  unsigned int delta=0;
157 	  for (i = 0; i < (*matrice)[0]->size(); ++i)
158 	    {
159 	      if (! indJ[i+delta% indJ.size()])
160 		{
161 		  // remove columns i
162 		  matrice->erase(i+ matrice->begin());
163 		  --i; // indice in modified matrix
164 		  ++delta; // i+delta: old indice
165 		}
166 	    }
167 
168 	}
169       else //INDJ: numbers
170 	{
171 	  indJ.erase(std::remove(indJ.begin(), indJ.end(), 0L), indJ.end()); // remove all zeroes
172 
173 	  if (indJ.empty())
174 	    {
175 	      clearVec<T>(copyAdress);
176 	      return retour;
177 	    }
178 
179 	  // case: a[-b]
180 	  // negatives...
181 	  if(indJ[0] < 0)
182 	    {
183 	      // sort in reverse order
184 	      std::sort(indJ.begin(),indJ.end(),std::greater<int>() );
185 
186 	      // we should have indJ like -1 -3 -7 -7 -12 ...
187 
188 	      // remove consecutive  duplicates
189 	      it = std::unique(indJ.begin(),indJ.end());
190 	      //indJ.erase(it,indJ.end());
191 
192 	      if ( indJ.back() > 0)
193 		Rf_error("only 0's may mix with negative subscripts");
194 
195 
196 
197 	      it=indJ.begin();
198 	      unsigned int delta=0;
199 	      // for all columns
200 	      for (j = 0; j < matrice->size(); ++j)
201 		{
202 		  if(it == indJ.end())
203 		    break;
204 
205 		  if (*it == - static_cast<int>(j+1+delta) )
206 		    {
207 		      matrice->erase(j+ matrice->begin());
208 		      ++it;
209 		      ++delta;
210 		      --j;
211 		    }
212 
213 		}
214 
215 	    }
216 	  else
217 	    // case: positive values: 1;5;7;7;9;10...
218 	    {
219 	      // note : can have duplicates and indices are not sorted
220 
221 	      // allocate new matrix (all will be copied)
222 	      // number of columns
223 	      matricework->reserve(indJ.size());
224 
225 	      // for all [new] rows
226 	      for( it=indJ.begin(); it != indJ.end(); it++)
227 		{
228 		  if (*it < 0)
229 		    Rf_error("only 0's may mix with negative subscripts");
230 		  if( static_cast<unsigned int>(*it-1) < matrice->size() ) {
231 		      //printf("on sort %s",(*matrice)[(*it)-1][0].str(10).c_str());
232 		      matricework->push_back( (*matrice)[*it-1] );
233 		  } else {
234 		      Rf_error("column subscript out of bounds");
235 		  }
236 		}
237 
238 	      // change addresses
239 	      matrice = &matricetmp2;
240 	      matricework = &matricetmp;
241 
242 	    }//end of case: int+positive values
243 
244 	}
245       } // INDJ "exists"
246 
247     if(matrice->size()==0)
248       {
249 	clearVec<T>(copyAdress);
250 	return(retour);
251       }
252 
253     // --------------------------
254     // PART 2:  ROWS
255     // --------------------------
256     indJ.empty();
257     std::vector<int> indI = bigintegerR::create_int(INDI);
258     if(TYPEOF(INDI) != NILSXP) {
259 	if (TYPEOF(INDI) == LGLSXP) { // LOGICAL
260 	  // for all rows
261 	  unsigned int delta = 0;
262 	  for (i = 0; i < (*matrice)[0]->size(); ++i)
263 	    {
264 	      if (! indI[(i+delta)% indI.size()])
265 		{
266 		  // for all columns j delete row i
267 		  for (j = 0; j < matrice->size(); ++j)
268 		    (*matrice)[j]->value.erase(i+(*matrice)[j]->value.begin());
269 
270 		  //++newnrow;
271 		  --i; // i: new indice in modified matrix
272 		  ++delta; // i+delta = old indices
273 		}
274 	    }
275 
276 	}
277 	else { // INDI : numbers
278 	  // remove zeros:
279 	  indI.erase(std::remove(indI.begin(), indI.end(), 0L), indI.end());
280 
281 	  if (indI.empty())
282 	    {
283 	      clearVec<T>(copyAdress);
284 	      return retour;
285 	    }
286 
287 	  // case: a[-b]
288 	  // negatives...
289 	  if(indI[0] < 0)
290 	    {
291 	      std::sort(indI.begin(),indI.end(),std::greater<int>() );
292 	      // we should have indI like -1 -3 -7 -7 -12 ...
293 
294 	      // remove duplicates
295 	      std::unique(indI.begin(),indI.end());
296 	      //indI.erase(it,indI.end());
297 
298 	      if ( indI.back() > 0)
299 		Rf_error("only 0's may mix with negative subscripts");
300 
301 
302 
303 	      //newnrow = A.nrow;
304 	      it=indI.begin();
305 	      // for all rows
306 	      unsigned int delta = 0;
307 	      for (i = 0; i < (*matrice)[0]->size(); ++i)
308 		{
309 
310 		  if(it != indI.end() )
311 		    if (*it == - static_cast<int>(i+1+delta) )
312 		      {
313 			// for all columns j remove row i
314 			for (j = 0; j < matrice->size(); ++j)
315 			  {
316 			    (*matrice)[j]->value.erase(i+(*matrice)[j]->value.begin());
317 			  }
318 			//--newnrow;
319 			--i; // i: new indice in modified matrix
320 			++delta; // i+delta = old indices
321 			++it;
322 		      }
323 
324 		}
325 
326 	    }
327 	  else
328 	    {
329 	    // case: positive values: 1;5;7;7;9;10...
330 
331 	      // delete too large values or give error iff  INDJ is non-NULL
332 	      for(it = indI.begin(); it != indI.end(); ++it)
333 		{
334 		  if(*it > static_cast<int>((*matrice)[0]->size()))
335 		    {
336 		      if(oldnrow < 0) { // vector case: out-of-bound --> NA
337 			/* it = indI.erase(it); */
338 			/* --it; */
339 		      } else { // matrix case:
340 			Rf_error("subscript out of bounds");
341 		      }
342 		    }
343 		}
344 	      // note : can have duplicates and indices are not sorted
345 
346 	      //newnrow = indI.size();
347 
348 	      // allocate new matrix (all will be copied)
349 	      // number of columns
350 
351 	      matricework->resize( matrice->size());
352 	      for (typename std::vector<T*>::iterator it = matricework->begin();
353 		   it != matricework->end();
354 		   ++it)
355 		{
356 		  *it = new T();
357 		  copyAdress.push_back(*it);
358 		}
359 
360 	      // number of row
361 	      for (j = 0; j < matricework->size(); ++j)
362 		(*matricework)[j]->resize( indI.size() );
363 
364 
365 	      // for all [new] rows
366 	      for( i=0; i < indI.size(); ++i)
367 		{
368 		  if (indI[i] < 0)
369 		    Rf_error("only 0's may mix with negative subscripts");
370 		  if( static_cast<unsigned int>(indI[i]-1) < (*matrice)[0]->size() )
371 		    {
372 		      // for all columns
373 		      for (j = 0; j < matricework->size(); ++j)
374 			//newmat[i,j] = oldmat[ind[i],j]
375 			( (*matricework)[j])->value[i] = ((*matrice)[j])->value[indI[i]-1];
376 		    }
377 		  else
378 		    for (j = 0; j < matricework->size(); ++j)
379 		      ( (*matricework)[j])->value[i].setValue();
380 		}
381 
382 	      matrice = matricework; // change addresses
383 	    }//end of case: int+positive values
384 
385 	}
386       }
387 
388     // --------------------------
389     // PART 3:  COPY RESULT
390     // --------------------------
391 
392     newnrow = (*matrice)[0]->size();
393     retour.resize(matrice->size() * newnrow);
394     for(i=0; i< newnrow ; ++i)
395       for(j=0; j <  matrice->size() ; ++j)
396 	retour.value[i + j* newnrow ]  =  ((*matrice)[j])->value[i] ;
397 
398     retour.nrow = (oldnrow < 0) ? -1 : newnrow;
399 
400     clearVec<T>(copyAdress);
401     return(retour);
402 
403   }  // end get_at
404 
405 
406   /** \brief set a matrix: for R function src[idx,jdx] <- value
407    *
408    */
set_at(T & src,const T & value,SEXP & IDX,SEXP & JDX)409   template<class T> void set_at(T & src ,const T & value, SEXP & IDX, SEXP & JDX)
410   {
411     // case: vector
412     if(src.nrow < 0)
413       src.nrow = src.size();
414 
415     // check that size is a multiple of row
416     if((src.size() / src.nrow) != static_cast<float>(src.size()) / static_cast<float>(src.nrow))
417       Rf_error("malformed matrix");
418 
419     unsigned int ncol = src.size() / src.nrow; // number of col
420     std::vector<bool> vidx =  indice_set_at ( src.nrow, IDX);
421     std::vector<bool> vjdx =  indice_set_at ( ncol, JDX);
422 
423     unsigned int k=0;
424 
425     for(unsigned int j = 0 ; j < ncol; ++j)
426       {
427 	if(vjdx[j])
428 	  for(int i = 0 ; i < src.nrow; ++i)
429 	    if(vidx[i])
430 	      {
431 		src.set(i + j * src.nrow, value[k % value.size()] );
432 		++k;
433 	      }
434       }
435 
436     return;
437 
438   }//end set_at
439 
440 }// end namespace
441 
442 
443 
444 
445 
446 
447 #endif
448