1 //==============================================================================
2 //
3 //  This file is part of GPSTk, the GPS Toolkit.
4 //
5 //  The GPSTk is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU Lesser General Public License as published
7 //  by the Free Software Foundation; either version 3.0 of the License, or
8 //  any later version.
9 //
10 //  The GPSTk is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU Lesser General Public License for more details.
14 //
15 //  You should have received a copy of the GNU Lesser General Public
16 //  License along with GPSTk; if not, write to the Free Software Foundation,
17 //  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 //  This software was developed by Applied Research Laboratories at the
20 //  University of Texas at Austin.
21 //  Copyright 2004-2020, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24 
25 //==============================================================================
26 //
27 //  This software was developed by Applied Research Laboratories at the
28 //  University of Texas at Austin, under contract to an agency or agencies
29 //  within the U.S. Department of Defense. The U.S. Government retains all
30 //  rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 //  Pursuant to DoD Directive 523024
33 //
34 //  DISTRIBUTION STATEMENT A: This software has been approved for public
35 //                            release, distribution is unlimited.
36 //
37 //==============================================================================
38 
39 /**
40  * @file FileHunter.cpp
41  * Find all files matching a specification.
42  */
43 
44 #include "FileHunter.hpp"
45 #include "YDSTime.hpp"
46 #include "CivilTime.hpp"
47 #include "GPSWeekSecond.hpp"
48 
49 using namespace std;
50 using namespace gpstk;
51 using namespace gpstk::StringUtils;
52 
53 // headers for directory searching interface
54 #ifndef _WIN32
55 #include <unistd.h>
56 #include <dirent.h>
57 #include <limits.h>
58 #include <sys/stat.h>
59 
60 #else
61 #include <io.h>
62 #include <direct.h>
63 #define PATH_MAX _MAX_PATH
64 #endif
65 
66 namespace gpstk
67 {
68 
FileHunter(const string & filespec)69    FileHunter::FileHunter(const string& filespec)
70    {
71       try
72       {
73          init(filespec);
74       }
75       catch (FileHunterException& e)
76       {
77          GPSTK_RETHROW(e);
78       }
79    }
80 
81 
FileHunter(const FileSpec & filespec)82    FileHunter::FileHunter(const FileSpec& filespec)
83    {
84       try
85       {
86          init(filespec.getSpecString());
87       }
88       catch (FileHunterException& e)
89       {
90          GPSTK_RETHROW(e);
91       }
92    }
93 
94 
newHunt(const string & filespec)95    FileHunter& FileHunter::newHunt(const string& filespec)
96    {
97       try
98       {
99          init(filespec);
100       }
101       catch (FileHunterException& e)
102       {
103          GPSTK_RETHROW(e);
104       }
105       return *this;
106    }
107 
108 
setFilter(const FileSpec::FileSpecType fst,const vector<string> & filter)109    FileHunter& FileHunter::setFilter(const FileSpec::FileSpecType fst,
110                                      const vector<string>& filter)
111    {
112       std::string  fileSpecType;
113       if (filter.empty())
114       {
115          FileHunterException exc("FileHunter::setFilter with empty filter"
116                                  " would result in no matches");
117          GPSTK_THROW(exc);
118       }
119       try
120       {     // ensure a valid file spec type
121          fileSpecType = FileSpec::convertFileSpecType(fst);
122       }
123       catch (FileSpecException& fse)
124       {
125          FileHunterException fhe(fse.getText(), fse.getErrorId());
126          GPSTK_THROW(fhe);
127       }
128          // try to find the field in the fileSpecList.
129       vector<FileSpec>::iterator itr = fileSpecList.begin();
130       while (itr != fileSpecList.end())
131       {
132          if ((*itr).hasField(fst))
133             break;
134          itr++;
135       }
136          // found the field - add the filter.
137       if (itr != fileSpecList.end())
138       {
139          filterList.push_back(FilterPair(fst, filter));
140       }
141       else  // didn't find it - throw an exception
142       {
143          FileHunterException fhe("The FileSpec does not have a field: " +
144                                  fileSpecType);
145          GPSTK_THROW(fhe);
146       }
147       return *this;
148    }
149 
150 
find(const CommonTime & start,const CommonTime & end,const FileSpec::FileSpecSortType fsst,enum FileChunking chunk) const151    vector<string> FileHunter::find(const CommonTime& start,
152                                    const CommonTime& end,
153                                    const FileSpec::FileSpecSortType fsst,
154                                    enum FileChunking chunk) const
155    {
156          // ensure proper time order
157       if (end < start)
158       {
159          FileHunterException fhe("The times are specified incorrectly");
160          GPSTK_THROW(fhe);
161       }
162          // move start time back to a boundary defined by file chunking
163       CommonTime exStart;
164       switch (chunk)
165       {
166          case WEEK:
167          {
168             GPSWeekSecond tmp(start);
169             tmp.sow = 0.0;
170             exStart = tmp;
171             YDSTime yds(end);
172             break;
173          }
174          case DAY:
175          {
176             YDSTime tmp(start);
177             tmp.sod = 0.0;
178             exStart = tmp;
179             break;
180          }
181          case HOUR:
182          {
183             CivilTime tmp(start);
184             tmp.minute = 0;
185             tmp.second = 0.0;
186             exStart = tmp;
187             break;
188          }
189          case MINUTE:
190          {
191             CivilTime tmp(start);
192             tmp.second = 0.0;
193             exStart = tmp;
194             break;
195          }
196       }
197       exStart.setTimeSystem(start.getTimeSystem());
198 
199          // Set min and max years for progressive coarse time filtering
200       int minY, maxY;
201       YDSTime tmpStart(start);
202       YDSTime tmpEnd(end);
203       minY = tmpStart.year;
204       maxY = tmpEnd.year;
205 
206       vector<string> toReturn;
207          // Seed the return vector with an empty string which will be
208          // appended to with the root directory or drive, depending on
209          // your O/S.  This being empty is a termination condition for
210          // an inner loop.
211       toReturn.push_back(string());
212          // complete file spec string, i.e. full path,
213          // i.e. aggregation of fileSpecList for final time filtering.
214       string fileSpecStr;
215 
216       try
217       {
218          vector<FileSpec>::const_iterator fsIter = fileSpecList.begin();
219 
220 #ifdef _WIN32
221          if (fsIter != fileSpecList.end())
222          {
223                // If Windows, we should seed it with the drive spec
224             toReturn[0] = (*fsIter).getSpecString();
225             fileSpecStr = (*fsIter).getSpecString();
226             fsIter++;
227          }
228 #endif
229          while (fsIter != fileSpecList.end())
230          {
231             vector<string> toReturnTemp;
232             vector<FileSpec>::const_iterator next = fsIter;
233             next++;
234             bool expectDir = (next != fileSpecList.end());
235 
236             fileSpecStr += string(1, slash) + fsIter->getSpecString();
237             for (size_t i = 0; i < toReturn.size(); i++)
238             {
239                   // Search for the next entries
240                //cerr << "Dir = " << toReturn[i] << endl;
241                vector<string> newEntries =
242                      searchHelper(toReturn[i], *fsIter, expectDir);
243 
244                   // After getting the potential entries, filter
245                   // them based on the user criteria
246                filterHelper(newEntries, *fsIter);
247 
248                   // For each new entry, check the time (if possible)
249                   // then add it if it's in the correct time range.
250                   // this is why we need to enter an empty string to
251                   // seed toReturn
252                vector<string>::const_iterator entryIter = newEntries.begin();
253                for ( ; entryIter != newEntries.end(); entryIter++)
254                {
255                      // To avoid extra processing, immediately attempt
256                      // to filter-out new entries whose year is not
257                      // within the valid year range
258                   if (coarseTimeFilter(*entryIter, *fsIter, minY, maxY))
259                   {
260                      //cerr << "Filtered out entry: " << *entryIter << endl;
261                      continue;
262                   }
263                   string newPath = toReturn[i] + string(1,slash) + *entryIter;
264                   //cerr << "  " << newPath << endl;
265                   toReturnTemp.push_back(newPath);
266                }
267             }
268 
269             toReturn = toReturnTemp;
270 
271                // If toReturn is ever empty, there are no matches
272             if (toReturn.empty())
273                return toReturn;
274 
275             fsIter = next;
276          } // while (itr != fileSpecList.end())
277 
278             // Sort the list by the file spec of the last field
279          fsIter--;
280          (*fsIter).sortList(toReturn, fsst);
281       }
282       catch (gpstk::Exception& exc)
283       {
284          FileHunterException nexc(exc);
285          GPSTK_THROW(nexc);
286       }
287          // Filter by fully-determined time
288       vector<string> filtered;
289       try
290       {
291          FileSpec fullSpec(fileSpecStr);
292          for (unsigned i = 0; i < toReturn.size(); i++)
293          {
294             CommonTime fileTime = fullSpec.extractCommonTime(toReturn[i]);
295             if ((fileTime >= exStart) && (fileTime <= end))
296             {
297                filtered.push_back(toReturn[i]);
298             }
299          }
300       }
301       catch (gpstk::Exception& exc)
302       {
303          FileHunterException nexc(exc);
304          GPSTK_THROW(nexc);
305       }
306       return filtered;
307    }
308 
309 
init(const string & filespec)310    void FileHunter::init(const string& filespec)
311    {
312          // debug
313       try
314       {
315          if (filespec.empty())
316          {
317             FileHunterException exc("FileHunter: empty file spec is invalid");
318             GPSTK_THROW(exc);
319          }
320          fileSpecList.clear();
321          filterList.clear();
322 
323          string fs(filespec);
324 
325             // first, check if the file spec has a leading '/'.  if not
326             // prepend the current directory to it.
327 #ifndef _WIN32
328          if (fs[0] != slash)
329          {
330             char* cwd = getcwd(NULL, PATH_MAX);
331 
332             if (cwd == NULL)
333             {
334                FileHunterException fhe("Cannot get working directory");
335                GPSTK_THROW(fhe);
336             }
337             string wd(cwd);
338                // append a trailing slash if needed
339             if (wd[wd.size()-1] != slash)
340                wd += std::string(1,slash);
341             fs.insert(0, wd);
342             free(cwd);
343          }
344             // Append a closing slash so the breakdown algorithm has a
345             // means to terminate.
346          if (fs[fs.size()-1] != '/') fs += std::string(1,'/');
347 #else
348             // If Windows, then check for leading drive name.
349             // If not leading drivename, then prepend current working directory.
350          if (fs[1]!=':')
351          {
352             char* cwdW = _getcwd(NULL, PATH_MAX);
353             if (cwdW == NULL)
354             {
355                FileHunterException fhe("Cannot get working directory");
356                GPSTK_THROW(fhe);
357             }
358             string wdW(cwdW);
359 
360                // append a trailing slash if needed
361             if (wdW[wdW.size()-1] != '\\')
362                wdW += std::string(1,'\\');
363             fs.insert(0, wdW);
364             free(cwdW);
365          }
366             // Append a closing slash so the breakdown algorithm has a
367             // means to terminate.
368          if (fs[fs.size()-1] != '\\') fs += std::string(1,'\\');
369 #endif
370 
371             // break down the filespec directory by directory into the
372             // storage vector
373          while (!fs.empty())
374          {
375 #ifndef _WIN32
376             if (fs[0] != slash)
377             {
378                FileHunterException fhe("Unexpected character: " +
379                                        fs.substr(0,1));
380                GPSTK_THROW(fhe);
381             }
382             else
383             {
384                // erase the leading slash
385                fs.erase(0, 1);
386             }
387             string::size_type slashpos = fs.find(slash);
388             FileSpec tempfs(fs.substr(0, slashpos));
389 
390                // debug
391             //printf("FileHunter.init():  fs, slashpos, tempfs = '%s', %d, '%s'.\n",
392             //   fs.c_str(),(int)slashpos,tempfs.getSpecString().c_str());
393 
394             if (slashpos != string::npos)
395             {
396                fileSpecList.push_back(tempfs);
397             }
398             fs.erase(0, slashpos);
399 #else
400                // for Windows erase the leading backslash, if present
401             if (fs[0] == '\\') fs.erase(0,1);
402             string::size_type slashpos;
403             slashpos = fs.find('\\');
404             FileSpec tempfs(fs.substr(0, slashpos));
405 
406             if (slashpos!=string::npos) fileSpecList.push_back(tempfs);
407             fs.erase(0, slashpos);
408 #endif
409          }
410       }
411       catch (FileHunterException &e)
412       {
413          GPSTK_RETHROW(e);
414       }
415       catch (FileSpecException &e)
416       {
417          FileHunterException fhe(e);
418          fhe.addText("Error in the file spec");
419          GPSTK_THROW(fhe);
420       }
421       catch (Exception &e)
422       {
423          FileHunterException fhe(e);
424          GPSTK_THROW(fhe);
425       }
426       catch (std::exception &e)
427       {
428          FileHunterException fhe("std::exception caught: " + string(e.what()));
429          GPSTK_THROW(fhe);
430       }
431       catch (...)
432       {
433          FileHunterException fhe("unknown exception caught");
434          GPSTK_THROW(fhe);
435       }
436    } // init
437 
438 
searchHelper(const string & directory,const FileSpec & fs,bool expectDir) const439    vector<string> FileHunter::searchHelper(const string& directory,
440                                            const FileSpec& fs,
441                                            bool expectDir) const
442    {
443       try
444       {
445          vector<string> toReturn;
446 
447             // generate a search string
448          string searchString = fs.createSearchString();
449 #ifndef _WIN32
450             // open the dir
451          DIR* theDir;
452 
453          //cerr << "In searchHelper() before opendir()" << endl;
454 
455             // The first clause is a special kludge for Cygwin
456             // referencing DOS drive structures
457          //if (searchString.compare("cygdrive")==0)
458          //{
459          //   std::string tempFS =  std::string(1,slash) + searchString;
460          //   theDir = opendir(tempFS.c_str());
461          //}
462          //else
463 
464          if (directory.empty())
465             theDir = opendir(std::string(1,slash).c_str());
466          else
467             theDir = opendir(directory.c_str());
468 
469          //cerr << "In searchHelper() after opendir()" < endl;
470 
471          if (theDir == NULL)
472          {
473             FileHunterException fhe("Cannot open directory: " + directory);
474             GPSTK_THROW(fhe);
475          }
476 
477             // get each dir/file entry and compare it to the search string
478          struct dirent* entry;
479 
480          while ( (entry = readdir(theDir)) != NULL)
481          {
482             string filename(entry->d_name);
483 
484             //cerr << "Testing '" << filename << "'" << endl;
485 
486             if ((filename.length() == searchString.length()) &&
487                 (filename != ".") && (filename != "..") &&
488                 isLike(filename, searchString, '*', '+', '?'))
489             {
490                   // Determine if entry is a directory
491                bool isDir = false;
492                if (entry->d_type == DT_DIR)
493                {
494                   isDir = true;
495                }
496                else if ( (entry->d_type == DT_UNKNOWN)
497                          || (entry->d_type == DT_LNK))
498                {
499                   string fullname(directory + slash + filename);
500                   struct stat statBuf;
501                   int rc = stat(fullname.c_str(), &statBuf);
502                   if (0 == rc)
503                   {
504                      if (S_ISDIR(statBuf.st_mode))
505                      {
506                         isDir = true;
507                      }
508                   }
509                }
510                if (expectDir == isDir)
511                {
512                   toReturn.push_back(filename);
513                }
514             }
515          }
516             // use filespec for extra verification?
517 
518             // cleanup
519          if (closedir(theDir) != 0)
520          {
521             FileHunterException fhe("Error closing directory: " +
522                                     directory);
523             GPSTK_THROW(fhe);
524          }
525 #endif
526 #ifdef _WIN32
527             // say 'hi' to old school MS io
528          char* cwd = _getcwd(NULL, PATH_MAX);
529          _chdir(directory.c_str());
530 
531          struct _finddata_t c_file;
532          long hFile;
533 
534          if ( (hFile = _findfirst( searchString.c_str(), &c_file )) != -1 )
535          {
536             std::string filename(c_file.name);
537             bool isDir = (c_file.attrib & _A_SUBDIR);
538             if ((filename != ".") && (filename != ".."))
539             {
540                if (expectDir == isDir)
541                {
542                   toReturn.push_back(filename);
543                }
544             }
545             while( _findnext( hFile, &c_file ) == 0 )
546             {
547                isDir = (c_file.attrib & _A_SUBDIR);
548                filename = std::string(c_file.name);
549                if ((filename != ".") && (filename != ".."))
550                {
551                   if (expectDir == isDir)
552                   {
553                      toReturn.push_back(filename);
554                   }
555                }
556             }
557          }
558          _findclose(hFile);
559          _chdir(cwd);
560 #endif
561          return toReturn;
562       }
563       catch (Exception& e)
564       {
565          FileHunterException fhe(e);
566          fhe.addText("Search failed");
567          GPSTK_THROW(fhe);
568       }
569       catch (std::exception& e)
570       {
571          FileHunterException fhe("std::exception caught: " + string(e.what()));
572          fhe.addText("Search failed");
573          GPSTK_THROW(fhe);
574       }
575       catch (...)
576       {
577          FileHunterException fhe("unknown exception");
578          fhe.addText("Search failed");
579          GPSTK_THROW(fhe);
580       }
581    }  // searchHelper()
582 
583 
filterHelper(vector<std::string> & fileList,const FileSpec & fs) const584    void FileHunter::filterHelper(vector<std::string>& fileList,
585                                  const FileSpec& fs) const
586   {
587          // go through the filterList.  If the filespec has
588          // any fields to filter, remove matches from fileList
589 
590          // for each element in the filter....
591       vector<FilterPair>::const_iterator filterItr = filterList.begin();
592       while (filterItr != filterList.end())
593       {
594          try
595          {
596                // if the file spec has that element...
597             if (fs.hasField((*filterItr).first))
598             {
599                   // then search through the file list and
600                   // remove any files that don't match the filter.
601                vector<string>::iterator fileListItr = fileList.begin();
602                while (fileListItr != fileList.end())
603                {
604                      // thisField holds the part of the file name
605                      // that we're searching for
606                   string thisField;
607                   thisField = fs.extractField(*fileListItr,
608                                               (*filterItr).first);
609                   vector<string>::const_iterator filterStringItr =
610                         (*filterItr).second.begin();
611 
612                      // the iterator searches each element of the filter
613                      // and compares it to thisField.  If there's a match
614                      // then keep it.  if there's no match, delete it.
615                   while (filterStringItr != (*filterItr).second.end())
616                   {
617                      if (thisField == rightJustify(*filterStringItr,
618                                                    thisField.size(),
619                                                    '0'))
620                      {
621                         break;
622                      }
623                      filterStringItr++;
624                   }
625 
626                   if (filterStringItr == (*filterItr).second.end())
627                      fileListItr = fileList.erase(fileListItr);
628                   else
629                      fileListItr++;
630                }
631             }
632          }
633          catch (FileSpecException& fse)
634          {
635             FileHunterException fhe(fse);
636             GPSTK_THROW(fhe)
637          }
638          filterItr++;
639       }
640    }  // filterHelper()
641 
642 
coarseTimeFilter(const string & filename,const FileSpec & fs,int minY,int maxY) const643    bool FileHunter::coarseTimeFilter(
644          const string& filename,
645          const FileSpec& fs,
646          int minY,
647          int maxY) const
648    {
649       try
650       {
651          TimeTag::IdToValue tags;
652          TimeTag::getInfo(filename, fs.getSpecString(), tags);
653          TimeTag::IdToValue::const_iterator tagIter = tags.begin();
654          for ( ; tagIter != tags.end(); tagIter++)
655          {
656             switch (tagIter->first)
657             {
658                case 'Y':
659                {
660                   long year = asInt(tagIter->second);
661                   return ((year < minY) || (year > maxY));
662                }
663                case 'y':
664                {
665                   long year = asInt(tagIter->second);
666                   if (year < 1970)
667                   {
668                      year += (year >= 69) ? 1900 : 2000;
669                   }
670                   return ((year < minY) || (year > maxY));
671                }
672                //case 'F':  // Full-week filtering might also be nice
673             }
674          }
675       }
676       catch (...)
677       { }
678       return false;
679    }
680 
681 
dump(ostream & o) const682    void FileHunter::dump(ostream& o) const
683    {
684       vector<FileSpec>::const_iterator itr = fileSpecList.begin();
685       while (itr != fileSpecList.end())
686       {
687          (*itr).dump(o);
688          itr++;
689       }
690    }
691 
692 } // namespace
693