1 //============================================================================== 2 // 3 // This file is part of GPSTk, the GPS Toolkit. 4 // 5 // The GPSTk is free software; you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published 7 // by the Free Software Foundation; either version 3.0 of the License, or 8 // any later version. 9 // 10 // The GPSTk is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with GPSTk; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA 18 // 19 // This software was developed by Applied Research Laboratories at the 20 // University of Texas at Austin. 21 // Copyright 2004-2020, The Board of Regents of The University of Texas System 22 // 23 //============================================================================== 24 25 //============================================================================== 26 // 27 // This software was developed by Applied Research Laboratories at the 28 // University of Texas at Austin, under contract to an agency or agencies 29 // within the U.S. Department of Defense. The U.S. Government retains all 30 // rights to use, duplicate, distribute, disclose, or release this software. 31 // 32 // Pursuant to DoD Directive 523024 33 // 34 // DISTRIBUTION STATEMENT A: This software has been approved for public 35 // release, distribution is unlimited. 36 // 37 //============================================================================== 38 39 /** 40 * @file FileHunter.cpp 41 * Find all files matching a specification. 42 */ 43 44 #include "FileHunter.hpp" 45 #include "YDSTime.hpp" 46 #include "CivilTime.hpp" 47 #include "GPSWeekSecond.hpp" 48 49 using namespace std; 50 using namespace gpstk; 51 using namespace gpstk::StringUtils; 52 53 // headers for directory searching interface 54 #ifndef _WIN32 55 #include <unistd.h> 56 #include <dirent.h> 57 #include <limits.h> 58 #include <sys/stat.h> 59 60 #else 61 #include <io.h> 62 #include <direct.h> 63 #define PATH_MAX _MAX_PATH 64 #endif 65 66 namespace gpstk 67 { 68 FileHunter(const string & filespec)69 FileHunter::FileHunter(const string& filespec) 70 { 71 try 72 { 73 init(filespec); 74 } 75 catch (FileHunterException& e) 76 { 77 GPSTK_RETHROW(e); 78 } 79 } 80 81 FileHunter(const FileSpec & filespec)82 FileHunter::FileHunter(const FileSpec& filespec) 83 { 84 try 85 { 86 init(filespec.getSpecString()); 87 } 88 catch (FileHunterException& e) 89 { 90 GPSTK_RETHROW(e); 91 } 92 } 93 94 newHunt(const string & filespec)95 FileHunter& FileHunter::newHunt(const string& filespec) 96 { 97 try 98 { 99 init(filespec); 100 } 101 catch (FileHunterException& e) 102 { 103 GPSTK_RETHROW(e); 104 } 105 return *this; 106 } 107 108 setFilter(const FileSpec::FileSpecType fst,const vector<string> & filter)109 FileHunter& FileHunter::setFilter(const FileSpec::FileSpecType fst, 110 const vector<string>& filter) 111 { 112 std::string fileSpecType; 113 if (filter.empty()) 114 { 115 FileHunterException exc("FileHunter::setFilter with empty filter" 116 " would result in no matches"); 117 GPSTK_THROW(exc); 118 } 119 try 120 { // ensure a valid file spec type 121 fileSpecType = FileSpec::convertFileSpecType(fst); 122 } 123 catch (FileSpecException& fse) 124 { 125 FileHunterException fhe(fse.getText(), fse.getErrorId()); 126 GPSTK_THROW(fhe); 127 } 128 // try to find the field in the fileSpecList. 129 vector<FileSpec>::iterator itr = fileSpecList.begin(); 130 while (itr != fileSpecList.end()) 131 { 132 if ((*itr).hasField(fst)) 133 break; 134 itr++; 135 } 136 // found the field - add the filter. 137 if (itr != fileSpecList.end()) 138 { 139 filterList.push_back(FilterPair(fst, filter)); 140 } 141 else // didn't find it - throw an exception 142 { 143 FileHunterException fhe("The FileSpec does not have a field: " + 144 fileSpecType); 145 GPSTK_THROW(fhe); 146 } 147 return *this; 148 } 149 150 find(const CommonTime & start,const CommonTime & end,const FileSpec::FileSpecSortType fsst,enum FileChunking chunk) const151 vector<string> FileHunter::find(const CommonTime& start, 152 const CommonTime& end, 153 const FileSpec::FileSpecSortType fsst, 154 enum FileChunking chunk) const 155 { 156 // ensure proper time order 157 if (end < start) 158 { 159 FileHunterException fhe("The times are specified incorrectly"); 160 GPSTK_THROW(fhe); 161 } 162 // move start time back to a boundary defined by file chunking 163 CommonTime exStart; 164 switch (chunk) 165 { 166 case WEEK: 167 { 168 GPSWeekSecond tmp(start); 169 tmp.sow = 0.0; 170 exStart = tmp; 171 YDSTime yds(end); 172 break; 173 } 174 case DAY: 175 { 176 YDSTime tmp(start); 177 tmp.sod = 0.0; 178 exStart = tmp; 179 break; 180 } 181 case HOUR: 182 { 183 CivilTime tmp(start); 184 tmp.minute = 0; 185 tmp.second = 0.0; 186 exStart = tmp; 187 break; 188 } 189 case MINUTE: 190 { 191 CivilTime tmp(start); 192 tmp.second = 0.0; 193 exStart = tmp; 194 break; 195 } 196 } 197 exStart.setTimeSystem(start.getTimeSystem()); 198 199 // Set min and max years for progressive coarse time filtering 200 int minY, maxY; 201 YDSTime tmpStart(start); 202 YDSTime tmpEnd(end); 203 minY = tmpStart.year; 204 maxY = tmpEnd.year; 205 206 vector<string> toReturn; 207 // Seed the return vector with an empty string which will be 208 // appended to with the root directory or drive, depending on 209 // your O/S. This being empty is a termination condition for 210 // an inner loop. 211 toReturn.push_back(string()); 212 // complete file spec string, i.e. full path, 213 // i.e. aggregation of fileSpecList for final time filtering. 214 string fileSpecStr; 215 216 try 217 { 218 vector<FileSpec>::const_iterator fsIter = fileSpecList.begin(); 219 220 #ifdef _WIN32 221 if (fsIter != fileSpecList.end()) 222 { 223 // If Windows, we should seed it with the drive spec 224 toReturn[0] = (*fsIter).getSpecString(); 225 fileSpecStr = (*fsIter).getSpecString(); 226 fsIter++; 227 } 228 #endif 229 while (fsIter != fileSpecList.end()) 230 { 231 vector<string> toReturnTemp; 232 vector<FileSpec>::const_iterator next = fsIter; 233 next++; 234 bool expectDir = (next != fileSpecList.end()); 235 236 fileSpecStr += string(1, slash) + fsIter->getSpecString(); 237 for (size_t i = 0; i < toReturn.size(); i++) 238 { 239 // Search for the next entries 240 //cerr << "Dir = " << toReturn[i] << endl; 241 vector<string> newEntries = 242 searchHelper(toReturn[i], *fsIter, expectDir); 243 244 // After getting the potential entries, filter 245 // them based on the user criteria 246 filterHelper(newEntries, *fsIter); 247 248 // For each new entry, check the time (if possible) 249 // then add it if it's in the correct time range. 250 // this is why we need to enter an empty string to 251 // seed toReturn 252 vector<string>::const_iterator entryIter = newEntries.begin(); 253 for ( ; entryIter != newEntries.end(); entryIter++) 254 { 255 // To avoid extra processing, immediately attempt 256 // to filter-out new entries whose year is not 257 // within the valid year range 258 if (coarseTimeFilter(*entryIter, *fsIter, minY, maxY)) 259 { 260 //cerr << "Filtered out entry: " << *entryIter << endl; 261 continue; 262 } 263 string newPath = toReturn[i] + string(1,slash) + *entryIter; 264 //cerr << " " << newPath << endl; 265 toReturnTemp.push_back(newPath); 266 } 267 } 268 269 toReturn = toReturnTemp; 270 271 // If toReturn is ever empty, there are no matches 272 if (toReturn.empty()) 273 return toReturn; 274 275 fsIter = next; 276 } // while (itr != fileSpecList.end()) 277 278 // Sort the list by the file spec of the last field 279 fsIter--; 280 (*fsIter).sortList(toReturn, fsst); 281 } 282 catch (gpstk::Exception& exc) 283 { 284 FileHunterException nexc(exc); 285 GPSTK_THROW(nexc); 286 } 287 // Filter by fully-determined time 288 vector<string> filtered; 289 try 290 { 291 FileSpec fullSpec(fileSpecStr); 292 for (unsigned i = 0; i < toReturn.size(); i++) 293 { 294 CommonTime fileTime = fullSpec.extractCommonTime(toReturn[i]); 295 if ((fileTime >= exStart) && (fileTime <= end)) 296 { 297 filtered.push_back(toReturn[i]); 298 } 299 } 300 } 301 catch (gpstk::Exception& exc) 302 { 303 FileHunterException nexc(exc); 304 GPSTK_THROW(nexc); 305 } 306 return filtered; 307 } 308 309 init(const string & filespec)310 void FileHunter::init(const string& filespec) 311 { 312 // debug 313 try 314 { 315 if (filespec.empty()) 316 { 317 FileHunterException exc("FileHunter: empty file spec is invalid"); 318 GPSTK_THROW(exc); 319 } 320 fileSpecList.clear(); 321 filterList.clear(); 322 323 string fs(filespec); 324 325 // first, check if the file spec has a leading '/'. if not 326 // prepend the current directory to it. 327 #ifndef _WIN32 328 if (fs[0] != slash) 329 { 330 char* cwd = getcwd(NULL, PATH_MAX); 331 332 if (cwd == NULL) 333 { 334 FileHunterException fhe("Cannot get working directory"); 335 GPSTK_THROW(fhe); 336 } 337 string wd(cwd); 338 // append a trailing slash if needed 339 if (wd[wd.size()-1] != slash) 340 wd += std::string(1,slash); 341 fs.insert(0, wd); 342 free(cwd); 343 } 344 // Append a closing slash so the breakdown algorithm has a 345 // means to terminate. 346 if (fs[fs.size()-1] != '/') fs += std::string(1,'/'); 347 #else 348 // If Windows, then check for leading drive name. 349 // If not leading drivename, then prepend current working directory. 350 if (fs[1]!=':') 351 { 352 char* cwdW = _getcwd(NULL, PATH_MAX); 353 if (cwdW == NULL) 354 { 355 FileHunterException fhe("Cannot get working directory"); 356 GPSTK_THROW(fhe); 357 } 358 string wdW(cwdW); 359 360 // append a trailing slash if needed 361 if (wdW[wdW.size()-1] != '\\') 362 wdW += std::string(1,'\\'); 363 fs.insert(0, wdW); 364 free(cwdW); 365 } 366 // Append a closing slash so the breakdown algorithm has a 367 // means to terminate. 368 if (fs[fs.size()-1] != '\\') fs += std::string(1,'\\'); 369 #endif 370 371 // break down the filespec directory by directory into the 372 // storage vector 373 while (!fs.empty()) 374 { 375 #ifndef _WIN32 376 if (fs[0] != slash) 377 { 378 FileHunterException fhe("Unexpected character: " + 379 fs.substr(0,1)); 380 GPSTK_THROW(fhe); 381 } 382 else 383 { 384 // erase the leading slash 385 fs.erase(0, 1); 386 } 387 string::size_type slashpos = fs.find(slash); 388 FileSpec tempfs(fs.substr(0, slashpos)); 389 390 // debug 391 //printf("FileHunter.init(): fs, slashpos, tempfs = '%s', %d, '%s'.\n", 392 // fs.c_str(),(int)slashpos,tempfs.getSpecString().c_str()); 393 394 if (slashpos != string::npos) 395 { 396 fileSpecList.push_back(tempfs); 397 } 398 fs.erase(0, slashpos); 399 #else 400 // for Windows erase the leading backslash, if present 401 if (fs[0] == '\\') fs.erase(0,1); 402 string::size_type slashpos; 403 slashpos = fs.find('\\'); 404 FileSpec tempfs(fs.substr(0, slashpos)); 405 406 if (slashpos!=string::npos) fileSpecList.push_back(tempfs); 407 fs.erase(0, slashpos); 408 #endif 409 } 410 } 411 catch (FileHunterException &e) 412 { 413 GPSTK_RETHROW(e); 414 } 415 catch (FileSpecException &e) 416 { 417 FileHunterException fhe(e); 418 fhe.addText("Error in the file spec"); 419 GPSTK_THROW(fhe); 420 } 421 catch (Exception &e) 422 { 423 FileHunterException fhe(e); 424 GPSTK_THROW(fhe); 425 } 426 catch (std::exception &e) 427 { 428 FileHunterException fhe("std::exception caught: " + string(e.what())); 429 GPSTK_THROW(fhe); 430 } 431 catch (...) 432 { 433 FileHunterException fhe("unknown exception caught"); 434 GPSTK_THROW(fhe); 435 } 436 } // init 437 438 searchHelper(const string & directory,const FileSpec & fs,bool expectDir) const439 vector<string> FileHunter::searchHelper(const string& directory, 440 const FileSpec& fs, 441 bool expectDir) const 442 { 443 try 444 { 445 vector<string> toReturn; 446 447 // generate a search string 448 string searchString = fs.createSearchString(); 449 #ifndef _WIN32 450 // open the dir 451 DIR* theDir; 452 453 //cerr << "In searchHelper() before opendir()" << endl; 454 455 // The first clause is a special kludge for Cygwin 456 // referencing DOS drive structures 457 //if (searchString.compare("cygdrive")==0) 458 //{ 459 // std::string tempFS = std::string(1,slash) + searchString; 460 // theDir = opendir(tempFS.c_str()); 461 //} 462 //else 463 464 if (directory.empty()) 465 theDir = opendir(std::string(1,slash).c_str()); 466 else 467 theDir = opendir(directory.c_str()); 468 469 //cerr << "In searchHelper() after opendir()" < endl; 470 471 if (theDir == NULL) 472 { 473 FileHunterException fhe("Cannot open directory: " + directory); 474 GPSTK_THROW(fhe); 475 } 476 477 // get each dir/file entry and compare it to the search string 478 struct dirent* entry; 479 480 while ( (entry = readdir(theDir)) != NULL) 481 { 482 string filename(entry->d_name); 483 484 //cerr << "Testing '" << filename << "'" << endl; 485 486 if ((filename.length() == searchString.length()) && 487 (filename != ".") && (filename != "..") && 488 isLike(filename, searchString, '*', '+', '?')) 489 { 490 // Determine if entry is a directory 491 bool isDir = false; 492 if (entry->d_type == DT_DIR) 493 { 494 isDir = true; 495 } 496 else if ( (entry->d_type == DT_UNKNOWN) 497 || (entry->d_type == DT_LNK)) 498 { 499 string fullname(directory + slash + filename); 500 struct stat statBuf; 501 int rc = stat(fullname.c_str(), &statBuf); 502 if (0 == rc) 503 { 504 if (S_ISDIR(statBuf.st_mode)) 505 { 506 isDir = true; 507 } 508 } 509 } 510 if (expectDir == isDir) 511 { 512 toReturn.push_back(filename); 513 } 514 } 515 } 516 // use filespec for extra verification? 517 518 // cleanup 519 if (closedir(theDir) != 0) 520 { 521 FileHunterException fhe("Error closing directory: " + 522 directory); 523 GPSTK_THROW(fhe); 524 } 525 #endif 526 #ifdef _WIN32 527 // say 'hi' to old school MS io 528 char* cwd = _getcwd(NULL, PATH_MAX); 529 _chdir(directory.c_str()); 530 531 struct _finddata_t c_file; 532 long hFile; 533 534 if ( (hFile = _findfirst( searchString.c_str(), &c_file )) != -1 ) 535 { 536 std::string filename(c_file.name); 537 bool isDir = (c_file.attrib & _A_SUBDIR); 538 if ((filename != ".") && (filename != "..")) 539 { 540 if (expectDir == isDir) 541 { 542 toReturn.push_back(filename); 543 } 544 } 545 while( _findnext( hFile, &c_file ) == 0 ) 546 { 547 isDir = (c_file.attrib & _A_SUBDIR); 548 filename = std::string(c_file.name); 549 if ((filename != ".") && (filename != "..")) 550 { 551 if (expectDir == isDir) 552 { 553 toReturn.push_back(filename); 554 } 555 } 556 } 557 } 558 _findclose(hFile); 559 _chdir(cwd); 560 #endif 561 return toReturn; 562 } 563 catch (Exception& e) 564 { 565 FileHunterException fhe(e); 566 fhe.addText("Search failed"); 567 GPSTK_THROW(fhe); 568 } 569 catch (std::exception& e) 570 { 571 FileHunterException fhe("std::exception caught: " + string(e.what())); 572 fhe.addText("Search failed"); 573 GPSTK_THROW(fhe); 574 } 575 catch (...) 576 { 577 FileHunterException fhe("unknown exception"); 578 fhe.addText("Search failed"); 579 GPSTK_THROW(fhe); 580 } 581 } // searchHelper() 582 583 filterHelper(vector<std::string> & fileList,const FileSpec & fs) const584 void FileHunter::filterHelper(vector<std::string>& fileList, 585 const FileSpec& fs) const 586 { 587 // go through the filterList. If the filespec has 588 // any fields to filter, remove matches from fileList 589 590 // for each element in the filter.... 591 vector<FilterPair>::const_iterator filterItr = filterList.begin(); 592 while (filterItr != filterList.end()) 593 { 594 try 595 { 596 // if the file spec has that element... 597 if (fs.hasField((*filterItr).first)) 598 { 599 // then search through the file list and 600 // remove any files that don't match the filter. 601 vector<string>::iterator fileListItr = fileList.begin(); 602 while (fileListItr != fileList.end()) 603 { 604 // thisField holds the part of the file name 605 // that we're searching for 606 string thisField; 607 thisField = fs.extractField(*fileListItr, 608 (*filterItr).first); 609 vector<string>::const_iterator filterStringItr = 610 (*filterItr).second.begin(); 611 612 // the iterator searches each element of the filter 613 // and compares it to thisField. If there's a match 614 // then keep it. if there's no match, delete it. 615 while (filterStringItr != (*filterItr).second.end()) 616 { 617 if (thisField == rightJustify(*filterStringItr, 618 thisField.size(), 619 '0')) 620 { 621 break; 622 } 623 filterStringItr++; 624 } 625 626 if (filterStringItr == (*filterItr).second.end()) 627 fileListItr = fileList.erase(fileListItr); 628 else 629 fileListItr++; 630 } 631 } 632 } 633 catch (FileSpecException& fse) 634 { 635 FileHunterException fhe(fse); 636 GPSTK_THROW(fhe) 637 } 638 filterItr++; 639 } 640 } // filterHelper() 641 642 coarseTimeFilter(const string & filename,const FileSpec & fs,int minY,int maxY) const643 bool FileHunter::coarseTimeFilter( 644 const string& filename, 645 const FileSpec& fs, 646 int minY, 647 int maxY) const 648 { 649 try 650 { 651 TimeTag::IdToValue tags; 652 TimeTag::getInfo(filename, fs.getSpecString(), tags); 653 TimeTag::IdToValue::const_iterator tagIter = tags.begin(); 654 for ( ; tagIter != tags.end(); tagIter++) 655 { 656 switch (tagIter->first) 657 { 658 case 'Y': 659 { 660 long year = asInt(tagIter->second); 661 return ((year < minY) || (year > maxY)); 662 } 663 case 'y': 664 { 665 long year = asInt(tagIter->second); 666 if (year < 1970) 667 { 668 year += (year >= 69) ? 1900 : 2000; 669 } 670 return ((year < minY) || (year > maxY)); 671 } 672 //case 'F': // Full-week filtering might also be nice 673 } 674 } 675 } 676 catch (...) 677 { } 678 return false; 679 } 680 681 dump(ostream & o) const682 void FileHunter::dump(ostream& o) const 683 { 684 vector<FileSpec>::const_iterator itr = fileSpecList.begin(); 685 while (itr != fileSpecList.end()) 686 { 687 (*itr).dump(o); 688 itr++; 689 } 690 } 691 692 } // namespace 693