1 #include "rar.hpp"
2 
ScanTree(StringList * FileMasks,RECURSE_MODE Recurse,bool GetLinks,SCAN_DIRS GetDirs)3 ScanTree::ScanTree(StringList *FileMasks,RECURSE_MODE Recurse,bool GetLinks,SCAN_DIRS GetDirs)
4 {
5   ScanTree::FileMasks=FileMasks;
6   ScanTree::Recurse=Recurse;
7   ScanTree::GetLinks=GetLinks;
8   ScanTree::GetDirs=GetDirs;
9 
10   ScanEntireDisk=false;
11   FolderWildcards=false;
12 
13   SetAllMaskDepth=0;
14   *CurMask=0;
15   memset(FindStack,0,sizeof(FindStack));
16   Depth=0;
17   Errors=0;
18   *ErrArcName=0;
19   Cmd=NULL;
20   ErrDirList=NULL;
21   ErrDirSpecPathLength=NULL;
22 }
23 
24 
~ScanTree()25 ScanTree::~ScanTree()
26 {
27   for (int I=Depth;I>=0;I--)
28     if (FindStack[I]!=NULL)
29       delete FindStack[I];
30 }
31 
32 
GetNext(FindData * FD)33 SCAN_CODE ScanTree::GetNext(FindData *FD)
34 {
35   if (Depth<0)
36     return SCAN_DONE;
37 
38 #ifndef SILENT
39   uint LoopCount=0;
40 #endif
41 
42   SCAN_CODE FindCode;
43   while (1)
44   {
45     if (*CurMask==0 && !GetNextMask())
46       return SCAN_DONE;
47 
48 #ifndef SILENT
49     // Let's return some ticks to system or WinRAR can become irresponsible
50     // while scanning files in command like "winrar a -r arc c:\file.ext".
51     // Also we reset system sleep timer here.
52     if ((++LoopCount & 0x3ff)==0)
53       Wait();
54 #endif
55 
56     FindCode=FindProc(FD);
57     if (FindCode==SCAN_ERROR)
58     {
59       Errors++;
60       continue;
61     }
62     if (FindCode==SCAN_NEXT)
63       continue;
64     if (FindCode==SCAN_SUCCESS && FD->IsDir && GetDirs==SCAN_SKIPDIRS)
65       continue;
66     if (FindCode==SCAN_DONE && GetNextMask())
67       continue;
68     if (FilterList.ItemsCount()>0 && FindCode==SCAN_SUCCESS)
69       if (!CommandData::CheckArgs(&FilterList,FD->IsDir,FD->Name,false,MATCH_WILDSUBPATH))
70         continue;
71     break;
72   }
73   return FindCode;
74 }
75 
76 
77 // For masks like dir1\dir2*\*.ext in non-recursive mode.
ExpandFolderMask()78 bool ScanTree::ExpandFolderMask()
79 {
80   bool WildcardFound=false;
81   uint SlashPos=0;
82   for (int I=0;CurMask[I]!=0;I++)
83   {
84     if (CurMask[I]=='?' || CurMask[I]=='*')
85       WildcardFound=true;
86     if (WildcardFound && IsPathDiv(CurMask[I]))
87     {
88       // First path separator position after folder wildcard mask.
89       // In case of dir1\dir2*\dir3\name.ext mask it may point not to file
90       // name, so we cannot use PointToName() here.
91       SlashPos=I;
92       break;
93     }
94   }
95 
96   wchar Mask[NM];
97   wcsncpyz(Mask,CurMask,ASIZE(Mask));
98   Mask[SlashPos]=0;
99 
100   // Prepare the list of all folders matching the wildcard mask.
101   ExpandedFolderList.Reset();
102   FindFile Find;
103   Find.SetMask(Mask);
104   FindData FD;
105   while (Find.Next(&FD))
106     if (FD.IsDir)
107     {
108       wcsncatz(FD.Name,CurMask+SlashPos,ASIZE(FD.Name));
109 
110       // Treat dir*\* or dir*\*.* as dir, so empty 'dir' is also matched
111       // by such mask. Skipping empty dir with dir*\*.* confused some users.
112       wchar *LastMask=PointToName(FD.Name);
113       if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0)
114         RemoveNameFromPath(FD.Name);
115 
116       ExpandedFolderList.AddString(FD.Name);
117     }
118   if (ExpandedFolderList.ItemsCount()==0)
119     return false;
120   // Return the first matching folder name now.
121   ExpandedFolderList.GetString(CurMask,ASIZE(CurMask));
122   return true;
123 }
124 
125 
126 // For masks like dir1\dir2*\file.ext this function sets 'dir1' recursive mask
127 // and '*\dir2*\file.ext' filter. Masks without folder wildcards are
128 // returned as is.
GetFilteredMask()129 bool ScanTree::GetFilteredMask()
130 {
131   // If we have some matching folders left for non-recursive folder wildcard
132   // mask, we return it here.
133   if (ExpandedFolderList.ItemsCount()>0 && ExpandedFolderList.GetString(CurMask,ASIZE(CurMask)))
134     return true;
135 
136   FolderWildcards=false;
137   FilterList.Reset();
138   if (!FileMasks->GetString(CurMask,ASIZE(CurMask)))
139     return false;
140 
141   // Check if folder wildcards present.
142   bool WildcardFound=false;
143   uint FolderWildcardCount=0;
144   uint SlashPos=0;
145   uint StartPos=0;
146 #ifdef _WIN_ALL // Not treat the special NTFS \\?\d: path prefix as a wildcard.
147   if (CurMask[0]=='\\' && CurMask[1]=='\\' && CurMask[2]=='?' && CurMask[3]=='\\')
148     StartPos=4;
149 #endif
150   for (uint I=StartPos;CurMask[I]!=0;I++)
151   {
152     if (CurMask[I]=='?' || CurMask[I]=='*')
153       WildcardFound=true;
154     if (IsPathDiv(CurMask[I]) || IsDriveDiv(CurMask[I]))
155     {
156       if (WildcardFound)
157       {
158         // Calculate a number of folder wildcards in current mask.
159         FolderWildcardCount++;
160         WildcardFound=false;
161       }
162       if (FolderWildcardCount==0)
163         SlashPos=I; // Slash position before first folder wildcard mask.
164     }
165   }
166   if (FolderWildcardCount==0)
167     return true;
168   FolderWildcards=true; // Global folder wildcards flag.
169 
170   // If we have only one folder wildcard component and -r is missing or -r-
171   // is specified, prepare matching folders in non-recursive mode.
172   // We assume -r for masks like dir1*\dir2*\file*, because it is complicated
173   // to fast find them using OS file find API call.
174   if ((Recurse==RECURSE_NONE || Recurse==RECURSE_DISABLE) && FolderWildcardCount==1)
175     return ExpandFolderMask();
176 
177   wchar Filter[NM];
178   // Convert path\dir*\ to *\dir filter to search for 'dir' in all 'path' subfolders.
179   wcsncpyz(Filter,L"*",ASIZE(Filter));
180   AddEndSlash(Filter,ASIZE(Filter));
181   // SlashPos might point or not point to path separator for masks like 'dir*', '\dir*' or 'd:dir*'
182   wchar *WildName=IsPathDiv(CurMask[SlashPos]) || IsDriveDiv(CurMask[SlashPos]) ? CurMask+SlashPos+1 : CurMask+SlashPos;
183   wcsncatz(Filter,WildName,ASIZE(Filter));
184 
185   // Treat dir*\* or dir*\*.* as dir\, so empty 'dir' is also matched
186   // by such mask. Skipping empty dir with dir*\*.* confused some users.
187   wchar *LastMask=PointToName(Filter);
188   if (wcscmp(LastMask,L"*")==0 || wcscmp(LastMask,L"*.*")==0)
189     *LastMask=0;
190 
191   FilterList.AddString(Filter);
192 
193   bool RelativeDrive=IsDriveDiv(CurMask[SlashPos]);
194   if (RelativeDrive)
195     SlashPos++; // Use "d:" instead of "d" for d:* mask.
196 
197   CurMask[SlashPos]=0;
198 
199   if (!RelativeDrive) // Keep d: mask as is, not convert to d:\*
200   {
201     // We need to append "\*" both for -ep1 to work correctly and to
202     // convert d:\* masks previously truncated to d: back to original form.
203     AddEndSlash(CurMask,ASIZE(CurMask));
204     wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
205   }
206   return true;
207 }
208 
209 
GetNextMask()210 bool ScanTree::GetNextMask()
211 {
212   if (!GetFilteredMask())
213     return false;
214 #ifdef _WIN_ALL
215   UnixSlashToDos(CurMask,CurMask,ASIZE(CurMask));
216 #endif
217 
218   // We wish to scan entire disk if mask like c:\ is specified
219   // regardless of recursion mode. Use c:\*.* mask when need to scan only
220   // the root directory.
221   ScanEntireDisk=IsDriveLetter(CurMask) && IsPathDiv(CurMask[2]) && CurMask[3]==0;
222 
223   wchar *Name=PointToName(CurMask);
224   if (*Name==0)
225     wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
226   if (Name[0]=='.' && (Name[1]==0 || Name[1]=='.' && Name[2]==0))
227   {
228     AddEndSlash(CurMask,ASIZE(CurMask));
229     wcsncatz(CurMask,MASKALL,ASIZE(CurMask));
230   }
231   SpecPathLength=Name-CurMask;
232   Depth=0;
233 
234   wcsncpyz(OrigCurMask,CurMask,ASIZE(OrigCurMask));
235 
236   return true;
237 }
238 
239 
FindProc(FindData * FD)240 SCAN_CODE ScanTree::FindProc(FindData *FD)
241 {
242   if (*CurMask==0)
243     return SCAN_NEXT;
244   bool FastFindFile=false;
245 
246   if (FindStack[Depth]==NULL) // No FindFile object for this depth yet.
247   {
248     bool Wildcards=IsWildcard(CurMask);
249 
250     // If we have a file name without wildcards, we can try to use
251     // FastFind to optimize speed. For example, in Unix it results in
252     // stat call instead of opendir/readdir/closedir.
253     bool FindCode=!Wildcards && FindFile::FastFind(CurMask,FD,GetLinks);
254 
255     // Link check is important for NTFS, where links can have "Directory"
256     // attribute, but we do not want to recurse to them in "get links" mode.
257     bool IsDir=FindCode && FD->IsDir && (!GetLinks || !FD->IsLink);
258 
259     // SearchAll means that we'll use "*" mask for search, so we'll find
260     // subdirectories and will be able to recurse into them.
261     // We do not use "*" for directories at any level or for files
262     // at top level in recursion mode. We always comrpess the entire directory
263     // if folder wildcard is specified.
264     bool SearchAll=!IsDir && (Depth>0 || Recurse==RECURSE_ALWAYS ||
265                    FolderWildcards && Recurse!=RECURSE_DISABLE ||
266                    Wildcards && Recurse==RECURSE_WILDCARDS ||
267                    ScanEntireDisk && Recurse!=RECURSE_DISABLE);
268     if (Depth==0)
269       SearchAllInRoot=SearchAll;
270     if (SearchAll || Wildcards)
271     {
272       // Create the new FindFile object for wildcard based search.
273       FindStack[Depth]=new FindFile;
274 
275       wchar SearchMask[NM];
276       wcsncpyz(SearchMask,CurMask,ASIZE(SearchMask));
277       if (SearchAll)
278         SetName(SearchMask,MASKALL,ASIZE(SearchMask));
279       FindStack[Depth]->SetMask(SearchMask);
280     }
281     else
282     {
283       // Either we failed to fast find or we found a file or we found
284       // a directory in RECURSE_DISABLE mode, so we do not need to scan it.
285       // We can return here and do not need to process further.
286       // We need to process further only if we fast found a directory.
287       if (!FindCode || !IsDir || Recurse==RECURSE_DISABLE)
288       {
289          // Return SCAN_SUCCESS if we found a file.
290         SCAN_CODE RetCode=SCAN_SUCCESS;
291 
292         if (!FindCode)
293         {
294           // Return SCAN_ERROR if problem is more serious than just
295           // "file not found".
296           RetCode=FD->Error ? SCAN_ERROR:SCAN_NEXT;
297 
298           // If we failed to find an object, but our current mask is excluded,
299           // we skip this object and avoid indicating an error.
300           if (Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true))
301             RetCode=SCAN_NEXT;
302           else
303           {
304             ErrHandler.OpenErrorMsg(ErrArcName,CurMask);
305             // User asked to return RARX_NOFILES and not RARX_OPEN here.
306             ErrHandler.SetErrorCode(RARX_NOFILES);
307           }
308         }
309 
310         // If we searched only for one file or directory in "fast find"
311         // (without a wildcard) mode, let's set masks to zero,
312         // so calling function will know that current mask is used
313         // and next one must be read from mask list for next call.
314         // It is not necessary for directories, because even in "fast find"
315         // mode, directory recursing will quit by (Depth < 0) condition,
316         // which returns SCAN_DONE to calling function.
317         *CurMask=0;
318 
319         return RetCode;
320       }
321 
322       // We found a directory using only FindFile::FastFind function.
323       FastFindFile=true;
324     }
325   }
326 
327   if (!FastFindFile && !FindStack[Depth]->Next(FD,GetLinks))
328   {
329     // We cannot find anything more in directory either because of
330     // some error or just as result of all directory entries already read.
331 
332     bool Error=FD->Error;
333     if (Error)
334       ScanError(Error);
335 
336     wchar DirName[NM];
337     *DirName=0;
338 
339     // Going to at least one directory level higher.
340     delete FindStack[Depth];
341     FindStack[Depth--]=NULL;
342     while (Depth>=0 && FindStack[Depth]==NULL)
343       Depth--;
344     if (Depth < 0)
345     {
346       // Directories scanned both in normal and FastFindFile mode,
347       // finally exit from scan here, by (Depth < 0) condition.
348 
349       if (Error)
350         Errors++;
351       return SCAN_DONE;
352     }
353 
354     wchar *Slash=wcsrchr(CurMask,CPATHDIVIDER);
355     if (Slash!=NULL)
356     {
357       wchar Mask[NM];
358       wcsncpyz(Mask,Slash,ASIZE(Mask));
359       if (Depth<SetAllMaskDepth)
360         wcsncpyz(Mask+1,PointToName(OrigCurMask),ASIZE(Mask)-1);
361       *Slash=0;
362       wcsncpyz(DirName,CurMask,ASIZE(DirName));
363       wchar *PrevSlash=wcsrchr(CurMask,CPATHDIVIDER);
364       if (PrevSlash==NULL)
365         wcsncpyz(CurMask,Mask+1,ASIZE(CurMask));
366       else
367       {
368         *PrevSlash=0;
369         wcsncatz(CurMask,Mask,ASIZE(CurMask));
370       }
371     }
372     if (GetDirs==SCAN_GETDIRSTWICE &&
373         FindFile::FastFind(DirName,FD,GetLinks) && FD->IsDir)
374     {
375       FD->Flags|=FDDF_SECONDDIR;
376       return Error ? SCAN_ERROR:SCAN_SUCCESS;
377     }
378     return Error ? SCAN_ERROR:SCAN_NEXT;
379   }
380 
381   // Link check is required for NTFS links, not for Unix.
382   if (FD->IsDir && (!GetLinks || !FD->IsLink))
383   {
384     // If we found the directory in top (Depth==0) directory
385     // and if we are not in "fast find" (directory name only as argument)
386     // or in recurse (SearchAll was set when opening the top directory) mode,
387     // we do not recurse into this directory. We either return it by itself
388     // or skip it.
389     if (!FastFindFile && Depth==0 && !SearchAllInRoot)
390       return GetDirs==SCAN_GETCURDIRS ? SCAN_SUCCESS:SCAN_NEXT;
391 
392     // Let's check if directory name is excluded, so we do not waste
393     // time searching in directory, which will be excluded anyway.
394     if (Cmd!=NULL && (Cmd->ExclCheck(FD->Name,true,false,false) ||
395         Cmd->ExclDirByAttr(FD->FileAttr)))
396     {
397       // If we are here in "fast find" mode, it means that entire directory
398       // specified in command line is excluded. Then we need to return
399       // SCAN_DONE to go to next mask and avoid the infinite loop
400       // in GetNext() function. Such loop would be possible in case of
401       // SCAN_NEXT code and "rar a arc dir -xdir" command.
402 
403       return FastFindFile ? SCAN_DONE:SCAN_NEXT;
404     }
405 
406     wchar Mask[NM];
407 
408     wcsncpyz(Mask,FastFindFile ? MASKALL:PointToName(CurMask),ASIZE(Mask));
409     wcsncpyz(CurMask,FD->Name,ASIZE(CurMask));
410 
411     if (wcslen(CurMask)+wcslen(Mask)+1>=NM || Depth>=MAXSCANDEPTH-1)
412     {
413       uiMsg(UIERROR_PATHTOOLONG,CurMask,SPATHDIVIDER,Mask);
414       return SCAN_ERROR;
415     }
416 
417     AddEndSlash(CurMask,ASIZE(CurMask));
418     wcsncatz(CurMask,Mask,ASIZE(CurMask));
419 
420     Depth++;
421 
422     // We need to use OrigCurMask for depths less than SetAllMaskDepth
423     // and "*" for depths equal or larger than SetAllMaskDepth.
424     // It is important when "fast finding" directories at Depth > 0.
425     // For example, if current directory is RootFolder and we compress
426     // the following directories structure:
427     //   RootFolder
428     //     +--Folder1
429     //     |  +--Folder2
430     //     |  +--Folder3
431     //     +--Folder4
432     // with 'rar a -r arcname Folder2' command, rar could add not only
433     // Folder1\Folder2 contents, but also Folder1\Folder3 if we were using
434     // "*" mask at all levels. We need to use "*" mask inside of Folder2,
435     // but return to "Folder2" mask when completing scanning Folder2.
436     // We can rewrite SearchAll expression above to avoid fast finding
437     // directories at Depth > 0, but then 'rar a -r arcname Folder2'
438     // will add the empty Folder2 and do not add its contents.
439 
440     if (FastFindFile)
441       SetAllMaskDepth=Depth;
442   }
443   if (!FastFindFile && !CmpName(CurMask,FD->Name,MATCH_NAMES))
444     return SCAN_NEXT;
445 
446   return SCAN_SUCCESS;
447 }
448 
449 
ScanError(bool & Error)450 void ScanTree::ScanError(bool &Error)
451 {
452 #ifdef _WIN_ALL
453   if (Error)
454   {
455     // Get attributes of parent folder and do not display an error
456     // if it is reparse point. We cannot scan contents of standard
457     // Windows reparse points like "C:\Documents and Settings"
458     // and we do not want to issue numerous useless errors for them.
459     // We cannot just check FD->FileAttr here, it can be undefined
460     // if we process "folder\*" mask or if we process "folder" mask,
461     // but "folder" is inaccessible.
462     wchar *Slash=PointToName(CurMask);
463     if (Slash>CurMask)
464     {
465       *(Slash-1)=0;
466       DWORD Attr=GetFileAttributes(CurMask);
467       *(Slash-1)=CPATHDIVIDER;
468       if (Attr!=0xffffffff && (Attr & FILE_ATTRIBUTE_REPARSE_POINT)!=0)
469         Error=false;
470     }
471 
472     // Do not display an error if we cannot scan contents of
473     // "System Volume Information" folder. Normally it is not accessible.
474     if (wcsstr(CurMask,L"System Volume Information\\")!=NULL)
475       Error=false;
476   }
477 #endif
478 
479   if (Error && Cmd!=NULL && Cmd->ExclCheck(CurMask,false,true,true))
480     Error=false;
481 
482   if (Error)
483   {
484     if (ErrDirList!=NULL)
485       ErrDirList->AddString(CurMask);
486     if (ErrDirSpecPathLength!=NULL)
487       ErrDirSpecPathLength->Push((uint)SpecPathLength);
488     wchar FullName[NM];
489     // This conversion works for wildcard masks too.
490     ConvertNameToFull(CurMask,FullName,ASIZE(FullName));
491     uiMsg(UIERROR_DIRSCAN,FullName);
492     ErrHandler.SysErrMsg();
493   }
494 }
495