1 #include "rar.hpp"
2
ReadTextFile(const wchar * Name,StringList * List,bool Config,bool AbortOnError,RAR_CHARSET SrcCharset,bool Unquote,bool SkipComments,bool ExpandEnvStr)3 bool ReadTextFile(
4 const wchar *Name,
5 StringList *List,
6 bool Config,
7 bool AbortOnError,
8 RAR_CHARSET SrcCharset,
9 bool Unquote,
10 bool SkipComments,
11 bool ExpandEnvStr)
12 {
13 wchar FileName[NM];
14 *FileName=0;
15
16 if (Name!=NULL)
17 if (Config)
18 GetConfigName(Name,FileName,ASIZE(FileName),true,false);
19 else
20 wcsncpyz(FileName,Name,ASIZE(FileName));
21
22 File SrcFile;
23 if (*FileName!=0)
24 {
25 bool OpenCode=AbortOnError ? SrcFile.WOpen(FileName):SrcFile.Open(FileName,0);
26
27 if (!OpenCode)
28 {
29 if (AbortOnError)
30 ErrHandler.Exit(RARX_OPEN);
31 return false;
32 }
33 }
34 else
35 SrcFile.SetHandleType(FILE_HANDLESTD);
36
37 uint DataSize=0,ReadSize;
38 const int ReadBlock=4096;
39
40 Array<byte> Data(ReadBlock);
41 while ((ReadSize=SrcFile.Read(&Data[DataSize],ReadBlock))!=0)
42 {
43 DataSize+=ReadSize;
44 Data.Add(ReadSize); // Always have ReadBlock available for next data.
45 }
46 // Set to really read size, so we can zero terminate it correctly.
47 Data.Alloc(DataSize);
48
49 int LittleEndian=DataSize>=2 && Data[0]==255 && Data[1]==254 ? 1:0;
50 int BigEndian=DataSize>=2 && Data[0]==254 && Data[1]==255 ? 1:0;
51 bool Utf8=DataSize>=3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf;
52
53 if (SrcCharset==RCH_DEFAULT)
54 SrcCharset=DetectTextEncoding(&Data[0],DataSize);
55
56 Array<wchar> DataW;
57
58 if (SrcCharset==RCH_DEFAULT || SrcCharset==RCH_OEM || SrcCharset==RCH_ANSI)
59 {
60 Data.Push(0); // Zero terminate.
61 #if defined(_WIN_ALL)
62 if (SrcCharset==RCH_OEM)
63 OemToCharA((char *)&Data[0],(char *)&Data[0]);
64 #endif
65 DataW.Alloc(Data.Size());
66 CharToWide((char *)&Data[0],&DataW[0],DataW.Size());
67 }
68
69 if (SrcCharset==RCH_UNICODE)
70 {
71 size_t Start=2; // Skip byte order mark.
72 if (!LittleEndian && !BigEndian) // No byte order mask.
73 {
74 Start=0;
75 LittleEndian=1;
76 }
77
78 DataW.Alloc(Data.Size()/2+1);
79 size_t End=Data.Size() & ~1; // We need even bytes number for UTF-16.
80 for (size_t I=Start;I<End;I+=2)
81 DataW[(I-Start)/2]=Data[I+BigEndian]+Data[I+LittleEndian]*256;
82 DataW[(End-Start)/2]=0;
83 }
84
85 if (SrcCharset==RCH_UTF8)
86 {
87 Data.Push(0); // Zero terminate data.
88 DataW.Alloc(Data.Size());
89 UtfToWide((const char *)(Data+(Utf8 ? 3:0)),&DataW[0],DataW.Size());
90 }
91
92 wchar *CurStr=&DataW[0];
93
94 while (*CurStr!=0)
95 {
96 wchar *NextStr=CurStr,*CmtPtr=NULL;
97 while (*NextStr!='\r' && *NextStr!='\n' && *NextStr!=0)
98 {
99 if (SkipComments && NextStr[0]=='/' && NextStr[1]=='/')
100 {
101 *NextStr=0;
102 CmtPtr=NextStr;
103 }
104 NextStr++;
105 }
106 bool Done=*NextStr==0;
107
108 *NextStr=0;
109 for (wchar *SpacePtr=(CmtPtr!=NULL ? CmtPtr:NextStr)-1;SpacePtr>=CurStr;SpacePtr--)
110 {
111 if (*SpacePtr!=' ' && *SpacePtr!='\t')
112 break;
113 *SpacePtr=0;
114 }
115
116 if (Unquote && *CurStr=='\"')
117 {
118 size_t Length=wcslen(CurStr);
119 if (CurStr[Length-1]=='\"')
120 {
121 CurStr[Length-1]=0;
122 CurStr++;
123 }
124 }
125
126 bool Expanded=false;
127 #if defined(_WIN_ALL)
128 if (ExpandEnvStr && *CurStr=='%') // Expand environment variables in Windows.
129 {
130 wchar ExpName[NM];
131 *ExpName=0;
132 DWORD Result=ExpandEnvironmentStrings(CurStr,ExpName,ASIZE(ExpName));
133 Expanded=Result!=0 && Result<ASIZE(ExpName);
134 if (Expanded && *ExpName!=0)
135 List->AddString(ExpName);
136 }
137 #endif
138 if (!Expanded && *CurStr!=0)
139 List->AddString(CurStr);
140
141 if (Done)
142 break;
143 CurStr=NextStr+1;
144 while (*CurStr=='\r' || *CurStr=='\n')
145 CurStr++;
146 }
147 return true;
148 }
149
150
DetectTextEncoding(const byte * Data,size_t DataSize)151 RAR_CHARSET DetectTextEncoding(const byte *Data,size_t DataSize)
152 {
153 if (DataSize>3 && Data[0]==0xef && Data[1]==0xbb && Data[2]==0xbf &&
154 IsTextUtf8(Data+3,DataSize-3))
155 return RCH_UTF8;
156
157 bool LittleEndian=DataSize>2 && Data[0]==255 && Data[1]==254;
158 bool BigEndian=DataSize>2 && Data[0]==254 && Data[1]==255;
159
160 if (LittleEndian || BigEndian)
161 for (size_t I=LittleEndian ? 3 : 2;I<DataSize;I+=2)
162 if (Data[I]<32 && Data[I]!='\r' && Data[I]!='\n')
163 return RCH_UNICODE; // High byte in UTF-16 char is found.
164
165 return RCH_DEFAULT;
166 }
167