1 //! Support for archive files.
2
3 use core::convert::TryInto;
4
5 use crate::archive;
6 use crate::read::{self, Error, ReadError, ReadRef};
7
8 /// The kind of archive format.
9 // TODO: Gnu64 and Darwin64 (and Darwin for writing)
10 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
11 #[non_exhaustive]
12 pub enum ArchiveKind {
13 /// There are no special files that indicate the archive format.
14 Unknown,
15 /// The GNU (or System V) archive format.
16 Gnu,
17 /// The BSD archive format.
18 Bsd,
19 /// The Windows COFF archive format.
20 Coff,
21 }
22
23 /// A partially parsed archive file.
24 #[derive(Debug)]
25 pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> {
26 data: R,
27 len: u64,
28 offset: u64,
29 kind: ArchiveKind,
30 symbols: (u64, u64),
31 names: &'data [u8],
32 }
33
34 impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> {
35 /// Parse the archive header and special members.
parse(data: R) -> read::Result<Self>36 pub fn parse(data: R) -> read::Result<Self> {
37 let len = data.len().read_error("Unknown archive length")?;
38 let mut tail = 0;
39 let magic = data
40 .read_bytes(&mut tail, archive::MAGIC.len() as u64)
41 .read_error("Invalid archive size")?;
42 if magic != &archive::MAGIC[..] {
43 return Err(Error("Unsupported archive identifier"));
44 }
45
46 let mut file = ArchiveFile {
47 data,
48 offset: tail,
49 len,
50 kind: ArchiveKind::Unknown,
51 symbols: (0, 0),
52 names: &[],
53 };
54
55 // The first few members may be special, so parse them.
56 // GNU has:
57 // - "/": symbol table (optional)
58 // - "//": names table (optional)
59 // COFF has:
60 // - "/": first linker member
61 // - "/": second linker member
62 // - "//": names table
63 // BSD has:
64 // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional)
65 if tail < len {
66 let member = ArchiveMember::parse(data, &mut tail, &[])?;
67 if member.name == b"/" {
68 // GNU symbol table (unless we later determine this is COFF).
69 file.kind = ArchiveKind::Gnu;
70 file.symbols = member.file_range();
71 file.offset = tail;
72
73 if tail < len {
74 let member = ArchiveMember::parse(data, &mut tail, &[])?;
75 if member.name == b"/" {
76 // COFF linker member.
77 file.kind = ArchiveKind::Coff;
78 file.symbols = member.file_range();
79 file.offset = tail;
80
81 if tail < len {
82 let member = ArchiveMember::parse(data, &mut tail, &[])?;
83 if member.name == b"//" {
84 // COFF names table.
85 file.names = member.data(data)?;
86 file.offset = tail;
87 }
88 }
89 } else if member.name == b"//" {
90 // GNU names table.
91 file.names = member.data(data)?;
92 file.offset = tail;
93 }
94 }
95 } else if member.name == b"//" {
96 // GNU names table.
97 file.kind = ArchiveKind::Gnu;
98 file.names = member.data(data)?;
99 file.offset = tail;
100 } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" {
101 // BSD symbol table.
102 file.kind = ArchiveKind::Bsd;
103 file.symbols = member.file_range();
104 file.offset = tail;
105 } else {
106 // TODO: This could still be a BSD file. We leave this as unknown for now.
107 }
108 }
109 Ok(file)
110 }
111
112 /// Return the archive format.
113 #[inline]
kind(&self) -> ArchiveKind114 pub fn kind(&self) -> ArchiveKind {
115 self.kind
116 }
117
118 /// Iterate over the members of the archive.
119 ///
120 /// This does not return special members.
121 #[inline]
members(&self) -> ArchiveMemberIterator<'data, R>122 pub fn members(&self) -> ArchiveMemberIterator<'data, R> {
123 ArchiveMemberIterator {
124 data: self.data,
125 offset: self.offset,
126 len: self.len,
127 names: self.names,
128 }
129 }
130 }
131
132 /// An iterator over the members of an archive.
133 #[derive(Debug)]
134 pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> {
135 data: R,
136 offset: u64,
137 len: u64,
138 names: &'data [u8],
139 }
140
141 impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> {
142 type Item = read::Result<ArchiveMember<'data>>;
143
next(&mut self) -> Option<Self::Item>144 fn next(&mut self) -> Option<Self::Item> {
145 if self.offset >= self.len {
146 return None;
147 }
148 let member = ArchiveMember::parse(self.data, &mut self.offset, self.names);
149 if member.is_err() {
150 self.offset = self.len;
151 }
152 Some(member)
153 }
154 }
155
156 /// A partially parsed archive member.
157 #[derive(Debug)]
158 pub struct ArchiveMember<'data> {
159 header: &'data archive::Header,
160 name: &'data [u8],
161 offset: u64,
162 size: u64,
163 }
164
165 impl<'data> ArchiveMember<'data> {
166 /// Parse the archive member header, name, and file data.
167 ///
168 /// This reads the extended name (if any) and adjusts the file size.
parse<R: ReadRef<'data>>( data: R, offset: &mut u64, names: &'data [u8], ) -> read::Result<Self>169 fn parse<R: ReadRef<'data>>(
170 data: R,
171 offset: &mut u64,
172 names: &'data [u8],
173 ) -> read::Result<Self> {
174 let header = data
175 .read::<archive::Header>(offset)
176 .read_error("Invalid archive member header")?;
177 if header.terminator != archive::TERMINATOR {
178 return Err(Error("Invalid archive terminator"));
179 }
180
181 let mut file_offset = *offset;
182 let mut file_size =
183 parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?;
184 *offset = offset
185 .checked_add(file_size)
186 .read_error("Archive member size is too large")?;
187 // Entries are padded to an even number of bytes.
188 if (file_size & 1) != 0 {
189 *offset = offset.saturating_add(1);
190 }
191
192 let name = if header.name[0] == b'/' && (header.name[1] as char).is_digit(10) {
193 // Read file name from the names table.
194 parse_sysv_extended_name(&header.name[1..], names)
195 .read_error("Invalid archive extended name offset")?
196 } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_digit(10) {
197 // Read file name from the start of the file data.
198 parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size)
199 .read_error("Invalid archive extended name length")?
200 } else if header.name[0] == b'/' {
201 let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len());
202 &header.name[..name_len]
203 } else {
204 let name_len = memchr::memchr(b'/', &header.name)
205 .or_else(|| memchr::memchr(b' ', &header.name))
206 .unwrap_or(header.name.len());
207 &header.name[..name_len]
208 };
209
210 Ok(ArchiveMember {
211 header,
212 name,
213 offset: file_offset,
214 size: file_size,
215 })
216 }
217
218 /// Return the raw header.
219 #[inline]
header(&self) -> &'data archive::Header220 pub fn header(&self) -> &'data archive::Header {
221 self.header
222 }
223
224 /// Return the parsed file name.
225 ///
226 /// This may be an extended file name.
227 #[inline]
name(&self) -> &'data [u8]228 pub fn name(&self) -> &'data [u8] {
229 self.name
230 }
231
232 /// Parse the file modification timestamp from the header.
233 #[inline]
date(&self) -> Option<u64>234 pub fn date(&self) -> Option<u64> {
235 parse_u64_digits(&self.header.date, 10)
236 }
237
238 /// Parse the user ID from the header.
239 #[inline]
uid(&self) -> Option<u64>240 pub fn uid(&self) -> Option<u64> {
241 parse_u64_digits(&self.header.uid, 10)
242 }
243
244 /// Parse the group ID from the header.
245 #[inline]
gid(&self) -> Option<u64>246 pub fn gid(&self) -> Option<u64> {
247 parse_u64_digits(&self.header.gid, 10)
248 }
249
250 /// Parse the file mode from the header.
251 #[inline]
mode(&self) -> Option<u64>252 pub fn mode(&self) -> Option<u64> {
253 parse_u64_digits(&self.header.mode, 8)
254 }
255
256 /// Return the offset and size of the file data.
file_range(&self) -> (u64, u64)257 pub fn file_range(&self) -> (u64, u64) {
258 (self.offset, self.size)
259 }
260
261 /// Return the file data.
262 #[inline]
data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]>263 pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> {
264 data.read_bytes_at(self.offset, self.size)
265 .read_error("Archive member size is too large")
266 }
267 }
268
269 // Ignores bytes starting from the first space.
parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64>270 fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> {
271 if let [b' ', ..] = digits {
272 return None;
273 }
274 let mut result: u64 = 0;
275 for &c in digits {
276 if c == b' ' {
277 return Some(result);
278 } else {
279 let x = (c as char).to_digit(radix)?;
280 result = result
281 .checked_mul(u64::from(radix))?
282 .checked_add(u64::from(x))?;
283 }
284 }
285 Some(result)
286 }
287
parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()>288 fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> {
289 let offset = parse_u64_digits(digits, 10).ok_or(())?;
290 let offset = offset.try_into().map_err(|_| ())?;
291 let name_data = names.get(offset..).ok_or(())?;
292 let name = match memchr::memchr2(b'/', b'\0', name_data) {
293 Some(len) => &name_data[..len],
294 None => name_data,
295 };
296 Ok(name)
297 }
298
299 /// Modifies `data` to start after the extended name.
parse_bsd_extended_name<'data, R: ReadRef<'data>>( digits: &[u8], data: R, offset: &mut u64, size: &mut u64, ) -> Result<&'data [u8], ()>300 fn parse_bsd_extended_name<'data, R: ReadRef<'data>>(
301 digits: &[u8],
302 data: R,
303 offset: &mut u64,
304 size: &mut u64,
305 ) -> Result<&'data [u8], ()> {
306 let len = parse_u64_digits(digits, 10).ok_or(())?;
307 *size = size.checked_sub(len).ok_or(())?;
308 let name_data = data.read_bytes(offset, len)?;
309 let name = match memchr::memchr(b'\0', name_data) {
310 Some(len) => &name_data[..len],
311 None => name_data,
312 };
313 Ok(name)
314 }
315
316 #[cfg(test)]
317 mod tests {
318 use super::*;
319
320 #[test]
kind()321 fn kind() {
322 let data = b"!<arch>\n";
323 let archive = ArchiveFile::parse(&data[..]).unwrap();
324 assert_eq!(archive.kind(), ArchiveKind::Unknown);
325
326 let data = b"\
327 !<arch>\n\
328 / 4 `\n\
329 0000";
330 let archive = ArchiveFile::parse(&data[..]).unwrap();
331 assert_eq!(archive.kind(), ArchiveKind::Gnu);
332
333 let data = b"\
334 !<arch>\n\
335 // 4 `\n\
336 0000";
337 let archive = ArchiveFile::parse(&data[..]).unwrap();
338 assert_eq!(archive.kind(), ArchiveKind::Gnu);
339
340 let data = b"\
341 !<arch>\n\
342 / 4 `\n\
343 0000\
344 // 4 `\n\
345 0000";
346 let archive = ArchiveFile::parse(&data[..]).unwrap();
347 assert_eq!(archive.kind(), ArchiveKind::Gnu);
348
349 let data = b"\
350 !<arch>\n\
351 __.SYMDEF 4 `\n\
352 0000";
353 let archive = ArchiveFile::parse(&data[..]).unwrap();
354 assert_eq!(archive.kind(), ArchiveKind::Bsd);
355
356 let data = b"\
357 !<arch>\n\
358 #1/9 13 `\n\
359 __.SYMDEF0000";
360 let archive = ArchiveFile::parse(&data[..]).unwrap();
361 assert_eq!(archive.kind(), ArchiveKind::Bsd);
362
363 let data = b"\
364 !<arch>\n\
365 #1/16 20 `\n\
366 __.SYMDEF SORTED0000";
367 let archive = ArchiveFile::parse(&data[..]).unwrap();
368 assert_eq!(archive.kind(), ArchiveKind::Bsd);
369
370 let data = b"\
371 !<arch>\n\
372 / 4 `\n\
373 0000\
374 / 4 `\n\
375 0000\
376 // 4 `\n\
377 0000";
378 let archive = ArchiveFile::parse(&data[..]).unwrap();
379 assert_eq!(archive.kind(), ArchiveKind::Coff);
380 }
381
382 #[test]
gnu_names()383 fn gnu_names() {
384 let data = b"\
385 !<arch>\n\
386 // 18 `\n\
387 0123456789abcdef/\n\
388 s p a c e/ 0 0 0 644 4 `\n\
389 0000\
390 0123456789abcde/0 0 0 644 3 `\n\
391 odd\n\
392 /0 0 0 0 644 4 `\n\
393 even";
394 let data = &data[..];
395 let archive = ArchiveFile::parse(data).unwrap();
396 assert_eq!(archive.kind(), ArchiveKind::Gnu);
397 let mut members = archive.members();
398
399 let member = members.next().unwrap().unwrap();
400 assert_eq!(member.name(), b"s p a c e");
401 assert_eq!(member.data(data).unwrap(), &b"0000"[..]);
402
403 let member = members.next().unwrap().unwrap();
404 assert_eq!(member.name(), b"0123456789abcde");
405 assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
406
407 let member = members.next().unwrap().unwrap();
408 assert_eq!(member.name(), b"0123456789abcdef");
409 assert_eq!(member.data(data).unwrap(), &b"even"[..]);
410
411 assert!(members.next().is_none());
412 }
413
414 #[test]
bsd_names()415 fn bsd_names() {
416 let data = b"\
417 !<arch>\n\
418 0123456789abcde 0 0 0 644 3 `\n\
419 odd\n\
420 #1/16 0 0 0 644 20 `\n\
421 0123456789abcdefeven";
422 let data = &data[..];
423 let archive = ArchiveFile::parse(data).unwrap();
424 assert_eq!(archive.kind(), ArchiveKind::Unknown);
425 let mut members = archive.members();
426
427 let member = members.next().unwrap().unwrap();
428 assert_eq!(member.name(), b"0123456789abcde");
429 assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
430
431 let member = members.next().unwrap().unwrap();
432 assert_eq!(member.name(), b"0123456789abcdef");
433 assert_eq!(member.data(data).unwrap(), &b"even"[..]);
434
435 assert!(members.next().is_none());
436 }
437 }
438