1 //! Support for archive files.
2 
3 use core::convert::TryInto;
4 
5 use crate::archive;
6 use crate::read::{self, Error, ReadError, ReadRef};
7 
8 /// The kind of archive format.
9 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10 #[non_exhaustive]
11 pub enum ArchiveKind {
12     /// There are no special files that indicate the archive format.
13     Unknown,
14     /// The GNU (or System V) archive format.
15     Gnu,
16     /// The GNU (or System V) archive format with 64-bit symbol table.
17     Gnu64,
18     /// The BSD archive format.
19     Bsd,
20     /// The BSD archive format with 64-bit symbol table.
21     ///
22     /// This is used for Darwin.
23     Bsd64,
24     /// The Windows COFF archive format.
25     Coff,
26 }
27 
28 /// A partially parsed archive file.
29 #[derive(Debug)]
30 pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> {
31     data: R,
32     len: u64,
33     offset: u64,
34     kind: ArchiveKind,
35     symbols: (u64, u64),
36     names: &'data [u8],
37 }
38 
39 impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> {
40     /// Parse the archive header and special members.
parse(data: R) -> read::Result<Self>41     pub fn parse(data: R) -> read::Result<Self> {
42         let len = data.len().read_error("Unknown archive length")?;
43         let mut tail = 0;
44         let magic = data
45             .read_bytes(&mut tail, archive::MAGIC.len() as u64)
46             .read_error("Invalid archive size")?;
47         if magic != &archive::MAGIC[..] {
48             return Err(Error("Unsupported archive identifier"));
49         }
50 
51         let mut file = ArchiveFile {
52             data,
53             offset: tail,
54             len,
55             kind: ArchiveKind::Unknown,
56             symbols: (0, 0),
57             names: &[],
58         };
59 
60         // The first few members may be special, so parse them.
61         // GNU has:
62         // - "/" or "/SYM64/": symbol table (optional)
63         // - "//": names table (optional)
64         // COFF has:
65         // - "/": first linker member
66         // - "/": second linker member
67         // - "//": names table
68         // BSD has:
69         // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional)
70         // BSD 64-bit has:
71         // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional)
72         // BSD may use the extended name for the symbol table. This is handled
73         // by `ArchiveMember::parse`.
74         if tail < len {
75             let member = ArchiveMember::parse(data, &mut tail, &[])?;
76             if member.name == b"/" {
77                 // GNU symbol table (unless we later determine this is COFF).
78                 file.kind = ArchiveKind::Gnu;
79                 file.symbols = member.file_range();
80                 file.offset = tail;
81 
82                 if tail < len {
83                     let member = ArchiveMember::parse(data, &mut tail, &[])?;
84                     if member.name == b"/" {
85                         // COFF linker member.
86                         file.kind = ArchiveKind::Coff;
87                         file.symbols = member.file_range();
88                         file.offset = tail;
89 
90                         if tail < len {
91                             let member = ArchiveMember::parse(data, &mut tail, &[])?;
92                             if member.name == b"//" {
93                                 // COFF names table.
94                                 file.names = member.data(data)?;
95                                 file.offset = tail;
96                             }
97                         }
98                     } else if member.name == b"//" {
99                         // GNU names table.
100                         file.names = member.data(data)?;
101                         file.offset = tail;
102                     }
103                 }
104             } else if member.name == b"/SYM64/" {
105                 // GNU 64-bit symbol table.
106                 file.kind = ArchiveKind::Gnu64;
107                 file.symbols = member.file_range();
108                 file.offset = tail;
109 
110                 if tail < len {
111                     let member = ArchiveMember::parse(data, &mut tail, &[])?;
112                     if member.name == b"//" {
113                         // GNU names table.
114                         file.names = member.data(data)?;
115                         file.offset = tail;
116                     }
117                 }
118             } else if member.name == b"//" {
119                 // GNU names table.
120                 file.kind = ArchiveKind::Gnu;
121                 file.names = member.data(data)?;
122                 file.offset = tail;
123             } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" {
124                 // BSD symbol table.
125                 file.kind = ArchiveKind::Bsd;
126                 file.symbols = member.file_range();
127                 file.offset = tail;
128             } else if member.name == b"__.SYMDEF_64" || member.name == b"__.SYMDEF_64 SORTED" {
129                 // BSD 64-bit symbol table.
130                 file.kind = ArchiveKind::Bsd64;
131                 file.symbols = member.file_range();
132                 file.offset = tail;
133             } else {
134                 // TODO: This could still be a BSD file. We leave this as unknown for now.
135             }
136         }
137         Ok(file)
138     }
139 
140     /// Return the archive format.
141     #[inline]
kind(&self) -> ArchiveKind142     pub fn kind(&self) -> ArchiveKind {
143         self.kind
144     }
145 
146     /// Iterate over the members of the archive.
147     ///
148     /// This does not return special members.
149     #[inline]
members(&self) -> ArchiveMemberIterator<'data, R>150     pub fn members(&self) -> ArchiveMemberIterator<'data, R> {
151         ArchiveMemberIterator {
152             data: self.data,
153             offset: self.offset,
154             len: self.len,
155             names: self.names,
156         }
157     }
158 }
159 
160 /// An iterator over the members of an archive.
161 #[derive(Debug)]
162 pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> {
163     data: R,
164     offset: u64,
165     len: u64,
166     names: &'data [u8],
167 }
168 
169 impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> {
170     type Item = read::Result<ArchiveMember<'data>>;
171 
next(&mut self) -> Option<Self::Item>172     fn next(&mut self) -> Option<Self::Item> {
173         if self.offset >= self.len {
174             return None;
175         }
176         let member = ArchiveMember::parse(self.data, &mut self.offset, self.names);
177         if member.is_err() {
178             self.offset = self.len;
179         }
180         Some(member)
181     }
182 }
183 
184 /// A partially parsed archive member.
185 #[derive(Debug)]
186 pub struct ArchiveMember<'data> {
187     header: &'data archive::Header,
188     name: &'data [u8],
189     offset: u64,
190     size: u64,
191 }
192 
193 impl<'data> ArchiveMember<'data> {
194     /// Parse the archive member header, name, and file data.
195     ///
196     /// This reads the extended name (if any) and adjusts the file size.
parse<R: ReadRef<'data>>( data: R, offset: &mut u64, names: &'data [u8], ) -> read::Result<Self>197     fn parse<R: ReadRef<'data>>(
198         data: R,
199         offset: &mut u64,
200         names: &'data [u8],
201     ) -> read::Result<Self> {
202         let header = data
203             .read::<archive::Header>(offset)
204             .read_error("Invalid archive member header")?;
205         if header.terminator != archive::TERMINATOR {
206             return Err(Error("Invalid archive terminator"));
207         }
208 
209         let mut file_offset = *offset;
210         let mut file_size =
211             parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?;
212         *offset = offset
213             .checked_add(file_size)
214             .read_error("Archive member size is too large")?;
215         // Entries are padded to an even number of bytes.
216         if (file_size & 1) != 0 {
217             *offset = offset.saturating_add(1);
218         }
219 
220         let name = if header.name[0] == b'/' && (header.name[1] as char).is_digit(10) {
221             // Read file name from the names table.
222             parse_sysv_extended_name(&header.name[1..], names)
223                 .read_error("Invalid archive extended name offset")?
224         } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_digit(10) {
225             // Read file name from the start of the file data.
226             parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size)
227                 .read_error("Invalid archive extended name length")?
228         } else if header.name[0] == b'/' {
229             let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len());
230             &header.name[..name_len]
231         } else {
232             let name_len = memchr::memchr(b'/', &header.name)
233                 .or_else(|| memchr::memchr(b' ', &header.name))
234                 .unwrap_or(header.name.len());
235             &header.name[..name_len]
236         };
237 
238         Ok(ArchiveMember {
239             header,
240             name,
241             offset: file_offset,
242             size: file_size,
243         })
244     }
245 
246     /// Return the raw header.
247     #[inline]
header(&self) -> &'data archive::Header248     pub fn header(&self) -> &'data archive::Header {
249         self.header
250     }
251 
252     /// Return the parsed file name.
253     ///
254     /// This may be an extended file name.
255     #[inline]
name(&self) -> &'data [u8]256     pub fn name(&self) -> &'data [u8] {
257         self.name
258     }
259 
260     /// Parse the file modification timestamp from the header.
261     #[inline]
date(&self) -> Option<u64>262     pub fn date(&self) -> Option<u64> {
263         parse_u64_digits(&self.header.date, 10)
264     }
265 
266     /// Parse the user ID from the header.
267     #[inline]
uid(&self) -> Option<u64>268     pub fn uid(&self) -> Option<u64> {
269         parse_u64_digits(&self.header.uid, 10)
270     }
271 
272     /// Parse the group ID from the header.
273     #[inline]
gid(&self) -> Option<u64>274     pub fn gid(&self) -> Option<u64> {
275         parse_u64_digits(&self.header.gid, 10)
276     }
277 
278     /// Parse the file mode from the header.
279     #[inline]
mode(&self) -> Option<u64>280     pub fn mode(&self) -> Option<u64> {
281         parse_u64_digits(&self.header.mode, 8)
282     }
283 
284     /// Return the offset and size of the file data.
file_range(&self) -> (u64, u64)285     pub fn file_range(&self) -> (u64, u64) {
286         (self.offset, self.size)
287     }
288 
289     /// Return the file data.
290     #[inline]
data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]>291     pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> {
292         data.read_bytes_at(self.offset, self.size)
293             .read_error("Archive member size is too large")
294     }
295 }
296 
297 // Ignores bytes starting from the first space.
parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64>298 fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> {
299     if let [b' ', ..] = digits {
300         return None;
301     }
302     let mut result: u64 = 0;
303     for &c in digits {
304         if c == b' ' {
305             return Some(result);
306         } else {
307             let x = (c as char).to_digit(radix)?;
308             result = result
309                 .checked_mul(u64::from(radix))?
310                 .checked_add(u64::from(x))?;
311         }
312     }
313     Some(result)
314 }
315 
parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()>316 fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> {
317     let offset = parse_u64_digits(digits, 10).ok_or(())?;
318     let offset = offset.try_into().map_err(|_| ())?;
319     let name_data = names.get(offset..).ok_or(())?;
320     let name = match memchr::memchr2(b'/', b'\0', name_data) {
321         Some(len) => &name_data[..len],
322         None => name_data,
323     };
324     Ok(name)
325 }
326 
327 /// Modifies `data` to start after the extended name.
parse_bsd_extended_name<'data, R: ReadRef<'data>>( digits: &[u8], data: R, offset: &mut u64, size: &mut u64, ) -> Result<&'data [u8], ()>328 fn parse_bsd_extended_name<'data, R: ReadRef<'data>>(
329     digits: &[u8],
330     data: R,
331     offset: &mut u64,
332     size: &mut u64,
333 ) -> Result<&'data [u8], ()> {
334     let len = parse_u64_digits(digits, 10).ok_or(())?;
335     *size = size.checked_sub(len).ok_or(())?;
336     let name_data = data.read_bytes(offset, len)?;
337     let name = match memchr::memchr(b'\0', name_data) {
338         Some(len) => &name_data[..len],
339         None => name_data,
340     };
341     Ok(name)
342 }
343 
344 #[cfg(test)]
345 mod tests {
346     use super::*;
347 
348     #[test]
kind()349     fn kind() {
350         let data = b"!<arch>\n";
351         let archive = ArchiveFile::parse(&data[..]).unwrap();
352         assert_eq!(archive.kind(), ArchiveKind::Unknown);
353 
354         let data = b"\
355             !<arch>\n\
356             /                                               4         `\n\
357             0000";
358         let archive = ArchiveFile::parse(&data[..]).unwrap();
359         assert_eq!(archive.kind(), ArchiveKind::Gnu);
360 
361         let data = b"\
362             !<arch>\n\
363             //                                              4         `\n\
364             0000";
365         let archive = ArchiveFile::parse(&data[..]).unwrap();
366         assert_eq!(archive.kind(), ArchiveKind::Gnu);
367 
368         let data = b"\
369             !<arch>\n\
370             /                                               4         `\n\
371             0000\
372             //                                              4         `\n\
373             0000";
374         let archive = ArchiveFile::parse(&data[..]).unwrap();
375         assert_eq!(archive.kind(), ArchiveKind::Gnu);
376 
377         let data = b"\
378             !<arch>\n\
379             /SYM64/                                         4         `\n\
380             0000";
381         let archive = ArchiveFile::parse(&data[..]).unwrap();
382         assert_eq!(archive.kind(), ArchiveKind::Gnu64);
383 
384         let data = b"\
385             !<arch>\n\
386             /SYM64/                                         4         `\n\
387             0000\
388             //                                              4         `\n\
389             0000";
390         let archive = ArchiveFile::parse(&data[..]).unwrap();
391         assert_eq!(archive.kind(), ArchiveKind::Gnu64);
392 
393         let data = b"\
394             !<arch>\n\
395             __.SYMDEF                                       4         `\n\
396             0000";
397         let archive = ArchiveFile::parse(&data[..]).unwrap();
398         assert_eq!(archive.kind(), ArchiveKind::Bsd);
399 
400         let data = b"\
401             !<arch>\n\
402             #1/9                                            13        `\n\
403             __.SYMDEF0000";
404         let archive = ArchiveFile::parse(&data[..]).unwrap();
405         assert_eq!(archive.kind(), ArchiveKind::Bsd);
406 
407         let data = b"\
408             !<arch>\n\
409             #1/16                                           20        `\n\
410             __.SYMDEF SORTED0000";
411         let archive = ArchiveFile::parse(&data[..]).unwrap();
412         assert_eq!(archive.kind(), ArchiveKind::Bsd);
413 
414         let data = b"\
415             !<arch>\n\
416             __.SYMDEF_64                                    4         `\n\
417             0000";
418         let archive = ArchiveFile::parse(&data[..]).unwrap();
419         assert_eq!(archive.kind(), ArchiveKind::Bsd64);
420 
421         let data = b"\
422             !<arch>\n\
423             #1/12                                           16        `\n\
424             __.SYMDEF_640000";
425         let archive = ArchiveFile::parse(&data[..]).unwrap();
426         assert_eq!(archive.kind(), ArchiveKind::Bsd64);
427 
428         let data = b"\
429             !<arch>\n\
430             #1/19                                           23        `\n\
431             __.SYMDEF_64 SORTED0000";
432         let archive = ArchiveFile::parse(&data[..]).unwrap();
433         assert_eq!(archive.kind(), ArchiveKind::Bsd64);
434 
435         let data = b"\
436             !<arch>\n\
437             /                                               4         `\n\
438             0000\
439             /                                               4         `\n\
440             0000\
441             //                                              4         `\n\
442             0000";
443         let archive = ArchiveFile::parse(&data[..]).unwrap();
444         assert_eq!(archive.kind(), ArchiveKind::Coff);
445     }
446 
447     #[test]
gnu_names()448     fn gnu_names() {
449         let data = b"\
450             !<arch>\n\
451             //                                              18        `\n\
452             0123456789abcdef/\n\
453             s p a c e/      0           0     0     644     4         `\n\
454             0000\
455             0123456789abcde/0           0     0     644     3         `\n\
456             odd\n\
457             /0              0           0     0     644     4         `\n\
458             even";
459         let data = &data[..];
460         let archive = ArchiveFile::parse(data).unwrap();
461         assert_eq!(archive.kind(), ArchiveKind::Gnu);
462         let mut members = archive.members();
463 
464         let member = members.next().unwrap().unwrap();
465         assert_eq!(member.name(), b"s p a c e");
466         assert_eq!(member.data(data).unwrap(), &b"0000"[..]);
467 
468         let member = members.next().unwrap().unwrap();
469         assert_eq!(member.name(), b"0123456789abcde");
470         assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
471 
472         let member = members.next().unwrap().unwrap();
473         assert_eq!(member.name(), b"0123456789abcdef");
474         assert_eq!(member.data(data).unwrap(), &b"even"[..]);
475 
476         assert!(members.next().is_none());
477     }
478 
479     #[test]
bsd_names()480     fn bsd_names() {
481         let data = b"\
482             !<arch>\n\
483             0123456789abcde 0           0     0     644     3         `\n\
484             odd\n\
485             #1/16           0           0     0     644     20        `\n\
486             0123456789abcdefeven";
487         let data = &data[..];
488         let archive = ArchiveFile::parse(data).unwrap();
489         assert_eq!(archive.kind(), ArchiveKind::Unknown);
490         let mut members = archive.members();
491 
492         let member = members.next().unwrap().unwrap();
493         assert_eq!(member.name(), b"0123456789abcde");
494         assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
495 
496         let member = members.next().unwrap().unwrap();
497         assert_eq!(member.name(), b"0123456789abcdef");
498         assert_eq!(member.data(data).unwrap(), &b"even"[..]);
499 
500         assert!(members.next().is_none());
501     }
502 }
503