1 //! Support for archive files.
2 
3 use core::convert::TryInto;
4 
5 use crate::archive;
6 use crate::read::{self, Error, ReadError, ReadRef};
7 
8 /// The kind of archive format.
9 // TODO: Gnu64 and Darwin64 (and Darwin for writing)
10 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
11 #[non_exhaustive]
12 pub enum ArchiveKind {
13     /// There are no special files that indicate the archive format.
14     Unknown,
15     /// The GNU (or System V) archive format.
16     Gnu,
17     /// The BSD archive format.
18     Bsd,
19     /// The Windows COFF archive format.
20     Coff,
21 }
22 
23 /// A partially parsed archive file.
24 #[derive(Debug)]
25 pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> {
26     data: R,
27     len: u64,
28     offset: u64,
29     kind: ArchiveKind,
30     symbols: (u64, u64),
31     names: &'data [u8],
32 }
33 
34 impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> {
35     /// Parse the archive header and special members.
parse(data: R) -> read::Result<Self>36     pub fn parse(data: R) -> read::Result<Self> {
37         let len = data.len().read_error("Unknown archive length")?;
38         let mut tail = 0;
39         let magic = data
40             .read_bytes(&mut tail, archive::MAGIC.len() as u64)
41             .read_error("Invalid archive size")?;
42         if magic != &archive::MAGIC[..] {
43             return Err(Error("Unsupported archive identifier"));
44         }
45 
46         let mut file = ArchiveFile {
47             data,
48             offset: tail,
49             len,
50             kind: ArchiveKind::Unknown,
51             symbols: (0, 0),
52             names: &[],
53         };
54 
55         // The first few members may be special, so parse them.
56         // GNU has:
57         // - "/": symbol table (optional)
58         // - "//": names table (optional)
59         // COFF has:
60         // - "/": first linker member
61         // - "/": second linker member
62         // - "//": names table
63         // BSD has:
64         // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional)
65         if tail < len {
66             let member = ArchiveMember::parse(data, &mut tail, &[])?;
67             if member.name == b"/" {
68                 // GNU symbol table (unless we later determine this is COFF).
69                 file.kind = ArchiveKind::Gnu;
70                 file.symbols = member.file_range();
71                 file.offset = tail;
72 
73                 if tail < len {
74                     let member = ArchiveMember::parse(data, &mut tail, &[])?;
75                     if member.name == b"/" {
76                         // COFF linker member.
77                         file.kind = ArchiveKind::Coff;
78                         file.symbols = member.file_range();
79                         file.offset = tail;
80 
81                         if tail < len {
82                             let member = ArchiveMember::parse(data, &mut tail, &[])?;
83                             if member.name == b"//" {
84                                 // COFF names table.
85                                 file.names = member.data(data)?;
86                                 file.offset = tail;
87                             }
88                         }
89                     } else if member.name == b"//" {
90                         // GNU names table.
91                         file.names = member.data(data)?;
92                         file.offset = tail;
93                     }
94                 }
95             } else if member.name == b"//" {
96                 // GNU names table.
97                 file.kind = ArchiveKind::Gnu;
98                 file.names = member.data(data)?;
99                 file.offset = tail;
100             } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" {
101                 // BSD symbol table.
102                 file.kind = ArchiveKind::Bsd;
103                 file.symbols = member.file_range();
104                 file.offset = tail;
105             } else {
106                 // TODO: This could still be a BSD file. We leave this as unknown for now.
107             }
108         }
109         Ok(file)
110     }
111 
112     /// Return the archive format.
113     #[inline]
kind(&self) -> ArchiveKind114     pub fn kind(&self) -> ArchiveKind {
115         self.kind
116     }
117 
118     /// Iterate over the members of the archive.
119     ///
120     /// This does not return special members.
121     #[inline]
members(&self) -> ArchiveMemberIterator<'data, R>122     pub fn members(&self) -> ArchiveMemberIterator<'data, R> {
123         ArchiveMemberIterator {
124             data: self.data,
125             offset: self.offset,
126             len: self.len,
127             names: self.names,
128         }
129     }
130 }
131 
132 /// An iterator over the members of an archive.
133 #[derive(Debug)]
134 pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> {
135     data: R,
136     offset: u64,
137     len: u64,
138     names: &'data [u8],
139 }
140 
141 impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> {
142     type Item = read::Result<ArchiveMember<'data>>;
143 
next(&mut self) -> Option<Self::Item>144     fn next(&mut self) -> Option<Self::Item> {
145         if self.offset >= self.len {
146             return None;
147         }
148         let member = ArchiveMember::parse(self.data, &mut self.offset, self.names);
149         if member.is_err() {
150             self.offset = self.len;
151         }
152         Some(member)
153     }
154 }
155 
156 /// A partially parsed archive member.
157 #[derive(Debug)]
158 pub struct ArchiveMember<'data> {
159     header: &'data archive::Header,
160     name: &'data [u8],
161     offset: u64,
162     size: u64,
163 }
164 
165 impl<'data> ArchiveMember<'data> {
166     /// Parse the archive member header, name, and file data.
167     ///
168     /// This reads the extended name (if any) and adjusts the file size.
parse<R: ReadRef<'data>>( data: R, offset: &mut u64, names: &'data [u8], ) -> read::Result<Self>169     fn parse<R: ReadRef<'data>>(
170         data: R,
171         offset: &mut u64,
172         names: &'data [u8],
173     ) -> read::Result<Self> {
174         let header = data
175             .read::<archive::Header>(offset)
176             .read_error("Invalid archive member header")?;
177         if header.terminator != archive::TERMINATOR {
178             return Err(Error("Invalid archive terminator"));
179         }
180 
181         let mut file_offset = *offset;
182         let mut file_size =
183             parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?;
184         *offset = offset
185             .checked_add(file_size)
186             .read_error("Archive member size is too large")?;
187         // Entries are padded to an even number of bytes.
188         if (file_size & 1) != 0 {
189             *offset = offset.saturating_add(1);
190         }
191 
192         let name = if header.name[0] == b'/' && (header.name[1] as char).is_digit(10) {
193             // Read file name from the names table.
194             parse_sysv_extended_name(&header.name[1..], names)
195                 .read_error("Invalid archive extended name offset")?
196         } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_digit(10) {
197             // Read file name from the start of the file data.
198             parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size)
199                 .read_error("Invalid archive extended name length")?
200         } else if header.name[0] == b'/' {
201             let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len());
202             &header.name[..name_len]
203         } else {
204             let name_len = memchr::memchr(b'/', &header.name)
205                 .or_else(|| memchr::memchr(b' ', &header.name))
206                 .unwrap_or(header.name.len());
207             &header.name[..name_len]
208         };
209 
210         Ok(ArchiveMember {
211             header,
212             name,
213             offset: file_offset,
214             size: file_size,
215         })
216     }
217 
218     /// Return the raw header.
219     #[inline]
header(&self) -> &'data archive::Header220     pub fn header(&self) -> &'data archive::Header {
221         self.header
222     }
223 
224     /// Return the parsed file name.
225     ///
226     /// This may be an extended file name.
227     #[inline]
name(&self) -> &'data [u8]228     pub fn name(&self) -> &'data [u8] {
229         self.name
230     }
231 
232     /// Parse the file modification timestamp from the header.
233     #[inline]
date(&self) -> Option<u64>234     pub fn date(&self) -> Option<u64> {
235         parse_u64_digits(&self.header.date, 10)
236     }
237 
238     /// Parse the user ID from the header.
239     #[inline]
uid(&self) -> Option<u64>240     pub fn uid(&self) -> Option<u64> {
241         parse_u64_digits(&self.header.uid, 10)
242     }
243 
244     /// Parse the group ID from the header.
245     #[inline]
gid(&self) -> Option<u64>246     pub fn gid(&self) -> Option<u64> {
247         parse_u64_digits(&self.header.gid, 10)
248     }
249 
250     /// Parse the file mode from the header.
251     #[inline]
mode(&self) -> Option<u64>252     pub fn mode(&self) -> Option<u64> {
253         parse_u64_digits(&self.header.mode, 8)
254     }
255 
256     /// Return the offset and size of the file data.
file_range(&self) -> (u64, u64)257     pub fn file_range(&self) -> (u64, u64) {
258         (self.offset, self.size)
259     }
260 
261     /// Return the file data.
262     #[inline]
data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]>263     pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> {
264         data.read_bytes_at(self.offset, self.size)
265             .read_error("Archive member size is too large")
266     }
267 }
268 
269 // Ignores bytes starting from the first space.
parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64>270 fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> {
271     if let [b' ', ..] = digits {
272         return None;
273     }
274     let mut result: u64 = 0;
275     for &c in digits {
276         if c == b' ' {
277             return Some(result);
278         } else {
279             let x = (c as char).to_digit(radix)?;
280             result = result
281                 .checked_mul(u64::from(radix))?
282                 .checked_add(u64::from(x))?;
283         }
284     }
285     Some(result)
286 }
287 
parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()>288 fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> {
289     let offset = parse_u64_digits(digits, 10).ok_or(())?;
290     let offset = offset.try_into().map_err(|_| ())?;
291     let name_data = names.get(offset..).ok_or(())?;
292     let name = match memchr::memchr2(b'/', b'\0', name_data) {
293         Some(len) => &name_data[..len],
294         None => name_data,
295     };
296     Ok(name)
297 }
298 
299 /// Modifies `data` to start after the extended name.
parse_bsd_extended_name<'data, R: ReadRef<'data>>( digits: &[u8], data: R, offset: &mut u64, size: &mut u64, ) -> Result<&'data [u8], ()>300 fn parse_bsd_extended_name<'data, R: ReadRef<'data>>(
301     digits: &[u8],
302     data: R,
303     offset: &mut u64,
304     size: &mut u64,
305 ) -> Result<&'data [u8], ()> {
306     let len = parse_u64_digits(digits, 10).ok_or(())?;
307     *size = size.checked_sub(len).ok_or(())?;
308     let name_data = data.read_bytes(offset, len)?;
309     let name = match memchr::memchr(b'\0', name_data) {
310         Some(len) => &name_data[..len],
311         None => name_data,
312     };
313     Ok(name)
314 }
315 
316 #[cfg(test)]
317 mod tests {
318     use super::*;
319 
320     #[test]
kind()321     fn kind() {
322         let data = b"!<arch>\n";
323         let archive = ArchiveFile::parse(&data[..]).unwrap();
324         assert_eq!(archive.kind(), ArchiveKind::Unknown);
325 
326         let data = b"\
327             !<arch>\n\
328             /                                               4         `\n\
329             0000";
330         let archive = ArchiveFile::parse(&data[..]).unwrap();
331         assert_eq!(archive.kind(), ArchiveKind::Gnu);
332 
333         let data = b"\
334             !<arch>\n\
335             //                                              4         `\n\
336             0000";
337         let archive = ArchiveFile::parse(&data[..]).unwrap();
338         assert_eq!(archive.kind(), ArchiveKind::Gnu);
339 
340         let data = b"\
341             !<arch>\n\
342             /                                               4         `\n\
343             0000\
344             //                                              4         `\n\
345             0000";
346         let archive = ArchiveFile::parse(&data[..]).unwrap();
347         assert_eq!(archive.kind(), ArchiveKind::Gnu);
348 
349         let data = b"\
350             !<arch>\n\
351             __.SYMDEF                                       4         `\n\
352             0000";
353         let archive = ArchiveFile::parse(&data[..]).unwrap();
354         assert_eq!(archive.kind(), ArchiveKind::Bsd);
355 
356         let data = b"\
357             !<arch>\n\
358             #1/9                                            13        `\n\
359             __.SYMDEF0000";
360         let archive = ArchiveFile::parse(&data[..]).unwrap();
361         assert_eq!(archive.kind(), ArchiveKind::Bsd);
362 
363         let data = b"\
364             !<arch>\n\
365             #1/16                                           20        `\n\
366             __.SYMDEF SORTED0000";
367         let archive = ArchiveFile::parse(&data[..]).unwrap();
368         assert_eq!(archive.kind(), ArchiveKind::Bsd);
369 
370         let data = b"\
371             !<arch>\n\
372             /                                               4         `\n\
373             0000\
374             /                                               4         `\n\
375             0000\
376             //                                              4         `\n\
377             0000";
378         let archive = ArchiveFile::parse(&data[..]).unwrap();
379         assert_eq!(archive.kind(), ArchiveKind::Coff);
380     }
381 
382     #[test]
gnu_names()383     fn gnu_names() {
384         let data = b"\
385             !<arch>\n\
386             //                                              18        `\n\
387             0123456789abcdef/\n\
388             s p a c e/      0           0     0     644     4         `\n\
389             0000\
390             0123456789abcde/0           0     0     644     3         `\n\
391             odd\n\
392             /0              0           0     0     644     4         `\n\
393             even";
394         let data = &data[..];
395         let archive = ArchiveFile::parse(data).unwrap();
396         assert_eq!(archive.kind(), ArchiveKind::Gnu);
397         let mut members = archive.members();
398 
399         let member = members.next().unwrap().unwrap();
400         assert_eq!(member.name(), b"s p a c e");
401         assert_eq!(member.data(data).unwrap(), &b"0000"[..]);
402 
403         let member = members.next().unwrap().unwrap();
404         assert_eq!(member.name(), b"0123456789abcde");
405         assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
406 
407         let member = members.next().unwrap().unwrap();
408         assert_eq!(member.name(), b"0123456789abcdef");
409         assert_eq!(member.data(data).unwrap(), &b"even"[..]);
410 
411         assert!(members.next().is_none());
412     }
413 
414     #[test]
bsd_names()415     fn bsd_names() {
416         let data = b"\
417             !<arch>\n\
418             0123456789abcde 0           0     0     644     3         `\n\
419             odd\n\
420             #1/16           0           0     0     644     20        `\n\
421             0123456789abcdefeven";
422         let data = &data[..];
423         let archive = ArchiveFile::parse(data).unwrap();
424         assert_eq!(archive.kind(), ArchiveKind::Unknown);
425         let mut members = archive.members();
426 
427         let member = members.next().unwrap().unwrap();
428         assert_eq!(member.name(), b"0123456789abcde");
429         assert_eq!(member.data(data).unwrap(), &b"odd"[..]);
430 
431         let member = members.next().unwrap().unwrap();
432         assert_eq!(member.name(), b"0123456789abcdef");
433         assert_eq!(member.data(data).unwrap(), &b"even"[..]);
434 
435         assert!(members.next().is_none());
436     }
437 }
438