1 use byteorder::{BigEndian, ReadBytesExt};
2 use error::{Error, Result};
3 use huffman::{HuffmanTable, HuffmanTableClass};
4 use marker::Marker;
5 use marker::Marker::*;
6 use std::io::{self, Read};
7 use std::ops::Range;
8 
9 #[derive(Clone, Copy, Debug, PartialEq)]
10 pub struct Dimensions {
11     pub width: u16,
12     pub height: u16,
13 }
14 
15 #[derive(Clone, Copy, Debug, PartialEq)]
16 pub enum EntropyCoding {
17     Huffman,
18     Arithmetic,
19 }
20 
21 #[derive(Clone, Copy, Debug, PartialEq)]
22 pub enum CodingProcess {
23     DctSequential,
24     DctProgressive,
25     Lossless,
26 }
27 
28 #[derive(Clone)]
29 pub struct FrameInfo {
30     pub is_baseline: bool,
31     pub is_differential: bool,
32     pub coding_process: CodingProcess,
33     pub entropy_coding: EntropyCoding,
34     pub precision: u8,
35 
36     pub image_size: Dimensions,
37     pub output_size: Dimensions,
38     pub mcu_size: Dimensions,
39     pub components: Vec<Component>,
40 }
41 
42 #[derive(Debug)]
43 pub struct ScanInfo {
44     pub component_indices: Vec<usize>,
45     pub dc_table_indices: Vec<usize>,
46     pub ac_table_indices: Vec<usize>,
47 
48     pub spectral_selection: Range<u8>,
49     pub successive_approximation_high: u8,
50     pub successive_approximation_low: u8,
51 }
52 
53 #[derive(Clone, Debug)]
54 pub struct Component {
55     pub identifier: u8,
56 
57     pub horizontal_sampling_factor: u8,
58     pub vertical_sampling_factor: u8,
59 
60     pub quantization_table_index: usize,
61 
62     pub dct_scale: usize,
63 
64     pub size: Dimensions,
65     pub block_size: Dimensions,
66 }
67 
68 #[derive(Debug)]
69 pub enum AppData {
70     Adobe(AdobeColorTransform),
71     Jfif,
72     Avi1,
73 }
74 
75 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
76 #[derive(Clone, Copy, Debug, PartialEq)]
77 pub enum AdobeColorTransform {
78     // RGB or CMYK
79     Unknown,
80     YCbCr,
81     // YCbCrK
82     YCCK,
83 }
84 
85 impl FrameInfo {
update_idct_size(&mut self, idct_size: usize)86     pub(crate) fn update_idct_size(&mut self, idct_size: usize) {
87         for component in &mut self.components {
88             component.dct_scale = idct_size;
89         }
90 
91         update_component_sizes(self.image_size, &mut self.components);
92 
93         self.output_size = Dimensions {
94             width: (self.image_size.width as f32 * idct_size as f32 / 8.0).ceil() as u16,
95             height: (self.image_size.height as f32 * idct_size as f32 / 8.0).ceil() as u16
96         };
97     }
98 }
99 
read_length<R: Read>(reader: &mut R, marker: Marker) -> Result<usize>100 fn read_length<R: Read>(reader: &mut R, marker: Marker) -> Result<usize> {
101     assert!(marker.has_length());
102 
103     // length is including itself.
104     let length = reader.read_u16::<BigEndian>()? as usize;
105 
106     if length < 2 {
107         return Err(Error::Format(format!("encountered {:?} with invalid length {}", marker, length)));
108     }
109 
110     Ok(length - 2)
111 }
112 
skip_bytes<R: Read>(reader: &mut R, length: usize) -> Result<()>113 fn skip_bytes<R: Read>(reader: &mut R, length: usize) -> Result<()> {
114     let length = length as u64;
115     let to_skip = &mut reader.by_ref().take(length);
116     let copied = io::copy(to_skip, &mut io::sink())?;
117     if copied < length {
118         Err(Error::Io(io::ErrorKind::UnexpectedEof.into()))
119     } else {
120         Ok(())
121     }
122 }
123 
124 // Section B.2.2
parse_sof<R: Read>(reader: &mut R, marker: Marker) -> Result<FrameInfo>125 pub fn parse_sof<R: Read>(reader: &mut R, marker: Marker) -> Result<FrameInfo> {
126     let length = read_length(reader, marker)?;
127 
128     if length <= 6 {
129         return Err(Error::Format("invalid length in SOF".to_owned()));
130     }
131 
132     let is_baseline = marker == SOF(0);
133     let is_differential = match marker {
134         SOF(0 ..= 3) | SOF(9 ..= 11)  => false,
135         SOF(5 ..= 7) | SOF(13 ..= 15) => true,
136         _ => panic!(),
137     };
138     let coding_process = match marker {
139         SOF(0) | SOF(1) | SOF(5) | SOF(9) | SOF(13) => CodingProcess::DctSequential,
140         SOF(2) | SOF(6) | SOF(10) | SOF(14)         => CodingProcess::DctProgressive,
141         SOF(3) | SOF(7) | SOF(11) | SOF(15)         => CodingProcess::Lossless,
142         _ => panic!(),
143     };
144     let entropy_coding = match marker {
145         SOF(0 ..= 3) | SOF(5 ..= 7)     => EntropyCoding::Huffman,
146         SOF(9 ..= 11) | SOF(13 ..= 15)  => EntropyCoding::Arithmetic,
147         _ => panic!(),
148     };
149 
150     let precision = reader.read_u8()?;
151 
152     match precision {
153         8 => {},
154         12 => {
155             if is_baseline {
156                 return Err(Error::Format("12 bit sample precision is not allowed in baseline".to_owned()));
157             }
158         },
159         _ => {
160             if coding_process != CodingProcess::Lossless {
161                 return Err(Error::Format(format!("invalid precision {} in frame header", precision)))
162             }
163         },
164     }
165 
166     let height = reader.read_u16::<BigEndian>()?;
167     let width = reader.read_u16::<BigEndian>()?;
168 
169     // height:
170     // "Value 0 indicates that the number of lines shall be defined by the DNL marker and
171     //     parameters at the end of the first scan (see B.2.5)."
172 
173     if width == 0 {
174         return Err(Error::Format("zero width in frame header".to_owned()));
175     }
176 
177     let component_count = reader.read_u8()?;
178 
179     if component_count == 0 {
180         return Err(Error::Format("zero component count in frame header".to_owned()));
181     }
182     if coding_process == CodingProcess::DctProgressive && component_count > 4 {
183         return Err(Error::Format("progressive frame with more than 4 components".to_owned()));
184     }
185 
186     if length != 6 + 3 * component_count as usize {
187         return Err(Error::Format("invalid length in SOF".to_owned()));
188     }
189 
190     let mut components: Vec<Component> = Vec::with_capacity(component_count as usize);
191 
192     for _ in 0 .. component_count {
193         let identifier = reader.read_u8()?;
194 
195         // Each component's identifier must be unique.
196         if components.iter().any(|c| c.identifier == identifier) {
197             return Err(Error::Format(format!("duplicate frame component identifier {}", identifier)));
198         }
199 
200         let byte = reader.read_u8()?;
201         let horizontal_sampling_factor = byte >> 4;
202         let vertical_sampling_factor = byte & 0x0f;
203 
204         if horizontal_sampling_factor == 0 || horizontal_sampling_factor > 4 {
205             return Err(Error::Format(format!("invalid horizontal sampling factor {}", horizontal_sampling_factor)));
206         }
207         if vertical_sampling_factor == 0 || vertical_sampling_factor > 4 {
208             return Err(Error::Format(format!("invalid vertical sampling factor {}", vertical_sampling_factor)));
209         }
210 
211         let quantization_table_index = reader.read_u8()?;
212 
213         if quantization_table_index > 3 || (coding_process == CodingProcess::Lossless && quantization_table_index != 0) {
214             return Err(Error::Format(format!("invalid quantization table index {}", quantization_table_index)));
215         }
216 
217         components.push(Component {
218             identifier: identifier,
219             horizontal_sampling_factor: horizontal_sampling_factor,
220             vertical_sampling_factor: vertical_sampling_factor,
221             quantization_table_index: quantization_table_index as usize,
222             dct_scale: 8,
223             size: Dimensions {width: 0, height: 0},
224             block_size: Dimensions {width: 0, height: 0},
225         });
226     }
227 
228     let mcu_size = update_component_sizes(Dimensions { width, height }, &mut components);
229 
230     Ok(FrameInfo {
231         is_baseline: is_baseline,
232         is_differential: is_differential,
233         coding_process: coding_process,
234         entropy_coding: entropy_coding,
235         precision: precision,
236         image_size: Dimensions { width, height },
237         output_size: Dimensions { width, height },
238         mcu_size,
239         components: components,
240     })
241 }
242 
243 /// Returns ceil(x/y), requires x>0
ceil_div(x: u32, y: u32) -> u16244 fn ceil_div(x: u32, y: u32) -> u16 {
245     assert!(x>0 && y>0, "invalid dimensions");
246     (1 + ((x - 1) / y)) as u16
247 }
248 
update_component_sizes(size: Dimensions, components: &mut [Component]) -> Dimensions249 fn update_component_sizes(size: Dimensions, components: &mut [Component]) -> Dimensions {
250     let h_max = components.iter().map(|c| c.horizontal_sampling_factor).max().unwrap() as u32;
251     let v_max = components.iter().map(|c| c.vertical_sampling_factor).max().unwrap() as u32;
252 
253     let mcu_size = Dimensions {
254         width: ceil_div(size.width as u32, h_max * 8),
255         height: ceil_div(size.height as u32, v_max * 8),
256     };
257 
258     for component in components {
259         component.size.width = ceil_div(size.width as u32 * component.horizontal_sampling_factor as u32 * component.dct_scale as u32, h_max * 8);
260         component.size.height = ceil_div(size.height as u32 * component.vertical_sampling_factor as u32 * component.dct_scale as u32, v_max * 8);
261 
262         component.block_size.width = mcu_size.width * component.horizontal_sampling_factor as u16;
263         component.block_size.height = mcu_size.height * component.vertical_sampling_factor as u16;
264     }
265 
266     mcu_size
267 }
268 
269 #[test]
test_update_component_sizes()270 fn test_update_component_sizes() {
271     let mut components = [Component {
272         identifier: 1,
273         horizontal_sampling_factor: 2,
274         vertical_sampling_factor: 2,
275         quantization_table_index: 0,
276         dct_scale: 8,
277         size: Dimensions { width: 0, height: 0 },
278         block_size: Dimensions { width: 0, height: 0 },
279     }];
280     let mcu = update_component_sizes(
281         Dimensions { width: 800, height: 280 },
282         &mut components);
283     assert_eq!(mcu, Dimensions { width: 50, height: 18 });
284     assert_eq!(components[0].block_size, Dimensions { width: 100, height: 36 });
285     assert_eq!(components[0].size, Dimensions { width: 800, height: 280 });
286 }
287 
288 // Section B.2.3
parse_sos<R: Read>(reader: &mut R, frame: &FrameInfo) -> Result<ScanInfo>289 pub fn parse_sos<R: Read>(reader: &mut R, frame: &FrameInfo) -> Result<ScanInfo> {
290     let length = read_length(reader, SOS)?;
291     if 0 == length {
292         return Err(Error::Format("zero length in SOS".to_owned()));
293     }
294 
295     let component_count = reader.read_u8()?;
296 
297     if component_count == 0 || component_count > 4 {
298         return Err(Error::Format(format!("invalid component count {} in scan header", component_count)));
299     }
300 
301     if length != 4 + 2 * component_count as usize {
302         return Err(Error::Format("invalid length in SOS".to_owned()));
303     }
304 
305     let mut component_indices = Vec::with_capacity(component_count as usize);
306     let mut dc_table_indices = Vec::with_capacity(component_count as usize);
307     let mut ac_table_indices = Vec::with_capacity(component_count as usize);
308 
309     for _ in 0 .. component_count {
310         let identifier = reader.read_u8()?;
311 
312         let component_index = match frame.components.iter().position(|c| c.identifier == identifier) {
313             Some(value) => value,
314             None => return Err(Error::Format(format!("scan component identifier {} does not match any of the component identifiers defined in the frame", identifier))),
315         };
316 
317         // Each of the scan's components must be unique.
318         if component_indices.contains(&component_index) {
319             return Err(Error::Format(format!("duplicate scan component identifier {}", identifier)));
320         }
321 
322         // "... the ordering in the scan header shall follow the ordering in the frame header."
323         if component_index < *component_indices.iter().max().unwrap_or(&0) {
324             return Err(Error::Format("the scan component order does not follow the order in the frame header".to_owned()));
325         }
326 
327         let byte = reader.read_u8()?;
328         let dc_table_index = byte >> 4;
329         let ac_table_index = byte & 0x0f;
330 
331         if dc_table_index > 3 || (frame.is_baseline && dc_table_index > 1) {
332             return Err(Error::Format(format!("invalid dc table index {}", dc_table_index)));
333         }
334         if ac_table_index > 3 || (frame.is_baseline && ac_table_index > 1) {
335             return Err(Error::Format(format!("invalid ac table index {}", ac_table_index)));
336         }
337 
338         component_indices.push(component_index);
339         dc_table_indices.push(dc_table_index as usize);
340         ac_table_indices.push(ac_table_index as usize);
341     }
342 
343     let blocks_per_mcu = component_indices.iter().map(|&i| {
344         frame.components[i].horizontal_sampling_factor as u32 * frame.components[i].vertical_sampling_factor as u32
345     }).fold(0, ::std::ops::Add::add);
346 
347     if component_count > 1 && blocks_per_mcu > 10 {
348         return Err(Error::Format("scan with more than one component and more than 10 blocks per MCU".to_owned()));
349     }
350 
351     let spectral_selection_start = reader.read_u8()?;
352     let spectral_selection_end = reader.read_u8()?;
353 
354     let byte = reader.read_u8()?;
355     let successive_approximation_high = byte >> 4;
356     let successive_approximation_low = byte & 0x0f;
357 
358     if frame.coding_process == CodingProcess::DctProgressive {
359         if spectral_selection_end > 63 || spectral_selection_start > spectral_selection_end ||
360                 (spectral_selection_start == 0 && spectral_selection_end != 0) {
361             return Err(Error::Format(format!("invalid spectral selection parameters: ss={}, se={}", spectral_selection_start, spectral_selection_end)));
362         }
363         if spectral_selection_start != 0 && component_count != 1 {
364             return Err(Error::Format("spectral selection scan with AC coefficients can't have more than one component".to_owned()));
365         }
366 
367         if successive_approximation_high > 13 || successive_approximation_low > 13 {
368             return Err(Error::Format(format!("invalid successive approximation parameters: ah={}, al={}", successive_approximation_high, successive_approximation_low)));
369         }
370 
371         // Section G.1.1.1.2
372         // "Each scan which follows the first scan for a given band progressively improves
373         //     the precision of the coefficients by one bit, until full precision is reached."
374         if successive_approximation_high != 0 && successive_approximation_high != successive_approximation_low + 1 {
375             return Err(Error::Format("successive approximation scan with more than one bit of improvement".to_owned()));
376         }
377     }
378     else {
379         if spectral_selection_start != 0 || spectral_selection_end != 63 {
380             return Err(Error::Format("spectral selection is not allowed in non-progressive scan".to_owned()));
381         }
382         if successive_approximation_high != 0 || successive_approximation_low != 0 {
383             return Err(Error::Format("successive approximation is not allowed in non-progressive scan".to_owned()));
384         }
385     }
386 
387     Ok(ScanInfo {
388         component_indices: component_indices,
389         dc_table_indices: dc_table_indices,
390         ac_table_indices: ac_table_indices,
391         spectral_selection: Range {
392             start: spectral_selection_start,
393             end: spectral_selection_end + 1,
394         },
395         successive_approximation_high: successive_approximation_high,
396         successive_approximation_low: successive_approximation_low,
397     })
398 }
399 
400 // Section B.2.4.1
parse_dqt<R: Read>(reader: &mut R) -> Result<[Option<[u16; 64]>; 4]>401 pub fn parse_dqt<R: Read>(reader: &mut R) -> Result<[Option<[u16; 64]>; 4]> {
402     let mut length = read_length(reader, DQT)?;
403     let mut tables = [None; 4];
404 
405     // Each DQT segment may contain multiple quantization tables.
406     while length > 0 {
407         let byte = reader.read_u8()?;
408         let precision = (byte >> 4) as usize;
409         let index = (byte & 0x0f) as usize;
410 
411         // The combination of 8-bit sample precision and 16-bit quantization tables is explicitly
412         // disallowed by the JPEG spec:
413         //     "An 8-bit DCT-based process shall not use a 16-bit precision quantization table."
414         //     "Pq: Quantization table element precision – Specifies the precision of the Qk
415         //      values. Value 0 indicates 8-bit Qk values; value 1 indicates 16-bit Qk values. Pq
416         //      shall be zero for 8 bit sample precision P (see B.2.2)."
417         // libjpeg allows this behavior though, and there are images in the wild using it. So to
418         // match libjpeg's behavior we are deviating from the JPEG spec here.
419         if precision > 1 {
420             return Err(Error::Format(format!("invalid precision {} in DQT", precision)));
421         }
422         if index > 3 {
423             return Err(Error::Format(format!("invalid destination identifier {} in DQT", index)));
424         }
425         if length < 65 + 64 * precision {
426             return Err(Error::Format("invalid length in DQT".to_owned()));
427         }
428 
429         let mut table = [0u16; 64];
430 
431         for i in 0 .. 64 {
432             table[i] = match precision {
433                 0 => reader.read_u8()? as u16,
434                 1 => reader.read_u16::<BigEndian>()?,
435                 _ => unreachable!(),
436             };
437         }
438 
439         if table.iter().any(|&val| val == 0) {
440             return Err(Error::Format("quantization table contains element with a zero value".to_owned()));
441         }
442 
443         tables[index] = Some(table);
444         length -= 65 + 64 * precision;
445     }
446 
447     Ok(tables)
448 }
449 
450 // Section B.2.4.2
parse_dht<R: Read>(reader: &mut R, is_baseline: Option<bool>) -> Result<(Vec<Option<HuffmanTable>>, Vec<Option<HuffmanTable>>)>451 pub fn parse_dht<R: Read>(reader: &mut R, is_baseline: Option<bool>) -> Result<(Vec<Option<HuffmanTable>>, Vec<Option<HuffmanTable>>)> {
452     let mut length = read_length(reader, DHT)?;
453     let mut dc_tables = vec![None, None, None, None];
454     let mut ac_tables = vec![None, None, None, None];
455 
456     // Each DHT segment may contain multiple huffman tables.
457     while length > 17 {
458         let byte = reader.read_u8()?;
459         let class = byte >> 4;
460         let index = (byte & 0x0f) as usize;
461 
462         if class != 0 && class != 1 {
463             return Err(Error::Format(format!("invalid class {} in DHT", class)));
464         }
465         if is_baseline == Some(true) && index > 1 {
466             return Err(Error::Format("a maximum of two huffman tables per class are allowed in baseline".to_owned()));
467         }
468         if index > 3 {
469             return Err(Error::Format(format!("invalid destination identifier {} in DHT", index)));
470         }
471 
472         let mut counts = [0u8; 16];
473         reader.read_exact(&mut counts)?;
474 
475         let size = counts.iter().map(|&val| val as usize).fold(0, ::std::ops::Add::add);
476 
477         if size == 0 {
478             return Err(Error::Format("encountered table with zero length in DHT".to_owned()));
479         }
480         else if size > 256 {
481             return Err(Error::Format("encountered table with excessive length in DHT".to_owned()));
482         }
483         else if size > length - 17 {
484             return Err(Error::Format("invalid length in DHT".to_owned()));
485         }
486 
487         let mut values = vec![0u8; size];
488         reader.read_exact(&mut values)?;
489 
490         match class {
491             0 => dc_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::DC)?),
492             1 => ac_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::AC)?),
493             _ => unreachable!(),
494         }
495 
496         length -= 17 + size;
497     }
498 
499     if length != 0 {
500         return Err(Error::Format("invalid length in DHT".to_owned()));
501     }
502 
503     Ok((dc_tables, ac_tables))
504 }
505 
506 // Section B.2.4.4
parse_dri<R: Read>(reader: &mut R) -> Result<u16>507 pub fn parse_dri<R: Read>(reader: &mut R) -> Result<u16> {
508     let length = read_length(reader, DRI)?;
509 
510     if length != 2 {
511         return Err(Error::Format("DRI with invalid length".to_owned()));
512     }
513 
514     Ok(reader.read_u16::<BigEndian>()?)
515 }
516 
517 // Section B.2.4.5
parse_com<R: Read>(reader: &mut R) -> Result<Vec<u8>>518 pub fn parse_com<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
519     let length = read_length(reader, COM)?;
520     let mut buffer = vec![0u8; length];
521 
522     reader.read_exact(&mut buffer)?;
523 
524     Ok(buffer)
525 }
526 
527 // Section B.2.4.6
parse_app<R: Read>(reader: &mut R, marker: Marker) -> Result<Option<AppData>>528 pub fn parse_app<R: Read>(reader: &mut R, marker: Marker) -> Result<Option<AppData>> {
529     let length = read_length(reader, marker)?;
530     let mut bytes_read = 0;
531     let mut result = None;
532 
533     match marker {
534         APP(0) => {
535             if length >= 5 {
536                 let mut buffer = [0u8; 5];
537                 reader.read_exact(&mut buffer)?;
538                 bytes_read = buffer.len();
539 
540                 // http://www.w3.org/Graphics/JPEG/jfif3.pdf
541                 if &buffer[0 .. 5] == &[b'J', b'F', b'I', b'F', b'\0'] {
542                     result = Some(AppData::Jfif);
543                 // https://sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#AVI1
544                 } else if &buffer[0 .. 5] == &[b'A', b'V', b'I', b'1', b'\0'] {
545                     result = Some(AppData::Avi1);
546                 }
547             }
548         },
549         APP(14) => {
550             if length >= 12 {
551                 let mut buffer = [0u8; 12];
552                 reader.read_exact(&mut buffer)?;
553                 bytes_read = buffer.len();
554 
555                 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
556                 if &buffer[0 .. 6] == &[b'A', b'd', b'o', b'b', b'e', b'\0'] {
557                     let color_transform = match buffer[11] {
558                         0 => AdobeColorTransform::Unknown,
559                         1 => AdobeColorTransform::YCbCr,
560                         2 => AdobeColorTransform::YCCK,
561                         _ => return Err(Error::Format("invalid color transform in adobe app segment".to_owned())),
562                     };
563 
564                     result = Some(AppData::Adobe(color_transform));
565                 }
566             }
567         },
568         _ => {},
569     }
570 
571     skip_bytes(reader, length - bytes_read)?;
572     Ok(result)
573 }
574