1 use byteorder::{BigEndian, ReadBytesExt};
2 use error::{Error, Result};
3 use huffman::{HuffmanTable, HuffmanTableClass};
4 use marker::Marker;
5 use marker::Marker::*;
6 use std::io::{self, Read};
7 use std::ops::Range;
8
9 #[derive(Clone, Copy, Debug, PartialEq)]
10 pub struct Dimensions {
11 pub width: u16,
12 pub height: u16,
13 }
14
15 #[derive(Clone, Copy, Debug, PartialEq)]
16 pub enum EntropyCoding {
17 Huffman,
18 Arithmetic,
19 }
20
21 #[derive(Clone, Copy, Debug, PartialEq)]
22 pub enum CodingProcess {
23 DctSequential,
24 DctProgressive,
25 Lossless,
26 }
27
28 #[derive(Clone)]
29 pub struct FrameInfo {
30 pub is_baseline: bool,
31 pub is_differential: bool,
32 pub coding_process: CodingProcess,
33 pub entropy_coding: EntropyCoding,
34 pub precision: u8,
35
36 pub image_size: Dimensions,
37 pub output_size: Dimensions,
38 pub mcu_size: Dimensions,
39 pub components: Vec<Component>,
40 }
41
42 #[derive(Debug)]
43 pub struct ScanInfo {
44 pub component_indices: Vec<usize>,
45 pub dc_table_indices: Vec<usize>,
46 pub ac_table_indices: Vec<usize>,
47
48 pub spectral_selection: Range<u8>,
49 pub successive_approximation_high: u8,
50 pub successive_approximation_low: u8,
51 }
52
53 #[derive(Clone, Debug)]
54 pub struct Component {
55 pub identifier: u8,
56
57 pub horizontal_sampling_factor: u8,
58 pub vertical_sampling_factor: u8,
59
60 pub quantization_table_index: usize,
61
62 pub dct_scale: usize,
63
64 pub size: Dimensions,
65 pub block_size: Dimensions,
66 }
67
68 #[derive(Debug)]
69 pub enum AppData {
70 Adobe(AdobeColorTransform),
71 Jfif,
72 Avi1,
73 }
74
75 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
76 #[derive(Clone, Copy, Debug, PartialEq)]
77 pub enum AdobeColorTransform {
78 // RGB or CMYK
79 Unknown,
80 YCbCr,
81 // YCbCrK
82 YCCK,
83 }
84
85 impl FrameInfo {
update_idct_size(&mut self, idct_size: usize)86 pub(crate) fn update_idct_size(&mut self, idct_size: usize) {
87 for component in &mut self.components {
88 component.dct_scale = idct_size;
89 }
90
91 update_component_sizes(self.image_size, &mut self.components);
92
93 self.output_size = Dimensions {
94 width: (self.image_size.width as f32 * idct_size as f32 / 8.0).ceil() as u16,
95 height: (self.image_size.height as f32 * idct_size as f32 / 8.0).ceil() as u16
96 };
97 }
98 }
99
read_length<R: Read>(reader: &mut R, marker: Marker) -> Result<usize>100 fn read_length<R: Read>(reader: &mut R, marker: Marker) -> Result<usize> {
101 assert!(marker.has_length());
102
103 // length is including itself.
104 let length = reader.read_u16::<BigEndian>()? as usize;
105
106 if length < 2 {
107 return Err(Error::Format(format!("encountered {:?} with invalid length {}", marker, length)));
108 }
109
110 Ok(length - 2)
111 }
112
skip_bytes<R: Read>(reader: &mut R, length: usize) -> Result<()>113 fn skip_bytes<R: Read>(reader: &mut R, length: usize) -> Result<()> {
114 let length = length as u64;
115 let to_skip = &mut reader.by_ref().take(length);
116 let copied = io::copy(to_skip, &mut io::sink())?;
117 if copied < length {
118 Err(Error::Io(io::ErrorKind::UnexpectedEof.into()))
119 } else {
120 Ok(())
121 }
122 }
123
124 // Section B.2.2
parse_sof<R: Read>(reader: &mut R, marker: Marker) -> Result<FrameInfo>125 pub fn parse_sof<R: Read>(reader: &mut R, marker: Marker) -> Result<FrameInfo> {
126 let length = read_length(reader, marker)?;
127
128 if length <= 6 {
129 return Err(Error::Format("invalid length in SOF".to_owned()));
130 }
131
132 let is_baseline = marker == SOF(0);
133 let is_differential = match marker {
134 SOF(0 ..= 3) | SOF(9 ..= 11) => false,
135 SOF(5 ..= 7) | SOF(13 ..= 15) => true,
136 _ => panic!(),
137 };
138 let coding_process = match marker {
139 SOF(0) | SOF(1) | SOF(5) | SOF(9) | SOF(13) => CodingProcess::DctSequential,
140 SOF(2) | SOF(6) | SOF(10) | SOF(14) => CodingProcess::DctProgressive,
141 SOF(3) | SOF(7) | SOF(11) | SOF(15) => CodingProcess::Lossless,
142 _ => panic!(),
143 };
144 let entropy_coding = match marker {
145 SOF(0 ..= 3) | SOF(5 ..= 7) => EntropyCoding::Huffman,
146 SOF(9 ..= 11) | SOF(13 ..= 15) => EntropyCoding::Arithmetic,
147 _ => panic!(),
148 };
149
150 let precision = reader.read_u8()?;
151
152 match precision {
153 8 => {},
154 12 => {
155 if is_baseline {
156 return Err(Error::Format("12 bit sample precision is not allowed in baseline".to_owned()));
157 }
158 },
159 _ => {
160 if coding_process != CodingProcess::Lossless {
161 return Err(Error::Format(format!("invalid precision {} in frame header", precision)))
162 }
163 },
164 }
165
166 let height = reader.read_u16::<BigEndian>()?;
167 let width = reader.read_u16::<BigEndian>()?;
168
169 // height:
170 // "Value 0 indicates that the number of lines shall be defined by the DNL marker and
171 // parameters at the end of the first scan (see B.2.5)."
172
173 if width == 0 {
174 return Err(Error::Format("zero width in frame header".to_owned()));
175 }
176
177 let component_count = reader.read_u8()?;
178
179 if component_count == 0 {
180 return Err(Error::Format("zero component count in frame header".to_owned()));
181 }
182 if coding_process == CodingProcess::DctProgressive && component_count > 4 {
183 return Err(Error::Format("progressive frame with more than 4 components".to_owned()));
184 }
185
186 if length != 6 + 3 * component_count as usize {
187 return Err(Error::Format("invalid length in SOF".to_owned()));
188 }
189
190 let mut components: Vec<Component> = Vec::with_capacity(component_count as usize);
191
192 for _ in 0 .. component_count {
193 let identifier = reader.read_u8()?;
194
195 // Each component's identifier must be unique.
196 if components.iter().any(|c| c.identifier == identifier) {
197 return Err(Error::Format(format!("duplicate frame component identifier {}", identifier)));
198 }
199
200 let byte = reader.read_u8()?;
201 let horizontal_sampling_factor = byte >> 4;
202 let vertical_sampling_factor = byte & 0x0f;
203
204 if horizontal_sampling_factor == 0 || horizontal_sampling_factor > 4 {
205 return Err(Error::Format(format!("invalid horizontal sampling factor {}", horizontal_sampling_factor)));
206 }
207 if vertical_sampling_factor == 0 || vertical_sampling_factor > 4 {
208 return Err(Error::Format(format!("invalid vertical sampling factor {}", vertical_sampling_factor)));
209 }
210
211 let quantization_table_index = reader.read_u8()?;
212
213 if quantization_table_index > 3 || (coding_process == CodingProcess::Lossless && quantization_table_index != 0) {
214 return Err(Error::Format(format!("invalid quantization table index {}", quantization_table_index)));
215 }
216
217 components.push(Component {
218 identifier: identifier,
219 horizontal_sampling_factor: horizontal_sampling_factor,
220 vertical_sampling_factor: vertical_sampling_factor,
221 quantization_table_index: quantization_table_index as usize,
222 dct_scale: 8,
223 size: Dimensions {width: 0, height: 0},
224 block_size: Dimensions {width: 0, height: 0},
225 });
226 }
227
228 let mcu_size = update_component_sizes(Dimensions { width, height }, &mut components);
229
230 Ok(FrameInfo {
231 is_baseline: is_baseline,
232 is_differential: is_differential,
233 coding_process: coding_process,
234 entropy_coding: entropy_coding,
235 precision: precision,
236 image_size: Dimensions { width, height },
237 output_size: Dimensions { width, height },
238 mcu_size,
239 components: components,
240 })
241 }
242
243 /// Returns ceil(x/y), requires x>0
ceil_div(x: u32, y: u32) -> u16244 fn ceil_div(x: u32, y: u32) -> u16 {
245 assert!(x>0 && y>0, "invalid dimensions");
246 (1 + ((x - 1) / y)) as u16
247 }
248
update_component_sizes(size: Dimensions, components: &mut [Component]) -> Dimensions249 fn update_component_sizes(size: Dimensions, components: &mut [Component]) -> Dimensions {
250 let h_max = components.iter().map(|c| c.horizontal_sampling_factor).max().unwrap() as u32;
251 let v_max = components.iter().map(|c| c.vertical_sampling_factor).max().unwrap() as u32;
252
253 let mcu_size = Dimensions {
254 width: ceil_div(size.width as u32, h_max * 8),
255 height: ceil_div(size.height as u32, v_max * 8),
256 };
257
258 for component in components {
259 component.size.width = ceil_div(size.width as u32 * component.horizontal_sampling_factor as u32 * component.dct_scale as u32, h_max * 8);
260 component.size.height = ceil_div(size.height as u32 * component.vertical_sampling_factor as u32 * component.dct_scale as u32, v_max * 8);
261
262 component.block_size.width = mcu_size.width * component.horizontal_sampling_factor as u16;
263 component.block_size.height = mcu_size.height * component.vertical_sampling_factor as u16;
264 }
265
266 mcu_size
267 }
268
269 #[test]
test_update_component_sizes()270 fn test_update_component_sizes() {
271 let mut components = [Component {
272 identifier: 1,
273 horizontal_sampling_factor: 2,
274 vertical_sampling_factor: 2,
275 quantization_table_index: 0,
276 dct_scale: 8,
277 size: Dimensions { width: 0, height: 0 },
278 block_size: Dimensions { width: 0, height: 0 },
279 }];
280 let mcu = update_component_sizes(
281 Dimensions { width: 800, height: 280 },
282 &mut components);
283 assert_eq!(mcu, Dimensions { width: 50, height: 18 });
284 assert_eq!(components[0].block_size, Dimensions { width: 100, height: 36 });
285 assert_eq!(components[0].size, Dimensions { width: 800, height: 280 });
286 }
287
288 // Section B.2.3
parse_sos<R: Read>(reader: &mut R, frame: &FrameInfo) -> Result<ScanInfo>289 pub fn parse_sos<R: Read>(reader: &mut R, frame: &FrameInfo) -> Result<ScanInfo> {
290 let length = read_length(reader, SOS)?;
291 if 0 == length {
292 return Err(Error::Format("zero length in SOS".to_owned()));
293 }
294
295 let component_count = reader.read_u8()?;
296
297 if component_count == 0 || component_count > 4 {
298 return Err(Error::Format(format!("invalid component count {} in scan header", component_count)));
299 }
300
301 if length != 4 + 2 * component_count as usize {
302 return Err(Error::Format("invalid length in SOS".to_owned()));
303 }
304
305 let mut component_indices = Vec::with_capacity(component_count as usize);
306 let mut dc_table_indices = Vec::with_capacity(component_count as usize);
307 let mut ac_table_indices = Vec::with_capacity(component_count as usize);
308
309 for _ in 0 .. component_count {
310 let identifier = reader.read_u8()?;
311
312 let component_index = match frame.components.iter().position(|c| c.identifier == identifier) {
313 Some(value) => value,
314 None => return Err(Error::Format(format!("scan component identifier {} does not match any of the component identifiers defined in the frame", identifier))),
315 };
316
317 // Each of the scan's components must be unique.
318 if component_indices.contains(&component_index) {
319 return Err(Error::Format(format!("duplicate scan component identifier {}", identifier)));
320 }
321
322 // "... the ordering in the scan header shall follow the ordering in the frame header."
323 if component_index < *component_indices.iter().max().unwrap_or(&0) {
324 return Err(Error::Format("the scan component order does not follow the order in the frame header".to_owned()));
325 }
326
327 let byte = reader.read_u8()?;
328 let dc_table_index = byte >> 4;
329 let ac_table_index = byte & 0x0f;
330
331 if dc_table_index > 3 || (frame.is_baseline && dc_table_index > 1) {
332 return Err(Error::Format(format!("invalid dc table index {}", dc_table_index)));
333 }
334 if ac_table_index > 3 || (frame.is_baseline && ac_table_index > 1) {
335 return Err(Error::Format(format!("invalid ac table index {}", ac_table_index)));
336 }
337
338 component_indices.push(component_index);
339 dc_table_indices.push(dc_table_index as usize);
340 ac_table_indices.push(ac_table_index as usize);
341 }
342
343 let blocks_per_mcu = component_indices.iter().map(|&i| {
344 frame.components[i].horizontal_sampling_factor as u32 * frame.components[i].vertical_sampling_factor as u32
345 }).fold(0, ::std::ops::Add::add);
346
347 if component_count > 1 && blocks_per_mcu > 10 {
348 return Err(Error::Format("scan with more than one component and more than 10 blocks per MCU".to_owned()));
349 }
350
351 let spectral_selection_start = reader.read_u8()?;
352 let spectral_selection_end = reader.read_u8()?;
353
354 let byte = reader.read_u8()?;
355 let successive_approximation_high = byte >> 4;
356 let successive_approximation_low = byte & 0x0f;
357
358 if frame.coding_process == CodingProcess::DctProgressive {
359 if spectral_selection_end > 63 || spectral_selection_start > spectral_selection_end ||
360 (spectral_selection_start == 0 && spectral_selection_end != 0) {
361 return Err(Error::Format(format!("invalid spectral selection parameters: ss={}, se={}", spectral_selection_start, spectral_selection_end)));
362 }
363 if spectral_selection_start != 0 && component_count != 1 {
364 return Err(Error::Format("spectral selection scan with AC coefficients can't have more than one component".to_owned()));
365 }
366
367 if successive_approximation_high > 13 || successive_approximation_low > 13 {
368 return Err(Error::Format(format!("invalid successive approximation parameters: ah={}, al={}", successive_approximation_high, successive_approximation_low)));
369 }
370
371 // Section G.1.1.1.2
372 // "Each scan which follows the first scan for a given band progressively improves
373 // the precision of the coefficients by one bit, until full precision is reached."
374 if successive_approximation_high != 0 && successive_approximation_high != successive_approximation_low + 1 {
375 return Err(Error::Format("successive approximation scan with more than one bit of improvement".to_owned()));
376 }
377 }
378 else {
379 if spectral_selection_start != 0 || spectral_selection_end != 63 {
380 return Err(Error::Format("spectral selection is not allowed in non-progressive scan".to_owned()));
381 }
382 if successive_approximation_high != 0 || successive_approximation_low != 0 {
383 return Err(Error::Format("successive approximation is not allowed in non-progressive scan".to_owned()));
384 }
385 }
386
387 Ok(ScanInfo {
388 component_indices: component_indices,
389 dc_table_indices: dc_table_indices,
390 ac_table_indices: ac_table_indices,
391 spectral_selection: Range {
392 start: spectral_selection_start,
393 end: spectral_selection_end + 1,
394 },
395 successive_approximation_high: successive_approximation_high,
396 successive_approximation_low: successive_approximation_low,
397 })
398 }
399
400 // Section B.2.4.1
parse_dqt<R: Read>(reader: &mut R) -> Result<[Option<[u16; 64]>; 4]>401 pub fn parse_dqt<R: Read>(reader: &mut R) -> Result<[Option<[u16; 64]>; 4]> {
402 let mut length = read_length(reader, DQT)?;
403 let mut tables = [None; 4];
404
405 // Each DQT segment may contain multiple quantization tables.
406 while length > 0 {
407 let byte = reader.read_u8()?;
408 let precision = (byte >> 4) as usize;
409 let index = (byte & 0x0f) as usize;
410
411 // The combination of 8-bit sample precision and 16-bit quantization tables is explicitly
412 // disallowed by the JPEG spec:
413 // "An 8-bit DCT-based process shall not use a 16-bit precision quantization table."
414 // "Pq: Quantization table element precision – Specifies the precision of the Qk
415 // values. Value 0 indicates 8-bit Qk values; value 1 indicates 16-bit Qk values. Pq
416 // shall be zero for 8 bit sample precision P (see B.2.2)."
417 // libjpeg allows this behavior though, and there are images in the wild using it. So to
418 // match libjpeg's behavior we are deviating from the JPEG spec here.
419 if precision > 1 {
420 return Err(Error::Format(format!("invalid precision {} in DQT", precision)));
421 }
422 if index > 3 {
423 return Err(Error::Format(format!("invalid destination identifier {} in DQT", index)));
424 }
425 if length < 65 + 64 * precision {
426 return Err(Error::Format("invalid length in DQT".to_owned()));
427 }
428
429 let mut table = [0u16; 64];
430
431 for i in 0 .. 64 {
432 table[i] = match precision {
433 0 => reader.read_u8()? as u16,
434 1 => reader.read_u16::<BigEndian>()?,
435 _ => unreachable!(),
436 };
437 }
438
439 if table.iter().any(|&val| val == 0) {
440 return Err(Error::Format("quantization table contains element with a zero value".to_owned()));
441 }
442
443 tables[index] = Some(table);
444 length -= 65 + 64 * precision;
445 }
446
447 Ok(tables)
448 }
449
450 // Section B.2.4.2
parse_dht<R: Read>(reader: &mut R, is_baseline: Option<bool>) -> Result<(Vec<Option<HuffmanTable>>, Vec<Option<HuffmanTable>>)>451 pub fn parse_dht<R: Read>(reader: &mut R, is_baseline: Option<bool>) -> Result<(Vec<Option<HuffmanTable>>, Vec<Option<HuffmanTable>>)> {
452 let mut length = read_length(reader, DHT)?;
453 let mut dc_tables = vec![None, None, None, None];
454 let mut ac_tables = vec![None, None, None, None];
455
456 // Each DHT segment may contain multiple huffman tables.
457 while length > 17 {
458 let byte = reader.read_u8()?;
459 let class = byte >> 4;
460 let index = (byte & 0x0f) as usize;
461
462 if class != 0 && class != 1 {
463 return Err(Error::Format(format!("invalid class {} in DHT", class)));
464 }
465 if is_baseline == Some(true) && index > 1 {
466 return Err(Error::Format("a maximum of two huffman tables per class are allowed in baseline".to_owned()));
467 }
468 if index > 3 {
469 return Err(Error::Format(format!("invalid destination identifier {} in DHT", index)));
470 }
471
472 let mut counts = [0u8; 16];
473 reader.read_exact(&mut counts)?;
474
475 let size = counts.iter().map(|&val| val as usize).fold(0, ::std::ops::Add::add);
476
477 if size == 0 {
478 return Err(Error::Format("encountered table with zero length in DHT".to_owned()));
479 }
480 else if size > 256 {
481 return Err(Error::Format("encountered table with excessive length in DHT".to_owned()));
482 }
483 else if size > length - 17 {
484 return Err(Error::Format("invalid length in DHT".to_owned()));
485 }
486
487 let mut values = vec![0u8; size];
488 reader.read_exact(&mut values)?;
489
490 match class {
491 0 => dc_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::DC)?),
492 1 => ac_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::AC)?),
493 _ => unreachable!(),
494 }
495
496 length -= 17 + size;
497 }
498
499 if length != 0 {
500 return Err(Error::Format("invalid length in DHT".to_owned()));
501 }
502
503 Ok((dc_tables, ac_tables))
504 }
505
506 // Section B.2.4.4
parse_dri<R: Read>(reader: &mut R) -> Result<u16>507 pub fn parse_dri<R: Read>(reader: &mut R) -> Result<u16> {
508 let length = read_length(reader, DRI)?;
509
510 if length != 2 {
511 return Err(Error::Format("DRI with invalid length".to_owned()));
512 }
513
514 Ok(reader.read_u16::<BigEndian>()?)
515 }
516
517 // Section B.2.4.5
parse_com<R: Read>(reader: &mut R) -> Result<Vec<u8>>518 pub fn parse_com<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
519 let length = read_length(reader, COM)?;
520 let mut buffer = vec![0u8; length];
521
522 reader.read_exact(&mut buffer)?;
523
524 Ok(buffer)
525 }
526
527 // Section B.2.4.6
parse_app<R: Read>(reader: &mut R, marker: Marker) -> Result<Option<AppData>>528 pub fn parse_app<R: Read>(reader: &mut R, marker: Marker) -> Result<Option<AppData>> {
529 let length = read_length(reader, marker)?;
530 let mut bytes_read = 0;
531 let mut result = None;
532
533 match marker {
534 APP(0) => {
535 if length >= 5 {
536 let mut buffer = [0u8; 5];
537 reader.read_exact(&mut buffer)?;
538 bytes_read = buffer.len();
539
540 // http://www.w3.org/Graphics/JPEG/jfif3.pdf
541 if &buffer[0 .. 5] == &[b'J', b'F', b'I', b'F', b'\0'] {
542 result = Some(AppData::Jfif);
543 // https://sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#AVI1
544 } else if &buffer[0 .. 5] == &[b'A', b'V', b'I', b'1', b'\0'] {
545 result = Some(AppData::Avi1);
546 }
547 }
548 },
549 APP(14) => {
550 if length >= 12 {
551 let mut buffer = [0u8; 12];
552 reader.read_exact(&mut buffer)?;
553 bytes_read = buffer.len();
554
555 // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
556 if &buffer[0 .. 6] == &[b'A', b'd', b'o', b'b', b'e', b'\0'] {
557 let color_transform = match buffer[11] {
558 0 => AdobeColorTransform::Unknown,
559 1 => AdobeColorTransform::YCbCr,
560 2 => AdobeColorTransform::YCCK,
561 _ => return Err(Error::Format("invalid color transform in adobe app segment".to_owned())),
562 };
563
564 result = Some(AppData::Adobe(color_transform));
565 }
566 }
567 },
568 _ => {},
569 }
570
571 skip_bytes(reader, length - bytes_read)?;
572 Ok(result)
573 }
574