1<?php
2/**
3 * @todo Could use a test of extended XMP segments. Hard to find programs that
4 * create example files, and creating my own in vim probably wouldn't
5 * serve as a very good "test". (Adobe photoshop probably creates such files
6 * but it costs money). The implementation of it currently in MediaWiki is based
7 * solely on reading the standard, without any real world test files.
8 *
9 * @group Media
10 * @covers JpegMetadataExtractor
11 */
12class JpegMetadataExtractorTest extends MediaWikiIntegrationTestCase {
13
14	protected $filePath;
15
16	protected function setUp(): void {
17		parent::setUp();
18
19		$this->filePath = __DIR__ . '/../../data/media/';
20	}
21
22	/**
23	 * We also use this test to test padding bytes don't
24	 * screw stuff up
25	 *
26	 * @param string $file Filename
27	 *
28	 * @dataProvider provideUtf8Comment
29	 */
30	public function testUtf8Comment( $file ) {
31		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . $file );
32		$this->assertEquals( [ 'UTF-8 JPEG Comment — ¼' ], $res['COM'] );
33	}
34
35	public static function provideUtf8Comment() {
36		return [
37			[ 'jpeg-comment-utf.jpg' ],
38			[ 'jpeg-padding-even.jpg' ],
39			[ 'jpeg-padding-odd.jpg' ],
40		];
41	}
42
43	/** The file is iso-8859-1, but it should get auto converted */
44	public function testIso88591Comment() {
45		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-comment-iso8859-1.jpg' );
46		$this->assertEquals( [ 'ISO-8859-1 JPEG Comment - ¼' ], $res['COM'] );
47	}
48
49	/** Comment values that are non-textual (random binary junk) should not be shown.
50	 * The example test file has a comment with a 0x5 byte in it which is a control character
51	 * and considered binary junk for our purposes.
52	 */
53	public function testBinaryCommentStripped() {
54		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-comment-binary.jpg' );
55		$this->assertSame( [], $res['COM'] );
56	}
57
58	/* Very rarely a file can have multiple comments.
59	 *   Order of comments is based on order inside the file.
60	 */
61	public function testMultipleComment() {
62		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-comment-multiple.jpg' );
63		$this->assertEquals( [ 'foo', 'bar' ], $res['COM'] );
64	}
65
66	public function testXMPExtraction() {
67		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' );
68		$expected = file_get_contents( $this->filePath . 'jpeg-xmp-psir.xmp' );
69		$this->assertEquals( $expected, $res['XMP'] );
70	}
71
72	public function testPSIRExtraction() {
73		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' );
74		$expected = '50686f746f73686f7020332e30003842494d04040000000'
75			. '000181c02190004746573741c02190003666f6f1c020000020004';
76		$this->assertEquals( $expected, bin2hex( $res['PSIR'][0] ) );
77	}
78
79	public function testXMPExtractionNullChar() {
80		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-nullchar.jpg' );
81		$expected = file_get_contents( $this->filePath . 'jpeg-xmp-psir.xmp' );
82		$this->assertEquals( $expected, $res['XMP'] );
83	}
84
85	public function testXMPExtractionAltAppId() {
86		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-alt.jpg' );
87		$expected = file_get_contents( $this->filePath . 'jpeg-xmp-psir.xmp' );
88		$this->assertEquals( $expected, $res['XMP'] );
89	}
90
91	public function testIPTCHashComparisionNoHash() {
92		$segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' );
93		$res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
94
95		$this->assertEquals( 'iptc-no-hash', $res );
96	}
97
98	public function testIPTCHashComparisionBadHash() {
99		$segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-bad-hash.jpg' );
100		$res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
101
102		$this->assertEquals( 'iptc-bad-hash', $res );
103	}
104
105	public function testIPTCHashComparisionGoodHash() {
106		$segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-good-hash.jpg' );
107		$res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
108
109		$this->assertEquals( 'iptc-good-hash', $res );
110	}
111
112	public function testExifByteOrder() {
113		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'exif-user-comment.jpg' );
114		$expected = 'BE';
115		$this->assertEquals( $expected, $res['byteOrder'] );
116	}
117
118	public function testInfiniteRead() {
119		// test file truncated right after a segment, which previously
120		// caused an infinite loop looking for the next segment byte.
121		// Should get past infinite loop and throw in wfUnpack()
122		$this->expectException( MWException::class );
123		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-segment-loop1.jpg' );
124	}
125
126	public function testInfiniteRead2() {
127		// test file truncated after a segment's marker and size, which
128		// would cause a seek past end of file. Seek past end of file
129		// doesn't actually fail, but prevents further reading and was
130		// devolving into the previous case (testInfiniteRead).
131		$this->expectException( MWException::class );
132		$res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-segment-loop2.jpg' );
133	}
134}
135