1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3###############################################################################
4# $Id: jpeg_in_tiff_extract.py e4fe7cc06270e5f38dfe78e6785a6bcca4e39e29 2021-04-01 21:02:04 +0300 Idan Miara $
5#
6# Project:  GDAL/OGR samples
7# Purpose:  Extract a JPEG file from a JPEG-in-TIFF tile/strip
8# Author:   Even Rouault <even dot rouault at spatialys.com>
9#
10###############################################################################
11# Copyright (c) 2014, Even Rouault <even dot rouault at spatialys.com>
12#
13# Permission is hereby granted, free of charge, to any person obtaining a
14# copy of this software and associated documentation files (the "Software"),
15# to deal in the Software without restriction, including without limitation
16# the rights to use, copy, modify, merge, publish, distribute, sublicense,
17# and/or sell copies of the Software, and to permit persons to whom the
18# Software is furnished to do so, subject to the following conditions:
19#
20# The above copyright notice and this permission notice shall be included
21# in all copies or substantial portions of the Software.
22#
23# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29# DEALINGS IN THE SOFTWARE.
30###############################################################################
31
32import sys
33
34from osgeo import gdal
35
36
37def Usage():
38    print('Usage: jpeg_in_tiff_extract.py in.tif out.jpg [tile_x tile_y [band_nbr]]')
39    print('')
40    print('Extract a JPEG file from a JPEG-in-TIFF tile/strip.')
41    print('If tile_x tile_y are not specified, then all tiles/strips are extracted')
42    print('in filenames out_[bandnbr_]tx_ty.jpg')
43    print('')
44    return 1
45
46###############################################################
47
48
49def extract_tile(ds, src_band_nbr, tile_x, tile_y, jpg_filename):
50
51    block_offset = ds.GetRasterBand(src_band_nbr).GetMetadataItem('BLOCK_OFFSET_%d_%d' % (tile_x, tile_y), 'TIFF')
52    block_size = ds.GetRasterBand(src_band_nbr).GetMetadataItem('BLOCK_SIZE_%d_%d' % (tile_x, tile_y), 'TIFF')
53    if block_offset is None or block_size is None:
54        print('ERROR: Cannot find block (%d,%d)' % (tile_x, tile_y))
55        return 1
56
57    jpegtables = ds.GetRasterBand(src_band_nbr).GetMetadataItem('JPEGTABLES', 'TIFF')
58    if jpegtables is not None:
59        if (len(jpegtables) % 2) != 0 or jpegtables[0:4] != 'FFD8' or jpegtables[-2:] != 'D9':
60            print('ERROR: Invalid JPEG tables')
61            print(jpegtables)
62            return 1
63
64        # Remove final D9
65        jpegtables = jpegtables[0:-2]
66
67    tiff_f = gdal.VSIFOpenL(ds.GetDescription(), 'rb')
68    if tiff_f is None:
69        print('ERROR: Cannot reopen %s' % ds.GetDescription())
70        return 1
71
72    out_f = gdal.VSIFOpenL(jpg_filename, 'wb')
73    if out_f is None:
74        print('ERROR: Cannot create %s' % jpg_filename)
75        gdal.VSIFCloseL(tiff_f)
76        return 1
77
78    # Write JPEG tables
79    if jpegtables is not None:
80        for i in range(int(len(jpegtables) / 2)):
81            c1 = ord(jpegtables[2 * i])
82            c2 = ord(jpegtables[2 * i + 1])
83            if c1 >= ord('0') and c1 <= ord('9'):
84                val = c1 - ord('0')
85            else:
86                val = (c1 - ord('A')) + 10
87            val = val * 16
88            if c2 >= ord('0') and c2 <= ord('9'):
89                val = val + (c2 - ord('0'))
90            else:
91                val = val + (c2 - ord('A')) + 10
92            gdal.VSIFWriteL(chr(val), 1, 1, out_f)
93    else:
94        gdal.VSIFWriteL(chr(0xFF), 1, 1, out_f)
95        gdal.VSIFWriteL(chr(0xD8), 1, 1, out_f)
96
97    # Write Adobe APP14 marker if necessary
98    interleave = ds.GetMetadataItem('INTERLEAVE', 'IMAGE_STRUCTURE')
99    photometric = ds.GetMetadataItem('COMPRESSION', 'IMAGE_STRUCTURE')
100    if interleave == 'PIXEL' and photometric == 'JPEG' and ds.RasterCount == 3:
101        adobe_app14 = [0xFF, 0xEE, 0x00, 0x0E, 0x41, 0x64, 0x6F, 0x62, 0x65, 0x00, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00]
102        for c in adobe_app14:
103            gdal.VSIFWriteL(chr(c), 1, 1, out_f)
104
105    # Write JPEG codestream
106    # skip leading 0xFF 0xD8
107    gdal.VSIFSeekL(tiff_f, int(block_offset) + 2, 0)
108    data = gdal.VSIFReadL(1, int(block_size) - 2, tiff_f)
109    gdal.VSIFCloseL(tiff_f)
110    gdal.VSIFWriteL(data, 1, len(data), out_f)
111
112    gdal.VSIFCloseL(out_f)
113
114    aux_xml_filename = '%s.aux.xml' % jpg_filename
115    gt = ds.GetGeoTransform()
116    srs = ds.GetProjectionRef()
117    if srs is not None and srs != '':
118        sub_gt = [gt[i] for i in range(6)]
119        (blockxsize, blockysize) = ds.GetRasterBand(1).GetBlockSize()
120        sub_gt[0] = gt[0] + tile_x * blockxsize * gt[1]
121        sub_gt[3] = gt[3] + tile_y * blockysize * gt[5]
122
123        out_f = gdal.VSIFOpenL(aux_xml_filename, 'wb')
124        if out_f is None:
125            print('ERROR: Cannot create %s' % aux_xml_filename)
126            return 1
127        content = """<PAMDataset>
128    <SRS>%s</SRS>
129    <GeoTransform>%.18g,%.18g,%.18g,%.18g,%.18g,%.18g</GeoTransform>
130    </PAMDataset>
131    """ % (srs, sub_gt[0], sub_gt[1], sub_gt[2], sub_gt[3], sub_gt[4], sub_gt[5])
132        gdal.VSIFWriteL(content, 1, len(content), out_f)
133        gdal.VSIFCloseL(out_f)
134    else:
135        gdal.Unlink('%s.aux.xml' % jpg_filename)
136
137    return 0
138
139###############################################################
140
141
142def jpeg_in_tiff_extract(argv):
143
144    if len(argv) < 2:
145        print('ERROR: Not enough arguments')
146        return Usage()
147
148    tiff_filename = argv[0]
149    jpg_filename = argv[1]
150    if len(argv) >= 3:
151        tile_x = int(argv[2])
152        tile_y = int(argv[3])
153        if len(argv) == 5:
154            band_nbr = int(argv[4])
155        else:
156            band_nbr = None
157    else:
158        tile_x = None
159        tile_y = None
160
161    radix_jpg_filename = jpg_filename
162    extensions = ['.jpg', '.jpeg', '.JPG', '.JPEG']
163    extension = None
164    for ext in extensions:
165        pos = radix_jpg_filename.find(ext)
166        if pos >= 0:
167            extension = ext
168            radix_jpg_filename = radix_jpg_filename[0:pos]
169            break
170    if pos < 0:
171        print('ERROR: %s should end with .jpg/.jpeg' % jpg_filename)
172        return 1
173
174    ds = gdal.Open(tiff_filename)
175    if ds is None:
176        print('ERROR: Cannot open %s' % tiff_filename)
177        return 1
178
179    if ds.GetDriver() is None or \
180       ds.GetDriver().GetDescription() != 'GTiff':
181        print('ERROR: %s is not a TIFF dataset.' % tiff_filename)
182        return 1
183
184    photometric = ds.GetMetadataItem('COMPRESSION', 'IMAGE_STRUCTURE')
185    interleave = ds.GetMetadataItem('INTERLEAVE', 'IMAGE_STRUCTURE')
186
187    if photometric != 'JPEG' and photometric != 'YCbCr JPEG':
188        print('ERROR: %s is not a JPEG-compressed TIFF dataset.' % tiff_filename)
189        return 1
190
191    (blockxsize, blockysize) = ds.GetRasterBand(1).GetBlockSize()
192    if blockysize == 1:
193        blockysize = ds.RasterYSize
194    block_in_row = (ds.RasterXSize + blockxsize - 1) / blockxsize
195    block_in_col = (ds.RasterYSize + blockysize - 1) / blockysize
196
197    # Extract single tile ?
198    if tile_x is not None:
199
200        if tile_x < 0 or tile_x >= block_in_row:
201            print('ERROR: Invalid tile_x : %d. Should be >= 0 and < %d' % (tile_x, block_in_row))
202            return 1
203        if tile_y < 0 or tile_y >= block_in_col:
204            print('ERROR: Invalid tile_y : %d. Should be >= 0 and < %d' % (tile_y, block_in_col))
205            return 1
206
207        if ds.RasterCount > 1:
208            if interleave == 'PIXEL':
209                if band_nbr is not None:
210                    print('ERROR: For a INTERLEAVE=PIXEL dataset, band_nbr should NOT be specified')
211                    return 1
212            else:
213                if band_nbr is None:
214                    print('ERROR: For a INTERLEAVE=BAND dataset, band_nbr should be specified')
215                    return 1
216
217        if band_nbr is not None:
218            if band_nbr < 1 or band_nbr >= ds.RasterCount:
219                print('ERROR: Invalid band_nbr : %d. Should be >= 1 and <= %d' % (tile_y, ds.RasterCount))
220                return 1
221
222        if band_nbr is not None:
223            src_band_nbr = band_nbr
224        else:
225            src_band_nbr = 1
226
227        return extract_tile(ds, src_band_nbr, tile_x, tile_y, jpg_filename)
228
229    # Extract all tiles
230    else:
231        if ds.RasterCount == 1 or interleave == 'PIXEL':
232            for tile_y in range(block_in_col):
233                for tile_x in range(block_in_row):
234                    filename = '%s_%d_%d%s' % (radix_jpg_filename, tile_x, tile_y, extension)
235                    ret = extract_tile(ds, 1, tile_x, tile_y, filename)
236                    if ret != 0:
237                        return ret
238        else:
239            for src_band_nbr in range(ds.RasterCount):
240                for tile_y in range(block_in_col):
241                    for tile_x in range(block_in_row):
242                        filename = '%s_%d_%d_%d%s' % (radix_jpg_filename, src_band_nbr + 1, tile_x, tile_y, extension)
243                        ret = extract_tile(ds, src_band_nbr + 1, tile_x, tile_y, filename)
244                        if ret != 0:
245                            return ret
246        return 0
247
248
249def main(argv):
250    gdal.GeneralCmdLineProcessor(argv)
251    return jpeg_in_tiff_extract(argv[1:])
252
253
254if __name__ == '__main__':
255    sys.exit(main(sys.argv))
256
257