1#!/usr/bin/env python3 2# 3# Takes a list of files on the command line and checks for valid 4# UTF-8 data. Used for checking .po files. 5# 6# Copyright © 2016 Dr. Tobias Quathamer <toddy@debian.org> 7# 8# This program is free software; you can redistribute it and/or 9# modify it under the terms of the GNU Lesser General Public 10# License as published by the Free Software Foundation; either 11# version 2.1 of the License, or (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# Lesser General Public License for more details. 17# 18# You should have received a copy of the GNU Lesser General Public 19# License along with this program; if not, write to the Free Software 20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 22import re 23import sys 24 25# Remove the script name from the files to check 26sys.argv.pop(0) 27 28# Assume that every file is valid 29exit_status = 0 30 31# Cycle through all files and check for valid UTF-8 encoding 32for filename in sys.argv: 33 # Open the file for reading in binary mode 34 with open(filename, "rb") as pofile: 35 # The "Content-Type" header has not been seen yet 36 charset_utf8_seen = False 37 # Read all lines to check for Content-Type header 38 for line in pofile: 39 # Try to decode binary data to UTF-8 40 try: 41 utf8 = line.decode(encoding="utf-8", errors="strict") 42 except UnicodeError as error: 43 print("UTF-8 encoding error in file %s: %s (position %d)" % (filename, error.reason, error.start)) 44 print("Binary data: %s" % line) 45 exit_status = 1 46 break 47 if re.search(r'Content-Type: text/plain; charset=UTF-8', utf8): 48 charset_utf8_seen = True 49 # The whole file has been read, the content type should have 50 # been detected now. Otherwise, it's an error. 51 if not charset_utf8_seen: 52 print("Error in file %s: could not detect Content-Type header" % filename) 53 exit_status = 1 54 break 55 56sys.exit(exit_status) 57