1#!/usr/local/bin/python
2# -*- coding: iso-8859-1 -*-
3
4# $Id$
5
6# Copyright (c) 2004 Kungliga Tekniska Högskolan
7# (Royal Institute of Technology, Stockholm, Sweden).
8# All rights reserved.
9#
10# Redistribution and use in source and binary forms, with or without
11# modification, are permitted provided that the following conditions
12# are met:
13#
14# 1. Redistributions of source code must retain the above copyright
15#    notice, this list of conditions and the following disclaimer.
16#
17# 2. Redistributions in binary form must reproduce the above copyright
18#    notice, this list of conditions and the following disclaimer in the
19#    documentation and/or other materials provided with the distribution.
20#
21# 3. Neither the name of the Institute nor the names of its contributors
22#    may be used to endorse or promote products derived from this software
23#    without specific prior written permission.
24#
25# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
26# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28# ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
29# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35# SUCH DAMAGE.
36
37import re
38import string
39
40def read(filename):
41    """return a dict of unicode characters"""
42    ud = open(filename, 'r')
43    ret = {}
44    while True:
45        l = ud.readline()
46        if not l:
47            break
48        l = re.sub('#.*$', '', l)
49        if l == "\n":
50            continue
51        f = l.split(';')
52        key = int(f[0], 0x10)
53        if key in ret:
54            raise Exception('Duplicate key in UnicodeData')
55        ret[key] = f[1:]
56    ud.close()
57    return ret
58