1#! /usr/bin/perl -w
2#
3#                           TERMS AND CONDITIONS
4#                                   FOR
5#                         OPEN SOURCE CODE LICENSE
6#                               Version 1.1
7#
8# Japan Registry Services Co., Ltd. ("JPRS"), a Japanese corporation
9# having its head office at Chiyoda First Bldg. East 13F 3-8-1 Nishi-Kanda,
10# Chiyoda-ku, Tokyo 101-0065, Japan, grants you the license for open source
11# code specified in EXHIBIT A the "Code" subject to the following Terms and
12# Conditions ("OSCL").
13#
14# 1. License Grant.
15#   JPRS hereby grants you a worldwide, royalty-free, non-exclusive
16#   license, subject to third party intellectual property claims:
17#   (a) under intellectual property rights (other than patent or
18#       trademark) licensable by JPRS to use, reproduce, modify, display,
19#       perform, sublicense and distribute the Code (or portions thereof)
20#       with or without modifications, and/or as part of a derivative work;
21#       or
22#   (b) under claims of the infringement through the making, using,
23#       offering to sell and/or otherwise disposing the JPRS Revised Code
24#       (or portions thereof);
25#   (c) the licenses granted in this Section 1(a) and (b) are effective on
26#       the date JPRS first distributes the Code to you under the terms of
27#       this OSCL;
28#   (d) Notwithstanding the above stated terms, no patent license is
29#       granted:
30#       1)  for a code that you delete from the Code;
31#       2)  separate from the Code; or
32#       3)  for infringements caused by:
33#            i) modification of the Code; or
34#           ii) combination of the Code with other software or devices.
35#
36# 2. Consents.
37#   You agree that:
38#   (a) you must include a copy of this OSCL and the notice set forth in
39#       EXHIBIT A with every copy of the Code you distribute;
40#   (b) you must include a copy of this OSCL and the notice set forth in
41#       EXHIBIT A with every copy of binary form of the Code in the
42#       documentation and/or other materials provided with the distribution;
43#   (c) you may not offer or impose any terms on any source code version
44#       that alters or restricts the applicable version of this OSCL or
45#       the recipients' rights hereunder.
46#   (d) If the terms and conditions are set forth in EXHIBIT A, you must
47#       comply with those terms and conditions.
48#
49# 3. Proprietary Information.
50#   All trademarks, service marks, patents, copyrights, trade secrets, and
51#   other proprietary rights in or related to the Code are and will remain
52#   the exclusive property of JPRS or its licensors, whether or not
53#   specifically recognized or perfected under local law except specified
54#   in this OSCL; provided however you agree and understand that the JPRS
55#   name may not be used to endorse or promote this Code without prior
56#   written approval of JPRS.
57#
58# 4. WARRANTY DISCLAIMER.
59#   JPRS MAKES NO REPRESENTATIONS AND WARRANTIES REGARDING THE USE OF THE
60#   CODE, NOR DOES JPRS MAKE ANY REPRESENTATIONS THAT THE CODE WILL BECOME
61#   COMMERCIALLY AVAILABLE. JPRS, ITS AFFILIATES, AND ITS SUPPLIERS DO NOT
62#   WARRANT OR REPRESENT THAT THE CODE IS FREE OF ERRORS OR THAT THE CODE
63#   IS SUITABLE FOR TRANSLATION AND/OR LOCALIZATION. THE CODE IS PROVIDED
64#   ON AN "AS IS" BASIS AND JPRS AND ITS SUPPLIERS HAVE NO OBLIGATION TO
65#   CORRECT ERRORS OR TO SUPPORT THE CODE UNDER THIS OSCL FOR ANY REASON.
66#   TO THE FULL EXTENT PERMITTED BY LAW, ALL OBLIGATIONS ARE HEREBY
67#   EXCLUDED WHETHER EXPRESS, STATUTORY OR IMPLIED UNDER LAW, COURSE OF
68#   DEALING, CUSTOM, TRADE USAGE, ORAL OR WRITTEN STATEMENT OR OTHERWISE,
69#   INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY
70#   OR FITNESS FOR A PARTICULAR PURPOSE CONCERNING THE CODE.
71#
72# 5. NO LIABILITY.
73#   UNDER NO CIRCUMSTANCES SHALL JPRS AND/OR ITS AFFILIATES, LICENSORS, OR
74#   REPRESENTATIVES BE LIABLE FOR ANY DAMAGES INCLUDING BUT NOT LIMITED TO
75#   CONSEQUENTIAL, INDIRECT, SPECIAL, PUNITIVE OR INCIDENTAL DAMAGES,
76#   WHETHER FORESEEABLE OR UNFORESEEABLE, BASED ON YOUR CLAIMS, INCLUDING,
77#   BUT NOT LIMITED TO, CLAIMS FOR LOSS OF DATA, GOODWILL, PROFITS, USE OF
78#   MONEY, INTERRUPTION IN USE OR AVAILABILITY OF DATA, STOPPAGE, IMPLIED
79#   WARRANTY, BREACH OF CONTRACT, MISREPRESENTATION, NEGLIGENCE, STRICT
80#   LIABILITY IN TORT, OR OTHERWISE.
81#
82# 6. Indemnification.
83#   You hereby agree to indemnify, defend, and hold harmless JPRS for any
84#   liability incurred by JRPS due to your terms of warranty, support,
85#   indemnity, or liability offered by you to any third party.
86#
87# 7. Termination.
88# 7.1 This OSCL shall be automatically terminated in the events that:
89#   (a) You fail to comply with the terms herein and fail to cure such
90#       breach within 30 days of becoming aware of the breach;
91#   (b) You initiate patent or copyright infringement litigation against
92#       any party (including a cross-claim or counterclaim in a lawsuit)
93#       alleging that the Code constitutes a direct or indirect patent or
94#       copyright infringement, in such case, this OSCL to you shall
95#       terminate as of the date such litigation is filed;
96# 7.2 In the event of termination under Sections 7.1(a) or 7.1(b) above,
97#     all end user license agreements (excluding distributors and
98#     resellers) which have been validly granted by You or any distributor
99#     hereunder prior to termination shall survive termination.
100#
101#
102# 8. General.
103#   This OSCL shall be governed by, and construed and enforced in
104#   accordance with, the laws of Japan. Any litigation or arbitration
105#   between the parties shall be conducted exclusively in Tokyo, Japan
106#   except written consent of JPRS provides other venue.
107#
108#
109#                                EXHIBIT A
110#
111# The original open source code of idnkit-2 is idnkit-1.0 developed and
112# conceived by Japan Network Information Center ("JPNIC"), a Japanese
113# association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
114# Chiyoda-ku, Tokyo 101-0047, Japan, and JPRS modifies above original code
115# under following Terms and Conditions set forth by JPNIC.
116#
117#                                  JPNIC
118#
119# Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved.
120#
121# By using this file, you agree to the terms and conditions set forth bellow.
122#
123#                       LICENSE TERMS AND CONDITIONS
124#
125# The following License Terms and Conditions apply, unless a different
126# license is obtained from Japan Network Information Center ("JPNIC"),
127# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
128# Chiyoda-ku, Tokyo 101-0047, Japan.
129#
130# 1. Use, Modification and Redistribution (including distribution of any
131#    modified or derived work) in source and/or binary forms is permitted
132#    under this License Terms and Conditions.
133#
134# 2. Redistribution of source code must retain the copyright notices as they
135#    appear in each source code file, this License Terms and Conditions.
136#
137# 3. Redistribution in binary form must reproduce the Copyright Notice,
138#    this License Terms and Conditions, in the documentation and/or other
139#    materials provided with the distribution. For the purposes of binary
140#    distribution the "Copyright Notice" refers to the following language:
141#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
142#
143# 4. The name of JPNIC may not be used to endorse or promote products
144#    derived from this Software without specific prior written approval of
145#    JPNIC.
146#
147# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
148#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
149#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
150#    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
151#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
152#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
153#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
154#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
155#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
156#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
157#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
158#
159#
160#                        JPRS Public License Notice
161#                                   For
162#                                idnkit-2.
163#
164# The contents of this file are subject to the Terms and Conditions for
165# the Open Source Code License (the "OSCL"). You may not use this file
166# except in compliance with above terms and conditions. A copy of the OSCL
167# is available at <http://jprs.co.jp/idn/>.
168# The JPRS Revised Code is idnkit-2.
169# The Initial Developer of the JPRS Revised Code is Japan Network
170# Information Center ("JPNIC"), a Japanese association,
171# Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, Chiyoda-ku, Tokyo
172# 101-0047, Japan.
173# "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
174# "Copyright (c) 2010-2012 Japan Registry Services Co., Ltd.  All rights reserved."
175# Contributor(s): ______________________________________.
176#
177# If you wish to allow use of your version of this file only under the
178# above License(s) and not to allow others to use your version of this
179# file, please indicate your decision by deleting the relevant provisions
180# above and replacing them with the notice and other provisions required
181# by the above License(s). If you do not delete the relevant provisions,
182# a recipient may use your version of this file under either the above
183# License(s).
184#
185
186use Getopt::Long;
187use Carp;
188use UCD;
189use IDNATables;
190use ISO639Tables;
191use TR46Tables;
192use IO::File;
193use File::Basename;
194
195# Version of input data.
196$unknown_version = '<unknown>';
197$data_version = $unknown_version;
198
199# Default directory of input/output files.
200$default_data_dir = '../data';
201$default_program_dir = '../lib/auto';
202$default_header_dir = '../include/idn/auto';
203$default_include_dir = 'idn/auto';
204
205$data_dir = $default_data_dir;
206$program_dir = $default_program_dir;
207$header_dir = $default_header_dir;
208$include_dir = $default_include_dir;
209
210# Default file names.
211$program_file               = 'data.c';
212$header_file                = 'data.h';
213$unicodedata_file           = UCD::UnicodeData::default_file();
214$specialcasing_file         = UCD::SpecialCasing::default_file();
215$compositionexclusions_file = UCD::CompositionExclusions::default_file();
216$arabicshaping_file         = UCD::ArabicShaping::default_file();
217$scripts_file               = UCD::Scripts::default_file();
218$derivedcoreproperties_file = UCD::DerivedCoreProperties::default_file();
219$idnatables_file            = IDNATables::default_file();
220$iso639lang_file            = ISO639Tables::default_file();
221$tr46_file                  = TR46Tables::default_file();
222
223# Input and output file IDs.
224@input_file_ids = ();
225$output_file_id = '';
226
227# SparseMap bits.
228@default_sparsemap_bits = (9, 7, 5);
229@sparsemap_bits = @default_sparsemap_bits;
230
231# Prefix to accessor function names.
232use constant ACCESSOR_PREFIX => 'idn__sparsemap_get';
233
234#
235# Set input and output files.
236#
237sub set_file_ids {
238    ($output_file_id, @input_file_ids) = @_;
239}
240
241#
242# Parse command line options.
243#
244sub parse_options {
245    my $bits;
246    my %opts = ('datadir|d=s'    => \$data_dir,
247		'programdir|c=s' => \$program_dir,
248		'headerdir|h=s'  => \$header_dir,
249		'includedir|i=s' => \$include_dir,
250		'bits|b=s'       => \$bits);
251
252    if (!GetOptions(%opts)) {
253	print_usage();
254	exit(1);
255    }
256
257    $data_version = $ARGV[0] if (@ARGV > 0);
258
259    if (defined $bits) {
260	if ($bits !~ /^(\d+),(\d+),(\d+)$/) {
261	    warn "Invalid SparseMap bits: $bits\n";
262	    print_usage();
263	    exit(1);
264	}
265	@sparsemap_bits = ($1, $2, $3);
266	if ($sparsemap_bits[0] + $sparsemap_bits[1] + $sparsemap_bits[2] < 21
267	    || $sparsemap_bits[0] <= 0
268	    || $sparsemap_bits[1] <= 0
269	    || $sparsemap_bits[2] <= 0) {
270	    warn "Invalid SparseMap bits: $bits\n";
271	    print_usage();
272	    exit(1);
273	}
274    }
275
276    $unicodedata_file           = "$data_dir/$unicodedata_file";
277    $specialcasing_file         = "$data_dir/$specialcasing_file";
278    $compositionexclusions_file = "$data_dir/$compositionexclusions_file";
279    $arabicshaping_file         = "$data_dir/$arabicshaping_file";
280    $scripts_file               = "$data_dir/$scripts_file";
281    $derivedcoreproperties_file = "$data_dir/$derivedcoreproperties_file";
282    $idnatables_file            = "$data_dir/$idnatables_file";
283    $iso639lang_file            = "$data_dir/$iso639lang_file";
284    $tr46_file                  = "$data_dir/$tr46_file";
285    $program_file               = "$program_dir/$output_file_id.c";
286    $header_file                = "$header_dir/$output_file_id.h";
287}
288
289#
290# Print usage.
291#
292sub print_usage() {
293    warn "Usage: $0 [options..] [VERSION]\n";
294    warn "Options:\n";
295
296    warn "  -d DIR, --datadir=DIR\n";
297    warn "             read input files in DIR.\n";
298    warn "             (default: $default_data_dir)\n";
299    warn "  -c DIR, --programdir=DIR\n";
300    warn "             generate a program file at DIR.\n";
301    warn "             (default: $default_program_dir)\n";
302    warn "  -h DIR, --headerdir=DIR\n";
303    warn "             generate a header file at DIR.\n";
304    warn "             (default: $default_header_dir)\n";
305    warn "  -i DIR, --includedir=DIR\n";
306    warn "             directory used for a generated program file\n";
307    warn "             to include generated header files.\n";
308    warn "             (default: $default_include_dir)\n";
309    warn "  -b N0,N1,N2, --bits=N0,N1,N2\n";
310    warn "             SparseMap bits.\n";
311    warn "             (default: ", join(',', @default_sparsemap_bits), ")\n";
312}
313
314#
315# Output a preamble for a generated file.
316#
317sub cprog_preamble {
318    my $result = "/*\n";
319    $result .= " * Do not edit this file!\n";
320
321    if (@input_file_ids == 0) {
322	$result .= " * This file is generated automatically.\n";
323    } else {
324	$result .= " * This file is generated from:\n";
325
326	if (grep({$_ eq 'unicodedata'} @input_file_ids)) {
327	    my $default_file = UCD::UnicodeData::default_file();
328	    $result .= " *    $default_file (version $data_version)\n";
329	}
330	if (grep({$_ eq 'specialcasing'} @input_file_ids)) {
331	    my $default_file = UCD::SpecialCasing::default_file();
332	    $result .= " *    $default_file (version $data_version)\n";
333	}
334	if (grep({$_ eq 'compositionexclusions'} @input_file_ids)) {
335	    my $default_file = UCD::CompositionExclusions::default_file();
336	    $result .= " *    $default_file (version $data_version)\n";
337	}
338	if (grep({$_ eq 'arabicshaping'} @input_file_ids)) {
339	    my $default_file = UCD::ArabicShaping::default_file();
340	    $result .= " *    $default_file (version $data_version)\n";
341	}
342	if (grep({$_ eq 'scripts'} @input_file_ids)) {
343	    my $default_file = UCD::Scripts::default_file();
344	    $result .= " *    $default_file (version $data_version)\n";
345	}
346	if (grep({$_ eq 'derivedcoreproperties'} @input_file_ids)) {
347	    my $default_file = UCD::DerivedCoreProperties::default_file();
348	    $result .= " *    $default_file (version $data_version)\n";
349	}
350	if (grep({$_ eq 'idnatables'} @input_file_ids)) {
351	    my $default_file = IDNATables::default_file();
352	    $result .= " *    $default_file (version $data_version)\n";
353	}
354	if (grep({$_ eq 'iso639lang'} @input_file_ids)) {
355	    my $default_file = ISO639Tables::default_file();
356	    $result .= " *    $default_file (version $data_version)\n";
357	}
358	if (grep({$_ eq 'tr46'} @input_file_ids)) {
359	    my $default_file = TR46Tables::default_file();
360	    $result .= " *    $default_file (version $data_version)\n";
361	}
362    }
363
364    $result .= " */\n\n";
365    return $result;
366}
367
368#
369# Gererate a C comment.
370#
371sub cprog_comment {
372    my $result = "/*\n";
373
374    for my $i (@_) {
375	for my $j (split(/\n/, $i)) {
376	    $result .= " * " . $j . "\n";
377	}
378    }
379
380    $result .= " */\n";
381    return $result;
382}
383
384#
385# Gererate a C accessor function for SparseMap::Int.
386#
387sub cprog_accessor_int {
388    my ($name, $type, $default) = @_;
389
390    my $func = ACCESSOR_PREFIX . $name;
391    my $bits1 = uc($name) . "_BITS_1";
392    my $bits2 = uc($name) . "_BITS_2";
393    my $imap  = $name . "_imap";
394    my $table = $name . "_table";
395
396    my $result = "$type\n";
397    $result .= "$func(unsigned long v) {\n";
398    $result .= "	int idx0, idx1, idx2;\n";
399    $result .= "\n";
400    $result .= "	if (v > UTF32_MAX)\n";
401    $result .= "		return ($default);\n";
402    $result .= "	idx0 = v >> ($bits1 + $bits2);\n";
403    $result .= "	idx1 = (v >> $bits2) & ((1 << $bits1) - 1);\n";
404    $result .= "	idx2 = v & ((1 << $bits2) - 1);\n";
405    $result .= "	return ($type) $table\[$imap\[$imap\[idx0] + idx1]].tbl[idx2];\n";
406    $result .= "}\n";
407
408    return $result;
409}
410
411#
412# Gererate a C accessor function's prototype for SparseMap::Int.
413#
414sub cprog_accessor_proto_int {
415    my ($name, $type) = @_;
416
417    my $func = ACCESSOR_PREFIX . $name;
418
419    return "extern $type\n$func(unsigned long v);\n";
420}
421
422#
423# Gererate a C accessor function for SparseMap::Bit.
424#
425sub cprog_accessor_bit {
426    my ($name) = @_;
427
428    my $func = ACCESSOR_PREFIX . $name;
429    my $bits1 = uc($name) . "_BITS_1";
430    my $bits2 = uc($name) . "_BITS_2";
431    my $imap  = $name . "_imap";
432    my $table = $name . "_bitmap";
433
434    my $result = "extern int\n";
435    $result .= "$func(unsigned long v) {\n";
436    $result .= "	int idx0, idx1, idx2, idx3;\n";
437    $result .= "\n";
438    $result .= "	if (v > UTF32_MAX)\n";
439    $result .= "		return (0);\n";
440    $result .= "	idx0 = v >> ($bits1 + $bits2);\n";
441    $result .= "	idx1 = (v >> $bits2) & ((1 << $bits1) - 1);\n";
442    $result .= "	idx2 = (v & ((1 << $bits2) - 1)) >> 3;\n";
443    $result .= "	idx3 = 1 << (v & 0x07);\n";
444    $result .= "	return $table\[$imap\[$imap\[idx0] + idx1]].bm[idx2] & idx3;\n";
445    $result .= "}\n";
446
447    return $result;
448}
449
450#
451# Gererate a C accessor function's prototype for SparseMap::Bit.
452#
453sub cprog_accessor_proto_bit {
454    my ($name) = @_;
455    my $func = ACCESSOR_PREFIX . $name;
456
457    return "extern int\n$func(unsigned long v);\n";
458}
459
460#
461# Open a file to generate a C program.
462#
463sub open_program_file {
464    my $io = new IO::File($program_file, '>')
465	or die "failed to open the file, $!: $program_file\n";
466    $io->print(cprog_preamble());
467
468    $io->print("#include <stddef.h>\n");
469    $io->print("#include <idn/utf32.h>\n");
470
471    my $header_basename = basename($header_file);
472    if ($include_dir eq '') {
473	$io->print("#include <$header_basename>\n\n");
474    } else {
475	$io->print("#include <$include_dir/$header_basename>\n\n");
476    }
477
478    return $io;
479}
480
481#
482# Close a file opened by open_program_file().
483#
484sub close_program_file {
485    my ($io) = @_;
486    $io->close();
487}
488
489#
490# Open a file to generate a C header.
491#
492sub open_header_file {
493    my $io = new IO::File($header_file, '>')
494	or die "failed to open the file, $!: $header_file\n";
495
496    $io->print(cprog_preamble());
497
498    my $macro = 'IDN_' . uc($output_file_id) . '_H';
499    $macro =~ s|\.|_|g;
500    $io->print("#ifndef $macro\n");
501    $io->print("#define $macro\n");
502    $io->print("\n");
503
504    $io->print("#ifdef __cplusplus\n");
505    $io->print("extern \"C\" {\n");
506    $io->print("#endif\n");
507    $io->print("\n");
508
509    $io->print("#include <stddef.h>\n");
510    $io->print("\n");
511
512    return $io;
513}
514
515#
516# Close a file opened by open_header_file().
517#
518sub close_header_file {
519    my ($io) = @_;
520
521    $io->print("\n");
522    $io->print("#ifdef __cplusplus\n");
523    $io->print("}\n");
524    $io->print("#endif\n");
525    $io->print("\n");
526
527    $io->print("#endif\n");
528    $io->close();
529}
530
5311;
532