1 /*
2  * libid3tag - ID3 tag manipulation library
3  * Copyright (C) 2000-2004 Underbit Technologies, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * $Id: ucs4.c,v 1.13 2004/01/23 09:41:32 rob Exp $
20  */
21 
22 # ifdef HAVE_CONFIG_H
23 #  include "config.h"
24 # endif
25 
26 # include "global.h"
27 
28 # include <stdlib.h>
29 
30 # include "id3tag.h"
31 # include "ucs4.h"
32 # include "latin1.h"
33 # include "utf16.h"
34 # include "utf8.h"
35 
36 id3_ucs4_t const id3_ucs4_empty[] = { 0 };
37 
38 /*
39  * NAME:	ucs4->length()
40  * DESCRIPTION:	return the number of ucs4 chars represented by a ucs4 string
41  */
id3_ucs4_length(id3_ucs4_t const * ucs4)42 id3_length_t id3_ucs4_length(id3_ucs4_t const *ucs4)
43 {
44   id3_ucs4_t const *ptr = ucs4;
45 
46   while (*ptr)
47     ++ptr;
48 
49   return ptr - ucs4;
50 }
51 
52 /*
53  * NAME:	ucs4->size()
54  * DESCRIPTION:	return the encoding size of a ucs4 string
55  */
id3_ucs4_size(id3_ucs4_t const * ucs4)56 id3_length_t id3_ucs4_size(id3_ucs4_t const *ucs4)
57 {
58   return id3_ucs4_length(ucs4) + 1;
59 }
60 
61 /*
62  * NAME:	ucs4->latin1size()
63  * DESCRIPTION:	return the encoding size of a latin1-encoded ucs4 string
64  */
id3_ucs4_latin1size(id3_ucs4_t const * ucs4)65 id3_length_t id3_ucs4_latin1size(id3_ucs4_t const *ucs4)
66 {
67   return id3_ucs4_size(ucs4);
68 }
69 
70 /*
71  * NAME:	ucs4->utf16size()
72  * DESCRIPTION:	return the encoding size of a utf16-encoded ucs4 string
73  */
id3_ucs4_utf16size(id3_ucs4_t const * ucs4)74 id3_length_t id3_ucs4_utf16size(id3_ucs4_t const *ucs4)
75 {
76   id3_length_t size = 0;
77 
78   while (*ucs4) {
79     ++size;
80     if (*ucs4 >= 0x00010000L &&
81 	*ucs4 <= 0x0010ffffL)
82       ++size;
83 
84     ++ucs4;
85   }
86 
87   return size + 1;
88 }
89 
90 /*
91  * NAME:	ucs4->utf8size()
92  * DESCRIPTION:	return the encoding size of a utf8-encoded ucs4 string
93  */
id3_ucs4_utf8size(id3_ucs4_t const * ucs4)94 id3_length_t id3_ucs4_utf8size(id3_ucs4_t const *ucs4)
95 {
96   id3_length_t size = 0;
97 
98   while (*ucs4) {
99     if (*ucs4 <= 0x0000007fL)
100       size += 1;
101     else if (*ucs4 <= 0x000007ffL)
102       size += 2;
103     else if (*ucs4 <= 0x0000ffffL)
104       size += 3;
105     else if (*ucs4 <= 0x001fffffL)
106       size += 4;
107     else if (*ucs4 <= 0x03ffffffL)
108       size += 5;
109     else if (*ucs4 <= 0x7fffffffL)
110       size += 6;
111     else
112       size += 2;  /* based on U+00B7 replacement char */
113 
114     ++ucs4;
115   }
116 
117   return size + 1;
118 }
119 
120 /*
121  * NAME:	ucs4->latin1duplicate()
122  * DESCRIPTION:	duplicate and encode a ucs4 string into latin1
123  */
id3_ucs4_latin1duplicate(id3_ucs4_t const * ucs4)124 id3_latin1_t *id3_ucs4_latin1duplicate(id3_ucs4_t const *ucs4)
125 {
126   id3_latin1_t *latin1;
127 
128   latin1 = malloc(id3_ucs4_latin1size(ucs4) * sizeof(*latin1));
129   if (latin1)
130     id3_latin1_encode(latin1, ucs4);
131 
132   return release(latin1);
133 }
134 
135 /*
136  * NAME:	ucs4->utf16duplicate()
137  * DESCRIPTION:	duplicate and encode a ucs4 string into utf16
138  */
id3_ucs4_utf16duplicate(id3_ucs4_t const * ucs4)139 id3_utf16_t *id3_ucs4_utf16duplicate(id3_ucs4_t const *ucs4)
140 {
141   id3_utf16_t *utf16;
142 
143   utf16 = malloc(id3_ucs4_utf16size(ucs4) * sizeof(*utf16));
144   if (utf16)
145     id3_utf16_encode(utf16, ucs4);
146 
147   return release(utf16);
148 }
149 
150 /*
151  * NAME:	ucs4->utf8duplicate()
152  * DESCRIPTION:	duplicate and encode a ucs4 string into utf8
153  */
id3_ucs4_utf8duplicate(id3_ucs4_t const * ucs4)154 id3_utf8_t *id3_ucs4_utf8duplicate(id3_ucs4_t const *ucs4)
155 {
156   id3_utf8_t *utf8;
157 
158   utf8 = malloc(id3_ucs4_utf8size(ucs4) * sizeof(*utf8));
159   if (utf8)
160     id3_utf8_encode(utf8, ucs4);
161 
162   return release(utf8);
163 }
164 
165 /*
166  * NAME:	ucs4->copy()
167  * DESCRIPTION:	copy a ucs4 string
168  */
id3_ucs4_copy(id3_ucs4_t * dest,id3_ucs4_t const * src)169 void id3_ucs4_copy(id3_ucs4_t *dest, id3_ucs4_t const *src)
170 {
171   while ((*dest++ = *src++))
172     ;
173 }
174 
175 /*
176  * NAME:	ucs4->duplicate()
177  * DESCRIPTION:	duplicate a ucs4 string
178  */
id3_ucs4_duplicate(id3_ucs4_t const * src)179 id3_ucs4_t *id3_ucs4_duplicate(id3_ucs4_t const *src)
180 {
181   id3_ucs4_t *ucs4;
182 
183   ucs4 = malloc(id3_ucs4_size(src) * sizeof(*ucs4));
184   if (ucs4)
185     id3_ucs4_copy(ucs4, src);
186 
187   return ucs4;
188 }
189 
190 /*
191  * NAME:	ucs4->putnumber()
192  * DESCRIPTION:	write a ucs4 string containing a (positive) decimal number
193  */
id3_ucs4_putnumber(id3_ucs4_t * ucs4,unsigned long number)194 void id3_ucs4_putnumber(id3_ucs4_t *ucs4, unsigned long number)
195 {
196   int digits[10], *digit;
197 
198   digit = digits;
199 
200   do {
201     *digit++ = number % 10;
202     number  /= 10;
203   }
204   while (number);
205 
206   while (digit != digits)
207     *ucs4++ = '0' + *--digit;
208 
209   *ucs4 = 0;
210 }
211 
212 /*
213  * NAME:	ucs4->getnumber()
214  * DESCRIPTION:	read a ucs4 string containing a (positive) decimal number
215  */
id3_ucs4_getnumber(id3_ucs4_t const * ucs4)216 unsigned long id3_ucs4_getnumber(id3_ucs4_t const *ucs4)
217 {
218   unsigned long number = 0;
219 
220   while (*ucs4 >= '0' && *ucs4 <= '9')
221     number = 10 * number + (*ucs4++ - '0');
222 
223   return number;
224 }
225