1 /* -- pagc_tools.c
2
3 Various and miscellaneous functions.
4
5 Prototype 20H10 (This file was written by Walter Sinclair).
6
7 This file is part of PAGC.
8
9 Copyright (c) 2010 Walter Bruce Sinclair
10
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
18 */
19
20 /* For pagc-0.4.0 : last revised 2010-11-25 */
21
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <unistd.h>
27 #include "pagc_common.h"
28 #include "pagc_tools.h"
29
30 #ifndef MAXOUTSYM
31 #define MAXOUTSYM 18
32 #endif
33
34 #ifdef MSYS_POSIX
35 static void conform_directory_separator( char * ) ;
36 #endif
37
38
39 static const char *OutSymbNames[] = {
40 "BLDNG",
41 "HOUSE",
42 "PREDIR",
43 "QUALIF",
44 "PRETYP",
45 "STREET",
46 "SUFTYP",
47 "SUFDIR",
48 "RR",
49 "UNKNWN",
50 "CITY",
51 "PROV",
52 "NATION",
53 "POSTAL",
54 "BOXH",
55 "BOXT",
56 "UNITH",
57 "UNITT"
58 } ;
59
60 static const char *InSymbNames[] = {
61 "NUMBER",
62 "WORD",
63 "TYPE",
64 "QUALIF",
65 "PRETYP",
66 "STREET",
67 "ROAD",
68 "STOPWORD",
69 "RR",
70 "DASH",
71 "CITY",
72 "PROV",
73 "NATION",
74 "AMPERS",
75 "BOXH",
76 "ORD",
77 "UNITH",
78 "UNITT",
79 "SINGLE",
80 "BUILDH",
81 "MILE",
82 "DOUBLE",
83 "DIRECT",
84 "MIXED",
85 "BUILDT",
86 "FRACT",
87 "PCT",
88 "PCH",
89 "QUINT",
90 "QUAD",
91 } ;
92
93 /* ------------------------------------------------------------
94 ISO 8859 character set may pop up in some files. After 1998
95 TigerLine will use them.
96 ------------------------------------------------------------- */
convert_latin_one(char * inp)97 void convert_latin_one ( char *inp ) {
98 unsigned char *str ;
99
100 for ( str = ( unsigned char * ) inp ;
101 *str != SENTINEL ;
102 str++ ) {
103 unsigned char ch ;
104 ch = *str ;
105 /* -------------------------------------------
106 if bit 7 is set, reset bit 5 so both upper
107 and lower case can be done together
108 --------------------------------------------- */
109 if ( ch & 0x80 ) {
110 ch &= 0xDF ;
111 /* -----------------------------------------
112 reduce letters with diacritical marks to
113 their unmarked base letters
114 ------------------------------------------ */
115 if ( ch >= 0xC0 &&
116 ch <= 0xC6 )
117 ch = 'A' ;
118 else if ( ch == 0xc7 )
119 ch = 'C' ;
120 else if ( ch >= 0xc8 && ch <= 0xcb )
121 ch = 'E' ;
122 else if ( ch >= 0xcc && ch <= 0xcf )
123 ch = 'I' ;
124 else if ( ch == 0xd0 )
125 ch = 'D' ;
126 else if ( ch == 0xd1 )
127 ch = 'N' ;
128 else if ( ch >= 0xd2 && ch <= 0xd6 )
129 ch = 'O' ;
130 else if ( ch >= 0xd9 && ch <= 0xdc )
131 ch = 'U' ;
132 else if ( ch >= 0xdd && ch < 0xdf )
133 ch = 'Y' ;
134 else
135 /* -------------------------------
136 just clear the top bit so it
137 won't gum up the edit distance
138 machinery
139 -------------------------------- */
140 ch &= 0x7f ;
141 }
142 *str = ch ;
143 }
144
145 /* ----------------------------------------------
146 while we're at it, add a newline to the end
147 because the lexical scanner likes it like that
148 ----------------------------------------------- */
149 *str++ = '\n' ;
150 *str = SENTINEL ;
151 }
152
char_append(const char * div,char * dest,const char * src,int max_wid)153 void char_append( const char *div ,
154 char *dest ,
155 const char *src ,
156 int max_wid ) {
157 if ( *src == SENTINEL )
158 return ;
159 /* -- skip the delimitor if dest is empty -- */
160 if ( *dest == SENTINEL ) {
161 append_string_to_max( dest ,
162 ( char * ) src ,
163 max_wid ) ;
164 return ;
165 }
166 append_string_to_max( dest , ( char * ) div , max_wid ) ;
167 append_string_to_max( dest , ( char * ) src , max_wid ) ;
168 }
169
out_symb_name(int i)170 const char *out_symb_name( int i ) {
171 return ( OutSymbNames[ i ] ) ;
172 }
173
in_symb_name(int i)174 const char *in_symb_name( int i ) {
175 return ( InSymbNames[ i ] ) ;
176 }
177
out_symb_value(const char * src)178 int out_symb_value( const char *src ) {
179 int i ;
180
181 /* -- linear search -- */
182 for ( i = 0 ;
183 i < MAXOUTSYM ;
184 i++ ) {
185 if ( strcmp( src ,
186 OutSymbNames[ i ] ) == 0 )
187 return i ;
188 }
189 return FAIL ;
190 }
191
192 /*-------------------------------------------
193 util.c (get_input_line)
194 called by initial.c (restore_build_state)
195 --------------------------------------------*/
get_input_line(char * buf,FILE * fp)196 int get_input_line( char *buf ,
197 FILE *fp ) {
198 int i ;
199
200 BLANK_STRING(buf) ;
201 if ( ( fgets( buf ,
202 MAXSTRLEN ,
203 fp ) ) == NULL )
204 return FALSE ;
205 for ( i = strlen( buf ) ;
206 i > 0 ;
207 i-- ) {
208 if ( strchr( "\n\r",
209 buf[ i - 1 ] ) ) {
210 buf[ i - 1 ] = SENTINEL ;
211 } else
212 break ;
213 }
214 return TRUE ;
215 }
216
217
218 /*-------------------------------------------------------
219 pagc_tools.c (parse_file_name)
220 called by open_aux_file, main.c (main)
221 copies the file name to the output_tail and the path to
222 the output_head
223 --------------------------------------------------------*/
parse_file_name(const char * input_path_name,char global_path_separator,char * output_tail,char * output_head)224 void parse_file_name( const char *input_path_name ,
225 char global_path_separator ,
226 char *output_tail ,
227 char *output_head ) {
228 const char *end_ptr , *src ;
229 char *dest ;
230 /* -- find the file name part first -- */
231 /* -- move to end of the pathname -- */
232 for ( end_ptr = input_path_name ; *end_ptr != SENTINEL ; end_ptr++ ) ;
233 /* -- find the last directory delimitor -- */
234 while ( ( end_ptr > input_path_name ) && NOT_PATH_DELIMITOR(*end_ptr) ) {
235 end_ptr -- ;
236 }
237 /* ---------------------------------------------------------------
238 either end_ptr has the last delimitor or it is at string start.
239 If the first case, we need to increment to get the filename and
240 need to copy everything up to and including for the path.
241 -----------------------------------------------------------------*/
242 /* -- copy from beg to endptr to output path -- */
243 dest = output_head ;
244 src = input_path_name ;
245 /* if end_ptr points to a path delimitor, copy everything up but not
246 including it into the output_head (if output_head isn't NULL) */
247 if ( IS_PATH_DELIMITOR( *end_ptr ) ) {
248 while ( src < end_ptr ) {
249 if ( dest != NULL ) {
250 *dest++ = *src ;
251 }
252 src++ ;
253 }
254 src++ ;
255 }
256 /* -- copy from endptr to end to output file name -- */
257 if ( dest != NULL ) {
258 BLANK_STRING(dest) ;
259 }
260 /* copy everything after the delimitor up to the sentinel
261 into the output_tail */
262 if ( ( dest = output_tail ) != NULL ) {
263 while ( TRUE ) {
264 if ( ( *dest++ = *src++ ) == SENTINEL ) {
265 break ;
266 }
267 }
268 }
269 }
270
271
272 /*--------------------------------------------------
273 pagc_tools.c (combine_path_file)
274 called by util.c (open_aux_file)
275 calls char_append
276 --------------------------------------------------*/
combine_path_file(char global_path_separator,char * input_head,char * input_tail,char * output_path_name)277 void combine_path_file( char global_path_separator ,
278 char *input_head ,
279 char *input_tail ,
280 char *output_path_name ) {
281 char combine_buf[ 2 ] ;
282
283 combine_buf[ 0 ] = global_path_separator ;
284 combine_buf[ 1 ] = SENTINEL ;
285
286 if ( ( input_head != NULL ) &&
287 ( input_head[ 0 ] != SENTINEL ) ) {
288 append_string_to_max( output_path_name ,
289 input_head ,
290 PATHNAME_LEN ) ;
291
292 char_append( combine_buf ,
293 output_path_name ,
294 input_tail ,
295 PATHNAME_LEN ) ;
296 return ;
297 }
298 append_string_to_max( output_path_name ,
299 input_tail ,
300 PATHNAME_LEN ) ;
301 }
302
303
upper_case(char * d,const char * s)304 void upper_case( char *d ,
305 const char *s ) {
306 /* -- make an uppercase copy in d of string in s -- */
307 for ( ;
308 *s != SENTINEL ;
309 s++ ) {
310 *d++ = ( islower( *s )? toupper( *s ) : *s ) ;
311 }
312 BLANK_STRING(d) ;
313 }
314
315 /* 2010-10-22 : new routine */
upper_case_compare(char * str1,char * str2)316 int upper_case_compare( char *str1 , char* str2 ) {
317 char upper_buf1[ MAXSTRLEN ] ;
318 char upper_buf2[ MAXSTRLEN ] ;
319 upper_case( upper_buf1 , str1 ) ;
320 upper_case( upper_buf2 , str2 ) ;
321 return ( strcmp( upper_buf1 , upper_buf2 ) ) ;
322 }
323
324 /* 2010-10-30 : moved here for use in ds */
fast_reverse_endian(char * location_to_reverse,int bytes_to_reverse)325 void fast_reverse_endian( char *location_to_reverse , int bytes_to_reverse ) {
326 char *start_byte_ptr , *end_byte_ptr ;
327
328 for ( start_byte_ptr = location_to_reverse , end_byte_ptr = location_to_reverse + bytes_to_reverse - 1 ; start_byte_ptr < end_byte_ptr ; start_byte_ptr++ , end_byte_ptr-- ) {
329 char a = *start_byte_ptr ;
330 *start_byte_ptr = *end_byte_ptr ;
331 *end_byte_ptr = a ;
332 }
333 }
334
335 /*=================================================================
336 pagc_tools.c (append_string_to_max ) = format.c (format_ncat)
337 =================================================================*/
append_string_to_max(char * dest_buf_start,char * src_str_start,int buf_size)338 void append_string_to_max( char *dest_buf_start ,
339 char *src_str_start ,
340 int buf_size ) {
341
342 char a ;
343 char *d_ptr , *s_ptr , *buf_end ;
344
345 /* -- move to end of current contents of buffer -- */
346 d_ptr = dest_buf_start ;
347 while ( ( a = *d_ptr ) != SENTINEL ) {
348 d_ptr ++ ;
349 }
350 buf_end = dest_buf_start + buf_size - 1 ;
351
352 if ( d_ptr >= buf_end ) {
353 #ifndef BUILD_API
354 #ifndef NO_STDERR_OUTPUT
355 fprintf( stderr , "format_strncat: fatal buffer overflow of %s\n" , dest_buf_start ) ;
356 fprintf( stderr , "No room for %s\n" , src_str_start ) ;
357 #endif
358 exit( 1 ) ;
359 #else
360 /* TODO if postgresql we can throw and error or notice
361 but for now we will just truncate the string */
362 *d_ptr = SENTINEL ;
363 return;
364 #endif
365 }
366 s_ptr = src_str_start ;
367 while ( ( ( a = *s_ptr++ ) != SENTINEL ) &&
368 ( d_ptr != buf_end ) ) {
369 *d_ptr++ = a ;
370 }
371 *d_ptr = SENTINEL ;
372 }
373
374
375
376 /* ========================================================
377 pagc_tools.c (establish_directory)
378 Determine the current working directory and path_separator
379 ========================================================= */
establish_directory(char * c_w_d,char * p_s)380 int establish_directory( char * c_w_d ,
381 char * p_s ) {
382 char *c_w_d_ptr ;
383
384 c_w_d_ptr = getcwd( c_w_d ,
385 ( PATHNAME_LEN - 1 ) ) ;
386 if ( c_w_d_ptr == NULL ) {
387 return FALSE ;
388 }
389
390 *p_s = FORE_SLASH ;
391
392 #ifdef MSYS_POSIX
393
394 /* ..... transform cwd's non-POSIX directory separators to conform ..... */
395
396 conform_directory_separator( c_w_d ) ;
397
398 #endif
399
400 if ( isalpha( c_w_d[ 0 ] ) ) {
401
402 /* ..... drive letter, colon, dir_sep ..... */
403
404 if ( IS_COLON( c_w_d[ 1 ] ) ) {
405 *p_s = c_w_d[ 2 ] ;
406 if ( ( *p_s != FORE_SLASH ) &&
407 ( *p_s != BACK_SLASH ) ) {
408 return FALSE ;
409 }
410 } else {
411 return FALSE ;
412 }
413 }
414 return TRUE ;
415 }
416
417 #ifdef MSYS_POSIX
418 /*------------------------------------------------------------------
419 pagc_tools.c (conform_directory_separator)
420 -- called only if compiled with MSYS_POSIX defined .....
421 -- transform non-POSIX directory separators to conform with POSIX --
422 called by init_global
423 string.h (strlen)
424 -------------------------------------------------------------------*/
conform_directory_separator(char * path_name)425 static void conform_directory_separator( char * path_name ) {
426 int i ,
427 pn_len ;
428
429 pn_len = strlen( path_name ) ;
430 for ( i = 0 ;
431 i < pn_len ;
432 i++ ) {
433 if ( path_name[ i ] == BACK_SLASH ) {
434 path_name[ i ] = FORE_SLASH ;
435 }
436 }
437 }
438 /* ..... END OF IFDEF MSYS_POSIX ..... */
439 #endif
440
441
442