1 /* -- pagc_tools.c
2 
3 Various and miscellaneous functions.
4 
5 Prototype 20H10 (This file was written by Walter Sinclair).
6 
7 This file is part of PAGC.
8 
9 Copyright (c) 2010 Walter Bruce Sinclair
10 
11 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 
18 */
19 
20 /* For pagc-0.4.0 : last revised 2010-11-25 */
21 
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <unistd.h>
27 #include "pagc_common.h"
28 #include "pagc_tools.h"
29 
30 #ifndef MAXOUTSYM
31 #define MAXOUTSYM 18
32 #endif
33 
34 #ifdef MSYS_POSIX
35 static void conform_directory_separator( char * ) ;
36 #endif
37 
38 
39 static const char *OutSymbNames[] = {
40 "BLDNG",
41 "HOUSE",
42 "PREDIR",
43 "QUALIF",
44 "PRETYP",
45 "STREET",
46 "SUFTYP",
47 "SUFDIR",
48 "RR",
49 "UNKNWN",
50 "CITY",
51 "PROV",
52 "NATION",
53 "POSTAL",
54 "BOXH",
55 "BOXT",
56 "UNITH",
57 "UNITT"
58 } ;
59 
60 static const char *InSymbNames[] = {
61    "NUMBER",
62    "WORD",
63    "TYPE",
64    "QUALIF",
65    "PRETYP",
66    "STREET",
67    "ROAD",
68    "STOPWORD",
69    "RR",
70    "DASH",
71    "CITY",
72    "PROV",
73    "NATION",
74    "AMPERS",
75    "BOXH",
76    "ORD",
77    "UNITH",
78    "UNITT",
79    "SINGLE",
80    "BUILDH",
81    "MILE",
82    "DOUBLE",
83    "DIRECT",
84    "MIXED",
85    "BUILDT",
86    "FRACT",
87    "PCT",
88    "PCH",
89    "QUINT",
90    "QUAD",
91 } ;
92 
93 /* ------------------------------------------------------------
94    ISO 8859 character set may pop up in some files. After 1998
95    TigerLine will use them.
96 ------------------------------------------------------------- */
convert_latin_one(char * inp)97 void convert_latin_one ( char *inp ) {
98    unsigned char *str ;
99 
100    for ( str = ( unsigned char * ) inp ;
101          *str != SENTINEL ;
102          str++ ) {
103       unsigned char ch ;
104       ch = *str ;
105       /* -------------------------------------------
106          if bit 7 is set, reset bit 5 so both upper
107          and lower case can be done together
108       --------------------------------------------- */
109       if ( ch & 0x80 ) {
110          ch &= 0xDF ;
111          /* -----------------------------------------
112             reduce letters with diacritical marks to
113             their unmarked base letters
114          ------------------------------------------ */
115          if ( ch >= 0xC0 &&
116               ch <= 0xC6 )
117             ch = 'A' ;
118          else if ( ch == 0xc7 )
119             ch = 'C' ;
120          else if ( ch >= 0xc8 && ch <= 0xcb )
121             ch = 'E' ;
122          else if ( ch >= 0xcc && ch <= 0xcf )
123             ch = 'I' ;
124          else if ( ch == 0xd0 )
125             ch = 'D' ;
126          else if ( ch == 0xd1 )
127             ch = 'N' ;
128          else if ( ch >= 0xd2 && ch <= 0xd6 )
129             ch = 'O' ;
130          else if ( ch >= 0xd9 && ch <= 0xdc )
131             ch = 'U' ;
132          else if ( ch >= 0xdd && ch < 0xdf )
133             ch = 'Y' ;
134          else
135             /* -------------------------------
136                just clear the top bit so it
137                won't gum up the edit distance
138                machinery
139             -------------------------------- */
140             ch &= 0x7f ;
141       }
142       *str = ch ;
143    }
144 
145    /* ----------------------------------------------
146    while we're at it, add a newline to the end
147       because the lexical scanner likes it like that
148    ----------------------------------------------- */
149    *str++ = '\n' ;
150    *str = SENTINEL ;
151 }
152 
char_append(const char * div,char * dest,const char * src,int max_wid)153 void char_append( const char *div ,
154                   char *dest ,
155                   const char *src ,
156                   int max_wid ) {
157    if ( *src == SENTINEL )
158       return ;
159    /* -- skip the delimitor if dest is empty -- */
160    if ( *dest == SENTINEL ) {
161       append_string_to_max( dest ,
162                             ( char * ) src ,
163                             max_wid ) ;
164       return ;
165    }
166    append_string_to_max( dest , ( char * ) div , max_wid ) ;
167    append_string_to_max( dest , ( char * ) src , max_wid ) ;
168 }
169 
out_symb_name(int i)170 const char *out_symb_name( int i ) {
171    return ( OutSymbNames[ i ] ) ;
172 }
173 
in_symb_name(int i)174 const char *in_symb_name( int i ) {
175    return ( InSymbNames[ i ] ) ;
176 }
177 
out_symb_value(const char * src)178 int out_symb_value( const char *src ) {
179    int i ;
180 
181    /* -- linear search -- */
182    for ( i = 0 ;
183          i < MAXOUTSYM ;
184          i++ ) {
185       if ( strcmp( src ,
186                    OutSymbNames[ i ] ) == 0 )
187          return i ;
188    }
189    return FAIL ;
190 }
191 
192 /*-------------------------------------------
193 util.c (get_input_line)
194 called by initial.c (restore_build_state)
195 --------------------------------------------*/
get_input_line(char * buf,FILE * fp)196 int get_input_line( char *buf ,
197                     FILE *fp ) {
198    int i ;
199 
200    BLANK_STRING(buf) ;
201    if ( ( fgets( buf ,
202                  MAXSTRLEN ,
203                  fp ) ) == NULL )
204       return FALSE ;
205    for ( i = strlen( buf ) ;
206          i > 0 ;
207          i-- ) {
208       if ( strchr( "\n\r",
209                    buf[ i - 1 ] ) ) {
210          buf[ i - 1 ] = SENTINEL ;
211       } else
212          break ;
213    }
214    return TRUE ;
215 }
216 
217 
218 /*-------------------------------------------------------
219 pagc_tools.c (parse_file_name)
220 called by open_aux_file, main.c (main)
221 copies the file name to the output_tail and the path to
222 the output_head
223 --------------------------------------------------------*/
parse_file_name(const char * input_path_name,char global_path_separator,char * output_tail,char * output_head)224 void parse_file_name( const char *input_path_name ,
225                       char global_path_separator ,
226                       char *output_tail ,
227                       char *output_head ) {
228 	const char *end_ptr , *src ;
229 	char *dest ;
230    /* -- find the file name part first -- */
231    /* -- move to end of the pathname -- */
232 	for ( end_ptr = input_path_name ; *end_ptr != SENTINEL ; end_ptr++ ) ;
233 	/* -- find the last directory delimitor -- */
234 	while ( ( end_ptr > input_path_name ) && NOT_PATH_DELIMITOR(*end_ptr) ) {
235 		end_ptr -- ;
236 	}
237 	/* ---------------------------------------------------------------
238 	either end_ptr has the last delimitor or it is at string start.
239 		If the first case, we need to increment to get the filename and
240 		need to copy everything up to and including for the path.
241 	-----------------------------------------------------------------*/
242 	/* -- copy from beg to endptr to output path -- */
243 	dest = output_head ;
244 	src = input_path_name ;
245 	/* if end_ptr points to a path delimitor, copy everything up but not
246 	including it into the output_head (if output_head isn't NULL) */
247 	if ( IS_PATH_DELIMITOR( *end_ptr ) ) {
248 		while ( src < end_ptr ) {
249 			if ( dest != NULL ) {
250 				*dest++ = *src ;
251 			}
252 			src++ ;
253 		}
254 		src++ ;
255 	}
256 	/* -- copy from endptr to end to output file name -- */
257 	if ( dest != NULL ) {
258 		BLANK_STRING(dest) ;
259 	}
260 	/* copy everything after the delimitor up to the sentinel
261 	into the output_tail */
262 	if ( ( dest = output_tail ) != NULL ) {
263 		while ( TRUE ) {
264 			if ( ( *dest++ = *src++ ) == SENTINEL ) {
265 				break ;
266 			}
267 		}
268 	}
269 }
270 
271 
272 /*--------------------------------------------------
273 pagc_tools.c (combine_path_file)
274 called by util.c (open_aux_file)
275 calls char_append
276 --------------------------------------------------*/
combine_path_file(char global_path_separator,char * input_head,char * input_tail,char * output_path_name)277 void combine_path_file( char global_path_separator ,
278                         char *input_head ,
279                         char *input_tail ,
280                         char *output_path_name ) {
281    char combine_buf[ 2 ] ;
282 
283    combine_buf[ 0 ] = global_path_separator ;
284    combine_buf[ 1 ] = SENTINEL ;
285 
286    if ( ( input_head != NULL ) &&
287         ( input_head[ 0 ] != SENTINEL ) ) {
288       append_string_to_max( output_path_name ,
289                             input_head ,
290                             PATHNAME_LEN ) ;
291 
292       char_append( combine_buf ,
293                    output_path_name ,
294                    input_tail ,
295                    PATHNAME_LEN ) ;
296       return ;
297    }
298    append_string_to_max( output_path_name ,
299                          input_tail ,
300                          PATHNAME_LEN ) ;
301 }
302 
303 
upper_case(char * d,const char * s)304 void upper_case( char *d ,
305                  const char *s ) {
306    /* -- make an uppercase copy in d of string in s -- */
307    for ( ;
308          *s != SENTINEL ;
309          s++ ) {
310       *d++ = ( islower( *s )? toupper( *s ) : *s ) ;
311    }
312    BLANK_STRING(d) ;
313 }
314 
315 /* 2010-10-22 : new routine */
upper_case_compare(char * str1,char * str2)316 int upper_case_compare( char *str1 , char* str2 ) {
317 	char upper_buf1[ MAXSTRLEN ] ;
318 	char upper_buf2[ MAXSTRLEN ] ;
319 	upper_case( upper_buf1 , str1 ) ;
320 	upper_case( upper_buf2 , str2 ) ;
321 	return ( strcmp( upper_buf1 , upper_buf2 ) ) ;
322 }
323 
324 /* 2010-10-30 : moved here for use in ds */
fast_reverse_endian(char * location_to_reverse,int bytes_to_reverse)325 void fast_reverse_endian( char *location_to_reverse , int bytes_to_reverse ) {
326 	char *start_byte_ptr , *end_byte_ptr ;
327 
328 	for ( start_byte_ptr = location_to_reverse , end_byte_ptr = location_to_reverse + bytes_to_reverse - 1 ; start_byte_ptr < end_byte_ptr ; start_byte_ptr++ , end_byte_ptr-- ) {
329 		char a  = *start_byte_ptr ;
330 		*start_byte_ptr = *end_byte_ptr ;
331 		*end_byte_ptr = a ;
332 	}
333 }
334 
335 /*=================================================================
336 pagc_tools.c (append_string_to_max ) = format.c (format_ncat)
337 =================================================================*/
append_string_to_max(char * dest_buf_start,char * src_str_start,int buf_size)338 void append_string_to_max( char *dest_buf_start ,
339                            char *src_str_start ,
340                            int buf_size ) {
341 
342    char a ;
343    char *d_ptr , *s_ptr , *buf_end ;
344 
345    /* -- move to end of current contents of buffer -- */
346    d_ptr = dest_buf_start ;
347    while ( ( a = *d_ptr ) != SENTINEL ) {
348       d_ptr ++ ;
349    }
350    buf_end = dest_buf_start + buf_size - 1 ;
351 
352    if ( d_ptr >= buf_end ) {
353 #ifndef BUILD_API
354 #ifndef NO_STDERR_OUTPUT
355       fprintf( stderr , "format_strncat: fatal buffer overflow of %s\n" , dest_buf_start ) ;
356       fprintf( stderr , "No room for %s\n" , src_str_start ) ;
357 #endif
358       exit( 1 ) ;
359 #else
360       /* TODO if postgresql we can throw and error or notice
361          but for now we will just truncate the string */
362       *d_ptr = SENTINEL ;
363       return;
364 #endif
365    }
366    s_ptr = src_str_start ;
367    while ( ( ( a = *s_ptr++ ) != SENTINEL ) &&
368            ( d_ptr != buf_end ) ) {
369       *d_ptr++ = a ;
370    }
371    *d_ptr = SENTINEL ;
372 }
373 
374 
375 
376 /* ========================================================
377 pagc_tools.c (establish_directory)
378 Determine the current working directory and path_separator
379 ========================================================= */
establish_directory(char * c_w_d,char * p_s)380 int establish_directory( char * c_w_d ,
381                          char * p_s ) {
382    char *c_w_d_ptr ;
383 
384    c_w_d_ptr = getcwd( c_w_d ,
385                        ( PATHNAME_LEN - 1 ) ) ;
386    if ( c_w_d_ptr  == NULL ) {
387       return FALSE ;
388    }
389 
390    *p_s = FORE_SLASH ;
391 
392 #ifdef MSYS_POSIX
393 
394    /* ..... transform cwd's non-POSIX directory separators to conform  ..... */
395 
396    conform_directory_separator( c_w_d ) ;
397 
398 #endif
399 
400    if ( isalpha( c_w_d[ 0 ] ) ) {
401 
402       /* ..... drive letter, colon, dir_sep ..... */
403 
404       if ( IS_COLON( c_w_d[ 1 ] ) ) {
405          *p_s = c_w_d[ 2 ] ;
406          if ( ( *p_s != FORE_SLASH ) &&
407               ( *p_s != BACK_SLASH ) ) {
408             return FALSE ;
409          }
410       } else {
411          return FALSE ;
412       }
413    }
414    return TRUE ;
415 }
416 
417 #ifdef MSYS_POSIX
418 /*------------------------------------------------------------------
419 pagc_tools.c (conform_directory_separator)
420 -- called only if compiled with MSYS_POSIX defined .....
421 -- transform non-POSIX directory separators to conform with POSIX --
422 called by init_global
423 string.h (strlen)
424 -------------------------------------------------------------------*/
conform_directory_separator(char * path_name)425 static void conform_directory_separator( char * path_name ) {
426    int i ,
427        pn_len ;
428 
429    pn_len = strlen( path_name ) ;
430    for ( i = 0 ;
431          i < pn_len ;
432          i++ ) {
433       if ( path_name[ i ] == BACK_SLASH ) {
434          path_name[ i ] = FORE_SLASH ;
435       }
436    }
437 }
438 /* ..... END OF IFDEF MSYS_POSIX ..... */
439 #endif
440 
441 
442