1 #ifndef G_BASE_DEFINED
2 #define G_BASE_DEFINED
3 #if !defined(_POSIX_SOURCE) && !defined(__FreeBSD__) && !defined(__DragonFly__)
4 //mostly for MinGW
5 #define _POSIX_SOURCE
6 #endif
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #include <string.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <math.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <stdint.h>
18 
19 #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20   #ifndef __WIN32__
21     #define __WIN32__
22   #endif
23   #include <windows.h>
24   #include <direct.h>
25   #include <io.h>
26   #define CHPATHSEP '\\'
27   #undef off_t
28   #define off_t int64_t
29   #ifndef popen
30    #define popen _popen
31   #endif
32   #ifndef fseeko
33 		#ifdef _fseeki64
34 			#define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
35 		#else
36 			/*
37 			#define _DEFINE_WIN32_FSEEKO
38 			int fseeko(FILE *stream, off_t offset, int whence);
39 			*/
40 			#define fseeko fseek
41 		#endif
42   #endif
43  #ifndef ftello
44   #ifdef _ftelli64
45     #define ftello(stream) _ftelli64(stream)
46   #else
47     #define ftello ftell
48   #endif
49  #endif
50  #else
51   #define CHPATHSEP '/'
52   #include <unistd.h>
53 #endif
54 
55 #ifndef fseeko
56  #define fseeko fseek
57 #endif
58 #ifndef ftello
59  #define ftello ftell
60 #endif
61 
62 #ifdef DEBUG
63 #undef NDEBUG
64 #define _DEBUG 1
65 #define _DEBUG_ 1
66 #endif
67 
68 typedef int32_t int32;
69 typedef uint32_t uint32;
70 typedef int16_t int16;
71 typedef uint16_t uint16;
72 
73 typedef unsigned char uchar;
74 typedef unsigned char byte;
75 
76 #ifndef MAXUINT
77 #define MAXUINT ((unsigned int)-1)
78 #endif
79 
80 #ifndef MAXINT
81 #define MAXINT INT_MAX
82 #endif
83 
84 #ifndef MAX_UINT
85 #define MAX_UINT ((unsigned int)-1)
86 #endif
87 
88 #ifndef MAX_INT
89 #define MAX_INT INT_MAX
90 #endif
91 
92 typedef int64_t int64;
93 typedef uint64_t uint64;
94 
95 /****************************************************************************/
96 
97 #ifndef EXIT_FAILURE
98 #define EXIT_FAILURE 1
99 #endif
100 
101 #ifndef EXIT_SUCCESS
102 #define EXIT_SUCCESS 0
103 #endif
104 
105 /****************************************************************************/
106 #define ERR_ALLOC "Error allocating memory.\n"
107 
108 //-------------------
109 
110 // Debug helpers
111 #ifndef NDEBUG
112  #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
113  #ifdef TRACE
114   #define GTRACE(exp)  (GMessage exp)
115  #else
116   #define GTRACE(exp)  ((void)0)
117  #endif
118 #else
119  #define GASSERT(exp) ((void)0)
120  #define GTRACE(exp)  ((void)0)
121 #endif
122 
123 #define GERROR(exp) (GError exp)
124 /**********************************  Macros  ***********************************/
125 // Abolute value
126 #define GABS(val) (((val)>=0)?(val):-(val))
127 
128 // Min and Max
129 #define GMAX(a,b) (((a)>(b))?(a):(b))
130 #define GMIN(a,b) (((a)>(b))?(b):(a))
131 
132 // Min of three
133 #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
134 
135 // Max of three
136 #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
137 
138 // Return minimum and maximum of a, b
139 #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
140 
141 // Clamp value x to range [lo..hi]
142 #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
143 
144 typedef void* pointer;
145 typedef unsigned int uint;
146 
147 typedef int GCompareProc(const pointer item1, const pointer item2);
148 typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
149 typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
150 
151 typedef void GFreeProc(pointer item); //usually just delete,
152       //but may also support structures with embedded dynamic members
153 
154 #define GMALLOC(ptr,size)  if (!GMalloc((pointer*)(&ptr),size)) \
155                                      GError(ERR_ALLOC)
156 #define GCALLOC(ptr,size)  if (!GCalloc((pointer*)(&ptr),size)) \
157                                      GError(ERR_ALLOC)
158 #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
159                                      GError(ERR_ALLOC)
160 #define GFREE(ptr)       GFree((pointer*)(&ptr))
161 
strMin(char * arg1,char * arg2)162 inline char* strMin(char *arg1, char *arg2) {
163     return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
164 }
165 
strMax(char * arg1,char * arg2)166 inline char* strMax(char *arg1, char *arg2) {
167     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
168 }
169 
iround(double x)170 inline int iround(double x) {
171    return (int)floor(x + 0.5);
172 }
173 
174 int Gmkdir(const char *path, bool recursive=true, int perms=0775);
175 
176 
177 /****************************************************************************/
178 
Gintcmp(int a,int b)179 inline int Gintcmp(int a, int b) {
180  //return (a>b)? 1 : ((a==b)?0:-1);
181   return a-b;
182 }
183 
184 int Gstrcmp(const char* a, const char* b, int n=-1);
185 //same as strcmp but doesn't crash on NULL pointers
186 
187 int Gstricmp(const char* a, const char* b, int n=-1);
188 bool GstrEq(const char* a, const char* b);
189 bool GstriEq(const char* a, const char* b);
190 
191 //basic swap template function
Gswap(T & lhs,T & rhs)192 template<class T> void Gswap(T& lhs, T& rhs) {
193  //register T tmp=lhs;
194  T tmp=lhs; //requires copy operator
195  lhs=rhs;
196  rhs=tmp;
197 }
198 
199 
200 /**************** Memory management ***************************/
201 
202 bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
203 bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
204 bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
205 void GFree(pointer* ptr); // Free memory, resets ptr to NULL
206 
207 
208 //int saprintf(char **retp, const char *fmt, ...);
209 
210 void GError(const char* format,...); // Error routine (aborts program)
211 void GMessage(const char* format,...);// Log message to stderr
212 // Assert failed routine:- usually not called directly but through GASSERT
213 void GAssert(const char* expression, const char* filename, unsigned int lineno);
214 
215 // ****************** string manipulation *************************
216 char *Gstrdup(const char* str);
217 //duplicate a string by allocating a copy for it and returning it
218 char* Gstrdup(const char* sfrom, const char* sto);
219 //same as GStrdup, but with an early termination (e.g. on delimiter)
220 
221 char* Gsubstr(const char* str, char* from, char* to=NULL);
222 //extracts a substring, allocating it, including boundaries (from/to)
223 
224 int strsplit(char* str, char** fields, int maxfields, const char* delim);
225 int strsplit(char* str, char** fields, int maxfields, const char delim);
226 int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
227 
228 char* replaceStr(char* &str, char* newvalue);
229 
230 //conversion: to Lower/Upper case
231 // creating a new string:
232 char* upCase(const char* str);
233 char* loCase(const char* str);
234 // changing string in place:
235 char* strlower(char * str);
236 char* strupper(char * str);
237 
238 //strstr but for memory zones: scans a memory region
239 //for a substring:
240 void* Gmemscan(void *mem, unsigned int len,
241                   void *part, unsigned int partlen);
242 
243 // test if a char is in a string:
244 bool chrInStr(char c, const char* str);
245 
246 char* rstrchr(char* str, char ch);
247 /* returns a pointer to the rightmost
248   occurence of ch in str - like rindex for platforms missing it*/
249 
250 char* strchrs(const char* s, const char* chrs);
251 //strchr but with a set of chars instead of only one
252 
253 char* rstrfind(const char* str, const char *substr);
254 // like rindex() but for strings;  right side version of strstr()
255 
256 char* reverseChars(char* str, int slen=0); //in place reversal of string
257 
258 char* rstrstr(const char* rstart, const char *lend, const char* substr);
259 /*the reversed, rightside equivalent of strstr: starts searching
260  from right end (rstart), going back to left end (lend) and returns
261  a pointer to the last (right) matching character in str */
262 
263 char* strifind(const char* str,  const char* substr);
264 // case insensitive version of strstr -- finding a string within another string
265 // returns NULL if not found
266 
267 //Determines if a string begins with a given prefix
268 //(returns false when any of the params is NULL,
269 // but true when prefix is '' (empty string)!)
270 bool startsWith(const char* s, const char* prefix);
271 
272 bool startsiWith(const char* s, const char* prefix); //case insensitive
273 
274 
275 bool endsWith(const char* s, const char* suffix);
276 //Note: returns true if suffix is empty string, but false if it's NULL
277 
278 
279 // ELF hash function for strings
280 int strhash(const char* str);
281 
282 
283 
284 //---- generic base GSeg : genomic segment (interval) --
285 // coordinates are considered 1-based (so 0 is invalid)
286 class GSeg {
287  public:
288   uint start; //start<end always!
289   uint end;
290   GSeg(uint s=0,uint e=0) {
291     if (s>e) { start=e;end=s; }
292         else { start=s;end=e; }
293     }
294   //check for overlap with other segment
len()295   uint len() { return end-start+1; }
overlap(GSeg * d)296   bool overlap(GSeg* d) {
297      //return start<d->start ? (d->start<=end) : (start<=d->end);
298      return (start<=d->end && end>=d->start);
299      }
300 
overlap(GSeg & d)301   bool overlap(GSeg& d) {
302      //return start<d.start ? (d.start<=end) : (start<=d.end);
303      return (start<=d.end && end>=d.start);
304      }
305 
overlap(GSeg & d,int fuzz)306   bool overlap(GSeg& d, int fuzz) {
307      //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
308      return (start<=d.end+fuzz && end+fuzz>=d.start);
309      }
310 
overlap(uint s,uint e)311   bool overlap(uint s, uint e) {
312      if (s>e) { Gswap(s,e); }
313      //return start<s ? (s<=end) : (start<=e);
314      return (start<=e && end>=s);
315      }
316 
317   //return the length of overlap between two segments
overlapLen(GSeg * r)318   int overlapLen(GSeg* r) {
319      if (start<r->start) {
320         if (r->start>end) return 0;
321         return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
322         }
323        else { //r->start<=start
324         if (start>r->end) return 0;
325         return (r->end<end)? r->end-start+1 : end-start+1;
326         }
327      }
overlapLen(uint rstart,uint rend)328   int overlapLen(uint rstart, uint rend) {
329      if (rstart>rend) { Gswap(rstart,rend); }
330      if (start<rstart) {
331         if (rstart>end) return 0;
332         return (rend>end) ? end-rstart+1 : rend-rstart+1;
333         }
334        else { //rstart<=start
335         if (start>rend) return 0;
336         return (rend<end)? rend-start+1 : end-start+1;
337         }
338      }
339 
340   //fuzzy coordinate matching:
341   bool coordMatch(GSeg* s, uint fuzz=0) {
342     if (fuzz==0) return (start==s->start && end==s->end);
343     uint sd = (start>s->start) ? start-s->start : s->start-start;
344     uint ed = (end>s->end) ? end-s->end : s->end-end;
345     return (sd<=fuzz && ed<=fuzz);
346     }
347   //comparison operators required for sorting
348   bool operator==(GSeg& d){
349       return (start==d.start && end==d.end);
350       }
351   bool operator<(GSeg& d){
352      return (start==d.start)?(end<d.end):(start<d.start);
353      }
354 };
355 
356 
357 
358 //--------------------------------------------------------
359 // ************** simple line reading class for text files
360 
361 //GLineReader -- text line reading/buffering class
362 class GLineReader {
363    bool closeFile;
364    int len;
365    int allocated;
366    char* buf;
367    bool isEOF;
368    FILE* file;
369    off_t filepos; //current position
370    bool pushed; //pushed back
371    int lcount; //line counter (read lines)
372  public:
chars()373    char* chars() { return buf; }
line()374    char* line() { return buf; }
readcount()375    int readcount() { return lcount; } //number of lines read
setFile(FILE * stream)376    void setFile(FILE* stream) { file=stream; }
length()377    int length() { return len; }
size()378    int size() { return len; } //same as size();
isEof()379    bool isEof() {return isEOF; }
eof()380    bool eof() { return isEOF; }
getfpos()381    off_t getfpos() { return filepos; }
getFpos()382    off_t getFpos() { return filepos; }
nextLine()383    char* nextLine() { return getLine(); }
getLine()384    char* getLine() { if (pushed) { pushed=false; return buf; }
385                             else return getLine(file);  }
getLine(FILE * stream)386    char* getLine(FILE* stream) {
387                  if (pushed) { pushed=false; return buf; }
388                           else return getLine(stream, filepos); }
389    char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
390                            // the given file position
pushBack()391    void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
392             // so the next call will in fact return the same line
GLineReader(const char * fname)393    GLineReader(const char* fname) {
394       FILE* f=fopen(fname, "rb");
395       if (f==NULL) GError("Error opening file '%s'!\n",fname);
396       closeFile=true;
397       init(f);
398       }
399    GLineReader(FILE* stream=NULL, off_t fpos=0) {
400      closeFile=false;
401      init(stream,fpos);
402      }
403    void init(FILE* stream, off_t fpos=0) {
404      len=0;
405      isEOF=false;
406      allocated=1024;
407      GMALLOC(buf,allocated);
408      lcount=0;
409      buf[0]=0;
410      file=stream;
411      filepos=fpos;
412      pushed=false;
413      }
~GLineReader()414    ~GLineReader() {
415      GFREE(buf);
416      if (closeFile) fclose(file);
417      }
418 };
419 
420 
421 /* extended fgets() -  to read one full line from a file and
422   update the file position correctly !
423   buf will be reallocated as necessary, to fit the whole line
424   */
425 char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
426 
427 
428 //print int/values nicely formatted in 3-digit groups
429 char* commaprintnum(uint64 n);
430 
431 /*********************** File management functions *********************/
432 
433 // removes the last part (file or directory name) of a full path
434 // WARNING: this is a destructive operation for the given string!
435 void delFileName(char* filepath);
436 
437 // returns a pointer to the last file or directory name in a full path
438 const char* getFileName(const char* filepath);
439 // returns a pointer to the file "extension" part in a filename
440 const char* getFileExt(const char* filepath);
441 
442 
443 int fileExists(const char* fname);
444 //returns 0 if file entry doesn't exist
445 //        1 if it's a directory
446 //        2 if it's a regular file
447 //        3 otherwise (?)
448 
449 int64 fileSize(const char* fpath);
450 
451 //write a formatted fasta record, fasta formatted
452 void writeFasta(FILE *fw, const char* seqid, const char* descr,
453         const char* seq, int linelen=60, int seqlen=0);
454 
455 //parses the next number found in a string at the current position
456 //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
457 //updates the char* pointer to be after the last digit parsed
458 bool parseNumber(char* &p, double& v);
459 bool parseDouble(char* &p, double& v); //just an alias for parseNumber
460 
461 bool parseInt(char* &p, int& i);
462 bool parseUInt(char* &p, uint& i);
463 bool parseHex(char* &p,  uint& i);
464 
465 #endif /* G_BASE_DEFINED */
466