1#!/usr/local/bin/python3.8 2 3# python3 status: started 4 5# lib_textdata.py : I/O utilities for 1D/timing/married data files 6 7# created 16 Aug, 2010 [rickr] 8 9import sys, os, math 10 11def write_1D_file(data, filename, as_rows=0, space=' '): 12 """data can be 1D or 2D array of floats, write one index per line 13 If 2D data, each element is treated as a column (unless as_rows). 14 15 For tabbed output, consider space = '\t'. 16 17 return 0 on success""" 18 19 if type(data) != type([]): 20 print("** write_1D_file, invalid type: %s" % type(data)) 21 return 1 22 23 try: fp = open(filename, 'w') 24 except: 25 print("** failed to open '%s' for writing 1D" % filename) 26 return 1 27 28 if len(data) == 0: # an empty file then 29 fp.close() 30 return 0 31 32 if type(data[0]) == type([]): # multi-column 33 if as_rows: 34 for row in data: 35 fp.write("%s\n" % space.join(["%g" % val for val in row])) 36 else: 37 nt = len(data[0]) 38 for col in data: 39 if len(col) != nt: 40 print('** write_1D_file: columns not of equal lengths') 41 return 1 42 43 for ind in range(nt): 44 fp.write("%s\n" % space.join(["%g"%col[ind] for col in data])) 45 46 else: # single column 47 for val in data: 48 fp.write('%g\n' % val) 49 50 fp.close() 51 52 return 0 53 54def read_1D_file(filename, nlines=-1, marry_ok=0, verb=1): 55 """read a simple 1D file into a float matrix, and return the matrix 56 - skip leading '#', return a 2D array of floats""" 57 58 59 data, clines, alist = read_married_file(filename, nlines=nlines, verb=verb) 60 if data == None: 61 print('** failed to read 1D file %s' % filename) 62 return None 63 64 if married_type(data) and not marry_ok: 65 print("** file %s is in married format" % filename) 66 return None 67 68 if not data_is_rect(data): 69 print("** file %s is not rectangular" % filename) 70 return None 71 72 # return just the initial entries 73 retmat = [[val[0] for val in row] for row in data] 74 75 del(data) 76 del(clines) 77 del(alist) 78 79 return retmat 80 81def read_data_file(filename, nlines=-1, marry_ok=0, verb=1): 82 """read a numerical text file (1D or timing, say) into a float matrix, 83 and return the matrix and comment lines 84 - skip leading '#', return a 2D array of floats and 1D array of text""" 85 86 data, clines, alist = read_married_file(filename, nlines=nlines, verb=verb) 87 if data == None: 88 if verb > 0: print('** failed to read text data file %s' % filename) 89 return None, None 90 91 if married_type(data) and not marry_ok: 92 if verb > 0: print("** file %s is in married format" % filename) 93 return None, None 94 95 # return just the initial entries 96 retmat = [[val[0] for val in row] for row in data] 97 98 del(data) 99 del(alist) 100 101 return retmat, clines 102 103def read_married_file(filename, nlines = -1, verb = 1): 104 """akin to read_data_file, but instead of returning a 2D array of floats, 105 return a 2D array of married elements, where each element is of the 106 form: [time, [modulators], duration] 107 - all numbers are returned as floats 108 - time will always be set 109 - modulators may be empty (see common stim_times format) 110 - duration may be None (see common stim_times format) 111 The expected format of file element is: 112 time*mod1*mod2*...*modN:duration 113 Should we allow different separators? 114 115 return 116 - married data matrix 117 - comment list 118 - astcounts list (count of '*' characters per data line) 119 """ 120 121 if filename == '-' or filename == 'stdin': 122 fp = sys.stdin 123 else: 124 try: fp = open(filename, 'r') 125 except: 126 if verb > 0: print("** failed to open 1D file '%s'" % filename) 127 return None, None, None 128 129 if verb > 1: print("+d TD:read_married_file %s" % filename) 130 131 retmat = [] 132 clines = [] 133 astcounts = [] 134 lnum = 0 135 data = fp.read() 136 fp.close() 137 all_lines = data.splitlines() 138 for lind in range(len(all_lines)): 139 full_line = all_lines[lind] 140 if 0 <= nlines <= lnum: break # then stop early 141 142 # nuke anything after first comment, proceed with line 143 line = full_line 144 cind = full_line.find('#') 145 if cind >= 0: 146 line = full_line[0:cind] 147 cline = full_line[cind:] 148 clines.append(cline) 149 if verb > 2: print("-- have comment line: %s" % cline) 150 151 # check for lines to skip 152 if not line: 153 if verb > 3: print("... skipping empty line:") 154 continue 155 if line.isspace(): 156 if verb > 3: print("... skipping blank line:") 157 continue 158 if line[0] == '\0': 159 if verb > 2: print("have comment line: %s" % line) 160 clines.append(line) 161 continue 162 163 # process the line 164 if verb > 3: print('-- processing line #%d: %s' % (lind, line)) 165 rv, mlist, acount = process_one_data_line(line, verb) 166 if rv: return None, None, None 167 if verb > 4: print('++ mlist = %s' % mlist) 168 169 retmat.append(mlist) # even if empty 170 astcounts.append(acount) 171 172 if verb > 3: print("+d line %d, length %d" % (lnum, len(retmat[lnum]))) 173 lnum += 1 174 175 # now just check for consistency 176 if not married_mat_is_consistent(retmat, filename, verb=verb): 177 return None, None, None 178 179 return retmat, clines, astcounts 180 181def married_mat_is_consistent(mmat, fname, verb=1): 182 """just check for consistency: same number of modulators and 183 if one duration, all have one 184 """ 185 ind = 0 186 ttok = None 187 while ind < len(mmat): 188 if len(mmat[ind]) > 0: 189 ttok = mmat[ind][0] 190 break 191 ind += 1 192 if ttok: # have something to test, else empty mmat 193 modlen = len(ttok[1]) 194 moddur = ttok[2] > 0 # have duration 195 for lind in range(len(mmat)): 196 line = mmat[lind] 197 for entry in line: 198 if len(entry[1]) != modlen: 199 if verb: 200 print("** married file %s, line %d: inconsistent num" \ 201 " modulators: %d vs %d" \ 202 % (fname, lind, modlen, len(entry[1]))) 203 return 0 204 if moddur and entry[2]<=0: 205 if verb: 206 print("** married file %s, line %d:" \ 207 " inconsistent use of duration: dur (%g) <= 0" \ 208 % (fname, lind, entry[2])) 209 if not moddur and entry[2]>0: 210 if verb: 211 print("** married file %s, line %d:" \ 212 " inconsistent use of duration: should be zero" \ 213 % (fname, lind)) 214 return 0 215 216 return 1 # yay 217 218def process_one_data_line(line, verb=1): 219 """return an array of [time, [modulators], duration] elements 220 - lines should not be empty (or comments) 221 - skip '*' tokens (should be only way to get empty line) 222 - look for married elements 223 224 The returned acount is the number of '*'/',' found on this line. 225 226 If any tokens are married, they should all have the same format. 227 228 return result, [time tokens] [acounts] (result = 0 if OK, 1 on error) 229 """ 230 231 if not line: 232 print('** PODL: should not have empty line') 233 return 1, [], 0 234 if line.isspace(): 235 print('** PODL: should not have blank line') 236 return 1, [], 0 237 if line[0] == '\0' or line[0] == '#': 238 print('** PODL: should not have comment line') 239 return 1, [], 0 240 241 tokens = line.split() 242 inc_warn = 1 # do not warn of inconsistency more than once 243 res_sep = None # separator list result 244 res_list = [] # result list 245 acount = 0 246 for tok in tokens: 247 # for '*', just check first char (in case it is married) 5 Dec 2016 248 if tok[0] == '*': 249 if verb > 2: print("-- data file: skipping '*'") 250 acount += 1 251 continue 252 vals, seps = split_token(tok) 253 if verb > 3: 254 print("found token = '%s'\n vals = %s\n seps = %s" \ 255 % (tok, vals, seps)) 256 257 # check for valid floats 258 try: fvals = [float(val) for val in vals] 259 except: 260 if verb > 0: print("** unusable token, bad floats : %s" % tok) 261 if verb > 0: print(" line = %s" % line) 262 return 1, [], acount 263 264 # first time, copy the sep list 265 if res_sep == None: res_sep = seps[:] 266 267 # just compare for separator consistency (inconsistent if not) 268 if res_sep != seps and inc_warn: 269 print('** warning: inconsistent separators on line: %s' % line) 270 inc_warn = 0 271 272 # see what is here: just time, with duration, or only with modulators 273 # duration is 0 unless otherwise specified 274 if len(seps) == 0: res_list.append([fvals[0], [], 0]) 275 elif seps[-1] == ':': res_list.append([fvals[0], fvals[1:-1], fvals[-1]]) 276 else: res_list.append([fvals[0], fvals[1:], 0]) 277 278 return 0, res_list, acount 279 280def split_token(tdata, seplist=[':','*',',']): 281 """ 282 like str.split(), but allow a list of separators, and return 283 the corresponding list of separators (length should be one less 284 than the length of the value list) along with the list of elements, 285 i.e. return [split values] [separators] 286 """ 287 288 # deal with empty token or no separators first 289 if len(tdata) == 0: return [''], [] 290 291 toks = [] 292 seps = [] 293 start = 0 # start position for next token 294 for posn in range(len(tdata)): 295 if tdata[posn] in seplist: 296 toks.append(tdata[start:posn]) 297 seps.append(tdata[posn]) 298 start = posn + 1 299 300 # and append the final token 301 toks.append(tdata[start:]) 302 303 return toks, seps 304 305def married_type(mdata): 306 """return whether there are modulators or durations 307 (need only check first non-empty element, as consistency is required) 308 309 return a bit mask: 310 311 return 0 : simple times 312 return 1 : has amplitude modulators 313 return 2 : has duration modulators 314 return 3 : has both 315 """ 316 317 rv = 0 318 for lind in range(len(mdata)): 319 line = mdata[lind] 320 if len(line) > 0: 321 ttok = line[0] 322 if len(ttok[1]) > 0: rv |= 1 # has amp mods 323 if ttok[2] > 0: rv |= 2 # has dur mods 324 break 325 return rv 326 327def data_is_rect(mdata): 328 """return whether the number of columns is consistent across rows""" 329 if mdata == None: return 1 330 if len(mdata) == 0: return 1 331 rlen = len(mdata[0]) 332 for row in mdata: 333 if len(row) != rlen: return 0 334 return 1 335 336def main(): 337 if len(sys.argv) > 2: 338 if sys.argv[1] == '-eval': 339 print(eval(' '.join(sys.argv[2:]))) 340 return 0 341 342 print('lib_textdata.py: not intended as a main program') 343 return 1 344 345if __name__ == '__main__': 346 sys.exit(main()) 347