1from pandas import * 2import sys 3import re 4import msg 5 6class Tab: 7 8 def __init__( self, fin = sys.stdin, sep = "\t" ): 9 self.data = read_csv( fin, sep=sep, index_col=0 ) 10 11 def sel_rows( self, col = None, val = None ): 12 #self.data = self.data[self.data[col] == val] 13 pass 14 15 def eq( self, x, y ): 16 t = type(y) 17 if x == y: 18 return True 19 try: 20 return t(x) == y 21 except Exception: 22 return False 23 24 def get_cols( self, row = None, val = None, regex = None ): 25 cols = [] 26 if row is None: 27 if regex: 28 for c,cs in self.data.iteritems(): 29 if any((re.search(regex,str(v)) for v in cs)): 30 #del self.data[c] 31 cols.append(c) 32 elif val: 33 for c,cs in self.data.iteritems(): 34 if any((val == str(v) for v in cs)): 35 #if val not in cs: 36 #del self.data[c] 37 cols.append(c) 38 else: 39 msg.exit("Error") 40 else: 41 if regex: 42 for c,cs in self.data.iteritems(): 43 if re.search(regex,str(cs[row])): 44 #del self.data[c] 45 cols.append(c) 46 elif val: 47 for c,cs in self.data.iteritems(): 48 if self.eq(val, cs[row]): 49 #del self.data[c] 50 cols.append(c) 51 else: 52 msg.exit("Error") 53 return cols 54 55 def sel_columns( self, row = None, val = None, regex = None ): 56 cols = set(self.get_cols( row, val, regex )) 57 for c in set(self.data.columns) - cols: 58 del self.data[c] 59 60 def sub( self, row = None, col = None, val = None, regex = None, new_val = None, inverse = False ): 61 if row and col: 62 if regex: 63 if inverse: 64 if not re.match( regex, str(self.data[col][row]) ): 65 self.data[col][row] = new_val 66 else: 67 self.data[col][row] = re.sub( regex, new_val, str(self.data[col][row]) ) 68 else: 69 self.data[col][row] = new_val 70 elif col: 71 if regex: 72 if inverse: 73 for i,r in enumerate(self.data[col]): 74 if not re.match( regex, str(r) ): 75 self.data[col][i] = new_val 76 else: 77 for i,r in enumerate(self.data[col]): 78 self.data[col][i] = re.sub( regex, new_val, str(r) ) 79 elif val: 80 for i,r in enumerate(self.data[col]): 81 if str(r) == val: 82 self.data[col][i] = new_val 83 else: 84 msg.exit("Error") 85 elif row: 86 if regex: 87 if inverse: 88 for i,c in enumerate(self.data.ix[row]): 89 if not re.match( regex, str(c) ): 90 self.data.ix[row][i] = new_val 91 else: 92 for i,c in enumerate(self.data.ix[row]): 93 self.data.ix[row][i] = re.sub( regex, new_val, str(c) ) 94 elif val: 95 for i,c in enumerate(self.data.ix[row]): 96 if str(c) == val: 97 self.data.ix[row][i] = new_val 98 else: 99 msg.exit("Error") 100 101 #cols = self.get_cols( row, val, regex ) 102 103 104 def save( self, outf = sys.stdout, sep = "\t" ): 105 self.data.to_csv(outf,sep="\t") 106 107