1import petl 2import simpleeval 3from ..step import Step 4from ..field import Field 5from .. import helpers 6 7 8# NOTE: 9# Some of the following step can support WHERE/PREDICAT arguments (see petl) 10# Some of the following step use **options - we need to review/fix it 11 12 13class field_add(Step): 14 """Add field""" 15 16 code = "field-add" 17 18 def __init__( 19 self, 20 descriptor=None, 21 *, 22 name=None, 23 value=None, 24 formula=None, 25 function=None, 26 position=None, 27 incremental=False, 28 **options, 29 ): 30 self.setinitial("name", name) 31 self.setinitial("value", value) 32 self.setinitial("formula", formula) 33 self.setinitial("function", function) 34 self.setinitial("position", position if not incremental else 1) 35 self.setinitial("incremental", incremental) 36 for key, value in helpers.create_descriptor(**options).items(): 37 self.setinitial(key, value) 38 super().__init__(descriptor) 39 40 # Transform 41 42 def transform_resource(self, resource): 43 table = resource.to_petl() 44 descriptor = self.to_dict() 45 descriptor.pop("code", None) 46 name = descriptor.pop("name", None) 47 value = descriptor.pop("value", None) 48 formula = descriptor.pop("formula", None) 49 function = descriptor.pop("function", None) 50 position = descriptor.pop("position", None) 51 incremental = descriptor.pop("incremental", None) 52 field = Field(descriptor, name=name) 53 index = position - 1 if position else None 54 if index is None: 55 resource.schema.add_field(field) 56 else: 57 resource.schema.fields.insert(index, field) 58 if incremental: 59 resource.data = table.addrownumbers(field=name) 60 else: 61 if formula: 62 function = lambda row: simpleeval.simple_eval(formula, names=row) 63 value = value or function 64 resource.data = table.addfield(name, value=value, index=index) 65 66 # Metadata 67 68 metadata_profile = { # type: ignore 69 "type": "object", 70 "required": ["name"], 71 "properties": { 72 "name": {"type": "string"}, 73 "value": {}, 74 "position": {}, 75 "incremental": {}, 76 }, 77 } 78 79 80class field_filter(Step): 81 """Filter fields""" 82 83 code = "field-filter" 84 85 def __init__(self, descriptor=None, *, names=None): 86 self.setinitial("names", names) 87 super().__init__(descriptor) 88 89 # Transform 90 91 def transform_resource(self, resource): 92 table = resource.to_petl() 93 names = self.get("names") 94 for name in resource.schema.field_names: 95 if name not in names: 96 resource.schema.remove_field(name) 97 resource.data = table.cut(*names) 98 99 # Metadata 100 101 metadata_profile = { # type: ignore 102 "type": "object", 103 "required": ["names"], 104 "properties": { 105 "names": {"type": "array"}, 106 }, 107 } 108 109 110class field_move(Step): 111 """Move field""" 112 113 code = "field-move" 114 115 def __init__(self, descriptor=None, *, name=None, position=None): 116 self.setinitial("name", name) 117 self.setinitial("position", position) 118 super().__init__(descriptor) 119 120 # Transform 121 122 def transform_resource(self, resource): 123 table = resource.to_petl() 124 name = self.get("name") 125 position = self.get("position") 126 field = resource.schema.remove_field(name) 127 resource.schema.fields.insert(position - 1, field) 128 resource.data = table.movefield(name, position - 1) 129 130 # Metadata 131 132 metadata_profile = { # type: ignore 133 "type": "object", 134 "required": ["name", "position"], 135 "properties": { 136 "name": {"type": "string"}, 137 "position": {"type": "number"}, 138 }, 139 } 140 141 142class field_remove(Step): 143 """Remove field""" 144 145 code = "field-remove" 146 147 def __init__(self, descriptor=None, *, names=None): 148 self.setinitial("names", names) 149 super().__init__(descriptor) 150 151 # Transform 152 153 def transform_resource(self, resource): 154 table = resource.to_petl() 155 names = self.get("names") 156 for name in names: 157 resource.schema.remove_field(name) 158 resource.data = table.cutout(*names) 159 160 # Metadata 161 162 metadata_profile = { # type: ignore 163 "type": "object", 164 "required": ["names"], 165 "properties": { 166 "names": {"type": "array"}, 167 }, 168 } 169 170 171class field_split(Step): 172 """Split field""" 173 174 code = "field-split" 175 176 def __init__( 177 self, 178 descriptor=None, 179 *, 180 name=None, 181 to_names=None, 182 pattern=None, 183 preserve=False, 184 ): 185 self.setinitial("name", name) 186 self.setinitial("toNames", to_names) 187 self.setinitial("pattern", pattern) 188 self.setinitial("preserve", preserve) 189 super().__init__(descriptor) 190 191 # Transform 192 193 def transform_resource(self, resource): 194 table = resource.to_petl() 195 name = self.get("name") 196 to_names = self.get("toNames") 197 pattern = self.get("pattern") 198 preserve = self.get("preserve") 199 for to_name in to_names: 200 resource.schema.add_field(Field(name=to_name, type="string")) 201 if not preserve: 202 resource.schema.remove_field(name) 203 processor = petl.split 204 # NOTE: this condition needs to be improved 205 if "(" in pattern: 206 processor = petl.capture 207 resource.data = processor( 208 table, 209 name, 210 pattern, 211 to_names, 212 include_original=preserve, 213 ) 214 215 # Metadata 216 217 metadata_profile = { # type: ignore 218 "type": "object", 219 "required": ["name", "toNames", "pattern"], 220 "properties": { 221 "name": {"type": "string"}, 222 "toNames": {}, 223 "pattern": {}, 224 "preserve": {}, 225 }, 226 } 227 228 229class field_unpack(Step): 230 """Unpack field""" 231 232 code = "field-unpack" 233 234 def __init__(self, descriptor=None, *, name, to_names, preserve=False): 235 self.setinitial("name", name) 236 self.setinitial("toNames", to_names) 237 self.setinitial("preserve", preserve) 238 super().__init__(descriptor) 239 240 # Transform 241 242 def transform_resource(self, resource): 243 table = resource.to_petl() 244 name = self.get("name") 245 to_names = self.get("toNames") 246 preserve = self.get("preserve") 247 field = resource.schema.get_field(name) 248 for to_name in to_names: 249 resource.schema.add_field(Field(name=to_name)) 250 if not preserve: 251 resource.schema.remove_field(name) 252 if field.type == "object": 253 processor = table.unpackdict 254 resource.data = processor(name, to_names, includeoriginal=preserve) 255 else: 256 processor = table.unpack 257 resource.data = processor(name, to_names, include_original=preserve) 258 259 # Metadata 260 261 metadata_profile = { # type: ignore 262 "type": "object", 263 "required": ["name", "toNames"], 264 "properties": { 265 "name": {"type": "string"}, 266 "toNames": {"type": "array"}, 267 "preserve": {}, 268 }, 269 } 270 271 272class field_update(Step): 273 """Update field""" 274 275 code = "field-update" 276 277 def __init__( 278 self, 279 descriptor=None, 280 *, 281 name=None, 282 value=None, 283 formula=None, 284 function=None, 285 new_name=None, 286 **options, 287 ): 288 self.setinitial("name", name) 289 self.setinitial("value", value) 290 self.setinitial("formula", formula) 291 self.setinitial("function", function) 292 self.setinitial("newName", new_name) 293 for key, value in helpers.create_descriptor(**options).items(): 294 self.setinitial(key, value) 295 super().__init__(descriptor) 296 297 # Transform 298 299 def transform_resource(self, resource): 300 table = resource.to_petl() 301 descriptor = self.to_dict() 302 descriptor.pop("code", None) 303 name = descriptor.pop("name", None) 304 value = descriptor.pop("value", None) 305 formula = descriptor.pop("formula", None) 306 function = descriptor.pop("function", None) 307 new_name = descriptor.pop("newName", None) 308 if new_name: 309 descriptor["name"] = new_name 310 field = resource.schema.get_field(name) 311 field.update(descriptor) 312 if formula: 313 function = lambda val, row: simpleeval.simple_eval(formula, names=row) 314 if function: 315 resource.data = table.convert(name, function) 316 elif "value" in self: 317 resource.data = table.update(name, value) 318 319 # Metadata 320 321 metadata_profile = { # type: ignore 322 "type": "object", 323 "required": ["name"], 324 "properties": { 325 "name": {"type": "string"}, 326 "newName": {"type": "string"}, 327 }, 328 } 329