1 use super::debugger::DebuggingState; 2 use crate::cli::Options; 3 use anyhow::{Context, Result}; 4 use std::collections::HashMap; 5 use std::process::{Command, Stdio}; 6 7 /// This handles communication with GNU sed. 8 pub struct SedCommunicator { 9 options: Options, 10 } 11 impl SedCommunicator { new(options: Options) -> Self12 pub fn new(options: Options) -> Self { 13 SedCommunicator { options } 14 } get_execution_info_from_sed(&mut self) -> Result<DebugInfoFromSed>15 pub fn get_execution_info_from_sed(&mut self) -> Result<DebugInfoFromSed> { 16 let output = self.get_sed_output()?; 17 18 let program_source = self.parse_program_source(&output); 19 let label_jump_map = self.build_jump_map(&program_source); 20 let frames = self.parse_state_frames(&output, &label_jump_map, program_source.len()); 21 return Ok(DebugInfoFromSed { 22 program_source, 23 states: frames.0, 24 last_output: frames.1, 25 }); 26 } get_sed_output(&mut self) -> Result<String>27 fn get_sed_output(&mut self) -> Result<String> { 28 let mut path_to_be_used: &String = &String::from("sed"); 29 if let Some(path) = &self.options.sed_path { 30 path_to_be_used = path; 31 } 32 33 let mandatory_parameters = vec![ 34 "--debug", 35 "-f", 36 self.options 37 .sed_script 38 .to_str() 39 .with_context(|| format!("Invalid sed script path. Is it valid UTF-8?"))?, 40 self.options 41 .input_file 42 .to_str() 43 .with_context(|| format!("Invalid input path. Is it valid UTF-8?"))?, 44 ]; 45 let constructed_cmd_line = self 46 .options 47 .sed_parameters 48 .iter() 49 .map(|s| s.as_str()) 50 .chain(mandatory_parameters.iter().map(|s| *s)) 51 .collect::<Vec<&str>>(); 52 let sed_debug_command = Command::new(path_to_be_used) 53 .args(&constructed_cmd_line) 54 .stdin(Stdio::null()) 55 .stdout(Stdio::piped()) 56 .stderr(Stdio::inherit()) 57 .output() 58 .ok() 59 .with_context( 60 || format!("Sed failed to return output. Shouldn't you use -E option? Are you using GNU sed? Is there sed/gsed in $PATH?{}" , 61 if self.options.verbose{ format!("\n[Info] Sed was called using \"{} {}\"", &path_to_be_used, constructed_cmd_line.join(" ")) } else { format!("") } 62 ))? 63 .stdout; 64 65 if self.options.verbose { 66 eprintln!( 67 "[Info] Called sed with \"{} {}\", which returned {} lines of output.", 68 path_to_be_used, 69 constructed_cmd_line.join(" "), 70 sed_debug_command.len() 71 ); 72 } 73 74 // If sed returned no output (so it failed) and sed 75 // path wasn't specified by user, 76 // change executing path to "gsed" and try again. 77 if self.options.sed_path.is_none() && sed_debug_command.len() == 0 { 78 self.options.sed_path = Some(String::from("gsed")); 79 if self.options.verbose { 80 eprintln!( 81 "[Info] Sed failed and didn't return any output. As sed path wasn't specified, trying again with \"gsed\". If even that won't work, make sure \ 82 sed is able to process your script. Most common mistake is forgeting to use -E." 83 ); 84 } 85 return self.get_sed_output(); 86 } 87 88 Ok(String::from_utf8(sed_debug_command).with_context(|| "String received from sed doesn't seem to be UTF-8. If this continues to happen, please report a bug.")?) 89 } 90 91 /// Wait for line that looks like "SED PROGRAM:" 92 /// 93 /// Then, read each line with two spaces up front (remove those spaces) and save each line 94 /// into output vector. 95 /// 96 /// When we meet a line that doesn't start with two spaces, stop reading and return. parse_program_source(&self, sed_output: &String) -> Vec<String>97 fn parse_program_source(&self, sed_output: &String) -> Vec<String> { 98 sed_output 99 .lines() 100 .skip_while(|line| *line != "SED PROGRAM:") 101 .skip(1) 102 .take_while(|line| !line.starts_with("INPUT: '")) 103 .map(|line| String::from(line.trim())) 104 .collect() 105 } 106 107 /// Parse state frames. They look like this: 108 /// 109 /// ```sh 110 /// INPUT: 'input.txt' line 1 111 /// PATTERN: abc 112 /// COMMAND: s/a/b/g 113 /// MATCHED REGEX REGISTERS 114 /// regex[0] = 0-1 'a' 115 /// ``` 116 /// There might be multiple commands within one input line. The example continues: 117 /// ```sh 118 /// COMMNAD: = 119 /// 1 120 /// ``` 121 /// That was it, that was whole command. Notice the output of the command. 122 /// 123 /// A segment with multiple commands ends like this: 124 /// ```sh 125 /// COMMAND: d 126 /// END-OF-CYCLE 127 /// ``` 128 /// And another segment begins. Note that we don't differentiate within segments inside the result iself, 129 /// but we need to during parsing. 130 /// ```sh 131 /// INPUT: 'input.txt' line 2 132 /// PATTERN: bac 133 /// COMMDN: s/a/b/g 134 /// (...) 135 /// ``` 136 /// 137 /// --- 138 /// 139 /// List of sed commands that we recognize (this list might be incomplete): 140 /// 141 /// ```sh 142 /// INPUT: 'file.txt' line 1 # Defines where we took the pattern space from 143 /// # at start of segment. This one is ignored. 144 /// PATTERN: abc # Defines pattern space value 145 /// HOLD: def # Defines hold space value (can be empty) 146 /// COMMAND: s/a/b/g # Defines currently running command 147 /// MATCHED REGEX REGISTERS # Defines matched regex for previous command, including global capture group 148 /// regex[0] = 0-1 'a' 149 /// regex[1] = 0-3 'abc' 150 /// END-OF-CYCLE: # End of segment. This is ignored. 151 /// hello # Value printed to stdout. This tends to come after COMMAND or END-OF-CYCLE. 152 /// ``` 153 /// 154 /// This returns individual frames *and* output of the last segement of the sed script. parse_state_frames( &self, sed_output: &String, label_jump_map: &HashMap<String, usize>, lines_of_code: usize, ) -> (Vec<DebuggingState>, Option<Vec<String>>)155 fn parse_state_frames( 156 &self, 157 sed_output: &String, 158 label_jump_map: &HashMap<String, usize>, 159 lines_of_code: usize, 160 ) -> (Vec<DebuggingState>, Option<Vec<String>>) { 161 // First of all, skip the sed program source code. 162 let lines = sed_output 163 .lines() 164 .skip_while(|line| !line.starts_with("INPUT: ")); 165 166 // Start parsing 167 let mut sed_line: usize = 0; // We need to try to keep track of this ourselves. 168 // Sed doesn't exactly help with this one. 169 // All the states will end up here 170 let mut result: Vec<DebuggingState> = Vec::new(); 171 // The most recent pattern buffer 172 let mut current_pattern = ""; 173 // The most recent hold buffer 174 let mut current_hold = ""; 175 // The last command that was executed, if any 176 let mut previous_command: Option<String> = None; 177 // All matched regexes by previous command 178 let mut regex_registers: Vec<String> = Vec::new(); 179 // If sed printed any output because of last command, what was it 180 let mut previous_output = None; 181 // If true, we're currently parsing `MATCHED REGEX REGISTERS`, which lasts several lines. 182 let mut currently_loading_regex_matches: bool = false; 183 // If true, we're currently parsing `MATCHED REGEX REGISTERS`, but one of the regexes spans 184 // multiple lines. Keep loading it. 185 let mut currently_loading_multiline_regex_match: bool = false; 186 // Was any substitution since last command successful? 187 let mut substitution_successful: bool = false; 188 189 // TODO: Multiline regexes are not displayed correctly and will fall to output instead. FIXME!! 190 for line in lines { 191 // If we are trying to parse regexe matches, do so 192 if currently_loading_regex_matches { 193 if currently_loading_multiline_regex_match { 194 if line.starts_with(" regex[") { 195 // We PROBABLY have new regex now. There is no way to know for sure. 196 // Just carry on. 197 currently_loading_multiline_regex_match = false; 198 } else { 199 let last_regex_idx = regex_registers.len() - 1; 200 regex_registers 201 .get_mut(last_regex_idx) 202 .unwrap() 203 .push_str(line); 204 continue; 205 } 206 } 207 match line { 208 x if x.starts_with(" ") => { 209 let rest_of_regex: String = String::from( 210 x.chars() 211 .skip_while(|c| *c != '=') 212 .skip(1) 213 .collect::<String>() 214 .trim(), 215 ); 216 // If the regex didn't end, start loading it as multiline regex. 217 // We don't have a way to know this for sure, just guessing. 218 if !&rest_of_regex.ends_with("'") { 219 currently_loading_multiline_regex_match = true; 220 } 221 regex_registers.push(rest_of_regex); 222 substitution_successful = true; 223 } 224 _ => { 225 currently_loading_regex_matches = false; 226 } 227 } 228 } 229 // Do not attempt to match traditionally if we are still matching regexes 230 if currently_loading_regex_matches { 231 continue; 232 } 233 match line { 234 // Do not record INPUT lines, but reset line number, previous command and patern space. 235 x if x.starts_with("INPUT:") => { 236 sed_line = 0; 237 current_pattern = ""; 238 previous_command = None; 239 } 240 // Save pattern space 241 x if x.starts_with("PATTERN:") => { 242 current_pattern = x.trim_start_matches("PATTERN:").trim() 243 } 244 // Save hold space 245 x if x.starts_with("HOLD:") => current_hold = x.trim_start_matches("HOLD:").trim(), 246 // When we found a command, push previous debugging state 247 x if x.starts_with("COMMAND:") => { 248 let current_command = x.trim_start_matches("COMMAND:").trim(); 249 // Push state with the *previous* command and location 250 result.push(DebuggingState { 251 pattern_buffer: String::from(current_pattern), 252 hold_buffer: String::from(current_hold), 253 current_line: sed_line, 254 matched_regex_registers: regex_registers, 255 output: previous_output, 256 sed_command: previous_command, 257 }); 258 259 // Push line number forward 260 sed_line = self.next_line_position( 261 sed_line, 262 current_command, 263 label_jump_map, 264 lines_of_code, 265 substitution_successful, 266 ); 267 268 // Record new command 269 previous_command = Some(String::from(current_command)); 270 271 // Clear old info, such as output 272 previous_output = None; 273 regex_registers = Vec::new(); 274 275 // If the command is t or T, clear substitution_successful 276 if current_command.starts_with("t") || current_command.starts_with("T") { 277 substitution_successful = false; 278 } 279 } 280 x if x.starts_with("MATCHED REGEX REGISTERS") => { 281 currently_loading_regex_matches = true; 282 } 283 x if x.starts_with("END-OF-CYCLE:") => { 284 // Push last state, just as if we met next command, but the command was nil 285 result.push(DebuggingState { 286 pattern_buffer: String::from(current_pattern), 287 hold_buffer: String::from(current_hold), 288 current_line: sed_line, 289 matched_regex_registers: regex_registers, 290 output: previous_output, 291 sed_command: previous_command, 292 }); 293 294 // Start at the start again 295 sed_line = 0; 296 297 // Clear old info, such as output 298 previous_command = None; 299 previous_output = None; 300 regex_registers = Vec::new(); 301 substitution_successful = false; 302 } 303 x => { 304 // Assume this is returned value 305 if let Some(output) = &mut previous_output { 306 output.push(String::from(x)); 307 } else { 308 previous_output = Some(Vec::new()); 309 previous_output.as_mut().unwrap().push(String::from(x)); 310 } 311 } 312 } 313 } 314 315 (result, previous_output) 316 } 317 318 /// Guess next command position. 319 /// 320 /// Try to guess if the current command jumps anywhere. If so, 321 /// try to guess where. 322 /// 323 /// If not, just increment one. next_line_position( &self, current_position: usize, current_command: &str, label_jump_map: &HashMap<String, usize>, lines_of_code: usize, last_match_successful: bool, ) -> usize324 fn next_line_position( 325 &self, 326 current_position: usize, 327 current_command: &str, 328 label_jump_map: &HashMap<String, usize>, 329 lines_of_code: usize, 330 last_match_successful: bool, 331 ) -> usize { 332 // Handle jumps 333 match current_command { 334 // Unconditional jump 335 x if x.starts_with("b") => { 336 let rest = x[1..].trim(); 337 if rest == "" { 338 // Jump to end of script 339 lines_of_code 340 } else if let Some(target) = label_jump_map.get(rest) { 341 // Jump to target label 342 *target 343 } else { 344 // Label not found, just go one line down I guess? 345 current_position + 1 346 } 347 } 348 // Conditional jump 349 // Jump only if last substition was succesful 350 // (or, in case of T, only if the last substituion was not succesful) 351 x if x.starts_with("t") | x.starts_with("T") => { 352 if (x.starts_with("t") && last_match_successful) 353 || (x.starts_with("T") && !last_match_successful) 354 { 355 let rest = x[1..].trim(); 356 if rest == "" { 357 // jump to end of script 358 lines_of_code 359 } else if let Some(target) = label_jump_map.get(rest) { 360 // Jump to target label 361 *target 362 } else { 363 // Label not found, just go one line down I guess? 364 current_position + 1 365 } 366 } else { 367 current_position + 1 368 } 369 } 370 _ => { 371 // Unknown command, just go down 372 current_position + 1 373 } 374 } 375 } 376 377 /// Build label jump map build_jump_map(&self, source_code: &Vec<String>) -> HashMap<String, usize>378 fn build_jump_map(&self, source_code: &Vec<String>) -> HashMap<String, usize> { 379 let mut map: HashMap<String, usize> = HashMap::new(); 380 for (i, line) in source_code.iter().enumerate() { 381 let trimmed = line.trim(); 382 if trimmed.starts_with(":") { 383 map.insert(String::from(trimmed.trim_start_matches(":")), i); 384 } 385 } 386 map 387 } 388 } 389 390 pub struct DebugInfoFromSed { 391 pub program_source: Vec<String>, 392 pub states: Vec<DebuggingState>, 393 pub last_output: Option<Vec<String>>, 394 } 395