1 use super::debugger::DebuggingState;
2 use crate::cli::Options;
3 use anyhow::{Context, Result};
4 use std::collections::HashMap;
5 use std::process::{Command, Stdio};
6 
7 /// This handles communication with GNU sed.
8 pub struct SedCommunicator {
9     options: Options,
10 }
11 impl SedCommunicator {
new(options: Options) -> Self12     pub fn new(options: Options) -> Self {
13         SedCommunicator { options }
14     }
get_execution_info_from_sed(&mut self) -> Result<DebugInfoFromSed>15     pub fn get_execution_info_from_sed(&mut self) -> Result<DebugInfoFromSed> {
16         let output = self.get_sed_output()?;
17 
18         let program_source = self.parse_program_source(&output);
19         let label_jump_map = self.build_jump_map(&program_source);
20         let frames = self.parse_state_frames(&output, &label_jump_map, program_source.len());
21         return Ok(DebugInfoFromSed {
22             program_source,
23             states: frames.0,
24             last_output: frames.1,
25         });
26     }
get_sed_output(&mut self) -> Result<String>27     fn get_sed_output(&mut self) -> Result<String> {
28         let mut path_to_be_used: &String = &String::from("sed");
29         if let Some(path) = &self.options.sed_path {
30             path_to_be_used = path;
31         }
32 
33         let mandatory_parameters = vec![
34             "--debug",
35             "-f",
36             self.options
37                 .sed_script
38                 .to_str()
39                 .with_context(|| format!("Invalid sed script path. Is it valid UTF-8?"))?,
40             self.options
41                 .input_file
42                 .to_str()
43                 .with_context(|| format!("Invalid input path. Is it valid UTF-8?"))?,
44         ];
45         let constructed_cmd_line = self
46             .options
47             .sed_parameters
48             .iter()
49             .map(|s| s.as_str())
50             .chain(mandatory_parameters.iter().map(|s| *s))
51             .collect::<Vec<&str>>();
52         let sed_debug_command = Command::new(path_to_be_used)
53             .args(&constructed_cmd_line)
54             .stdin(Stdio::null())
55             .stdout(Stdio::piped())
56             .stderr(Stdio::inherit())
57             .output()
58             .ok()
59             .with_context(
60                 || format!("Sed failed to return output. Shouldn't you use -E option? Are you using GNU sed? Is there sed/gsed in $PATH?{}" ,
61                     if  self.options.verbose{ format!("\n[Info] Sed was called using \"{} {}\"", &path_to_be_used, constructed_cmd_line.join(" ")) } else { format!("") }
62             ))?
63             .stdout;
64 
65         if self.options.verbose {
66             eprintln!(
67                 "[Info] Called sed with \"{} {}\", which returned {} lines of output.",
68                 path_to_be_used,
69                 constructed_cmd_line.join(" "),
70                 sed_debug_command.len()
71             );
72         }
73 
74         // If sed returned no output (so it failed) and sed
75         // path wasn't specified by user,
76         // change executing path to "gsed" and try again.
77         if self.options.sed_path.is_none() && sed_debug_command.len() == 0 {
78             self.options.sed_path = Some(String::from("gsed"));
79             if self.options.verbose {
80                 eprintln!(
81                         "[Info] Sed failed and didn't return any output. As sed path wasn't specified, trying again with \"gsed\". If even that won't work, make sure \
82                                 sed is able to process your script. Most common mistake is forgeting to use -E."
83                     );
84             }
85             return self.get_sed_output();
86         }
87 
88         Ok(String::from_utf8(sed_debug_command).with_context(|| "String received from sed doesn't seem to be UTF-8. If this continues to happen, please report a bug.")?)
89     }
90 
91     /// Wait for line that looks like "SED PROGRAM:"
92     ///
93     /// Then, read each line with two spaces up front (remove those spaces) and save each line
94     /// into output vector.
95     ///
96     /// When we meet a line that doesn't start with two spaces, stop reading and return.
parse_program_source(&self, sed_output: &String) -> Vec<String>97     fn parse_program_source(&self, sed_output: &String) -> Vec<String> {
98         sed_output
99             .lines()
100             .skip_while(|line| *line != "SED PROGRAM:")
101             .skip(1)
102             .take_while(|line| !line.starts_with("INPUT:   '"))
103             .map(|line| String::from(line.trim()))
104             .collect()
105     }
106 
107     /// Parse state frames. They look like this:
108     ///
109     /// ```sh
110     /// INPUT:    'input.txt' line 1
111     /// PATTERN: abc
112     /// COMMAND: s/a/b/g
113     /// MATCHED REGEX REGISTERS
114     ///   regex[0] = 0-1 'a'
115     /// ```
116     /// There might be multiple commands within one input line. The example continues:
117     /// ```sh
118     /// COMMNAD: =
119     /// 1
120     /// ```
121     /// That was it, that was whole command. Notice the output of the command.
122     ///
123     /// A segment with multiple commands ends like this:
124     /// ```sh
125     /// COMMAND: d
126     /// END-OF-CYCLE
127     /// ```
128     /// And another segment begins. Note that we don't differentiate within segments inside the result iself,
129     /// but we need to during parsing.
130     /// ```sh
131     /// INPUT:    'input.txt' line 2
132     /// PATTERN: bac
133     /// COMMDN: s/a/b/g
134     /// (...)
135     /// ```
136     ///
137     /// ---
138     ///
139     /// List of sed commands that we recognize (this list might be incomplete):
140     ///
141     /// ```sh
142     /// INPUT:   'file.txt' line 1 # Defines where we took the pattern space from
143     ///                            # at start of segment. This one is ignored.
144     /// PATTERN: abc # Defines pattern space value
145     /// HOLD:    def # Defines hold space value (can be empty)
146     /// COMMAND: s/a/b/g # Defines currently running command
147     /// MATCHED REGEX REGISTERS # Defines matched regex for previous command, including global capture group
148     ///   regex[0] = 0-1 'a'
149     ///   regex[1] = 0-3 'abc'
150     /// END-OF-CYCLE:   # End of segment. This is ignored.
151     /// hello           # Value printed to stdout. This tends to come after COMMAND or END-OF-CYCLE.
152     /// ```
153     ///
154     /// This returns individual frames *and* output of the last segement of the sed script.
parse_state_frames( &self, sed_output: &String, label_jump_map: &HashMap<String, usize>, lines_of_code: usize, ) -> (Vec<DebuggingState>, Option<Vec<String>>)155     fn parse_state_frames(
156         &self,
157         sed_output: &String,
158         label_jump_map: &HashMap<String, usize>,
159         lines_of_code: usize,
160     ) -> (Vec<DebuggingState>, Option<Vec<String>>) {
161         // First of all, skip the sed program source code.
162         let lines = sed_output
163             .lines()
164             .skip_while(|line| !line.starts_with("INPUT: "));
165 
166         // Start parsing
167         let mut sed_line: usize = 0; // We need to try to keep track of this ourselves.
168                                      // Sed doesn't exactly help with this one.
169                                      // All the states will end up here
170         let mut result: Vec<DebuggingState> = Vec::new();
171         // The most recent pattern buffer
172         let mut current_pattern = "";
173         // The most recent hold buffer
174         let mut current_hold = "";
175         // The last command that was executed, if any
176         let mut previous_command: Option<String> = None;
177         // All matched regexes by previous command
178         let mut regex_registers: Vec<String> = Vec::new();
179         // If sed printed any output because of last command, what was it
180         let mut previous_output = None;
181         // If true, we're currently parsing `MATCHED REGEX REGISTERS`, which lasts several lines.
182         let mut currently_loading_regex_matches: bool = false;
183         // If true, we're currently parsing `MATCHED REGEX REGISTERS`, but one of the regexes spans
184         // multiple lines. Keep loading it.
185         let mut currently_loading_multiline_regex_match: bool = false;
186         // Was any substitution since last command successful?
187         let mut substitution_successful: bool = false;
188 
189         // TODO: Multiline regexes are not displayed correctly and will fall to output instead. FIXME!!
190         for line in lines {
191             // If we are trying to parse regexe matches, do so
192             if currently_loading_regex_matches {
193                 if currently_loading_multiline_regex_match {
194                     if line.starts_with("  regex[") {
195                         // We PROBABLY have new regex now. There is no way to know for sure.
196                         // Just carry on.
197                         currently_loading_multiline_regex_match = false;
198                     } else {
199                         let last_regex_idx = regex_registers.len() - 1;
200                         regex_registers
201                             .get_mut(last_regex_idx)
202                             .unwrap()
203                             .push_str(line);
204                         continue;
205                     }
206                 }
207                 match line {
208                     x if x.starts_with("  ") => {
209                         let rest_of_regex: String = String::from(
210                             x.chars()
211                                 .skip_while(|c| *c != '=')
212                                 .skip(1)
213                                 .collect::<String>()
214                                 .trim(),
215                         );
216                         // If the regex didn't end, start loading it as multiline regex.
217                         // We don't have a way to know this for sure, just guessing.
218                         if !&rest_of_regex.ends_with("'") {
219                             currently_loading_multiline_regex_match = true;
220                         }
221                         regex_registers.push(rest_of_regex);
222                         substitution_successful = true;
223                     }
224                     _ => {
225                         currently_loading_regex_matches = false;
226                     }
227                 }
228             }
229             // Do not attempt to match traditionally if we are still matching regexes
230             if currently_loading_regex_matches {
231                 continue;
232             }
233             match line {
234                 // Do not record INPUT lines, but reset line number, previous command and patern space.
235                 x if x.starts_with("INPUT:") => {
236                     sed_line = 0;
237                     current_pattern = "";
238                     previous_command = None;
239                 }
240                 // Save pattern space
241                 x if x.starts_with("PATTERN:") => {
242                     current_pattern = x.trim_start_matches("PATTERN:").trim()
243                 }
244                 // Save hold space
245                 x if x.starts_with("HOLD:") => current_hold = x.trim_start_matches("HOLD:").trim(),
246                 // When we found a command, push previous debugging state
247                 x if x.starts_with("COMMAND:") => {
248                     let current_command = x.trim_start_matches("COMMAND:").trim();
249                     // Push state with the *previous* command and location
250                     result.push(DebuggingState {
251                         pattern_buffer: String::from(current_pattern),
252                         hold_buffer: String::from(current_hold),
253                         current_line: sed_line,
254                         matched_regex_registers: regex_registers,
255                         output: previous_output,
256                         sed_command: previous_command,
257                     });
258 
259                     // Push line number forward
260                     sed_line = self.next_line_position(
261                         sed_line,
262                         current_command,
263                         label_jump_map,
264                         lines_of_code,
265                         substitution_successful,
266                     );
267 
268                     // Record new command
269                     previous_command = Some(String::from(current_command));
270 
271                     // Clear old info, such as output
272                     previous_output = None;
273                     regex_registers = Vec::new();
274 
275                     // If the command is t or T, clear substitution_successful
276                     if current_command.starts_with("t") || current_command.starts_with("T") {
277                         substitution_successful = false;
278                     }
279                 }
280                 x if x.starts_with("MATCHED REGEX REGISTERS") => {
281                     currently_loading_regex_matches = true;
282                 }
283                 x if x.starts_with("END-OF-CYCLE:") => {
284                     // Push last state, just as if we met next command, but the command was nil
285                     result.push(DebuggingState {
286                         pattern_buffer: String::from(current_pattern),
287                         hold_buffer: String::from(current_hold),
288                         current_line: sed_line,
289                         matched_regex_registers: regex_registers,
290                         output: previous_output,
291                         sed_command: previous_command,
292                     });
293 
294                     // Start at the start again
295                     sed_line = 0;
296 
297                     // Clear old info, such as output
298                     previous_command = None;
299                     previous_output = None;
300                     regex_registers = Vec::new();
301                     substitution_successful = false;
302                 }
303                 x => {
304                     // Assume this is returned value
305                     if let Some(output) = &mut previous_output {
306                         output.push(String::from(x));
307                     } else {
308                         previous_output = Some(Vec::new());
309                         previous_output.as_mut().unwrap().push(String::from(x));
310                     }
311                 }
312             }
313         }
314 
315         (result, previous_output)
316     }
317 
318     /// Guess next command position.
319     ///
320     /// Try to guess if the current command jumps anywhere. If so,
321     /// try to guess where.
322     ///
323     /// If not, just increment one.
next_line_position( &self, current_position: usize, current_command: &str, label_jump_map: &HashMap<String, usize>, lines_of_code: usize, last_match_successful: bool, ) -> usize324     fn next_line_position(
325         &self,
326         current_position: usize,
327         current_command: &str,
328         label_jump_map: &HashMap<String, usize>,
329         lines_of_code: usize,
330         last_match_successful: bool,
331     ) -> usize {
332         // Handle jumps
333         match current_command {
334             // Unconditional jump
335             x if x.starts_with("b") => {
336                 let rest = x[1..].trim();
337                 if rest == "" {
338                     // Jump to end of script
339                     lines_of_code
340                 } else if let Some(target) = label_jump_map.get(rest) {
341                     // Jump to target label
342                     *target
343                 } else {
344                     // Label not found, just go one line down I guess?
345                     current_position + 1
346                 }
347             }
348             // Conditional jump
349             // Jump only if last substition was succesful
350             // (or, in case of T, only if the last substituion was not succesful)
351             x if x.starts_with("t") | x.starts_with("T") => {
352                 if (x.starts_with("t") && last_match_successful)
353                     || (x.starts_with("T") && !last_match_successful)
354                 {
355                     let rest = x[1..].trim();
356                     if rest == "" {
357                         // jump to end of script
358                         lines_of_code
359                     } else if let Some(target) = label_jump_map.get(rest) {
360                         // Jump to target label
361                         *target
362                     } else {
363                         // Label not found, just go one line down I guess?
364                         current_position + 1
365                     }
366                 } else {
367                     current_position + 1
368                 }
369             }
370             _ => {
371                 // Unknown command, just go down
372                 current_position + 1
373             }
374         }
375     }
376 
377     /// Build label jump map
build_jump_map(&self, source_code: &Vec<String>) -> HashMap<String, usize>378     fn build_jump_map(&self, source_code: &Vec<String>) -> HashMap<String, usize> {
379         let mut map: HashMap<String, usize> = HashMap::new();
380         for (i, line) in source_code.iter().enumerate() {
381             let trimmed = line.trim();
382             if trimmed.starts_with(":") {
383                 map.insert(String::from(trimmed.trim_start_matches(":")), i);
384             }
385         }
386         map
387     }
388 }
389 
390 pub struct DebugInfoFromSed {
391     pub program_source: Vec<String>,
392     pub states: Vec<DebuggingState>,
393     pub last_output: Option<Vec<String>>,
394 }
395