1 //===-- Args.h --------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_ARGS_H
10 #define LLDB_UTILITY_ARGS_H
11 
12 #include "lldb/Utility/Environment.h"
13 #include "lldb/lldb-private-types.h"
14 #include "lldb/lldb-types.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/YAMLTraits.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 namespace lldb_private {
23 
24 /// \class Args Args.h "lldb/Utility/Args.h"
25 /// A command line argument class.
26 ///
27 /// The Args class is designed to be fed a command line. The command line is
28 /// copied into an internal buffer and then split up into arguments. Arguments
29 /// are space delimited if there are no quotes (single, double, or backtick
30 /// quotes) surrounding the argument. Spaces can be escaped using a \
31 /// character to avoid having to surround an argument that contains a space
32 /// with quotes.
33 class Args {
34 public:
35   struct ArgEntry {
36   private:
37     friend class Args;
38     friend struct llvm::yaml::MappingTraits<Args>;
39     friend struct llvm::yaml::MappingTraits<Args::ArgEntry>;
40 
41     std::unique_ptr<char[]> ptr;
42     char quote;
43 
44     char *data() { return ptr.get(); }
45 
46   public:
47     ArgEntry() = default;
48     ArgEntry(llvm::StringRef str, char quote);
49 
50     llvm::StringRef ref() const { return c_str(); }
51     const char *c_str() const { return ptr.get(); }
52 
53     /// Returns true if this argument was quoted in any way.
54     bool IsQuoted() const { return quote != '\0'; }
55     char GetQuoteChar() const { return quote; }
56   };
57 
58   /// Construct with an option command string.
59   ///
60   /// \param[in] command
61   ///     A NULL terminated command that will be copied and split up
62   ///     into arguments.
63   ///
64   /// \see Args::SetCommandString(llvm::StringRef)
65   Args(llvm::StringRef command = llvm::StringRef());
66 
67   Args(const Args &rhs);
68   explicit Args(const StringList &list);
69   explicit Args(llvm::ArrayRef<llvm::StringRef> args);
70 
71   Args &operator=(const Args &rhs);
72 
73   /// Destructor.
74   ~Args();
75 
76   explicit Args(const Environment &env) : Args() {
77     SetArguments(const_cast<const char **>(env.getEnvp().get()));
78   }
79 
80   explicit operator Environment() const { return GetConstArgumentVector(); }
81 
82   /// Dump all entries to the stream \a s using label \a label_name.
83   ///
84   /// If label_name is nullptr, the dump operation is skipped.
85   ///
86   /// \param[in] s
87   ///     The stream to which to dump all arguments in the argument
88   ///     vector.
89   /// \param[in] label_name
90   ///     The label_name to use as the label printed for each
91   ///     entry of the args like so:
92   ///       {label_name}[{index}]={value}
93   void Dump(Stream &s, const char *label_name = "argv") const;
94 
95   /// Sets the command string contained by this object.
96   ///
97   /// The command string will be copied and split up into arguments that can
98   /// be accessed via the accessor functions.
99   ///
100   /// \param[in] command
101   ///     A command StringRef that will be copied and split up
102   ///     into arguments.
103   ///
104   /// \see Args::GetArgumentCount() const
105   /// \see Args::GetArgumentAtIndex (size_t) const @see
106   /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
107   /// char *)
108   void SetCommandString(llvm::StringRef command);
109 
110   bool GetCommandString(std::string &command) const;
111 
112   bool GetQuotedCommandString(std::string &command) const;
113 
114   /// Gets the number of arguments left in this command object.
115   ///
116   /// \return
117   ///     The number or arguments in this object.
118   size_t GetArgumentCount() const { return m_entries.size(); }
119 
120   bool empty() const { return GetArgumentCount() == 0; }
121 
122   /// Gets the NULL terminated C string argument pointer for the argument at
123   /// index \a idx.
124   ///
125   /// \return
126   ///     The NULL terminated C string argument pointer if \a idx is a
127   ///     valid argument index, NULL otherwise.
128   const char *GetArgumentAtIndex(size_t idx) const;
129 
130   llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
131 
132   using const_iterator = std::vector<ArgEntry>::const_iterator;
133 
134   const_iterator begin() const { return m_entries.begin(); }
135   const_iterator end() const { return m_entries.end(); }
136 
137   size_t size() const { return GetArgumentCount(); }
138   const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
139 
140   /// Gets the argument vector.
141   ///
142   /// The value returned by this function can be used by any function that
143   /// takes and vector. The return value is just like \a argv in the standard
144   /// C entry point function:
145   ///     \code
146   ///         int main (int argc, const char **argv);
147   ///     \endcode
148   ///
149   /// \return
150   ///     An array of NULL terminated C string argument pointers that
151   ///     also has a terminating NULL C string pointer
152   char **GetArgumentVector();
153 
154   /// Gets the argument vector.
155   ///
156   /// The value returned by this function can be used by any function that
157   /// takes and vector. The return value is just like \a argv in the standard
158   /// C entry point function:
159   ///     \code
160   ///         int main (int argc, const char **argv);
161   ///     \endcode
162   ///
163   /// \return
164   ///     An array of NULL terminate C string argument pointers that
165   ///     also has a terminating NULL C string pointer
166   const char **GetConstArgumentVector() const;
167 
168   /// Gets the argument as an ArrayRef. Note that the return value does *not*
169   /// have a nullptr const char * at the end, as the size of the list is
170   /// embedded in the ArrayRef object.
171   llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
172     return llvm::makeArrayRef(m_argv).drop_back();
173   }
174 
175   /// Appends a new argument to the end of the list argument list.
176   ///
177   /// \param[in] arg_str
178   ///     The new argument.
179   ///
180   /// \param[in] quote_char
181   ///     If the argument was originally quoted, put in the quote char here.
182   void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
183 
184   void AppendArguments(const Args &rhs);
185 
186   void AppendArguments(const char **argv);
187 
188   /// Insert the argument value at index \a idx to \a arg_str.
189   ///
190   /// \param[in] idx
191   ///     The index of where to insert the argument.
192   ///
193   /// \param[in] arg_str
194   ///     The new argument.
195   ///
196   /// \param[in] quote_char
197   ///     If the argument was originally quoted, put in the quote char here.
198   void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
199                              char quote_char = '\0');
200 
201   /// Replaces the argument value at index \a idx to \a arg_str if \a idx is
202   /// a valid argument index.
203   ///
204   /// \param[in] idx
205   ///     The index of the argument that will have its value replaced.
206   ///
207   /// \param[in] arg_str
208   ///     The new argument.
209   ///
210   /// \param[in] quote_char
211   ///     If the argument was originally quoted, put in the quote char here.
212   void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
213                               char quote_char = '\0');
214 
215   /// Deletes the argument value at index
216   /// if \a idx is a valid argument index.
217   ///
218   /// \param[in] idx
219   ///     The index of the argument that will have its value replaced.
220   ///
221   void DeleteArgumentAtIndex(size_t idx);
222 
223   /// Sets the argument vector value, optionally copying all arguments into an
224   /// internal buffer.
225   ///
226   /// Sets the arguments to match those found in \a argv. All argument strings
227   /// will be copied into an internal buffers.
228   //
229   //  FIXME: Handle the quote character somehow.
230   void SetArguments(size_t argc, const char **argv);
231 
232   void SetArguments(const char **argv);
233 
234   /// Shifts the first argument C string value of the array off the argument
235   /// array.
236   ///
237   /// The string value will be freed, so a copy of the string should be made
238   /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
239   /// returned value before calling Args::Shift().
240   ///
241   /// \see Args::GetArgumentAtIndex (size_t) const
242   void Shift();
243 
244   /// Inserts a class owned copy of \a arg_str at the beginning of the
245   /// argument vector.
246   ///
247   /// A copy \a arg_str will be made.
248   ///
249   /// \param[in] arg_str
250   ///     The argument to push on the front of the argument stack.
251   ///
252   /// \param[in] quote_char
253   ///     If the argument was originally quoted, put in the quote char here.
254   void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
255 
256   /// Clear the arguments.
257   ///
258   /// For re-setting or blanking out the list of arguments.
259   void Clear();
260 
261   static lldb::Encoding
262   StringToEncoding(llvm::StringRef s,
263                    lldb::Encoding fail_value = lldb::eEncodingInvalid);
264 
265   static uint32_t StringToGenericRegister(llvm::StringRef s);
266 
267   static std::string GetShellSafeArgument(const FileSpec &shell,
268                                           llvm::StringRef unsafe_arg);
269 
270   /// EncodeEscapeSequences will change the textual representation of common
271   /// escape sequences like "\n" (two characters) into a single '\n'. It does
272   /// this for all of the supported escaped sequences and for the \0ooo (octal)
273   /// and \xXX (hex). The resulting "dst" string will contain the character
274   /// versions of all supported escape sequences. The common supported escape
275   /// sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
276   static void EncodeEscapeSequences(const char *src, std::string &dst);
277 
278   /// ExpandEscapeSequences will change a string of possibly non-printable
279   /// characters and expand them into text. So '\n' will turn into two
280   /// characters like "\n" which is suitable for human reading. When a character
281   /// is not printable and isn't one of the common in escape sequences listed in
282   /// the help for EncodeEscapeSequences, then it will be encoded as octal.
283   /// Printable characters are left alone.
284   static void ExpandEscapedCharacters(const char *src, std::string &dst);
285 
286   static std::string EscapeLLDBCommandArgument(const std::string &arg,
287                                                char quote_char);
288 
289 private:
290   friend struct llvm::yaml::MappingTraits<Args>;
291 
292   std::vector<ArgEntry> m_entries;
293   /// The arguments as C strings with a trailing nullptr element.
294   ///
295   /// These strings are owned by the ArgEntry object in m_entries with the
296   /// same index.
297   std::vector<char *> m_argv;
298 };
299 
300 /// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
301 /// A pair of an option list with a 'raw' string as a suffix.
302 ///
303 /// This class works similar to Args, but handles the case where we have a
304 /// trailing string that shouldn't be interpreted as a list of arguments but
305 /// preserved as is. It is also only useful for handling command line options
306 /// (e.g. '-foo bar -i0') that start with a dash.
307 ///
308 /// The leading option list is optional. If the first non-space character
309 /// in the string starts with a dash, and the string contains an argument
310 /// that is an unquoted double dash (' -- '), then everything up to the double
311 /// dash is parsed as a list of arguments. Everything after the double dash
312 /// is interpreted as the raw suffix string. Note that the space behind the
313 /// double dash is not part of the raw suffix.
314 ///
315 /// All strings not matching the above format as considered to be just a raw
316 /// string without any options.
317 ///
318 /// \see Args
319 class OptionsWithRaw {
320 public:
321   /// Parse the given string as a list of optional arguments with a raw suffix.
322   ///
323   /// See the class description for a description of the input format.
324   ///
325   /// \param[in] argument_string
326   ///     The string that should be parsed.
327   explicit OptionsWithRaw(llvm::StringRef argument_string);
328 
329   /// Returns true if there are any arguments before the raw suffix.
330   bool HasArgs() const { return m_has_args; }
331 
332   /// Returns the list of arguments.
333   ///
334   /// You can only call this method if HasArgs returns true.
335   Args &GetArgs() {
336     assert(m_has_args);
337     return m_args;
338   }
339 
340   /// Returns the list of arguments.
341   ///
342   /// You can only call this method if HasArgs returns true.
343   const Args &GetArgs() const {
344     assert(m_has_args);
345     return m_args;
346   }
347 
348   /// Returns the part of the input string that was used for parsing the
349   /// argument list. This string also includes the double dash that is used
350   /// for separating the argument list from the suffix.
351   ///
352   /// You can only call this method if HasArgs returns true.
353   llvm::StringRef GetArgStringWithDelimiter() const {
354     assert(m_has_args);
355     return m_arg_string_with_delimiter;
356   }
357 
358   /// Returns the part of the input string that was used for parsing the
359   /// argument list.
360   ///
361   /// You can only call this method if HasArgs returns true.
362   llvm::StringRef GetArgString() const {
363     assert(m_has_args);
364     return m_arg_string;
365   }
366 
367   /// Returns the raw suffix part of the parsed string.
368   const std::string &GetRawPart() const { return m_suffix; }
369 
370 private:
371   void SetFromString(llvm::StringRef arg_string);
372 
373   /// Keeps track if we have parsed and stored any arguments.
374   bool m_has_args = false;
375   Args m_args;
376   llvm::StringRef m_arg_string;
377   llvm::StringRef m_arg_string_with_delimiter;
378 
379   // FIXME: This should be a StringRef, but some of the calling code expect a
380   // C string here so only a real std::string is possible.
381   std::string m_suffix;
382 };
383 
384 } // namespace lldb_private
385 
386 namespace llvm {
387 namespace yaml {
388 template <> struct MappingTraits<lldb_private::Args::ArgEntry> {
389   class NormalizedArgEntry {
390   public:
391     NormalizedArgEntry(IO &) {}
392     NormalizedArgEntry(IO &, lldb_private::Args::ArgEntry &entry)
393         : value(entry.ref()), quote(entry.quote) {}
394     lldb_private::Args::ArgEntry denormalize(IO &) {
395       return lldb_private::Args::ArgEntry(value, quote);
396     }
397     StringRef value;
398     uint8_t quote;
399   };
400   static void mapping(IO &io, lldb_private::Args::ArgEntry &v);
401 };
402 template <> struct MappingTraits<lldb_private::Args> {
403   static void mapping(IO &io, lldb_private::Args &v);
404 };
405 } // namespace yaml
406 } // namespace llvm
407 
408 LLVM_YAML_IS_SEQUENCE_VECTOR(lldb_private::Args::ArgEntry)
409 
410 #endif // LLDB_UTILITY_ARGS_H
411