1 //===-- Args.h --------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_ARGS_H
10 #define LLDB_UTILITY_ARGS_H
11 
12 #include "lldb/Utility/Environment.h"
13 #include "lldb/lldb-private-types.h"
14 #include "lldb/lldb-types.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/YAMLTraits.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 namespace lldb_private {
23 
24 /// \class Args Args.h "lldb/Utility/Args.h"
25 /// A command line argument class.
26 ///
27 /// The Args class is designed to be fed a command line. The command line is
28 /// copied into an internal buffer and then split up into arguments. Arguments
29 /// are space delimited if there are no quotes (single, double, or backtick
30 /// quotes) surrounding the argument. Spaces can be escaped using a \
31 /// character to avoid having to surround an argument that contains a space
32 /// with quotes.
33 class Args {
34 public:
35   struct ArgEntry {
36   private:
37     friend class Args;
38 
39     std::unique_ptr<char[]> ptr;
40     char quote = '\0';
41 
dataArgEntry42     char *data() { return ptr.get(); }
43 
44   public:
45     ArgEntry() = default;
46     ArgEntry(llvm::StringRef str, char quote);
47 
refArgEntry48     llvm::StringRef ref() const { return c_str(); }
c_strArgEntry49     const char *c_str() const { return ptr.get(); }
50 
51     /// Returns true if this argument was quoted in any way.
IsQuotedArgEntry52     bool IsQuoted() const { return quote != '\0'; }
GetQuoteCharArgEntry53     char GetQuoteChar() const { return quote; }
54   };
55 
56   /// Construct with an option command string.
57   ///
58   /// \param[in] command
59   ///     A NULL terminated command that will be copied and split up
60   ///     into arguments.
61   ///
62   /// \see Args::SetCommandString(llvm::StringRef)
63   Args(llvm::StringRef command = llvm::StringRef());
64 
65   Args(const Args &rhs);
66   explicit Args(const StringList &list);
67   explicit Args(llvm::ArrayRef<llvm::StringRef> args);
68 
69   Args &operator=(const Args &rhs);
70 
71   /// Destructor.
72   ~Args();
73 
Args(const Environment & env)74   explicit Args(const Environment &env) : Args() {
75     SetArguments(const_cast<const char **>(env.getEnvp().get()));
76   }
77 
Environment()78   explicit operator Environment() const { return GetConstArgumentVector(); }
79 
80   /// Dump all entries to the stream \a s using label \a label_name.
81   ///
82   /// If label_name is nullptr, the dump operation is skipped.
83   ///
84   /// \param[in] s
85   ///     The stream to which to dump all arguments in the argument
86   ///     vector.
87   /// \param[in] label_name
88   ///     The label_name to use as the label printed for each
89   ///     entry of the args like so:
90   ///       {label_name}[{index}]={value}
91   void Dump(Stream &s, const char *label_name = "argv") const;
92 
93   /// Sets the command string contained by this object.
94   ///
95   /// The command string will be copied and split up into arguments that can
96   /// be accessed via the accessor functions.
97   ///
98   /// \param[in] command
99   ///     A command StringRef that will be copied and split up
100   ///     into arguments.
101   ///
102   /// \see Args::GetArgumentCount() const
103   /// \see Args::GetArgumentAtIndex (size_t) const @see
104   /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
105   /// char *)
106   void SetCommandString(llvm::StringRef command);
107 
108   bool GetCommandString(std::string &command) const;
109 
110   bool GetQuotedCommandString(std::string &command) const;
111 
112   /// Gets the number of arguments left in this command object.
113   ///
114   /// \return
115   ///     The number or arguments in this object.
GetArgumentCount()116   size_t GetArgumentCount() const { return m_entries.size(); }
117 
empty()118   bool empty() const { return GetArgumentCount() == 0; }
119 
120   /// Gets the NULL terminated C string argument pointer for the argument at
121   /// index \a idx.
122   ///
123   /// \return
124   ///     The NULL terminated C string argument pointer if \a idx is a
125   ///     valid argument index, NULL otherwise.
126   const char *GetArgumentAtIndex(size_t idx) const;
127 
entries()128   llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
129 
130   using const_iterator = std::vector<ArgEntry>::const_iterator;
131 
begin()132   const_iterator begin() const { return m_entries.begin(); }
end()133   const_iterator end() const { return m_entries.end(); }
134 
size()135   size_t size() const { return GetArgumentCount(); }
136   const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
137 
138   /// Gets the argument vector.
139   ///
140   /// The value returned by this function can be used by any function that
141   /// takes and vector. The return value is just like \a argv in the standard
142   /// C entry point function:
143   ///     \code
144   ///         int main (int argc, const char **argv);
145   ///     \endcode
146   ///
147   /// \return
148   ///     An array of NULL terminated C string argument pointers that
149   ///     also has a terminating NULL C string pointer
150   char **GetArgumentVector();
151 
152   /// Gets the argument vector.
153   ///
154   /// The value returned by this function can be used by any function that
155   /// takes and vector. The return value is just like \a argv in the standard
156   /// C entry point function:
157   ///     \code
158   ///         int main (int argc, const char **argv);
159   ///     \endcode
160   ///
161   /// \return
162   ///     An array of NULL terminate C string argument pointers that
163   ///     also has a terminating NULL C string pointer
164   const char **GetConstArgumentVector() const;
165 
166   /// Gets the argument as an ArrayRef. Note that the return value does *not*
167   /// have a nullptr const char * at the end, as the size of the list is
168   /// embedded in the ArrayRef object.
GetArgumentArrayRef()169   llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
170     return llvm::ArrayRef(m_argv).drop_back();
171   }
172 
173   /// Appends a new argument to the end of the list argument list.
174   ///
175   /// \param[in] arg_str
176   ///     The new argument.
177   ///
178   /// \param[in] quote_char
179   ///     If the argument was originally quoted, put in the quote char here.
180   void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
181 
182   void AppendArguments(const Args &rhs);
183 
184   void AppendArguments(const char **argv);
185 
186   /// Insert the argument value at index \a idx to \a arg_str.
187   ///
188   /// \param[in] idx
189   ///     The index of where to insert the argument.
190   ///
191   /// \param[in] arg_str
192   ///     The new argument.
193   ///
194   /// \param[in] quote_char
195   ///     If the argument was originally quoted, put in the quote char here.
196   void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
197                              char quote_char = '\0');
198 
199   /// Replaces the argument value at index \a idx to \a arg_str if \a idx is
200   /// a valid argument index.
201   ///
202   /// \param[in] idx
203   ///     The index of the argument that will have its value replaced.
204   ///
205   /// \param[in] arg_str
206   ///     The new argument.
207   ///
208   /// \param[in] quote_char
209   ///     If the argument was originally quoted, put in the quote char here.
210   void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
211                               char quote_char = '\0');
212 
213   /// Deletes the argument value at index
214   /// if \a idx is a valid argument index.
215   ///
216   /// \param[in] idx
217   ///     The index of the argument that will have its value replaced.
218   ///
219   void DeleteArgumentAtIndex(size_t idx);
220 
221   /// Sets the argument vector value, optionally copying all arguments into an
222   /// internal buffer.
223   ///
224   /// Sets the arguments to match those found in \a argv. All argument strings
225   /// will be copied into an internal buffers.
226   //
227   //  FIXME: Handle the quote character somehow.
228   void SetArguments(size_t argc, const char **argv);
229 
230   void SetArguments(const char **argv);
231 
232   /// Shifts the first argument C string value of the array off the argument
233   /// array.
234   ///
235   /// The string value will be freed, so a copy of the string should be made
236   /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
237   /// returned value before calling Args::Shift().
238   ///
239   /// \see Args::GetArgumentAtIndex (size_t) const
240   void Shift();
241 
242   /// Inserts a class owned copy of \a arg_str at the beginning of the
243   /// argument vector.
244   ///
245   /// A copy \a arg_str will be made.
246   ///
247   /// \param[in] arg_str
248   ///     The argument to push on the front of the argument stack.
249   ///
250   /// \param[in] quote_char
251   ///     If the argument was originally quoted, put in the quote char here.
252   void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
253 
254   /// Clear the arguments.
255   ///
256   /// For re-setting or blanking out the list of arguments.
257   void Clear();
258 
259   static lldb::Encoding
260   StringToEncoding(llvm::StringRef s,
261                    lldb::Encoding fail_value = lldb::eEncodingInvalid);
262 
263   static uint32_t StringToGenericRegister(llvm::StringRef s);
264 
265   static std::string GetShellSafeArgument(const FileSpec &shell,
266                                           llvm::StringRef unsafe_arg);
267 
268   /// EncodeEscapeSequences will change the textual representation of common
269   /// escape sequences like "\n" (two characters) into a single '\n'. It does
270   /// this for all of the supported escaped sequences and for the \0ooo (octal)
271   /// and \xXX (hex). The resulting "dst" string will contain the character
272   /// versions of all supported escape sequences. The common supported escape
273   /// sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
274   static void EncodeEscapeSequences(const char *src, std::string &dst);
275 
276   /// ExpandEscapeSequences will change a string of possibly non-printable
277   /// characters and expand them into text. So '\n' will turn into two
278   /// characters like "\n" which is suitable for human reading. When a character
279   /// is not printable and isn't one of the common in escape sequences listed in
280   /// the help for EncodeEscapeSequences, then it will be encoded as octal.
281   /// Printable characters are left alone.
282   static void ExpandEscapedCharacters(const char *src, std::string &dst);
283 
284   static std::string EscapeLLDBCommandArgument(const std::string &arg,
285                                                char quote_char);
286 
287 private:
288   std::vector<ArgEntry> m_entries;
289   /// The arguments as C strings with a trailing nullptr element.
290   ///
291   /// These strings are owned by the ArgEntry object in m_entries with the
292   /// same index.
293   std::vector<char *> m_argv;
294 };
295 
296 /// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
297 /// A pair of an option list with a 'raw' string as a suffix.
298 ///
299 /// This class works similar to Args, but handles the case where we have a
300 /// trailing string that shouldn't be interpreted as a list of arguments but
301 /// preserved as is. It is also only useful for handling command line options
302 /// (e.g. '-foo bar -i0') that start with a dash.
303 ///
304 /// The leading option list is optional. If the first non-space character
305 /// in the string starts with a dash, and the string contains an argument
306 /// that is an unquoted double dash (' -- '), then everything up to the double
307 /// dash is parsed as a list of arguments. Everything after the double dash
308 /// is interpreted as the raw suffix string. Note that the space behind the
309 /// double dash is not part of the raw suffix.
310 ///
311 /// All strings not matching the above format as considered to be just a raw
312 /// string without any options.
313 ///
314 /// \see Args
315 class OptionsWithRaw {
316 public:
317   /// Parse the given string as a list of optional arguments with a raw suffix.
318   ///
319   /// See the class description for a description of the input format.
320   ///
321   /// \param[in] argument_string
322   ///     The string that should be parsed.
323   explicit OptionsWithRaw(llvm::StringRef argument_string);
324 
325   /// Returns true if there are any arguments before the raw suffix.
HasArgs()326   bool HasArgs() const { return m_has_args; }
327 
328   /// Returns the list of arguments.
329   ///
330   /// You can only call this method if HasArgs returns true.
GetArgs()331   Args &GetArgs() {
332     assert(m_has_args);
333     return m_args;
334   }
335 
336   /// Returns the list of arguments.
337   ///
338   /// You can only call this method if HasArgs returns true.
GetArgs()339   const Args &GetArgs() const {
340     assert(m_has_args);
341     return m_args;
342   }
343 
344   /// Returns the part of the input string that was used for parsing the
345   /// argument list. This string also includes the double dash that is used
346   /// for separating the argument list from the suffix.
347   ///
348   /// You can only call this method if HasArgs returns true.
GetArgStringWithDelimiter()349   llvm::StringRef GetArgStringWithDelimiter() const {
350     assert(m_has_args);
351     return m_arg_string_with_delimiter;
352   }
353 
354   /// Returns the part of the input string that was used for parsing the
355   /// argument list.
356   ///
357   /// You can only call this method if HasArgs returns true.
GetArgString()358   llvm::StringRef GetArgString() const {
359     assert(m_has_args);
360     return m_arg_string;
361   }
362 
363   /// Returns the raw suffix part of the parsed string.
GetRawPart()364   const std::string &GetRawPart() const { return m_suffix; }
365 
366 private:
367   void SetFromString(llvm::StringRef arg_string);
368 
369   /// Keeps track if we have parsed and stored any arguments.
370   bool m_has_args = false;
371   Args m_args;
372   llvm::StringRef m_arg_string;
373   llvm::StringRef m_arg_string_with_delimiter;
374 
375   // FIXME: This should be a StringRef, but some of the calling code expect a
376   // C string here so only a real std::string is possible.
377   std::string m_suffix;
378 };
379 
380 } // namespace lldb_private
381 
382 #endif // LLDB_UTILITY_ARGS_H
383