1 //===-- Args.h --------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_ARGS_H
10 #define LLDB_UTILITY_ARGS_H
11 
12 #include "lldb/Utility/Environment.h"
13 #include "lldb/lldb-private-types.h"
14 #include "lldb/lldb-types.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringRef.h"
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 namespace lldb_private {
22 
23 /// \class Args Args.h "lldb/Utility/Args.h"
24 /// A command line argument class.
25 ///
26 /// The Args class is designed to be fed a command line. The command line is
27 /// copied into an internal buffer and then split up into arguments. Arguments
28 /// are space delimited if there are no quotes (single, double, or backtick
29 /// quotes) surrounding the argument. Spaces can be escaped using a \
30 /// character to avoid having to surround an argument that contains a space
31 /// with quotes.
32 class Args {
33 public:
34   struct ArgEntry {
35   private:
36     friend class Args;
37     std::unique_ptr<char[]> ptr;
38     char quote;
39 
40     char *data() { return ptr.get(); }
41 
42   public:
43     ArgEntry() = default;
44     ArgEntry(llvm::StringRef str, char quote);
45 
46     llvm::StringRef ref() const { return c_str(); }
47     const char *c_str() const { return ptr.get(); }
48 
49     /// Returns true if this argument was quoted in any way.
50     bool IsQuoted() const { return quote != '\0'; }
51     char GetQuoteChar() const { return quote; }
52   };
53 
54   /// Construct with an option command string.
55   ///
56   /// \param[in] command
57   ///     A NULL terminated command that will be copied and split up
58   ///     into arguments.
59   ///
60   /// \see Args::SetCommandString(llvm::StringRef)
61   Args(llvm::StringRef command = llvm::StringRef());
62 
63   Args(const Args &rhs);
64   explicit Args(const StringList &list);
65 
66   Args &operator=(const Args &rhs);
67 
68   /// Destructor.
69   ~Args();
70 
71   explicit Args(const Environment &env) : Args() {
72     SetArguments(const_cast<const char **>(env.getEnvp().get()));
73   }
74 
75   explicit operator Environment() const { return GetConstArgumentVector(); }
76 
77   /// Dump all entries to the stream \a s using label \a label_name.
78   ///
79   /// If label_name is nullptr, the dump operation is skipped.
80   ///
81   /// \param[in] s
82   ///     The stream to which to dump all arguments in the argument
83   ///     vector.
84   /// \param[in] label_name
85   ///     The label_name to use as the label printed for each
86   ///     entry of the args like so:
87   ///       {label_name}[{index}]={value}
88   void Dump(Stream &s, const char *label_name = "argv") const;
89 
90   /// Sets the command string contained by this object.
91   ///
92   /// The command string will be copied and split up into arguments that can
93   /// be accessed via the accessor functions.
94   ///
95   /// \param[in] command
96   ///     A command StringRef that will be copied and split up
97   ///     into arguments.
98   ///
99   /// \see Args::GetArgumentCount() const
100   /// \see Args::GetArgumentAtIndex (size_t) const @see
101   /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
102   /// char *)
103   void SetCommandString(llvm::StringRef command);
104 
105   bool GetCommandString(std::string &command) const;
106 
107   bool GetQuotedCommandString(std::string &command) const;
108 
109   /// Gets the number of arguments left in this command object.
110   ///
111   /// \return
112   ///     The number or arguments in this object.
113   size_t GetArgumentCount() const;
114   bool empty() const { return GetArgumentCount() == 0; }
115 
116   /// Gets the NULL terminated C string argument pointer for the argument at
117   /// index \a idx.
118   ///
119   /// \return
120   ///     The NULL terminated C string argument pointer if \a idx is a
121   ///     valid argument index, NULL otherwise.
122   const char *GetArgumentAtIndex(size_t idx) const;
123 
124   llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
125 
126   using const_iterator = std::vector<ArgEntry>::const_iterator;
127 
128   const_iterator begin() const { return m_entries.begin(); }
129   const_iterator end() const { return m_entries.end(); }
130 
131   size_t size() const { return GetArgumentCount(); }
132   const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
133 
134   /// Gets the argument vector.
135   ///
136   /// The value returned by this function can be used by any function that
137   /// takes and vector. The return value is just like \a argv in the standard
138   /// C entry point function:
139   ///     \code
140   ///         int main (int argc, const char **argv);
141   ///     \endcode
142   ///
143   /// \return
144   ///     An array of NULL terminated C string argument pointers that
145   ///     also has a terminating NULL C string pointer
146   char **GetArgumentVector();
147 
148   /// Gets the argument vector.
149   ///
150   /// The value returned by this function can be used by any function that
151   /// takes and vector. The return value is just like \a argv in the standard
152   /// C entry point function:
153   ///     \code
154   ///         int main (int argc, const char **argv);
155   ///     \endcode
156   ///
157   /// \return
158   ///     An array of NULL terminate C string argument pointers that
159   ///     also has a terminating NULL C string pointer
160   const char **GetConstArgumentVector() const;
161 
162   /// Gets the argument as an ArrayRef. Note that the return value does *not*
163   /// have a nullptr const char * at the end, as the size of the list is
164   /// embedded in the ArrayRef object.
165   llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
166     return llvm::makeArrayRef(m_argv).drop_back();
167   }
168 
169   /// Appends a new argument to the end of the list argument list.
170   ///
171   /// \param[in] arg_str
172   ///     The new argument.
173   ///
174   /// \param[in] quote_char
175   ///     If the argument was originally quoted, put in the quote char here.
176   void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
177 
178   void AppendArguments(const Args &rhs);
179 
180   void AppendArguments(const char **argv);
181 
182   /// Insert the argument value at index \a idx to \a arg_str.
183   ///
184   /// \param[in] idx
185   ///     The index of where to insert the argument.
186   ///
187   /// \param[in] arg_str
188   ///     The new argument.
189   ///
190   /// \param[in] quote_char
191   ///     If the argument was originally quoted, put in the quote char here.
192   void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
193                              char quote_char = '\0');
194 
195   /// Replaces the argument value at index \a idx to \a arg_str if \a idx is
196   /// a valid argument index.
197   ///
198   /// \param[in] idx
199   ///     The index of the argument that will have its value replaced.
200   ///
201   /// \param[in] arg_str
202   ///     The new argument.
203   ///
204   /// \param[in] quote_char
205   ///     If the argument was originally quoted, put in the quote char here.
206   void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
207                               char quote_char = '\0');
208 
209   /// Deletes the argument value at index
210   /// if \a idx is a valid argument index.
211   ///
212   /// \param[in] idx
213   ///     The index of the argument that will have its value replaced.
214   ///
215   void DeleteArgumentAtIndex(size_t idx);
216 
217   /// Sets the argument vector value, optionally copying all arguments into an
218   /// internal buffer.
219   ///
220   /// Sets the arguments to match those found in \a argv. All argument strings
221   /// will be copied into an internal buffers.
222   //
223   //  FIXME: Handle the quote character somehow.
224   void SetArguments(size_t argc, const char **argv);
225 
226   void SetArguments(const char **argv);
227 
228   /// Shifts the first argument C string value of the array off the argument
229   /// array.
230   ///
231   /// The string value will be freed, so a copy of the string should be made
232   /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
233   /// returned value before calling Args::Shift().
234   ///
235   /// \see Args::GetArgumentAtIndex (size_t) const
236   void Shift();
237 
238   /// Inserts a class owned copy of \a arg_str at the beginning of the
239   /// argument vector.
240   ///
241   /// A copy \a arg_str will be made.
242   ///
243   /// \param[in] arg_str
244   ///     The argument to push on the front of the argument stack.
245   ///
246   /// \param[in] quote_char
247   ///     If the argument was originally quoted, put in the quote char here.
248   void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
249 
250   // Clear the arguments.
251   //
252   // For re-setting or blanking out the list of arguments.
253   void Clear();
254 
255   static lldb::Encoding
256   StringToEncoding(llvm::StringRef s,
257                    lldb::Encoding fail_value = lldb::eEncodingInvalid);
258 
259   static uint32_t StringToGenericRegister(llvm::StringRef s);
260 
261   static const char *GetShellSafeArgument(const FileSpec &shell,
262                                           const char *unsafe_arg,
263                                           std::string &safe_arg);
264 
265   // EncodeEscapeSequences will change the textual representation of common
266   // escape sequences like "\n" (two characters) into a single '\n'. It does
267   // this for all of the supported escaped sequences and for the \0ooo (octal)
268   // and \xXX (hex). The resulting "dst" string will contain the character
269   // versions of all supported escape sequences. The common supported escape
270   // sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
271 
272   static void EncodeEscapeSequences(const char *src, std::string &dst);
273 
274   // ExpandEscapeSequences will change a string of possibly non-printable
275   // characters and expand them into text. So '\n' will turn into two
276   // characters like "\n" which is suitable for human reading. When a character
277   // is not printable and isn't one of the common in escape sequences listed in
278   // the help for EncodeEscapeSequences, then it will be encoded as octal.
279   // Printable characters are left alone.
280   static void ExpandEscapedCharacters(const char *src, std::string &dst);
281 
282   static std::string EscapeLLDBCommandArgument(const std::string &arg,
283                                                char quote_char);
284 
285 private:
286   std::vector<ArgEntry> m_entries;
287   std::vector<char *> m_argv;
288 };
289 
290 /// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
291 /// A pair of an option list with a 'raw' string as a suffix.
292 ///
293 /// This class works similar to Args, but handles the case where we have a
294 /// trailing string that shouldn't be interpreted as a list of arguments but
295 /// preserved as is. It is also only useful for handling command line options
296 /// (e.g. '-foo bar -i0') that start with a dash.
297 ///
298 /// The leading option list is optional. If the first non-space character
299 /// in the string starts with a dash, and the string contains an argument
300 /// that is an unquoted double dash (' -- '), then everything up to the double
301 /// dash is parsed as a list of arguments. Everything after the double dash
302 /// is interpreted as the raw suffix string. Note that the space behind the
303 /// double dash is not part of the raw suffix.
304 ///
305 /// All strings not matching the above format as considered to be just a raw
306 /// string without any options.
307 ///
308 /// \see Args
309 class OptionsWithRaw {
310 public:
311   /// Parse the given string as a list of optional arguments with a raw suffix.
312   ///
313   /// See the class description for a description of the input format.
314   ///
315   /// \param[in] argument_string
316   ///     The string that should be parsed.
317   explicit OptionsWithRaw(llvm::StringRef argument_string);
318 
319   /// Returns true if there are any arguments before the raw suffix.
320   bool HasArgs() const { return m_has_args; }
321 
322   /// Returns the list of arguments.
323   ///
324   /// You can only call this method if HasArgs returns true.
325   Args &GetArgs() {
326     assert(m_has_args);
327     return m_args;
328   }
329 
330   /// Returns the list of arguments.
331   ///
332   /// You can only call this method if HasArgs returns true.
333   const Args &GetArgs() const {
334     assert(m_has_args);
335     return m_args;
336   }
337 
338   /// Returns the part of the input string that was used for parsing the
339   /// argument list. This string also includes the double dash that is used
340   /// for separating the argument list from the suffix.
341   ///
342   /// You can only call this method if HasArgs returns true.
343   llvm::StringRef GetArgStringWithDelimiter() const {
344     assert(m_has_args);
345     return m_arg_string_with_delimiter;
346   }
347 
348   /// Returns the part of the input string that was used for parsing the
349   /// argument list.
350   ///
351   /// You can only call this method if HasArgs returns true.
352   llvm::StringRef GetArgString() const {
353     assert(m_has_args);
354     return m_arg_string;
355   }
356 
357   /// Returns the raw suffix part of the parsed string.
358   const std::string &GetRawPart() const { return m_suffix; }
359 
360 private:
361   void SetFromString(llvm::StringRef arg_string);
362 
363   /// Keeps track if we have parsed and stored any arguments.
364   bool m_has_args = false;
365   Args m_args;
366   llvm::StringRef m_arg_string;
367   llvm::StringRef m_arg_string_with_delimiter;
368 
369   // FIXME: This should be a StringRef, but some of the calling code expect a
370   // C string here so only a real std::string is possible.
371   std::string m_suffix;
372 };
373 
374 } // namespace lldb_private
375 
376 #endif // LLDB_UTILITY_ARGS_H
377