1%%% Copyright (C) 2010-2013  Tomas Abrahamsson
2%%%
3%%% Author: Tomas Abrahamsson <tab@lysator.liu.se>
4%%%
5%%% This library is free software; you can redistribute it and/or
6%%% modify it under the terms of the GNU Lesser General Public
7%%% License as published by the Free Software Foundation; either
8%%% version 2.1 of the License, or (at your option) any later version.
9%%%
10%%% This library is distributed in the hope that it will be useful,
11%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
12%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13%%% Lesser General Public License for more details.
14%%%
15%%% You should have received a copy of the GNU Lesser General Public
16%%% License along with this library; if not, write to the Free Software
17%%% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18%%% MA  02110-1301  USA
19
20-module(gpb_compile).
21%-compile(export_all).
22-export([file/1, file/2]).
23-export([string/2, string/3]).
24-export([proto_defs/2, proto_defs/3]).
25-export([msg_defs/2, msg_defs/3]).
26-export([format_error/1, format_warning/1]).
27-export([c/0, c/1, c/2]). % Cmd line interface, halts vm---don't use from shell!
28-export([parse_opts_and_args/1]).
29-export([show_args/0]).
30-export([show_version/0]).
31-export([locate_import/2]).
32-export([read_import/2]).
33-include_lib("kernel/include/file.hrl").
34-include_lib("eunit/include/eunit.hrl").
35-include("../include/gpb.hrl").
36-include("gpb_codegen.hrl").
37-include("gpb_compile.hrl").
38
39-import(gpb_lib, [replace_term/2]).
40
41%% -- Types -----------------------------------------------------
42
43%% Options
44-type boolean_opt(X) :: X | {X, boolean()}.% Just an option `X' means `{X,true}'
45-type directory() :: string().
46
47-type opts() :: [opt()].
48-type opt() :: type_specs | {type_specs, boolean()} |
49               {verify, optionally | always | never} |
50               {copy_bytes, true | false | auto | integer() | float()} |
51               {strings_as_binaries, boolean()} | strings_as_binaries |
52               boolean_opt(defs_as_proplists) |
53               boolean_opt(descriptor) |
54               boolean_opt(maps) |
55               boolean_opt(msgs_as_maps) |
56               boolean_opt(mapfields_as_maps) |
57               boolean_opt(defs_as_maps) |
58               {maps_unset_optional, omitted | present_undefined} |
59               {maps_oneof, tuples | flat} |
60               {maps_key_type, atom | binary} |
61               boolean_opt(nif) |
62               {load_nif, string()} |
63               {i, directory()} |
64               {o, directory()} |
65               {o_erl, directory()} | {o_hrl, directory()} |
66               {o_nif_cc, directory()} |
67               binary | to_proto_defs | to_msg_defs |
68               return |
69               boolean_opt(return_warnings) | boolean_opt(return_errors) |
70               report |
71               boolean_opt(report_warnings) | boolean_opt(report_errors) |
72               boolean_opt(warnings_as_errors) |
73               boolean_opt(include_as_lib) |
74               boolean_opt(use_packages) |
75               {erlc_compile_options,string()} |
76               {rename, renaming()} |
77               {msg_name_prefix,
78                string() | atom() |
79                {by_proto, prefix_by_proto()}} |
80               {msg_name_suffix, string() | atom()} |
81               boolean_opt(msg_name_to_snake_case) |
82               boolean_opt(msg_name_to_lower) |
83               {module_name_prefix, string() | atom()} |
84               {module_name_suffix, string() | atom()} |
85               {module_name, string() | atom()} |
86               {translate_type, {gpb_field_type(), [translation()]}} |
87               {translate_field, {field_path(), [translation()]}} |
88               {any_translate, [translation()]} |
89               boolean_opt(epb_compatibility) |
90               boolean_opt(epb_functions) |
91               boolean_opt(defaults_for_omitted_optionals) |
92               boolean_opt(type_defaults_for_omitted_optionals) |
93               {import_fetcher, import_fetcher_fun()} |
94               {target_erlang_version, integer() | current} |
95               term().
96
97-type renaming() :: {pkg_name, name_change()} |
98                    {msg_name, msg_name_change()} |
99                    {msg_fqname, msg_name_change()} |
100                    {group_name, name_change()} |
101                    {group_fqname, name_change()} |
102                    {service_name, name_change()} |
103                    {service_fqname, name_change()} |
104                    {rpc_name, name_change()}.
105
106-type name_change() :: {prefix, string() | atom()} |
107                       {suffix, string() | atom()} |
108                       lowercase |
109                       snake_case |
110                       dots_to_underscores |
111                       base_name.
112
113-type msg_name_change() :: name_change() |
114                           {prefix, {by_proto, prefix_by_proto()}}.
115
116-type prefix_by_proto() :: [{ProtoName::atom(), Prefix::string() | atom()}].
117
118
119-type field_path() :: [atom() | []].
120-type translation() :: {encode, mod_fn_argtemplate()} |
121                       {decode, mod_fn_argtemplate()} |
122                       {decode_init_default, mod_fn_argtemplate()} |
123                       {decode_repeated_add_elem, mod_fn_argtemplate()} |
124                       {decode_repeated_finalize, mod_fn_argtemplate()} |
125                       {merge,  mod_fn_argtemplate()} |
126                       {verify, mod_fn_argtemplate()} |
127                       {type_spec, string()}.
128-type fn_name() :: atom().
129-type mod_fn_argtemplate() :: {module(), fn_name(), arg_template()}.
130-type arg_template() :: [arg()].
131-type arg() :: term() | named_arg().
132-type named_arg() :: '$1' | '$2' | '$errorf' | '$user_data' | '$op'.
133
134-type fetcher_ret() :: from_file | {ok, string()} | {error, term()}.
135-type import_fetcher_fun() :: fun((string()) -> fetcher_ret()).
136
137%% Compilation return values
138-type comp_ret() :: mod_ret() | bin_ret() | error_ret().
139-type mod_ret() :: ok | {ok, [warning()]}.
140-type bin_ret() :: {ok, module(), code()} |
141                   {ok, module(), code(), [warning()]}.
142-type error_ret() :: error | {error, reason()} | {error, reason(), [warning()]}.
143-type warning() :: term().
144-type reason() :: term().
145-type code() :: binary() | gpb_parse:defs() | [code_item()].
146-type code_item() :: {erl, ErlCode :: binary()} |
147                     {nif, NifCcText :: string()}.
148-export_type([opts/0, opt/0]).
149-export_type([comp_ret/0]).
150
151-ifndef(NO_HAVE_STACKTRACE_SYNTAX).
152-compile({nowarn_deprecated_function, {erlang, get_stacktrace, 0}}).
153-endif.
154
155%% @equiv file(File, [])
156-spec file(string()) -> comp_ret().
157file(File) ->
158    file(File, []).
159
160%% @doc
161%% Compile a .proto file to a .erl file and to a .hrl file.
162%%
163%% The `File' argument must not include path to the .proto file. Example:
164%% "SomeDefinitions.proto" is ok, while "/path/to/SomeDefinitions.proto"
165%% is not ok.
166%%
167%% The .proto file is expected to be found in a directories specified by an
168%% `{i,directory()}' option. It is possible to specify `{i,directory()}'
169%% several times, they will be searched in the order specified.
170%%
171%% The `type_specs' option enables or disables `::Type()' annotations
172%% in the generated .hrl file. Default is `true'. The default changed
173%% in gpb version 4.0.0. Previously, the default was `false'.
174%% If you have messages referencing other messages cyclically, and get into
175%% troubles when compiling the generated files, set this to `false'.
176%%
177%% The `verify' option specifies whether or not to generate code
178%% that verifies, during encoding, that values are of correct type and
179%% within range.  The `verify' option can have the following values:
180%% <dl>
181%%    <dt>`always'</dt><dd>Generate code that unconditionally
182%%        verifies values.</dd>
183%%    <dt>`never'</dt><dd>Generate code that never verifies
184%%        values time. Encoding will fail if a value of the wrong
185%%        type is supplied. This includes forgetting to set a required
186%%        message field. Encoding may silently truncate values out of
187%%        range for some types.</dd>
188%%    <dt>`optionally'</dt><dd>Generate an `encode_msg/2' that accepts
189%%        the run-time option `verify' or `{verify,boolean()}' for specifying
190%%        whether or not to verify values.</dd>
191%% </dl>
192%%
193%% Erlang value verification either succeeds or crashes with the `error'
194%% `{gpb_type_error,Reason}'. Regardless of the `verify' option,
195%% a function, `verify_msg/1' is always generated.
196%%
197%% The `copy_bytes' option specifies whether when decoding data of
198%% type `bytes' (or strings if the `strings_as_binaries' is set), the
199%% decoded bytes should be copied or not.  Copying requires the
200%% `binary' module, which first appeared in Erlang R14A. When not
201%% copying decoded bytes, they will become sub binaries of the larger
202%% input message binary. This may tie up the memory in the input
203%% message binary longer than necessary after it has been
204%% decoded. Copying the decoded bytes will avoid creating sub
205%% binaries, which will in turn make it possible to free the input message
206%% binary earlier. The `copy_bytes' option can have the following values:
207%% <dl>
208%%   <dt>`false'</dt><dd>Never copy bytes/(sub-)binaries.</dd>
209%%   <dt>`true'</dt><dd>Always copy bytes/(sub-)binaries.</dd>
210%%   <dt>`auto'</dt><dd>Copy bytes/(sub-)binaries if the beam vm,
211%%           on which the compiler (this module) is running,
212%%           has the `binary:copy/1' function. (This is the default)</dd>
213%%   <dt>integer() | float()</dt><dd>Copy the bytes/(sub-)binaries if the
214%%           message this many times or more larger than the size of the
215%%           bytes/(sub-)binary.</dd>
216%% </dl>
217%%
218%% The `strings_as_binaries' option specifies whether strings should
219%% be returned from decoding as strings (list of Unicode code points),
220%% or as binaries (UTF-8 encoded). The `copy_bytes' option applies
221%% to strings as well, when the `strings_as_binaries' option is set.
222%% Upon encoding, both binaries and iolists are accepted.
223%%
224%% The `defs_as_proplists' option changes the generated introspection
225%% functions `find_msg_def' and `get_msg_defs' to return the description
226%% of each message field as a proplist, instead of as a `#field{}' record.
227%% The purpose is to make the generated code completely independent
228%% of gpb, at compile-time (it is already independent at run-time).
229%% The keys of the proplist are the names of the record fields in the
230%% `#field{}' record.  See also {@link gpb:proplists_to_field_records()}
231%% and related functions for conversion functions between these two
232%% formats.
233%%
234%% The `descriptor' option specifies whether or not to generate a
235%% function, descriptor/0, which returns a binary that describes the
236%% proto file(s) contents according to the protobuf's `descriptor.proto'.
237%% The default is to not generate such a description.  The generated
238%% description binary is most likely not identical to what `protoc'
239%% would generate, but the contents is roughly equivalent.
240%%
241%% The `{o,directory()}' option specifies directory to use for storing
242%% the generated `.erl' and `.hrl' files. Default is the same
243%% directory as for the proto `File'.
244%%
245%% The `{o_erl,directory()}', `{o_hrl,directory()}', `{o_nif_cc,directory()}',
246%% options specify output directories for where to generate the `.erl'
247%% and `.hrl' files respectively, and for the NIF C++ file,
248%% if the `nif' option is specified. The `{o_erl,directory()}' option
249%% overrides any `{o,directory()}' option, and similarly for the
250%% other file-type specific output options.
251%%
252%% The `maps' option will generate a protobuf encoder/decoder that
253%% uses maps instead of records. This option expands to the following
254%% options:
255%% <dl>
256%%    <dt>`msgs_as_maps'</dt>
257%%    <dd>No `.hrl' file will be generated, and the functions
258%%        `encode_msg', `merge_msgs' and `verify_msg' will take the
259%%        message name as an additional parameter.</dd>
260%%    <dt>`mapfields_as_maps'</dt>
261%%    <dd>The value for fields of type `map<_,_>' will be a map
262%%        instead of a list of 2-tuples.</dd>
263%%    <dt>`defs_as_maps'</dt>
264%%    <dd>The introspection will generate message field descriptions
265%%        as maps instead of as `#field{}' records, unless, of course
266%%        `defs_as_proplists' is specified, in which case they will be
267%%        proplists instead.</dd>
268%% </dl>
269%%
270%% For messages as maps, for optional fields, if not set, the
271%% `maps_unset_optional' option specifies the Erlang-internal
272%% representation; both how it is expected to be found at encoding,
273%% and how decoding will return it:
274%% <dl>
275%%   <dt>`omitted'</dt>
276%%   <dd>This means it is not included in the map.
277%%       This is the default. (since gpb version 4.0.0)
278%%   </dd>
279%%   <dt>`present_undefined'</dt>
280%%   <dd>This means it is present and has the value `undefined'.
281%%       This <em>was</em> the default before gpb version 4.0.0.
282%%   </dd>
283%% </dl>
284%%
285%% The `maps_oneof' option can be used for messages as maps, and can only
286%% take effect if `maps_unset_optional' is `omitted' (default since 4.0.0).
287%% It changes the representation of oneof fields as described below, if
288%% we would have a oneof-field, `xf' with two alternatives `a1' and `a2':
289%% <dl>
290%%   <dt>`{maps_oneof,tuples}'</dt>
291%%   <dd>`#{xf => {a1, Value}}' or `#{xf => {a2, Value}}'</dd>
292%%   <dt>`{maps_oneof,flat}'</dt>
293%%   <dd>`#{a1 => Value}}' or `#{a2 => Value}}'</dd>
294%% </dl>
295%%
296%% For messages as maps, the `maps_key_type' option makes it possible
297%% to control whether keys should be atoms (default) or binaries.
298%%
299%% The `nif' option will cause the compiler to generate nif C++ code
300%% for encoding and decoding. The generated nif C++ code can be linked
301%% with the Google protobuf C++ library.  Read the file
302%% `README.nif-cc' for more info.
303%%
304%% The `binary' option will cause the generated and compiled code to be
305%% returned as a binary. No files will be written. The return value
306%% will be on the form `{ok,Mod,Code}' or `{ok,Mod,Code,Warnings}'
307%% if the compilation is successful. This option may be useful
308%% e.g. when generating test cases. In case the `nif' option is set,
309%% the `Code' will be a list of tuples: `{erl,binary()}' which
310%% contains the Erlang object byte code, and `{nif,binary()}' which
311%% contains the C++ code. You will have to compile the C++ code with a
312%% C++ compiler, before you can use the Erlang code.
313%%
314%% The `to_proto_defs' option will result in `{ok,Defs}' or
315%% `{ok,Defs,Warns}' being returned if the compilation is successful.
316%% The returned message definitions can be used with the
317%% {@link proto_defs/2} or {@link proto_defs/3} functions.
318%%
319%% The `to_msg_defs' option is a deprecated alias for `to_proto_defs'.
320%%
321%% <dl>
322%%   <dt>`report_errors'/`report_warnings'</dt>
323%%   <dd>Causes errors/warnings to be printed as they occur.</dd>
324%%   <dt>`report'</dt>
325%%   <dd>This is a short form for both `report_errors' and
326%%       `report_warnings'.</dd>
327%%   <dt>`return_errors'</dt>
328%%   <dd>If this flag is set, then  `{error,ErrorList,WarningList}' is
329%%       returned when there are errors.</dd>
330%%   <dt>`return_warnings'</dt>
331%%   <dd>If  this  flag  is set, then an extra field containing `WarningList'
332%%       is added to the tuples returned on success.</dd>
333%%   <dt>`return'</dt>
334%%   <dd>This is a short form for both `return_errors' and
335%%       `return_warnings'.</dd>
336%% </dl>
337%%
338%% Setting the `warnings_as_errors' option will cause warnings to be
339%% treated as errors.  If there are warnings but no errors, and
340%% `return_warnings' is not specified, then `error' will be returned.
341%%
342%% See {@link format_error/1} for a way to turn an error <i>Reason</i> to
343%% plain text.
344%%
345%% If the `include_as_lib' option is set, the generated code will include
346%% gpb.hrl as a library, which is necessary if dependencies are managed with
347%% Rebar. Otherwise, the header file is included directly and must be located
348%% in the path, which is default behavior.
349%%
350%% The `use_packages' option instructs gpb to prepend the name of a package
351%% to every message it contains. If no package is defined, nothing will be
352%% prepended. This enables the reference of messages in other packages which
353%% would otherwise not be possible. However, for reasons of backward
354%% compatibility, this option is disabled by default.
355%%
356%% If the the `{erlc_compile_options,string()}' option is set,
357%% then the genereted code will contain a directive `-compile([String]).'
358%%
359%% The `{rename,{What,How}}' can transform message names, package names,
360%% service and rpc names in various ways. This option supersedes the
361%% options `{msg_name_prefix,Prefix}', `{msg_name_suffix,Suffix}',
362%% `msg_name_to_lower' and `msg_name_to_snake_case', while at the same
363%% time giving more fine-grained control. It is for example possible to
364%% apply snake_casing only to the message name, while keeping the
365%% package name, the service name and the rpc name intact. This can be
366%% useful with grpc, where these name components are exposed. The
367%% `msg_fqname' refers to the fully qualified message name, as in
368%% `Package.MsgName', while the `msg_name' refers to just the message
369%% name without package. The `service_fqname' and `service_name' specifiers
370%% work analogously.
371%%
372%% It is possible to stack `rename' options, and they will be applied in
373%% the order they are specified. So it is for example possible to
374%% snake_case a name, and then also prefix it.
375%%
376%% The `{msg_name_prefix,Prefix}' will add `Prefix' (a string or an atom)
377%% to each message. This might be useful for resolving colliding names,
378%% when incorporating several protocol buffer definitions into the same
379%% project. The `{msg_name_suffix,Suffix}' works correspondingly.
380%%
381%% The `{msg_name_prefix,Prefix}' option expands
382%% to `[{rename,{pkg_name,Prefix}},{rename,{msg_fqname,{prefix,Prefix}}},
383%% {rename,{group_fqname,{prefix,Prefix}}}}]',
384%% and ditto for suffixes.
385%%
386%% For backwards compatibility, the `{msg_name_prefix,{by_proto,PrefixList}}'
387%% expands to just `[{rename,{msg_fqname,{prefix,PrefixList}}}]'.
388%%
389%% The `msg_name_to_lower' and `msg_name_to_snake_case' options expands
390%% to `[{rename,{pkg_name,X}},{rename,{service_fqname,X}},
391%% {rename,{rpc_name,X}},{rename,{msg_fqname,X}},
392%% {rename,{rpc_name,X}},{rename,{group_fqname,X}}]' where `X' is
393%% `lowercase' or `snake_case' respectively.
394%%
395%% The `{module_name_prefix,Prefix}' will add `Prefix' (a string or an atom)
396%% to the generated code and definition files. The `{module_name_suffix,Suffix}'
397%% works correspondingly. For the case of compatibility with Erlang Protobuffs,
398%% the `epb_compatibility' option implies `{module_name_suffix,"_pb"}'
399%%
400%% The `{module_name,Name}' can be used to specify the module name of the
401%% generated code freely, instead of basing it on the proto file name.
402%% The name specified with `module_name' can be prefixed and suffixed with
403%% the `module_name_prefix' and `module_name_suffix' options.
404%%
405%% The `translate_type' option can be used to provide packer and unpacker
406%% functions for message fields of a certain type.
407%% For messages, the `MsgName' refers to a name <em>after</em>
408%% renaming has taken place.
409%% The merge translator is optional, and is called either via the `merge_msgs'
410%% function in the generated code, or when the decoder sees another
411%% field of the same type. The default merge operation is to let the second
412%% element overwrite previous elements. The verify translator is
413%% optional too, since verification can be disabled.
414%% The translation calls are specified as `{Mod,Fn,ArgTemplate}' where
415%% `Mod',`Fn' is a module and function to call, `ArgTemplate' is a list
416%% of terms, containing markers, such as `$1', `$2' and so on, for where
417%% to place the actual args. This makes it possible to specify additional
418%% static argument terms, for instance.
419%% The translator functions are called as follows:
420%% <dl>
421%%   <dt>Encode (Packing)</dt>
422%%   <dd>Call `Mod:Fn(Term)' to pack the `Term' (`$1') to
423%%       a value of the suitable for normal gpb encoding.</dd>
424%%   <dt>Decode (Unpacking)</dt>
425%%   <dd>Call `Mod:Fn(Any)' to unpack the `Any' (`$1') to
426%%       unpack a normal gpb decoded value to a term.</dd>
427%%   <dt>Merge</dt>
428%%   <dd>Call `Mod:Fn(Term1, Term2) -> Term3' to merge two
429%%       unpacked terms to a resulting Term3. The `$1' is the
430%%       previously seen term (during decoding, on encountering a
431%%       second field of the same type), or the first argument to the
432%%       `merge_msgs' function. The `$2' is the lastly seen term, or
433%%       the second argument to the `merge_msgs' function.</dd>
434%%   <dt>Verify</dt>
435%%   <dd>Call `Mod:Fn(Term) -> _' to verify an unpacked `Term'.
436%%       If `Term' (`$1') is valid, the function is expected to just return
437%%       any value, which is ignored and discarded.
438%%       If `Term' is invalid, the function is exptected to not
439%%       return anything, but instead either crash, call
440%%       `erlang:error/1', or `throw/1' or `exit/1'.  with the
441%%       reason for error.
442%%       (For backwards compatibility, it is also possible
443%%       to have an error function as argument, using `$errorf',
444%%       but this is deprecated.)</dd>
445%% </dl>
446%% There are additional translator argument markers:
447%% <dl>
448%%   <dt>`$user_data'</dt>
449%%   <dd>This will be replaced by the `user_data' option to the
450%%     generated `encode_msg', `decode_msg', `merge_msgs' and
451%%     `verify_msg' functions. If that option is not specified, the
452%%     value `undefined' is used substituted for `$user_data'.</dd>
453%%   <dt>`$op'</dt>
454%%   <dd>This will be replaced by `encode', `decode', `merge',
455%%   `verify', `decode_init_default', `decode_repeated_add_elem' or
456%%   `decode_repeated_finalize', depending on from which context it
457%%   is actually called. This can be useful because if the message is
458%%   to be verified on encoding (see the `verify' option), then the
459%%   same options, and thus the same user-data, are used for both
460%%   `encode_msg' and for `verify_msg'. The `$op' marker makes it
461%%   possible to tell these two call sites apart, if needed.</dd>
462%% </dl>
463%%
464%% The option `{any_translate,Translations}' is retained for backwards
465%% compatibility, and expands to
466%% <code>{translate_type,{'google.protobuf.Any',Translations}}</code>.
467%%
468%% The `translate_field' option can be used to translate individual fields.
469%% The option format is `{translate_field,{FieldPath,Translations}}' where
470%% each `Translation' consists of `{Op,{Mod,Fn,ArgTemplate}}' elements,
471%% just as for `translate_type'. The `FieldPath' is a list on the
472%% following format:
473%% <ul>
474%%   <li>`[MsgName]' for the message itself on the top-level</li>
475%%   <li>`[MsgName,FieldName]' for fields, generally</li>
476%%   <li>`[MsgName,FieldName,[]]' for elements of repeated fields</li>
477%%   <li>`[MsgName,OnoefFieldName,FieldName]' for elements of oneof
478%%     fields.</li>
479%% </ul>
480%% For repeated fields, the additional operations `decode_init_default',
481%% `decode_repeated_add_elem' and `decode_repeated_finalize' also exist
482%% and must all be specified.
483%%
484%% The `epb_compatibility' option is an umbrella-option for
485%% compatibility with the Erlang protobuffs library. It will expand to
486%% the options below. It will expand in-place, meaning any of these
487%% can be overridden if specified before the `epb_compatibility'
488%% option.
489%% <ul>
490%%   <li>`epb_functions'</li>
491%%   <li>`defaults_for_omitted_optionals'</li>
492%%   <li>`{module_name_suffix,"_pb"}'</li>
493%%   <li>`{msg_name_to_lower,true}'</li>
494%% </ul>
495%%
496%% If the `epb_functions' option is specified, then for compatibility
497%% with Erlang protobuffs, the following functions will be generated:
498%% <ul>
499%%   <li>`encode/1'</li>
500%%   <li>`encode_<MsgName>/1'</li>
501%%   <li>`decode/2'</li>
502%%   <li>`decode_<MsgName>/1'</li>
503%% </ul>
504%%
505%% The `defaults_for_omitted_optionals' and
506%% `type_defaults_for_omitted_optionals' options generates code that
507%% set default values or type-defaults respectively, on decoding, if
508%% an optional field is not present in the binary to decode. Normally
509%% it would otherwise have been set to `undefined'. Note that with
510%% these options it is not possible to determine after decoding
511%% whether a field contained data in the binary message. Also note
512%% that these options are only applicable for proto2 syntax messages,
513%% and are ignored for proto3 syntax messages. (For proto3, it
514%% effectively <em>must</em> be ignored, since, on the wire, a field
515%% set to its type-default value is indistinguishable from an omitted
516%% value.)
517%%
518%% The `import_fetcher' option can be used to catch imports. The
519%% option value must be a function taking one argument, the name of
520%% the file to import. It must return either `from_file', letting this
521%% file pass through the normal file import, or `{ok,string()}' if it
522%% has fetched the file itself, or `{error,term()}'.
523%%
524%% The `target_erlang_version' can be used to specify another major
525%% version of Erlang/OTP to generate code for. The default, `current'
526%% means that the generated code is expected to be compiled and run
527%% on the same major version as gpb runs on.
528-spec file(string(), opts()) -> comp_ret().
529file(File, Opts) ->
530    do_file_or_string(File, Opts).
531
532%% @equiv string(Mod, Str, [])
533-spec string(module(), string()) -> comp_ret().
534string(Mod, Str) ->
535    string(Mod, Str, []).
536
537%% @doc
538%% Compile a `.proto' file as string. See {@link file/2} for information
539%% on options and return values.
540-spec string(module(), string(), opts()) -> comp_ret().
541string(Mod, Str, Opts) ->
542    do_file_or_string({Mod, Str}, Opts).
543
544do_file_or_string(In, Opts0) ->
545    Opts1 = normalize_opts(Opts0),
546    case parse_file_or_string(In, Opts1) of
547        {ok, Defs} ->
548            case gpb_names:rename_defs(Defs, Opts1) of
549                {ok, Defs1} ->
550                    Mod = find_out_mod(In, Opts1),
551                    DefaultOutDir = find_default_out_dir(In),
552                    Opts2 = Opts1 ++ [{o,DefaultOutDir}],
553                    do_proto_defs_aux1(Mod, Defs1, Opts2);
554                {error, Reason} = Error ->
555                    possibly_report_error(Error, Opts1),
556                    case proplists:get_bool(return_warnings, Opts1) of
557                        true  -> {error, Reason, []};
558                        false -> Error
559                    end
560            end;
561        {error, Reason} = Error ->
562            possibly_report_error(Error, Opts1),
563            case proplists:get_bool(return_warnings, Opts1) of
564                true  -> {error, Reason, []};
565                false -> Error
566            end
567    end.
568
569normalize_opts(Opts0) ->
570    normalize_return_report_opts(
571      normalize_alias_opts(Opts0)).
572
573normalize_alias_opts(Opts) ->
574    lists:foldl(fun(F, OptsAcc) -> F(OptsAcc) end,
575                Opts,
576                [fun norm_opt_alias_to_msg_proto_defs/1,
577                 fun norm_opt_epb_compat_opt/1,
578                 fun norm_opt_map_opts/1,
579                 fun norm_opt_any_translate/1]).
580
581norm_opt_alias_to_msg_proto_defs(Opts) ->
582    lists:map(fun(to_msg_defs)         -> to_proto_defs;
583                 ({to_msg_defs, Bool}) -> {to_proto_defs, Bool};
584                 (Opt)                 -> Opt
585              end,
586              Opts).
587
588norm_opt_epb_compat_opt(Opts) ->
589    proplists:expand(
590      [{epb_compatibility, [epb_functions,
591                            defaults_for_omitted_optionals,
592                            {module_name_suffix,"_pb"},
593                            {msg_name_to_lower, true}]},
594       {{epb_compatibility,false}, [{epb_functions,false},
595                                    {defaults_for_omitted_optionals,false}]}],
596      Opts).
597
598norm_opt_map_opts(Opts) ->
599    proplists:expand(
600      [{maps, [msgs_as_maps,
601               mapfields_as_maps,
602               defs_as_maps]},
603       {{maps,false}, [{msgs_as_maps, false},
604                       {mapfields_as_maps, false},
605                       {defs_as_maps, false}]}],
606      Opts).
607
608norm_opt_any_translate(Opts) ->
609    AnyType = {msg, 'google.protobuf.Any'},
610    lists:map(fun({any_translate, Transls}) ->
611                      {translate_type, {AnyType, Transls}};
612                 (Opt) ->
613                      Opt
614              end,
615              Opts).
616
617normalize_return_report_opts(Opts1) ->
618    Opts2 = expand_opt(return, [return_warnings, return_errors], Opts1),
619    Opts3 = expand_opt(report, [report_warnings, report_errors], Opts2),
620    Opts4 = unless_defined_set(return_warnings, report_warnings, Opts3),
621    Opts5 = unless_defined_set(return_errors,   report_errors, Opts4),
622    Opts5.
623
624expand_opt(OptionToTestFor, OptionsToExpandTo, Opts) ->
625    lists:append(
626      lists:map(fun(Opt) when Opt == OptionToTestFor -> OptionsToExpandTo;
627                   (Opt) -> [Opt]
628                end,
629                Opts)).
630
631unless_defined_set(OptionToTestFor, Default, Opts) ->
632    case is_option_defined(OptionToTestFor, Opts) of
633        true  -> Opts;
634        false -> Opts ++ [Default]
635    end.
636
637is_option_defined(Key, Opts) ->
638    lists:any(fun({K, _V}) -> K =:= Key;
639                 (K)       -> K =:= Key
640              end,
641              Opts).
642
643find_out_mod({Mod, _S}, _Opts) ->
644    Mod;
645find_out_mod(File, Opts) ->
646    gpb_names:file_name_to_module_name(File, Opts).
647
648find_default_out_dir({_Mod, _S}) -> ".";
649find_default_out_dir(File) -> filename:dirname(File).
650
651%% @equiv proto_defs(Mod, Defs, [])
652-spec proto_defs(module(), gpb_parse:defs()) -> comp_ret().
653proto_defs(Mod, Defs) ->
654    proto_defs(Mod, Defs, []).
655
656%% @doc
657%% Compile a list of pre-parsed definitions to file or to a binary.
658%% See {@link file/2} for information on options and return values.
659-spec proto_defs(module(), gpb_parse:defs(), opts()) -> comp_ret().
660proto_defs(Mod, Defs, Opts) ->
661    do_proto_defs_aux1(Mod, Defs, normalize_opts(Opts)).
662
663do_proto_defs_aux1(Mod, Defs, Opts) ->
664    possibly_probe_defs(Defs, Opts),
665    Warns0 = check_unpackables_marked_as_packed(Defs),
666    Warns1 = check_maps_flat_oneof_may_fail_on_compilation(Opts),
667    Warns = Warns0 ++ Warns1,
668    AnRes = gpb_analyzer:analyze_defs(Defs, Opts),
669    case verify_opts(Defs, Opts) of
670        ok ->
671            Res1 = do_proto_defs_aux2(Defs, clean_module_name(Mod), AnRes, Opts),
672            return_or_report_warnings_or_errors(Res1, Warns, Opts,
673                                                get_output_format(Opts));
674        {error, OptError} ->
675            return_or_report_warnings_or_errors({error, OptError}, [], Opts,
676                                                get_output_format(Opts))
677    end.
678
679verify_opts(Defs, Opts) ->
680    while_ok([fun() -> verify_opts_translation_and_nif(Opts) end,
681              fun() -> verify_opts_epb_compat(Defs, Opts) end,
682              fun() -> verify_opts_flat_oneof(Opts) end]).
683
684while_ok(Funs) ->
685    lists:foldl(fun(F, ok) -> F();
686                   (_, Err) -> Err
687                end,
688                ok,
689                Funs).
690
691verify_opts_translation_and_nif(Opts) ->
692    TranslType = lists:keymember(translate_type, 1, Opts),
693    TranslField = lists:keymember(translate_field, 1, Opts),
694    DoNif = proplists:get_bool(nif, Opts),
695    if (TranslType or TranslField) and DoNif ->
696            {error, {invalid_options, translation, nif}};
697       true ->
698            ok
699    end.
700
701verify_opts_epb_compat(Defs, Opts) ->
702    while_ok(
703      [fun() ->
704               case {proplists:get_bool(epb_functions, Opts),
705                     gpb_lib:get_records_or_maps_by_opts(Opts)} of
706                   {true, maps} ->
707                       {error, {invalid_options, epb_functions,maps}};
708                   _ ->
709                       ok
710               end
711       end,
712       fun() ->
713               case proplists:get_bool(epb_functions, Opts) of
714                   true ->
715                       case lists:member(msg, gpb_lib:msg_names(Defs)) of
716                           true ->
717                               {error, {epb_functions_impossible,
718                                        {with_msg_named,msg}}};
719                           false ->
720                               ok
721                       end;
722                   false ->
723                       ok
724               end
725       end]).
726
727verify_opts_flat_oneof(Opts) ->
728    case gpb_lib:get_mapping_and_unset_by_opts(Opts) of
729        #maps{oneof=flat} ->
730            case gpb_lib:target_can_do_flat_oneof_for_maps(Opts) of
731                true ->
732                    ok;
733                false -> {error, maps_flat_oneof_not_supported_for_target_version}
734            end;
735        _ ->
736            ok
737    end.
738
739check_maps_flat_oneof_may_fail_on_compilation(Opts) ->
740    CanFlatOnoef = gpb_lib:target_can_do_flat_oneof_for_maps(Opts),
741    MayFail = gpb_lib:target_may_fail_compilation_for_flat_oneof_for_maps(Opts),
742    case gpb_lib:get_mapping_and_unset_by_opts(Opts) of
743        #maps{oneof=flat} ->
744            if CanFlatOnoef, MayFail ->
745                    [maps_flat_oneof_generated_code_may_fail_to_compile];
746               not CanFlatOnoef ->
747                    []; % a later check will signal an error
748               true ->
749                    []
750            end;
751        _ ->
752            []
753    end.
754
755%% @equiv msg_defs(Mod, Defs, [])
756%% @doc Deprecated, use proto_defs/2 instead.
757-spec msg_defs(module(), gpb_parse:defs()) -> comp_ret().
758msg_defs(Mod, Defs) ->
759    msg_defs(Mod, Defs, []).
760
761%% @spec msg_defs(Mod, Defs, Opts) -> CompRet
762%% @equiv proto_defs(Mod, Defs, Opts)
763%% @doc Deprecated, use proto_defs/2 instead.
764-spec msg_defs(module(), gpb_parse:defs(), opts()) -> comp_ret().
765msg_defs(Mod, Defs, Opts) ->
766    proto_defs(Mod, Defs, Opts).
767
768do_proto_defs_aux2(Defs, Mod, AnRes, Opts) ->
769    case get_output_format(Opts) of
770        proto_defs ->
771            {ok, Defs};
772        binary ->
773            ErlTxt = format_erl(Mod, Defs, AnRes, Opts),
774            HrlTxt = possibly_format_hrl(Mod, Defs, AnRes, Opts),
775            NifTxt = possibly_format_nif_cc(Mod, Defs, AnRes, Opts),
776            compile_to_binary(Mod, HrlTxt, ErlTxt, NifTxt, Opts);
777        file ->
778            ErlTxt = format_erl(Mod, Defs, AnRes, Opts),
779            HrlTxt = possibly_format_hrl(Mod, Defs, AnRes, Opts),
780            NifTxt = possibly_format_nif_cc(Mod, Defs, AnRes, Opts),
781            ErlOutDir = get_erl_outdir(Opts),
782            HrlOutDir = get_hrl_outdir(Opts),
783            NifCcOutDir = get_nif_cc_outdir(Opts),
784            Erl   = filename:join(ErlOutDir, atom_to_list(Mod) ++ ".erl"),
785            Hrl   = filename:join(HrlOutDir, atom_to_list(Mod) ++ ".hrl"),
786            NifCc = filename:join(NifCcOutDir, atom_to_list(Mod) ++ ".nif.cc"),
787            case {file_write_file(Erl, ErlTxt, Opts),
788                  possibly_write_file(Hrl, HrlTxt, Opts),
789                  possibly_write_file(NifCc, NifTxt, Opts)} of
790                {ok, ok, ok}       -> ok;
791                {{error, R}, _, _} -> {error, {write_failed, Erl, R}};
792                {_, {error, R}, _} -> {error, {write_failed, Erl, R}};
793                {_, _, {error, R}} -> {error, {write_failed, NifCc,  R}}
794            end
795    end.
796
797return_or_report_warnings_or_errors(Res, ExtraWarns, Opts, OutFormat) ->
798    Res2 = merge_warns(Res, ExtraWarns, OutFormat),
799    possibly_report_warnings(Res2, Opts),
800    possibly_report_error(Res2, Opts),
801    return_warnings_or_errors(Res2, Opts).
802
803merge_warns(ok, Warns, _OutFmt)                  -> {ok, Warns};
804merge_warns({ok, Warns1}, Warns2, file)          -> {ok, Warns2++Warns1};
805merge_warns({ok, Defs}, Warns, proto_defs)       -> {ok, Defs, Warns};
806merge_warns({ok, M, B}, Warns, binary)           -> {ok, M, B, Warns};
807merge_warns({ok, M, B, Warns1}, Warns2, binary)  -> {ok, M, B, Warns2++Warns1};
808merge_warns({error, R}, Warns, _OutFmt)          -> {error, R, Warns};
809merge_warns({error, R, Warns1}, Warns2, _OutFmt) -> {error, R, Warns2++Warns1};
810merge_warns(error, Warns, binary) ->
811    erlang:error({internal_error, ?MODULE,
812                  generated_code_failed_to_compile, Warns}).
813
814possibly_report_warnings(Result, Opts) ->
815    Warns = case Result of
816                {error, _Reason, Ws} -> Ws;
817                {ok, _M, _B, Ws}     -> Ws;
818                {ok, _Defs, Ws}      -> Ws;
819                {ok, Ws}             -> Ws
820            end,
821    case proplists:get_bool(report_warnings, Opts) of
822        true  -> lists:foreach(fun report_warning/1, Warns);
823        false -> ok
824    end.
825
826report_warning(Warn) ->
827    io:format("~s~n", [format_warning(Warn)]).
828
829possibly_report_error(Res, Opts) ->
830    case {Res, proplists:get_bool(report_errors, Opts)} of
831        {{error, _Reason, _Warns}, true} ->
832            io:format("~s~n", [format_error(Res)]);
833        {{error, _Reason}, true} ->
834            io:format("~s~n", [format_error(Res)]);
835        _ ->
836            ok
837    end.
838
839return_warnings_or_errors(Res, Opts) ->
840    case proplists:get_bool(return_warnings, Opts) of
841        true ->
842            case proplists:get_bool(warnings_as_errors, Opts) of
843                true  -> turn_warnings_to_errors_keep(Res);
844                false -> Res
845            end;
846        false ->
847            case proplists:get_bool(warnings_as_errors, Opts) of
848                true  -> turn_warnings_to_errors_remove(Res);
849                false -> remove_warnings_from_res(Res)
850            end
851    end.
852
853turn_warnings_to_errors_keep({ok, _Mod, _Bin, []}=Res) -> Res;
854turn_warnings_to_errors_keep({ok, _MsgDefs, []}=Res)   -> Res;
855turn_warnings_to_errors_keep({ok, []}=Res)             -> Res;
856turn_warnings_to_errors_keep({ok, _Mod, _Bin, Warns})  -> {error, [], Warns};
857turn_warnings_to_errors_keep({ok, _MsgDefs, Warns})    -> {error, [], Warns};
858turn_warnings_to_errors_keep({ok, Warns})              -> {error, [], Warns};
859turn_warnings_to_errors_keep({error, R, Warns})        -> {error, R, Warns}.
860
861turn_warnings_to_errors_remove({ok, Mod, Bin, []})       -> {ok, Mod, Bin};
862turn_warnings_to_errors_remove({ok, MsgDefs, []})        -> {ok, MsgDefs};
863turn_warnings_to_errors_remove({ok, []})                 -> ok;
864turn_warnings_to_errors_remove({ok, _Mod, _Bin, _Warns}) -> error;
865turn_warnings_to_errors_remove({ok, _MsgDefs, _Warns})   -> error;
866turn_warnings_to_errors_remove({ok, _Warns})             -> error;
867turn_warnings_to_errors_remove({error, R, _Warns})       -> {error, R}.
868
869remove_warnings_from_res({ok, Mod, Bin, _Warns}) -> {ok, Mod, Bin};
870remove_warnings_from_res({ok, MsgDefs, _Warns})  -> {ok, MsgDefs};
871remove_warnings_from_res({ok, _Warns})           -> ok;
872remove_warnings_from_res({error, R, _Warns})     -> {error, R}.
873
874get_output_format([binary | _])                -> binary;
875get_output_format([{binary, true} | _])        -> binary;
876get_output_format([to_proto_defs | _])         -> proto_defs;
877get_output_format([{to_proto_defs, true} | _]) -> proto_defs;
878get_output_format([_ | Rest])                  -> get_output_format(Rest);
879get_output_format([])                          -> file.
880
881get_erl_outdir(Opts) ->
882    proplists:get_value(o_erl, Opts, get_outdir(Opts)).
883
884get_hrl_outdir(Opts) ->
885    proplists:get_value(o_hrl, Opts, get_outdir(Opts)).
886
887get_nif_cc_outdir(Opts) ->
888    proplists:get_value(o_nif_cc, Opts, get_outdir(Opts)).
889
890get_outdir(Opts) ->
891    proplists:get_value(o, Opts, ".").
892
893clean_module_name(Mod) ->
894    Clean = re:replace(atom_to_list(Mod), "[.]", "_", [global, {return,list}]),
895    list_to_atom(Clean).
896
897%% @spec format_error({error, Reason} | Reason) -> io_list()
898%%           Reason = term()
899%%
900%% @doc Produce a plain-text error message from a reason returned by
901%% for instance {@link file/2} or {@link proto_defs/2}.
902-spec format_error(Err) -> iolist() when
903      Err :: reason() | {error, reason()} | {error, reason(), [warning()]}.
904format_error({error, Reason, _Warns}) -> fmt_err(Reason);
905format_error({error, Reason})         -> fmt_err(Reason);
906format_error(Reason)                  -> fmt_err(Reason).
907
908%% Note: do NOT include trailing newline (\n or ~n)
909fmt_err({option_error, {not_supported, maps_omitted_nif}}) ->
910    ?f("Options maps, maps_unset_optional=omitted and nif is not supported");
911fmt_err({parse_error, FileName, {Line, Module, ErrInfo}}) ->
912    ?f("~s:~w: ~s", [FileName, Line, Module:format_error(ErrInfo)]);
913fmt_err({scan_error, FileName, {Line, Module, ErrInfo}}) ->
914    ?f("~s:~w: ~s", [FileName, Line, Module:format_error(ErrInfo)]);
915fmt_err({import_not_found, Import, Tried}) ->
916    PrettyTried = [begin
917                       PrettyReason = file:format_error(Reason),
918                       ?f("~n  ~ts (~s (~p))", [File,PrettyReason,Reason])
919                   end
920                   || {File,Reason} <- Tried],
921    TriedTxt = if Tried == [] -> "";
922                  true -> ", tried:"
923               end,
924    ?f("Could not find import file ~p~s~s", [Import, TriedTxt, PrettyTried]);
925fmt_err({fetcher_issue, File, Reason}) ->
926    ?f("Failed to import file ~p using fetcher, ~p", [File, Reason]);
927fmt_err({read_failed, File, Reason}) ->
928    ?f("failed to read ~p: ~s (~p)", [File, file:format_error(Reason), Reason]);
929fmt_err({post_process, Reasons}) ->
930    gpb_parse:format_post_process_error({error, Reasons});
931fmt_err({write_failed, File, Reason}) ->
932    ?f("failed to write ~s: ~s (~p)", [File, file:format_error(Reason),Reason]);
933fmt_err({invalid_options, translation, nif}) ->
934    "Option error: Not supported: both translation option and nif";
935fmt_err({unsupported_translation, _Type, non_msg_type}) ->
936    "Option to translate is supported only for message types, for now";
937fmt_err({invalid_options, epb_functions, maps}) ->
938    "Option error: Not supported: both epb_compatibility (or epb_functions) "
939        "and maps";
940fmt_err({epb_compatibility_impossible, {with_msg_named, msg}}) ->
941    "Not possible to generate epb compatible functions when a message "
942        "is named 'msg' because of collision with the standard gpb functions "
943        "'encode_msg' and 'decode_msg'";
944fmt_err(maps_flat_oneof_not_supported_for_target_version) ->
945    "Flat oneof for maps is only supported on Erlang 18 and later";
946fmt_err({rename_defs, Reason}) ->
947    gpb_names:format_error(Reason);
948fmt_err(X) ->
949    ?f("Unexpected error ~p", [X]).
950
951%% @doc Produce a plain-text error message from a reason returned by
952%% for instance {@link file/2} or {@link proto_defs/2}.
953%% @end
954%% Note: do NOT include trailing newline (\n or ~n)
955-spec format_warning(warning()) -> iolist().
956format_warning({ignored_field_opt_packed_for_unpackable_type,
957                MsgName, FName, Type, _Opts}) ->
958    ?f("Warning: ignoring option packed for non-packable field ~s.~s "
959       "of type ~w", [MsgName, FName, Type]);
960format_warning(maps_flat_oneof_generated_code_may_fail_to_compile) ->
961    "Warning: Generated code for flat oneof for maps may fail to compile "
962        "on 18.3.4.6, or later Erlang 18 versions, due to a compiler issue";
963format_warning(X) ->
964    case io_lib:deep_char_list(X) of
965        true  -> X;
966        false -> ?f("Warning: Unknown warning: ~p", [X])
967    end.
968
969%% @doc Command line interface for the compiler.
970%% With no proto file to compile, print a help message and exit.
971-spec c() -> no_return().
972c() ->
973    io:format("No proto files specified.~n"),
974    show_help(),
975    halt(0).
976
977%% @doc This function is intended as a command line interface for the compiler.
978%% Call it from the command line as follows:
979%% ```
980%%    erl <erlargs> [gpb-opts] -s gpb_compile c File.proto ...
981%%    erl <erlargs> -s gpb_compile c File.proto ... -extra [gpb-opts]
982%% '''
983%% The `<erlargs>' can be `-noshell -noinput +B -boot start_clean -pa SomeDir'
984%%
985%% The options below are supported. Dashes and underscores inside option names
986%% are equivalent, ie `-o-erl' and `-o_erl' are the same option.
987%% <dl>
988%%   <dt>`-IDir' `-I Dir'</dt>
989%%   <dd>Specify include directory.
990%%       Option may be specified more than once to specify
991%%       several include directories.</dd>
992%%   <dt>`-o Dir'</dt>
993%%   <dd>Specify output directory for where to generate
994%%       the <i>ProtoFile</i>.erl and <i>ProtoFile</i>.hrl</dd>
995%%   <dt>`-o-erl Dir' | `-o-hrl Dir' | `-o-nif-cc Dir'</dt>
996%%   <dd>Specify output directory for where to generate
997%%       the <i>ProtoFile</i>.erl and <i>ProtoFile</i>.hrl respectively,
998%%       and for the NIF C++ file, if the `-nif' option is specified.
999%%       The `-o-erl Dir' option overrides any `-o Dir' option, and
1000%%       similarly for the other file-type specific output options.</dd>
1001%%   <dt>`-v optionally | always | never'</dt>
1002%%   <dd>Specify how the generated encoder should
1003%%       verify the message to be encoded.</dd>
1004%%   <dt>`-nif'</dt>
1005%%   <dd>Generate nifs for linking with the protobuf C(++) library.</dd>
1006%%   <dt>`-load_nif FunctionDefinition'</dt>
1007%%   <dd>Specify `FunctionDefinition' as the text that defines the
1008%%       function `load_nif/0'.  This is called as the `on_load'
1009%%       hook for loading the NIF.  See also the doc for the `load_nif'
1010%%       option in the {@link file/2} function.</dd>
1011%%   <dt>`-c true | false | auto | integer() | float()'</dt>
1012%%   <dd>Specify how or when the generated decoder should
1013%%       copy fields of type `bytes'. See the `copy_bytes' option
1014%%       for the function {@link file/2} for more info.</dd>
1015%%   <dt>`-strbin'</dt>
1016%%   <dd>Specify that decoded strings should be returned as binaries,
1017%%       instead of as strings (lists).</dd>
1018%%   <dt>`-pldefs'</dt>
1019%%   <dd>Specify that introspection functions shall return proplists
1020%%       instead of `#field{}' records, to make the generated code
1021%%       completely free of even compile-time dependencies to gpb.</dd>
1022%%   <dt>`-pkgs'</dt>
1023%%   <dd>Prepend the name of a package to every message it contains.
1024%%       If no package is defined, nothing will be prepended.
1025%%       Default is to not prepend package names for backwards
1026%%       compatibility, but it is needed for some proto files.</dd>
1027%%   <dt>`-translate_type TMsFs'</dt>
1028%%   <dd>Call functions in `TMsFs' to pack, unpack, merge and verify
1029%%       for the specifed type. The `TMsFs' is a string on the
1030%%       following format: `type=Type,e=Mod:Fn,d=Mod:Fn[,m=Mod:Fn][,V=Mod:Fn]'.
1031%%       The Type and specified modules and functions are called and used
1032%%       as follows:
1033%%       <dl>
1034%%         <dt>`type=Type'</dt>
1035%%         <dd>Specfies that the translations apply to fields of type.
1036%%             The `Type' may be either of:
1037%%             `msg:MsgName' (after any renaming operations),
1038%%             `enum:EnumName', `int32', `int64', `uint32', `uint64',
1039%%             `sint32', `sint64', `fixed32', `fixed64', `sfixed32',
1040%%             `sfixed64', `bool', `double', `string', `bytes' or
1041%%             `map<KeyType,ValueType>'. The last may need quoting in
1042%%             the shell.</dd>
1043%%         <dt>`e=Mod:Fn'</dt>
1044%%         <dd>Call `Mod:Fn(Term)' to pack the `Term' to a value of type
1045%%             `Type', ie to a value that gpb knows how to wire-encode.</dd>
1046%%         <dt>`d=Mod:Fn'</dt>
1047%%         <dd>Call `Mod:Fn(Value)' to unpack the just wire-decoded `Value'
1048%%             of type `Type', to something of your choice.</dd>
1049%%         <dt>`m=Mod:Fn'</dt>
1050%%         <dd>Call `Mod:Fn(Term1, Term2) -> Term3' to merge two
1051%%             unpacked terms to a resulting Term3. Note that this function
1052%%             is never called for scalar types.</dd>
1053%%         <dt>`V=Mod:Fn'</dt>
1054%%         <dd>Call `Mod:Fn(Term) -> _' to verify an unpacked `Term'.
1055%%             If `Term' is valid, the function is expected to just return
1056%%             any value, which is ignored and discarded.
1057%%             If `Term' is invalid, the function is exptected to not
1058%%             return anything, but instead either crash, call
1059%%             `erlang:error/1', or `throw/1' or `exit/1' with the
1060%%             reason for error.
1061%%             If you want to use a verifier, this is the new preferred
1062%%             approach.</dd>
1063%%         <dt>`v=Mod:Fn'</dt>
1064%%         <dd>Call `Mod:Fn(Term, ErrorF) -> _' to verify an unpacked `Term'.
1065%%             This exists for backwards compatibility, and its use
1066%%             is deprecated.</dd>.
1067%%       </dl>
1068%%   </dd>
1069%%   <dt>`-translate_field FMsFs'</dt>
1070%%   <dd>Call functions in FMsFs to pack, unpack, merge, and verify.
1071%%       This is similar to the `-translate_type' option, except that
1072%%       a message field is specified instead of a type. The `FMsFs'
1073%%       is a string on the following format:
1074%%       `field=Path,e=...,d=...,m=...,V=...[,i=Mod:Fn][,a=Mod:Fn][,f=Mod:Fn]'
1075%%       See the `-translate_type' option for info on `e=', `d=', `m=' and `V='
1076%%       items. Additionally for this `-translate_field' option, these exist:
1077%%       <dl>
1078%%         <dt>`field=Path'</dt>
1079%%         <dd>The `Path' indicates the element to translate as follows:
1080%%           <ul>
1081%%             <li>`MsgName' for the message itself. (This is actually
1082%%                  equivalent to `-translate_type type=msg:MsgName,...')</li>
1083%%             <li>`MsgName.FieldName' for fields generally</li>
1084%%             <li>`MsgName.OneofName.FieldName' for oneof fields</li>
1085%%             <li>`MsgName.FieldName.[]' for elements of repeated fields</li>
1086%%           </ul>
1087%%         </dd>
1088%%         <dt>`i=Mod:Fn'</dt>
1089%%         <dd>For repeated fields, call `Mod:Fn()' on decoding to initialize
1090%%             the field to some value</dd>
1091%%         <dt>`a=Mod:Fn'</dt>
1092%%         <dd>For repeated fields, call `Mod:Fn(Elem,S)' on decoding
1093%%             to add an item)</dd>
1094%%         <dt>`f=Mod:Fn'</dt>
1095%%         <dd>For repeated fields, call `Mod:Fn(S)' on decoding
1096%%             to finalize the field</dd>
1097%%       </dl>
1098%%   </dd>
1099%%   <dt>`-any_translate MsFs'</dt>
1100%%   <dd>Call functions in `MsFs' to pack, unpack, merge and verify
1101%%       `google.protobuf.Any' messages. The `MsFs' is a string on the
1102%%       following format: `e=Mod:Fn,d=Mod:Fn[,m=Mod:Fn][,V=Mod:Fn]'.
1103%%       See the translate option for details on the string components.</dd>
1104%%   <dt>`-msgprefix Prefix'</dt>
1105%%   <dd>Prefix each message with `Prefix'. This can be useful to
1106%%       when including different sub-projects that have colliding
1107%%       message names.</dd>
1108%%   <dt>`-modprefix Prefix'</dt>
1109%%   <dd>Prefix each module with `Prefix'. Normally the module name of
1110%%       the generated code is based on the name of the `.proto' file.
1111%%       This option prepends a prefix to the module name, which can be
1112%%       useful when including different sub-projects that have
1113%%       colliding proto file names.</dd>
1114%%   <dt>`-msgsuffix Suffix'</dt>
1115%%   <dd>Suffix each message name with `Suffix'.</dd>
1116%%   <dt>`-modsuffix Suffix'</dt>
1117%%   <dd>Suffix each module name with `Suffix'.</dd>
1118%%   <dt>`-modname Name'</dt>
1119%%   <dd>Specify the name of the generated module.</dd>
1120%%   <dt>`-msgtolower'</dt>
1121%%   <dd>ToLower each message. Any prefixes/suffixes are added
1122%%       after case modification.</dd>
1123%%   <dt>`-il'</dt>
1124%%   <dd>Generate code that include gpb.hrl using `-include_lib'
1125%%       instead of `-include', which is the default.</dd>
1126%%   <dt>`-type'<br/>`-no_type'</dt>
1127%%   <dd>Enables or disables `::Type()' annotations in the generated code.
1128%%       Default is to enable if there are no cyclic dependencies.</dd>
1129%%   <dt>`-descr'</dt>
1130%%   <dd>Generate self-description information.</dd>
1131%%   <dt>`-maps'</dt>
1132%%   <dd>This option expands to the following options:
1133%%       <ul>
1134%%         <li>`-msgs-as-maps'</li>
1135%%         <li>`-mapfields-as-maps'</li>
1136%%         <li>`-defs-as-maps'</li>
1137%%       </ul>
1138%%       See the `maps' option for the function {@link file/2}
1139%%       for more info.</dd>
1140%%   <dt>`-maps_unset_optional omitted | present_undefined'</dt>
1141%%   <dd>Specifies the internal format for optional fields that are unset.</dd>
1142%%   <dt>`-maps_oneof tuples | flat'</dt>
1143%%   <dd>Specifies the internal format for oneof fields in maps.</dd>
1144%%   <dt>`-maps_key_type atom | binary'</dt>
1145%%   <dd>Specifies the key type for maps.</dd>
1146%%   <dt>`-msgs-as-maps'</dt>
1147%%   <dd>Specifies that messages should be maps. No `.hrl' file will
1148%%       be generated.
1149%%       Without this option, messages will be records.</dd>
1150%%   <dt>`-mapfields-as-maps'</dt>
1151%%   <dd>Specifies that fields of type `map<_,_>' should be maps.
1152%%       Otherwise, they will be 2-tuples.</dd>
1153%%   <dt>`-defs-as-maps'</dt>
1154%%   <dd>Specifies that proto defintions from the generated code
1155%%       are to be returned as maps. Otherwise, they will be lists
1156%%       of tuples and records (or proplists if the `-pldefs' option
1157%%       is specified)</dd>
1158%%   <dt>`-erlc_compile_options Options'</dt>
1159%%   <dd>Specifies compilation options, in a comma separated string, to pass
1160%%       along to the `-compile(...)' directive on the generated code.</dd>
1161%%   <dt>`-epb'</dt>
1162%%   <dd>Enable compatibility with the Erlang Protobuffs library:
1163%%       <ul>
1164%%         <li>Implies the `-epb-functions' option</li>
1165%%         <li>Implies the `-defaults-for-omitted-optionals' option</li>
1166%%         <li>Implies the `-modsuffix _pb' option</li>
1167%%         <li>Implies the `-msgtolower' option</li>
1168%%       </ul></dd>
1169%%   <dt>`-epb-functions'</dt>
1170%%   <dd>For compatibility with the Erlang Protobuffs library, generate also
1171%%       the following functions: `encode/1', `decode/2', `encode_MsgName/1'
1172%%       and `decode_MsgName/1'</dd>
1173%%   <dt>`-defaults-for-omitted-optionals'</dt>
1174%%   <dd>For optional fields not present on decoding, set the field to
1175%%       its default value, if any, instead of to `undefined'.</dd>
1176%%   <dt>`-type-defaults-for-omitted-optionals'</dt>
1177%%   <dd>For optional fields not present on decoding, set the field to
1178%%       its type-default, instead of to `undefined'.</dd>
1179%%   <dt>`-for-version N'</dt>
1180%%   <dd>Generate code for Erlang/OTP version N instead of current.</dd>
1181%%   <dt>`-Werror', `-W1', `-W0', `-W', `-Wall'</dt>
1182%%   <dd>`-Werror' means treat warnings as errors<br></br>
1183%%       `-W1' enables warnings, `-W0' disables warnings.<br></br>
1184%%       `-W' and `-Wall' are the same as `-W1'</dd>
1185%%   <dt>`--help' or `-h'</dt>
1186%%   <dd>Show help.</dd>
1187%%   <dt>`--version' or `-V'</dt>
1188%%   <dd>Show the version number of gpb.</dd>
1189%% </dl>
1190%% If several files are specified, each is compiled individually, no
1191%% checking is done for instance for multiply defined messages or
1192%% fields across files, such as the `protoc' does.
1193-spec c([string() | atom()]) -> no_return().
1194c([F | _]=Files) when is_atom(F); is_list(F) -> %% invoked with -s or -run
1195    erlang:system_flag(backtrace_depth, 32),
1196    FileNames = [if is_atom(File)     -> atom_to_list(File);
1197                    is_list(File)     -> File
1198                 end
1199                 || File <- Files],
1200    InitArgs = init_args_to_argv(init:get_arguments()),
1201    PlainArgs = init:get_plain_arguments(),
1202    Argv = InitArgs ++ PlainArgs ++ FileNames,
1203    case parse_opts_and_args(Argv) of
1204        {ok, {Opts, Args}} ->
1205            c(Opts, Args);
1206        {error, Reason} ->
1207            io:format("Error: ~s.~n", [Reason]),
1208            show_args(),
1209            halt(1)
1210    end.
1211
1212init_args_to_argv(InitArgs) ->
1213    lists:append([["-"++atom_to_list(OptName) | OptArgs]
1214                  || {OptName, OptArgs} <- InitArgs,
1215                     is_gpb_opt(OptName)]).
1216
1217%% Opts are expected to be on same format as accepted by file/2.
1218%% passed by parse_opts_and_args/2.
1219-spec c(opts(), [ProtoFileName::string()]) -> no_return().
1220c(Opts, Args) ->
1221    case determine_cmdline_op(Opts, Args) of
1222        error  ->
1223            show_help(),
1224            halt(1);
1225        show_help  ->
1226            show_help(),
1227            halt(0);
1228        show_version  ->
1229            show_version(),
1230            halt(0);
1231        compile ->
1232            Opts2 = Opts ++ [report_warnings, report_errors],
1233            Results = [file(FileName, Opts2) || FileName <- Args],
1234            case lists:usort(Results) of
1235                [ok]  -> halt(0);
1236                _Errs -> halt(1)
1237            end
1238    end.
1239
1240-spec parse_opts_and_args([string()]) -> {ok, {opts(), Args::[string()]}} |
1241                                         {error, Reason::string()}.
1242parse_opts_and_args(Argv) ->
1243    do_parse_argv(Argv, [], []).
1244
1245do_parse_argv(["-"++OptName=Opt | Rest], Opts, Files) ->
1246    case find_opt_spec(OptName) of
1247        {ok, OptSpec} ->
1248            case parse_opt(OptName, OptSpec, Rest) of
1249                {ok, {ParsedOpt, Rest2}} ->
1250                    do_parse_argv(Rest2, [ParsedOpt | Opts], Files);
1251                {error, Reason} ->
1252                    {error, Reason}
1253            end;
1254        error ->
1255            {error, "Unknown option " ++ Opt}
1256    end;
1257do_parse_argv([File | Rest], Opts, Files) ->
1258    do_parse_argv(Rest, Opts, [File | Files]);
1259do_parse_argv([], Opts, Files) ->
1260    {ok, {lists:reverse(Opts), lists:reverse(Files)}}.
1261
1262is_gpb_opt(InitArgOptAtom) ->
1263    find_opt_spec(atom_to_list(InitArgOptAtom)) /= error.
1264
1265find_opt_spec(OptName) ->
1266    case [OptSpec || OptSpec <- opt_specs(), opt_matches(OptName, OptSpec)] of
1267        [] ->
1268            error;
1269        [OptSpec] ->
1270            {ok, OptSpec}
1271    end.
1272
1273opt_matches(Opt, {OptName, 'string_maybe_appended()', _OptTag, _Descr}) ->
1274    lists:prefix(norm_uscore_dash(OptName), norm_uscore_dash(Opt));
1275opt_matches(Opt, {OptName, _Type, _OptTag, _Descr}) ->
1276    norm_uscore_dash(Opt) == norm_uscore_dash(OptName).
1277
1278norm_uscore_dash("_"++Tl)  -> "-" ++ norm_uscore_dash(Tl);
1279norm_uscore_dash([C | Tl]) -> [C | norm_uscore_dash(Tl)];
1280norm_uscore_dash("")       -> "".
1281
1282parse_opt(Opt, {OptName, 'string_maybe_appended()', OptTag, _Descr}, Rest) ->
1283    case {Opt, Rest} of
1284        {OptName, [H | Rest2]} ->
1285            {ok, {{OptTag, H}, Rest2}};
1286        {OptName, []} ->
1287            {error, "Missing argument for option -" ++ OptName};
1288        _ ->
1289            true = lists:prefix(OptName, Opt),
1290            OptArg = gpb_lib:string_slice(Opt, length(OptName)),
1291            {ok, {{OptTag, OptArg}, Rest}}
1292    end;
1293parse_opt(_, {_OptName, undefined, OptTag, _Descr}, Rest) ->
1294    {ok, {OptTag, Rest}};
1295parse_opt(_, {_OptName, 'string()', OptTag, _Descr}, [OptArg | Rest]) ->
1296    {ok, {{OptTag, OptArg}, Rest}};
1297parse_opt(_, {OptName, 'integer()', OptTag, _Descr}, [OptArg | Rest]) ->
1298    try list_to_integer(OptArg) of
1299        N -> {ok, {{OptTag, N}, Rest}}
1300    catch error:badarg ->
1301            {error, ?ff("Invalid version number (integer) for ~s: ~p",
1302                        [OptName, OptArg])}
1303    end;
1304parse_opt(_, {_OptName, F, OptTag, _Descr}, Rest) when is_function(F) ->
1305    F(OptTag, Rest);
1306parse_opt(_, {OptName, Alternatives, OptTag, _Descr}, [OptArg | Rest]) ->
1307    case parse_opt_alts(tuple_to_list(Alternatives), OptArg, OptTag) of
1308        {ok, Opt} -> {ok, {Opt, Rest}};
1309        error     -> {error, "Invalid argument for -" ++ OptName}
1310    end;
1311parse_opt(OptName, _OptSpec, []) ->
1312    {error, "Missing argument for option -" ++ OptName}.
1313
1314parse_opt_alts(['number()' | Rest], OptArg, OptTag) ->
1315    case string_to_number(OptArg) of
1316        {ok, Value} -> {ok, {OptTag, Value}};
1317        error       -> parse_opt_alts(Rest, OptArg, OptTag)
1318    end;
1319parse_opt_alts([Value | Rest], OptArg, OptTag) ->
1320    case atom_to_list(Value) of
1321        OptArg -> {ok, {OptTag, Value}};
1322        _      -> parse_opt_alts(Rest, OptArg, OptTag)
1323    end;
1324parse_opt_alts([], _OptArg, _OptTag) ->
1325    error.
1326
1327opt_specs() ->
1328    [
1329     {"I", 'string_maybe_appended()', i, "\n"
1330      "       Specify include directory.\n"
1331      "       Option may be specified more than once to specify\n"
1332      "       several include directories.\n"},
1333     {"o", 'string()', o, "Dir\n"
1334      "       Specify output directory for where to generate\n"
1335      "       the <ProtoFile>.erl and <ProtoFile>.hrl\n"},
1336     {"o-erl", 'string()', o_erl, "Dir\n"
1337      "       Specify output directory for where to generate\n"
1338      "       the <ProtoFile>.erl.\n"
1339      "       The -o-erl Dir option overrides any -o Dir option, and\n"
1340      "       similarly for the other file-type specific output options.\n"},
1341     {"o-hrl", 'string()', o_hrl, "Dir\n"
1342      "       Specify output directory for where to generate\n"
1343      "       the <ProtoFile>.hrl\n"},
1344     {"o-nif-cc", 'string()', o_nif_cc, "Dir\n"
1345      "       Specify output directory for where to generate\n"
1346      "       the NIF C++ file, if the -nif option is specified\n"},
1347     {"nif", undefined, nif, "\n"
1348      "       Generate nifs for linking with the protobuf C(++) library.\n"},
1349     {"load_nif", 'string()', load_nif, "FunctionDefinition\n"
1350      "       Specify FunctionDefinition as the text that defines the\n"
1351      "       function load_nif/0.  This is called as the -on_load.\n"
1352      "       hook for loading the NIF.\n"},
1353     {"v", {optionally, always, never}, verify, " optionally | always | never\n"
1354      "       Specify how the generated encoder should\n"
1355      "       verify the message to be encoded.\n"},
1356     {"c", {true, false, auto, 'number()'}, copy_bytes,
1357      " true | false | auto | number()\n"
1358      "       Specify how or when the generated decoder should\n"
1359      "       copy fields of type bytes.\n"},
1360     {"strbin", undefined, strings_as_binaries, "\n"
1361      "       Specify that decoded strings should be returned as binaries,\n"
1362      "       instead of as strings (lists).\n"},
1363     {"pldefs", undefined, defs_as_proplists, "\n"
1364      "       Specify that introspection functions shall return proplists\n"
1365      "       instead of #field{} records, to make the generated code\n"
1366      "       completely free of even compile-time dependencies to gpb.\n"},
1367     {"pkgs", undefined, use_packages, "\n"
1368      "       Prepend the name of a package to every message it contains.\n"
1369      "       If no package is defined, nothing will be prepended.\n"
1370      "       Default is to not prepend package names for backwards\n"
1371      "       compatibility, but it is needed for some proto files.\n"},
1372     {"translate_type", fun opt_translate_type/2, translate_type,
1373      " type=Type,e=Mod:Fn,d=Mod:Fn[,m=Mod:Fn][,v=Mod:Fn]\n"
1374      "       For fields of the specified type, call Mod:Fn to:\n"
1375      "       - encode (calls Mod:Fn(Term) -> AnyMessage to pack)\n"
1376      "       - decode (calls Mod:Fn(AnyMessage) -> Term to unpack)\n"
1377      "       - merge  (calls Mod:Fn(Term,Term2) -> Term3 to merge unpacked)\n"
1378      "       - verify (calls Mod:Fn(Term) -> _ to verify unpacked)\n"
1379      "       Type can be any of msg:MsgName (after any renaming operations)\n"
1380      "       enum:EnumName, int32, int64, uint32, uint64, sint32 sint64,\n"
1381      "       fixed32, fixed64, sfixed32, sfixed64, bool, double, string,\n"
1382      "       bytes, map<KeyType,ValueType>. The last may need quoting in\n"
1383      "       the shell. No merge function is called for scalar fields.\n"},
1384     {"translate_field", fun opt_translate_field/2, translate_field,
1385      " field=Field,e=Mod:Fn,d=Mod:Fn[,m=Mod:Fn][,v=Mod:Fn]"
1386      "[,i=Mod:Fn][,a=Mod:Fn][,f=Mod:Fn]\n"
1387      "       For the specified field, call Mod:Fn. Specify Field as one of:\n"
1388      "       - MsgName for the message itself\n"
1389      "       - MsgName.FieldName for fields generally\n"
1390      "       - MsgName.OneofName.FieldName for oneof fields\n"
1391      "       - MsgName.FieldName.[] for elements of repeated fields.\n"
1392      "       For repeated fields, ie for the field itself, not its elements,\n"
1393      "       the following extra translations are to be specified:\n"
1394      "       - i=Mod:Fn (calls Mod:Fn() on decoding to initialize the field)\n"
1395      "       - a=Mod:Fn (calls Mod:Fn(Elem,S) on decoding to add an item)\n"
1396      "       - f=Mod:Fn (calls Mod:Fn(S) on decoding to finalize the field)\n"
1397      ""},
1398     {"any_translate", fun opt_any_translate/2, any_translate,
1399      " e=Mod:Fn,d=Mod:Fn[,m=Mod:Fn][,v=Mod:Fn]\n"
1400      "       For a google.protobuf.Any message, call Mod:Fn to:\n"
1401      "       - encode (calls Mod:Fn(Term) -> AnyMessage to pack)\n"
1402      "       - decode (calls Mod:Fn(AnyMessage) -> Term to unpack)\n"
1403      "       - merge  (calls Mod:Fn(Term,Term2) -> Term3 to merge unpacked)\n"
1404      "       - verify (calls Mod:Fn(Term) -> _ to verify unpacked)\n"},
1405     {"msgprefix", 'string()', msg_name_prefix, "Prefix\n"
1406      "       Prefix each message with Prefix.\n"},
1407     {"modprefix", 'string()', module_name_prefix, "Prefix\n"
1408      "       Prefix the module name with Prefix.\n"},
1409     {"msgsuffix", 'string()', msg_name_suffix, "Suffix\n"
1410      "       Suffix each message with Suffix.\n"},
1411     {"msgtolower", undefined, msg_name_to_lower, "ToLower\n"
1412      "       ToLower each message.  Any prefixes/suffixes are added\n"
1413      "       after case modification.\n"},
1414     {"modsuffix", 'string()', module_name_suffix, "Suffix\n"
1415      "       Suffix the module name with Suffix.\n"},
1416     {"modname", 'string()', module_name, "Name\n"
1417      "       Specify the name of the generated module.\n"},
1418     {"il", undefined, include_as_lib, "\n"
1419      "       Generate code that includes gpb.hrl using -include_lib\n"
1420      "       instead of -include, which is the default.\n"},
1421     {"type", undefined, type_specs, "\n"
1422      "       Enables `::Type()' annotations in the generated code.\n"},
1423     {"no_type", fun opt_no_type_specs/2, type_specs, "\n"
1424      "       Disbles `::Type()' annotations in the generated code.\n"},
1425     {"descr", undefined, descriptor, "\n"
1426      "       Generate self-description information.\n"},
1427     {"maps", undefined, maps, "\n"
1428      "       This will expand to the following options:\n"
1429      "         -msgs-as-maps\n"
1430      "         -msgfields-as-maps\n"
1431      "         -defs-as-maps\n"},
1432     {"maps_unset_optional", {omitted, present_undefined}, maps_unset_optional,
1433      "omitted | present_undefined\n"
1434      "       Specifies the internal format for optional fields\n"
1435      "       that are unset.\n"},
1436     {"maps_oneof", {tuples, flat}, maps_oneof,
1437      "tuples | flat\n"
1438      "       Specifies the representation for oneof fields in maps:\n"
1439      "       as tuples, #{..., OneofField => {Tag, Value}, ...}   (default)\n"
1440      "       or flat,   #{..., Tag => Value, ...}\n"},
1441     {"maps_key_type", {atom, binary}, maps_key_type,
1442      "atom | binary\n"
1443      "       Specifies the key type for maps.\n"},
1444     {"msgs-as-maps", undefined, msgs_as_maps, "\n"
1445      "        Specifies that messages should be maps.\n"
1446      "        Otherwise, they will be records.\n"},
1447     {"mapfields-as-maps", undefined, mapfields_as_maps, "\n"
1448      "        Specifies that fields of type map<_,_> should be maps.\n"
1449      "        Otherwise, they will be 2-tuples.\n"},
1450     {"defs-as-maps", undefined, defs_as_maps, "\n"
1451      "        Specifies that proto defintions from the generated code\n"
1452      "        are to be returned as maps. Otherwise, they will be lists\n"
1453      "        of tuples and records (or proplists if the -pldefs option\n"
1454      "        is specified)\n"},
1455     {"erlc_compile_options", 'string()', erlc_compile_options, "String\n"
1456      "       Specifies compilation options, in a comma separated string, to\n"
1457      "       pass along to the -compile() directive on the generated code.\n"},
1458     {"epb", undefined, epb_compatibility, "\n"
1459      "       Enable compatibility with the Erlang Protobuffs library:\n"
1460      "       * Implies the -epb-functions option\n"
1461      "       * Implies the -modsuffix _pb option\n"
1462      "       * Implies the -msgtolower option\n"},
1463     {"epb-functions", undefined, epb_functions, "\n"
1464      "       Generate some functions for API compatibility with the\n"
1465      "       Erlang protobuffs library:\n"
1466      "       * encode/1 and encode_MsgName/1\n"
1467      "       * decode/2 and decode_MsgName/1\n"},
1468     {"defaults-for-omitted-optionals", undefined,
1469      defaults_for_omitted_optionals, "\n"
1470      "       For optional fields not present on decoding, set the field\n"
1471      "       to its default value, if any, instead of to undefined.\n"},
1472     {"type-defaults-for-omitted-optionals", undefined,
1473      type_defaults_for_omitted_optionals, "\n"
1474      "       For optional fields not present on decoding, set the field\n"
1475      "       to its type-default, instead of to undefined.\n"},
1476     {"for-version", 'integer()', target_erlang_version, "N\n"
1477      "       Generate code for Erlang/OTP version N instead of current.\n"},
1478     {"Werror",undefined, warnings_as_errors, "\n"
1479      "       Treat warnings as errors\n"},
1480     {"W1", undefined, report_warnings, "\n"
1481      "       Report warnings\n"},
1482     {"W0", undefined, {report_warnings,false}, "\n"
1483      "       Do not report warnings\n"},
1484     {"Wall", undefined, report_warnings, "\n"
1485      "       Same as -W1\n"},
1486     {"W", undefined, report_warnings, "\n"
1487      "       Same as -W1\n"},
1488     {"h", undefined, help, "\n"
1489      "       Show help\n"},
1490     {"-help", undefined, help, "\n"
1491      "       Show help\n"},
1492     {"V", undefined, version, "\n"
1493      "       Show version\n"},
1494     {"-version", undefined, version, "\n"
1495      "       Show version\n"}
1496    ] ++
1497        case os:getenv("GPB_DEV_OPTS") of
1498            "true" ->
1499                [{"fp", {pass_as_params,pass_as_record}, field_pass_method,
1500                  "pass_as_params | pass_as_record\n"
1501                  "        Override whether message fields are to be passed\n"
1502                  "        as parameters or as a record (or map, depending\n"
1503                  "        on the -maps option).  This is purely internal,\n"
1504                  "        and has no impact neither on input nor output,\n"
1505                  "        but there may be a performance difference.\n"
1506                  "        Normally, it is calculated automatically for each\n"
1507                  "        message, but during development it may be useful\n"
1508                  "        to be able to force it.\n"}];
1509            _ ->
1510                []
1511        end.
1512
1513
1514opt_no_type_specs(OptTag, Rest) ->
1515    Opt = {OptTag, false},
1516    {ok, {Opt, Rest}}.
1517
1518opt_translate_type(OptTag, [S | Rest]) ->
1519    try S of
1520        "type="++S2 ->
1521            {Type,Rest2} = opt_translate_type(S2),
1522            Ts = gpb_lib:string_lexemes(Rest2, ","),
1523            Opt = {OptTag, {Type, [opt_translate_mfa(T) || T <- Ts]}},
1524            {ok, {Opt, Rest}};
1525        _ ->
1526            {error, "Translation is expected to begin with type="}
1527    catch throw:{badopt,ErrText} ->
1528            {error, ErrText}
1529    end.
1530
1531opt_translate_field(OptTag, [S | Rest]) ->
1532    try S of
1533        "field="++S2 ->
1534            {Path,Rest2} = opt_translate_elempath(S2),
1535            Ts = gpb_lib:string_lexemes(Rest2, ","),
1536            Opt = {OptTag, {Path, [opt_translate_mfa(T) || T <- Ts]}},
1537            {ok, {Opt, Rest}};
1538        _ ->
1539            {error, "Translation is expected to begin with field="}
1540    catch throw:{badopt,ErrText} ->
1541            {error, ErrText}
1542    end.
1543
1544opt_any_translate(OptTag, [S | Rest]) ->
1545    try
1546        Ts = gpb_lib:string_lexemes(S, ","),
1547        Opt = {OptTag, [opt_translate_mfa(T) || T <- Ts]},
1548        {ok, {Opt, Rest}}
1549    catch throw:{badopt,ErrText} ->
1550            {error, ErrText}
1551    end.
1552
1553opt_translate_type("msg:"++Rest)  -> opt_to_comma_with_tag(Rest, msg);
1554opt_translate_type("enum:"++Rest) -> opt_to_comma_with_tag(Rest, enum);
1555opt_translate_type("map<"++Rest)  -> opt_translate_map_type(Rest);
1556opt_translate_type(Other) ->
1557    {S, Rest} = read_s(Other, $,, ""),
1558    Type = s2a(S),
1559    Allowed = [int32, int64, uint32, uint64, sint32, sint64, fixed32, fixed64,
1560               sfixed32, sfixed64, bool, float, double, string, bytes],
1561    case lists:member(Type, Allowed) of
1562        true -> {Type, Rest};
1563        false -> throw({badopt,"Invalid translation type: "++S})
1564    end.
1565
1566opt_translate_map_type(S) ->
1567    {KeyType, Rest} = opt_translate_type(S),
1568    case gpb:is_allowed_as_key_type(KeyType) of
1569        true ->
1570            {S2, Rest2} = read_s(Rest, $>, ""),
1571            case opt_translate_type(S2++",") of
1572                {ValueType, ""} ->
1573                    {{map,KeyType,ValueType}, Rest2};
1574                {_ValueType, _} ->
1575                    throw({badopt,"Trailing garbage text"})
1576            end;
1577        false ->
1578            throw({badopt,"Not allowed as map key type"})
1579    end.
1580
1581opt_to_comma_with_tag(S, Tag) ->
1582    {S2, Rest} = read_s(S, $,, ""),
1583    {{Tag, s2a(S2)}, Rest}.
1584
1585opt_translate_elempath(S) ->
1586    {S2, Rest} = read_s(S, $,, ""),
1587    case gpb_lib:string_lexemes(S2, ".") of
1588        [Msg]              -> {[s2a(Msg)], Rest};
1589        [Msg,Field]        -> {[s2a(Msg),s2a(Field)], Rest};
1590        [Msg,Field,"[]"]   -> {[s2a(Msg),s2a(Field),[]], Rest};
1591        [Msg,Field,OFName] -> {[s2a(Msg),s2a(Field),s2a(OFName)], Rest};
1592        _ -> throw({badopt, "Invalid element path"})
1593    end.
1594
1595s2a(S) -> list_to_atom(S).
1596
1597read_s([Delim|Rest], Delim, Acc) -> {lists:reverse(Acc), Rest};
1598read_s([C|Rest], Delim, Acc)     -> read_s(Rest, Delim, [C | Acc]);
1599read_s("", _Delim, _Acc)         -> throw({badopt, "Unexpected end of string"}).
1600
1601opt_translate_mfa("e="++MF) -> {encode,opt_mf_str(MF, 1)};
1602opt_translate_mfa("d="++MF) -> {decode,opt_mf_str(MF, 1)};
1603opt_translate_mfa("m="++MF) -> {merge, opt_mf_str(MF, 2)};
1604opt_translate_mfa("V="++MF) -> {verify,opt_mf_str(MF, 1)};
1605opt_translate_mfa("v="++MF) -> {verify,opt_mf_str_verify(MF)};
1606opt_translate_mfa("i="++MF) -> {decode_init_default,opt_mf_str(MF, 0)};
1607opt_translate_mfa("a="++MF) -> {decode_repeated_add_elem, opt_mf_str(MF, 2)};
1608opt_translate_mfa("f="++MF) -> {decode_repeated_finalize, opt_mf_str(MF, 1)};
1609opt_translate_mfa(X) -> throw({badopt,"Invalid translation spec: "++X}).
1610
1611opt_mf_str(S, Arity) ->
1612    case gpb_lib:string_lexemes(S, ":") of
1613        [M,F] -> {list_to_atom(M),list_to_atom(F),opt_arg_template(Arity)};
1614        _     -> throw({badopt,"Invalid Mod:Fn spec: "++S})
1615    end.
1616
1617opt_mf_str_verify(S) ->
1618    {M,F,[A]} = opt_mf_str(S, 1),
1619    {M,F,[A,'$errorf']}.
1620
1621opt_arg_template(Arity) ->
1622    [list_to_atom(?ff("$~w", [I])) || I <- lists:seq(1,Arity)].
1623
1624determine_cmdline_op(Opts, FileNames) ->
1625    case {lists:member(help, Opts), lists:member(version, Opts)} of
1626        {true, _} -> show_help;
1627        {_, true} -> show_version;
1628        _         -> if FileNames == [] -> error;
1629                        FileNames /= [] -> compile
1630                     end
1631    end.
1632
1633show_help() ->
1634    io:format(
1635      "gpb version ~s~n"
1636      "Usage: erl <erlargs> [gpb-opts] -s ~p c <ProtoFile>.proto~n"
1637      "   or: erl <erlargs> -s ~p c <ProtoFile>.proto -extra [gpb-opts]~n"
1638      "Typical erlargs = -noshell -noinput +B -boot start_clean -pa SomeDir~n"
1639      "~n",
1640      [gpb:version_as_string(), ?MODULE, ?MODULE]),
1641    show_args().
1642
1643show_arg({OptDef, 'string_maybe_appended()', _, OptDoc}) ->
1644    io:format("   -~s   -~sOption ~s", [OptDef, OptDef, OptDoc]);
1645show_arg({OptDef, _, _, OptDoc}) ->
1646    io:format("   -~s ~s", [OptDef, OptDoc]).
1647
1648-spec show_args() -> _. % side effect is to print valid opts/args
1649show_args() ->
1650    io:format(
1651      "Recognized gpb-opts: (see the edoc for ~p for further details)~n",
1652      [?MODULE]),
1653    lists:foreach(fun show_arg/1, opt_specs()).
1654
1655-spec show_version() -> _. % side effect is to print version
1656show_version() ->
1657    io:format("gpb version ~s~n", [gpb:version_as_string()]).
1658
1659string_to_number(S) ->
1660    try {ok, list_to_integer(S)}
1661    catch error:badarg ->
1662            try {ok, list_to_float(S)}
1663            catch error:badarg -> error
1664            end
1665    end.
1666
1667parse_file_or_string(In, Opts) ->
1668    Opts1 = add_curr_dir_as_include_if_needed(Opts),
1669    case parse_file_and_imports(In, Opts1) of
1670        {ok, {Defs1, _AllImported}} ->
1671            case gpb_parse:post_process_all_files(Defs1, Opts1) of
1672                {ok, Defs2} ->
1673                    {ok, Defs2};
1674                {error, Reasons} ->
1675                    {error, {post_process, Reasons}}
1676            end;
1677        {error, Reason} ->
1678            {error, Reason}
1679    end.
1680
1681add_curr_dir_as_include_if_needed(Opts) ->
1682    ImportDirs = [Dir || {i,Dir} <- Opts],
1683    case lists:member(".", ImportDirs) of
1684        true  -> Opts;
1685        false -> Opts ++ [{i,"."}]
1686    end.
1687
1688parse_file_and_imports(In, Opts) ->
1689    FName = file_name_from_input(In),
1690    parse_file_and_imports(In, [FName], Opts).
1691
1692file_name_from_input({Mod,_S}) -> lists:concat([Mod, ".proto"]);
1693file_name_from_input(FName)    -> FName.
1694
1695parse_file_and_imports(In, AlreadyImported, Opts) ->
1696    case locate_read_import_int(In, Opts) of
1697        {ok, Contents} ->
1698            %% Add to AlreadyImported to prevent trying to import it again: in
1699            %% case we get an error we don't want to try to reprocess it later
1700            %% (in case it is multiply imported) and get the error again.
1701            FName = file_name_from_input(In),
1702            AlreadyImported2 = [FName | AlreadyImported],
1703            case scan_and_parse_string(Contents, FName, Opts) of
1704                {ok, Defs} ->
1705                    Imports = gpb_parse:fetch_imports(Defs),
1706                    Opts2 = ensure_include_path_to_wellknown_types_if_proto3(
1707                              Defs, Imports, Opts),
1708                    read_and_parse_imports(Imports, AlreadyImported2,
1709                                           Defs, Opts2);
1710                {error, Reason} ->
1711                    {error, Reason}
1712            end;
1713        {error, Reason} ->
1714            {error, Reason}
1715    end.
1716
1717scan_and_parse_string(S, FName, Opts) ->
1718    case gpb_scan:string(S) of
1719        {ok, Tokens, _} ->
1720            case gpb_parse:parse(Tokens++[{'$end', 999}]) of
1721                {ok, PTree} ->
1722                    case gpb_parse:post_process_one_file(FName, PTree, Opts) of
1723                        {ok, Result} ->
1724                            {ok, Result};
1725                        {error, Reason} ->
1726                            {error, {parse_error, FName, Reason}}
1727                    end;
1728                {error, {_Line, _Module, _ErrInfo}=Reason} ->
1729                    {error, {parse_error, FName, Reason}}
1730            end;
1731        {error, {_Line0, _Module, _ErrInfo}=Reason, _Line1} ->
1732            {error, {scan_error, FName, Reason}}
1733    end.
1734
1735read_and_parse_imports([Import | Rest], AlreadyImported, Defs, Opts) ->
1736    case lists:member(Import, AlreadyImported) of
1737        true ->
1738            read_and_parse_imports(Rest, AlreadyImported, Defs, Opts);
1739        false ->
1740            case import_it(Import, AlreadyImported, Defs, Opts) of
1741                {ok, {Defs2, Imported2}} ->
1742                    read_and_parse_imports(Rest, Imported2, Defs2, Opts);
1743                {error, Reason} ->
1744                    {error, Reason}
1745            end
1746    end;
1747read_and_parse_imports([], Imported, Defs, _Opts) ->
1748    {ok, {Defs, Imported}}.
1749
1750import_it(Import, AlreadyImported, Defs, Opts) ->
1751    %% FIXME: how do we handle scope of declarations,
1752    %%        e.g. options/package for imported files?
1753    case parse_file_and_imports(Import, AlreadyImported, Opts) of
1754        {ok, {MoreDefs, MoreImported}} ->
1755            Defs2 = Defs++MoreDefs,
1756            Imported2 = lists:usort(AlreadyImported++MoreImported),
1757            {ok, {Defs2, Imported2}};
1758        {error, Reason} ->
1759            {error, Reason}
1760    end.
1761
1762locate_read_import_int({_Mod, Str}, _Opts) ->
1763    {ok, Str};
1764locate_read_import_int(Import, Opts) ->
1765    case proplists:get_value(import_fetcher, Opts) of
1766        undefined ->
1767            locate_read_import_aux(Import, Opts);
1768        Importer when is_function(Importer, 1) ->
1769            case Importer(Import) of
1770                from_file ->
1771                    locate_read_import_aux(Import, Opts);
1772                {ok, Contents} when is_list(Contents) ->
1773                    case lists:all(fun is_integer/1, Contents) of
1774                        true ->
1775                            {ok, Contents};
1776                        false ->
1777                            error({bad_fetcher_return,
1778                                   {not_a_string, Contents},
1779                                   Import})
1780                    end;
1781                {error, Reason} ->
1782                    {error, {fetcher_issue, Import, Reason}};
1783                X ->
1784                    error({bad_fetcher_return, Import, X})
1785            end
1786    end.
1787
1788
1789locate_read_import_aux(Import, Opts) ->
1790    ImportPaths = [Path || {i, Path} <- Opts],
1791    case locate_import_aux(ImportPaths, Import, Opts, []) of
1792        {ok, File} ->
1793            read_import(File, Opts);
1794        {error, _} = Error ->
1795            Error
1796    end.
1797
1798%% @doc Locate an import target.  This function might be potentially
1799%% useful for instance in an intercepting `import_fetcher' fun that
1800%% just wants to record the accessed imports.
1801-spec locate_import(string(), opts()) -> {ok, File::string()} |
1802                                         {error, reason()}.
1803locate_import(ProtoFileName, Opts) ->
1804    Opts1 = ensure_include_path_to_wellknown_types(Opts),
1805    ImportPaths = [Path || {i, Path} <- Opts1],
1806    locate_import_aux(ImportPaths, ProtoFileName, Opts1, []).
1807
1808locate_import_aux([Path | Rest], Import, Opts, Tried) ->
1809    File = filename:join(Path, Import),
1810    case file_read_file_info(File, Opts) of
1811        {ok, #file_info{access = A}} when A == read; A == read_write ->
1812            {ok, File};
1813        {ok, #file_info{}} ->
1814            locate_import_aux(Rest, Import, Opts, Tried);
1815        {error, Reason} ->
1816            locate_import_aux(Rest, Import, Opts, [{File,Reason} | Tried])
1817    end;
1818locate_import_aux([], Import, _Opts, Tried) ->
1819    {error, {import_not_found, Import, Tried}}.
1820
1821%% @doc Read an import file.  This function might be potentially
1822%% useful for instance in an intercepting `import_fetcher' fun that
1823%% just wants to record the accessed imports.
1824-spec read_import(string(), opts()) -> {ok, string()} | {error, reason()}.
1825read_import(File, Opts) ->
1826    case file_read_file(File, Opts) of
1827        {ok,B} ->
1828            case utf8_decode(B) of
1829                {ok, {utf8, S}} ->
1830                    {ok, S};
1831                {ok, {latin1, S}} ->
1832                    {ok, S};
1833                {error, Reason} ->
1834                    {error, {utf8_decode_failed, Reason, File}}
1835            end;
1836        {error, Reason} ->
1837            {error, {read_failed, File, Reason}}
1838    end.
1839
1840ensure_include_path_to_wellknown_types_if_proto3(Defs, Imports, Opts) ->
1841    case proplists:get_value(syntax, Defs) of
1842        "proto3" ->
1843            case lists:any(fun imports_wellknown/1, Imports) of
1844                true ->
1845                    ensure_include_path_to_wellknown_types(Opts);
1846                false ->
1847                    Opts
1848            end;
1849        _ ->
1850            Opts
1851    end.
1852
1853ensure_include_path_to_wellknown_types(Opts) ->
1854    PrivDir = get_priv_dir(),
1855    Wellknown = filename:join(PrivDir, "proto3"),
1856    sanity_check_installation_wellknown_proto3(Wellknown),
1857    add_opt_unless_present({i,Wellknown}, Opts).
1858
1859imports_wellknown("google/protobuf/"++_) -> true;
1860imports_wellknown(_) -> false.
1861
1862add_opt_unless_present(Opt, [Opt | Rest]) ->
1863    [Opt | Rest];
1864add_opt_unless_present(Opt, [H | Rest]) ->
1865    [H | add_opt_unless_present(Opt, Rest)];
1866add_opt_unless_present(Opt, []) ->
1867    [Opt].
1868
1869get_priv_dir() ->
1870    case application:get_application(?MODULE) of
1871        {ok,CurrApp} ->
1872            code:priv_dir(CurrApp);
1873        undefined ->
1874            %% Not loaded as an application, just executing code;
1875            %% from an escript possibly? (or even from an ez archive?)
1876            MDir = filename:dirname(code:which(?MODULE)),
1877            case filename:basename(MDir) of
1878                "ebin" ->
1879                    filename:join(filename:dirname(MDir), "priv");
1880                _ ->
1881                    case code:priv_dir(gpb) of % hard-wired app name...
1882                        Dir when is_list(Dir) ->
1883                            Dir;
1884                        {error,Reason} ->
1885                            error({failed_to_locate_privdir,Reason})
1886                    end
1887            end
1888    end.
1889
1890sanity_check_installation_wellknown_proto3(WellknownDir) ->
1891    case filelib:is_dir(WellknownDir) of
1892        true ->
1893            ok;
1894        false ->
1895            error({well_known_proto3_missing,
1896                   "Your installation is missing the priv/proto3 "
1897                   "directory, which is expected to house the "
1898                   "'proto3 well known types' such as "
1899                   "google/protobuf/timestamp.proto and "
1900                   "google/protobuf/duration.proto. "
1901                   "They were expected (calculated) to be found in "
1902                    ++ WellknownDir})
1903    end.
1904
1905
1906%% Input .proto file appears to be expected to be UTF-8 by Google's protobuf.
1907%% In 3.0.0, it accepts a byte order mark (BOM), but in 2.6.1 it does not.
1908%% It only accepts a BOM for for UTF-8. It does not accept UTF-16 nor UTF-32
1909%% input (tried both little and big endian for both, with proper BOMs).
1910utf8_decode(B) ->
1911    {Enc, Len} = unicode:bom_to_encoding(B),
1912    <<_Bom:Len/binary, B2/binary>> = B,
1913    if Enc == latin1;
1914       Enc == utf8 ->
1915            %% Enc == latin1 means just that no Byte order mark was seen,
1916            %% it might still be UTF-8 encoded, though, so try that first.
1917            case unicode:characters_to_list(B2) of
1918                S when is_list(S) ->
1919                    {ok, {utf8, S}};
1920                {error, _, _} ->
1921                    {ok, {latin1, binary_to_list(B2)}}
1922            end;
1923       true ->
1924            {error, {invalid_proto_byte_order_mark, Enc}}
1925    end.
1926
1927check_unpackables_marked_as_packed(Defs) ->
1928    gpb_lib:fold_msg_or_group_fields(
1929      fun(_, MsgName, #?gpb_field{name=FName, type=Type, opts=Opts}, Acc) ->
1930              case {lists:member(packed, Opts), gpb:is_type_packable(Type)} of
1931                  {true, false} ->
1932                      Warn = {ignored_field_opt_packed_for_unpackable_type,
1933                              MsgName, FName, Type, Opts},
1934                      [Warn | Acc];
1935                  _ ->
1936                      Acc
1937              end
1938      end,
1939      [],
1940      Defs).
1941
1942%% -- generating code ----------------------------------------------
1943
1944format_erl(Mod, Defs, #anres{maps_as_msgs=MapsAsMsgs}=AnRes, Opts) ->
1945    DoNif = proplists:get_bool(nif, Opts),
1946    NoNif = not DoNif,
1947    AsLib = proplists:get_bool(include_as_lib, Opts),
1948    CompileOptsStr = get_erlc_compile_options_str(Opts),
1949    gpb_lib:iolist_to_utf8_or_escaped_binary(
1950      [?f("%% Automatically generated, do not edit~n"
1951          "%% Generated by ~p version ~s~n",
1952          [?MODULE, gpb:version_as_string()]),
1953       ?f("-module(~w).~n", [Mod]),
1954       case CompileOptsStr of
1955           ""    -> "";
1956           [_|_] -> ?f("-compile([~ts]).~n", [CompileOptsStr])
1957       end,
1958       "\n",
1959       case gpb_lib:get_records_or_maps_by_opts(Opts) of
1960           records ->
1961               ?f("-export([encode_msg/1, encode_msg/2, encode_msg/3]).~n");
1962           maps ->
1963               ?f("-export([encode_msg/2, encode_msg/3]).~n")
1964       end,
1965       [[?f("-export([encode/1]). %% epb compatibility~n"),
1966         [?f("-export([~p/1]).~n", [gpb_lib:mk_fn(encode_, MsgName)])
1967          || {{msg,MsgName}, _Fields} <- Defs],
1968         "\n"]
1969        || gpb_lib:get_epb_functions_by_opts(Opts)],
1970       ?f("-export([decode_msg/2"),[", decode_msg/3" || NoNif], ?f("]).~n"),
1971       case gpb_lib:get_records_or_maps_by_opts(Opts) of
1972           records ->
1973               ?f("-export([merge_msgs/2, merge_msgs/3, merge_msgs/4]).~n");
1974           maps ->
1975               ?f("-export([merge_msgs/3, merge_msgs/4]).~n")
1976       end,
1977       [[?f("-export([decode/2]). %% epb compatibility~n"),
1978         [?f("-export([~p/1]).~n", [gpb_lib:mk_fn(decode_, MsgName)])
1979          || {{msg,MsgName}, _Fields} <- Defs],
1980         "\n"]
1981        || gpb_lib:get_epb_functions_by_opts(Opts)],
1982       case gpb_lib:get_records_or_maps_by_opts(Opts) of
1983           records ->
1984               ?f("-export([verify_msg/1, verify_msg/2, verify_msg/3]).~n");
1985           maps ->
1986               ?f("-export([verify_msg/2, verify_msg/3]).~n")
1987       end,
1988       ?f("-export([get_msg_defs/0]).~n"),
1989       ?f("-export([get_msg_names/0]).~n"),
1990       ?f("-export([get_group_names/0]).~n"),
1991       ?f("-export([get_msg_or_group_names/0]).~n"),
1992       ?f("-export([get_enum_names/0]).~n"),
1993       ?f("-export([find_msg_def/1, fetch_msg_def/1]).~n"),
1994       ?f("-export([find_enum_def/1, fetch_enum_def/1]).~n"),
1995       gpb_gen_introspect:format_enum_value_symbol_converter_exports(Defs),
1996       ?f("-export([get_service_names/0]).~n"),
1997       ?f("-export([get_service_def/1]).~n"),
1998       ?f("-export([get_rpc_names/1]).~n"),
1999       ?f("-export([find_rpc_def/2, fetch_rpc_def/2]).~n"),
2000       ?f("-export([get_package_name/0]).~n"),
2001       [?f("-export([descriptor/0]).~n")
2002        || gpb_lib:get_gen_descriptor_by_opts(Opts)],
2003       ?f("-export([gpb_version_as_string/0, gpb_version_as_list/0]).~n"),
2004       "\n",
2005       [["-on_load(load_nif/0).\n",
2006         "-export([load_nif/0]). %% for debugging of nif loading\n",
2007         "\n"]
2008        || DoNif],
2009       case gpb_lib:get_records_or_maps_by_opts(Opts) of
2010           records -> ?f("-include(\"~s.hrl\").~n", [Mod]);
2011           maps    -> ""
2012       end,
2013       case gpb_lib:get_defs_as_maps_or_records(Opts) of
2014           records ->
2015               [case gpb_lib:get_field_format_by_opts(Opts) of
2016                    fields_as_records ->
2017                        if AsLib ->
2018                                ?f("-include_lib(\"gpb/include/gpb.hrl\").~n");
2019                           not AsLib ->
2020                                ?f("-include(\"gpb.hrl\").~n")
2021                        end;
2022                    fields_as_proplists ->
2023                        "";
2024                    fields_as_maps ->
2025                        ""
2026                end];
2027           maps ->
2028               ""
2029       end,
2030       "\n",
2031       gpb_gen_types:format_export_types(Defs, AnRes, Opts),
2032       "\n",
2033       if not DoNif ->
2034               case gpb_lib:get_2tuples_or_maps_for_maptype_fields_by_opts(Opts)
2035               of
2036                   '2tuples' ->
2037                       gpb_gen_types:format_maps_as_msgs_record_defs(
2038                         MapsAsMsgs);
2039                   maps ->
2040                       ""
2041               end;
2042          DoNif ->
2043               ""
2044       end,
2045       [[?f("~s~n", [gpb_gen_nif:format_load_nif(Mod, Opts)]),
2046         "\n"]
2047        || DoNif],
2048       %% Enabling inlining seems to cause performance to drop drastically
2049       %% I've seen decoding performance go down from 76000 msgs/s
2050       %% to about 10000 msgs/s for a set of mixed message samples.
2051       %% f("-compile(inline).~n"),
2052       %%
2053       gpb_gen_encoders:format_encoders_top_function(Defs, AnRes, Opts),
2054       "\n",
2055       if DoNif ->
2056               ?f("~s~n", [gpb_gen_nif:format_nif_encoder_error_wrappers(
2057                             Defs, AnRes, Opts)]);
2058          not DoNif ->
2059               [gpb_gen_encoders:format_msg_encoders(Defs, AnRes, Opts,
2060                                                     true),
2061                gpb_gen_encoders:format_map_encoders(MapsAsMsgs, AnRes, Opts,
2062                                                     false),
2063                gpb_gen_encoders:format_aux_encoders(Defs, AnRes, Opts)]
2064       end,
2065       "\n",
2066       gpb_gen_decoders:format_decoders_top_function(Defs, AnRes, Opts),
2067       "\n\n",
2068       if DoNif ->
2069               [gpb_gen_nif:format_nif_decoder_error_wrappers(Defs,
2070                                                              AnRes, Opts)];
2071          not DoNif ->
2072               [gpb_gen_decoders:format_msg_decoders(Defs, AnRes, Opts),
2073                gpb_gen_decoders:format_map_decoders(MapsAsMsgs, AnRes, Opts),
2074                gpb_gen_decoders:format_aux_decoders(Defs, AnRes, Opts)]
2075       end,
2076       "\n",
2077       gpb_gen_mergers:format_msg_merge_code(Defs, AnRes, Opts),
2078       "\n",
2079       gpb_gen_verifiers:format_verifiers_top_function(Defs, AnRes, Opts),
2080       "\n",
2081       gpb_gen_verifiers:format_verifiers(Defs, AnRes, Opts),
2082       "\n",
2083       if not DoNif ->
2084               [gpb_gen_translators:format_aux_transl_helpers(),
2085                gpb_gen_translators:format_translators(Defs, AnRes, Opts)];
2086          DoNif ->
2087               [gpb_gen_translators:format_aux_transl_helpers(),
2088                gpb_gen_translators:format_merge_translators(Defs, AnRes,
2089                                                             Opts)]
2090       end,
2091       "\n",
2092       gpb_gen_introspect:format_introspection(Defs, Opts),
2093       "\n",
2094       possibly_format_descriptor(Defs, Opts),
2095       "\n",
2096       ?f("gpb_version_as_string() ->~n"),
2097       ?f("    \"~s\".~n", [gpb:version_as_string()]),
2098       "\n",
2099       ?f("gpb_version_as_list() ->~n"),
2100       ?f("    ~s.~n", [gpb_version_as_list_pretty()])],
2101      Opts).
2102
2103gpb_version_as_list_pretty() ->
2104    %% The version "2.2-60-gb0decf3" is rendered with ~w
2105    %% as: [2,2,0,0,60,[103,98,48,100,101,99,102,51]]
2106    %% this function renders it as [2,2,0,0,60,"gb0decf3"]
2107    %% which is exactly the same, but easier for humans to read.
2108    {V, SubStrs} =
2109        lists:mapfoldl(fun(N, Acc) when is_integer(N) -> {N, Acc};
2110                          (S, Acc) when is_list(S) -> {x, Acc++[S]}
2111                       end,
2112                       [],
2113                       gpb:version_as_list()),
2114    S2 = remove_whitespaces(?ff("~p~n", [V])),
2115    r_strs(S2, $x, SubStrs).
2116
2117remove_whitespaces(S)  -> [C || C <- S, not is_whitespace_char(C)].
2118is_whitespace_char($\s) -> true;
2119is_whitespace_char($\t) -> true;
2120is_whitespace_char($\n) -> true;
2121is_whitespace_char(_)   -> false.
2122
2123r_strs([M | Tl], M, [S|Rest]) -> ?ff("~p", [S]) ++ r_strs(Tl, M, Rest);
2124r_strs([C | Tl], M, SubStrs)  -> [C | r_strs(Tl, M, SubStrs)];
2125r_strs("", _M, [])            -> "".
2126
2127get_erlc_compile_options_str(Opts) ->
2128    proplists:get_value(erlc_compile_options, Opts, "").
2129
2130%% -- descr -----------------------------------------------------
2131
2132possibly_format_descriptor(Defs, Opts) ->
2133    case gpb_lib:get_gen_descriptor_by_opts(Opts) of
2134        true ->
2135            try gpb_compile_descr:encode_defs_to_descriptor(Defs) of
2136                Bin when is_binary(Bin) ->
2137                    gpb_codegen:format_fn(
2138                      descriptor, fun() -> 'bin' end,
2139                      [replace_term(bin, Bin)])
2140            catch error:undef ->
2141                    ST = erlang:get_stacktrace(),
2142                    case {element(1,hd(ST)), element(2,hd(ST))} of
2143                        {gpb_compile_descr, encode_defs_to_descriptor} ->
2144                            gpb_codegen:format_fn(
2145                              descriptor,
2146                              fun() -> erlang:error(descr_not_avail) end);
2147                        _ ->
2148                            %% other error
2149                            erlang:raise(error, undef, ST)
2150                    end
2151            end;
2152        false ->
2153            ""
2154    end.
2155
2156%% -- hrl -----------------------------------------------------
2157
2158possibly_format_hrl(Mod, Defs, AnRes, Opts) ->
2159    case gpb_lib:get_records_or_maps_by_opts(Opts) of
2160        records -> format_hrl(Mod, Defs, AnRes, Opts);
2161        maps    -> '$not_generated'
2162    end.
2163
2164format_hrl(Mod, Defs, AnRes, Opts1) ->
2165    Opts = [{module, Mod}|Opts1],
2166    ModVsn = list_to_atom(atom_to_list(Mod) ++ "_gpb_version"),
2167    gpb_lib:iolist_to_utf8_or_escaped_binary(
2168      [?f("%% Automatically generated, do not edit~n"
2169          "%% Generated by ~p version ~s~n",
2170          [?MODULE, gpb:version_as_string()]),
2171       "\n",
2172       ?f("-ifndef(~p).~n", [Mod]),
2173       ?f("-define(~p, true).~n", [Mod]),
2174       "\n",
2175       ?f("-define(~p, \"~s\").~n", [ModVsn, gpb:version_as_string()]),
2176       "\n",
2177       gpb_lib:nl_join(
2178         [gpb_gen_types:format_msg_record(Msg, Fields, AnRes, Opts, Defs)
2179          || {_,Msg,Fields} <- gpb_lib:msgs_or_groups(Defs)]),
2180       "\n",
2181       ?f("-endif.~n")],
2182      Opts).
2183
2184%% -- nif c++ code -----------------------------------------------------
2185
2186possibly_format_nif_cc(Mod, Defs, AnRes, Opts) ->
2187    case proplists:get_bool(nif, Opts) of
2188        true  -> gpb_gen_nif:format_nif_cc(Mod, Defs, AnRes, Opts);
2189        false -> '$not_generated'
2190    end.
2191
2192%% -- compile to memory -----------------------------------------------------
2193
2194compile_to_binary(Mod, HrlText, ErlCode, PossibleNifCode, Opts) ->
2195    ModAsStr = flatten_iolist(?f("~p", [Mod])),
2196    ErlCode2 = nano_epp(ErlCode, ModAsStr, HrlText, Opts),
2197    {ok, Toks, _EndLine} = erl_scan:string(ErlCode2),
2198    FormToks = split_toks_at_dot(Toks),
2199    Forms = [case erl_parse:parse_form(Ts) of
2200                 {ok, Form} ->
2201                     Form;
2202                 {error, Reason} ->
2203                     io:format(user, "Ts=~p~n", [Ts]),
2204                     erlang:error(
2205                       {internal_error,?MODULE,Mod,Ts,Reason,
2206                        {more_info,[{full_erl,ErlCode2},{hrl,HrlText},
2207                                    {nif,PossibleNifCode},{opts,Opts}]}})
2208             end
2209             || Ts <- FormToks],
2210    combine_erl_and_possible_nif(compile:noenv_forms(Forms, Opts),
2211                                 PossibleNifCode).
2212
2213-record(nepp, %% nano-epp state
2214        {depth, %% for ifdef/else/endif processing
2215         mod, %% ModAsStr,
2216         hrl,
2217         defs}).
2218
2219nano_epp(Code, ModAsStr, HrlText, Opts) ->
2220    %% nepp = nano-erlang-preprocessor. Couldn't find a way to run
2221    %% the epp from a string, and don't want or need to use the file
2222    %% system when everything is already in memory.
2223
2224    %% Setup a dictionary, mostly to handle -ifdef...-endif
2225    %% in hrls and in the decoders.
2226    %% The OTP_RELEASE first appeared in Erlang 21.
2227    D0 = dict:new(),
2228    OtpRelease = gpb_lib:current_otp_release(),
2229    TargetOtpRelease = proplists:get_value(target_erlang_version, Opts,
2230                                           OtpRelease),
2231    D1 = if TargetOtpRelease >= 21 ->
2232                 dict:store('OTP_RELEASE', OtpRelease, D0);
2233            TargetOtpRelease < 21 ->
2234                 D0
2235         end,
2236    NState = #nepp{depth=1, mod=ModAsStr, hrl=HrlText, defs=D1},
2237    {Txt, <<>>, _EndNState, _EndLine} = nepp1(Code, NState, _Line=1, []),
2238    Txt.
2239
2240nepp1(<<"%% -*- coding:",_/binary>>=B, #nepp{mod=ModAsStr}=NState, N, Acc) ->
2241    %% First (non-coding) line must be a -file(...) directive,
2242    %% or else unused record definitions in included files will
2243    %% produce warnings: eg: {27,erl_lint,{unused_record,gpb_oneof}}.
2244    {CodingLine,Rest} = read_until(B, "\n", ""),
2245    Erl = (ModAsStr -- "''") ++ ".erl",
2246    CodingAndFileDirective = CodingLine ++ "\n" ++ file_directive(Erl, 1),
2247    Acc2 = lists:reverse(CodingAndFileDirective, Acc),
2248    nepp2_nl(Rest, NState, N, Acc2);
2249nepp1(Rest, #nepp{mod=ModAsStr}=NState, N, Acc) ->
2250    Erl = (ModAsStr -- "''") ++ ".erl",
2251    FileDirective = file_directive(Erl, 1),
2252    Acc2 = lists:reverse(FileDirective, Acc),
2253    nepp2_nl(Rest, NState, N, Acc2).
2254
2255nepp2(<<"?MODULE", Rest/binary>>, #nepp{mod=ModAsStr}=NState, N, Acc) ->
2256    nepp2(Rest, NState, N, lists:reverse(ModAsStr, Acc));
2257nepp2(<<$\n, Rest/binary>>, NState, N, Acc) ->
2258    nepp2_nl(Rest, NState, N+1, [$\n | Acc]);
2259nepp2(<<C, Rest/binary>>, NState, N, Acc) ->
2260    nepp2(Rest, NState, N, [C | Acc]);
2261nepp2(<<>>, NState, N, Acc) ->
2262    {lists:reverse(Acc), <<>>, NState, N}.
2263
2264%% collect and handle pre-processor directives
2265nepp2_nl(<<"-include", Rest/binary>>, NState, N, Acc) ->
2266    nepp2_inc(Rest,NState, N, Acc);
2267nepp2_nl(<<"-include_lib", Rest/binary>>, NState, N, Acc) ->
2268    nepp2_inc(Rest, NState, N, Acc);
2269nepp2_nl(<<"-define", Rest/binary>>, NState, N, Acc) ->
2270    nepp2_def(Rest, NState, N, Acc);
2271nepp2_nl(<<"-ifdef", Rest/binary>>, NState, N, Acc) ->
2272    nepp2_ifdef(Rest, ifdef, NState, N, Acc);
2273nepp2_nl(<<"-ifndef", Rest/binary>>, NState, N, Acc) ->
2274    nepp2_ifdef(Rest, ifndef, NState, N, Acc);
2275nepp2_nl(<<"-else.\n", Rest/binary>>, #nepp{depth=1}=NState, N, Acc) ->
2276    nepp2_skip(Rest, NState, N+1, Acc);
2277nepp2_nl(<<"-endif.\n", Rest/binary>>, #nepp{depth=1}=NState, N, Acc) ->
2278    {lists:reverse(Acc), Rest, NState, N+1};
2279nepp2_nl(X, NState, N, Acc) ->
2280    nepp2(X, NState, N, Acc).
2281
2282nepp2_inc(Rest, #nepp{mod=ModAsStr, hrl=Hrl}=NState, N, Acc) ->
2283    {_,    Rest1} = read_until(Rest,  "(", ""),
2284    {Inc1, Rest2} = read_until(Rest1, ")", ""),
2285    {_,    Rest3} = read_until(Rest2, "\n", ""),
2286    Inc = parse_term(Inc1),
2287    Erl = (ModAsStr -- "''") ++ ".erl",
2288    case classify_inc(Inc) of
2289        gpb_hrl ->
2290            FieldDef = field_record_to_text(),
2291            OneofDef = oneof_record_to_text(),
2292            RpcDef   = rpc_record_to_text(),
2293            Txt = lists:flatten([file_directive(Inc, 1),
2294                                 FieldDef, OneofDef, RpcDef]),
2295            Acc2 = lists:reverse(Txt ++ file_directive(Erl, N+1), Acc),
2296            nepp2_nl(Rest3, NState, N+1, Acc2);
2297        mod_hrl when Hrl /= '$not_generated' ->
2298            {Txt1, <<>>, NState2, _EndLine} = nepp2_nl(Hrl, NState, 1, []),
2299            Txt2 = lists:flatten([file_directive(Inc, 1), Txt1]),
2300            Acc2 = lists:reverse(Txt2 ++ file_directive(Erl, N+1), Acc),
2301            nepp2_nl(Rest3, NState2, N+1, Acc2)
2302    end.
2303
2304nepp2_def(Rest, #nepp{defs=Ds}=NState, N, Acc) ->
2305    {_,   Rest1} = read_until(Rest,  "(", ""),
2306    {Sym, Rest2} = read_until(Rest1, ",", ""),
2307    {Val, Rest3} = read_until(Rest2, ")", ""),
2308    {_,   Rest4} = read_until(Rest3, "\n", ""),
2309    Ds1 = dict:store(parse_term(Sym), parse_term(Val), Ds),
2310    nepp2_nl(Rest4, NState#nepp{defs=Ds1}, N+1, Acc).
2311
2312nepp2_ifdef(Rest, SkipCond, #nepp{depth=Depth, defs=Ds}=NState, N, Acc) ->
2313    {_,   Rest1} = read_until(Rest,  "(", ""),
2314    {Sym, Rest2} = read_until(Rest1, ")", ""),
2315    {_,   Rest3} = read_until(Rest2, "\n", ""),
2316    {Txt, Rest4, NState2, N2} =
2317    case {dict:is_key(parse_term(Sym), Ds), SkipCond} of
2318        {true,  ifdef}  -> nepp2_nl(Rest3, NState#nepp{depth=1}, N+1, []);
2319        {false, ifndef} -> nepp2_nl(Rest3, NState#nepp{depth=1}, N+1, []);
2320        _ -> nepp2_skip(Rest3, NState#nepp{depth=1}, N+1, [])
2321
2322    end,
2323    nepp2_nl(Rest4, NState2#nepp{depth=Depth}, N2, lists:reverse(Txt, Acc)).
2324
2325nepp2_skip(<<"-else.\n", Rest/binary>>, #nepp{depth=Depth}=NState, N, Acc) ->
2326    if Depth == 1 -> nepp2_nl(Rest, NState, N+1, Acc);
2327       Depth >  1 -> nepp2_skip(Rest, NState, N+1, Acc)
2328    end;
2329nepp2_skip(<<"-endif.\n", Rest/binary>>, #nepp{depth=Depth}=NState, N, Acc) ->
2330    if Depth == 1 -> {lists:reverse(Acc), Rest, NState, N+1};
2331       Depth >  1 -> nepp2_skip(Rest, NState#nepp{depth=Depth-1}, N+1, Acc)
2332    end;
2333nepp2_skip(<<"-ifdef", Rest/binary>>, #nepp{depth=Depth}=NState, N, Acc) ->
2334    {_, Rest2} = read_until(Rest, "\n", ""),
2335    nepp2_skip(Rest2, NState#nepp{depth=Depth+1}, N+1, Acc);
2336nepp2_skip(<<"-ifndef", Rest/binary>>, #nepp{depth=Depth}=NState, N, Acc) ->
2337    {_, Rest2} = read_until(Rest, "\n", ""),
2338    nepp2_skip(Rest2, NState#nepp{depth=Depth+1}, N+1, Acc);
2339nepp2_skip(<<$\n, Rest/binary>>, NState, N, Acc) ->
2340    nepp2_skip(Rest, NState, N+1, Acc);
2341nepp2_skip(<<_, Rest/binary>>, NState, N, Acc) ->
2342    nepp2_skip(Rest, NState, N, Acc).
2343
2344read_until(<<C, Rest/binary>>, Delims, Acc) ->
2345    case lists:member(C, Delims) of
2346        true  -> {lists:reverse(Acc), Rest};
2347        false -> read_until(Rest, Delims, [C | Acc])
2348    end.
2349
2350parse_term(S) ->
2351    {ok, Tokens, _End} = erl_scan:string(S),
2352    {ok, Term} = erl_parse:parse_term(Tokens++[{dot,1}]),
2353    Term.
2354
2355classify_inc(F) ->
2356    case lists:last(filename:split(F)) of
2357        "gpb.hrl" -> gpb_hrl;
2358        _         -> mod_hrl
2359    end.
2360
2361file_directive(File, N) ->
2362    ?ff("-file(\"~s\", ~p).\n", [File, N]).
2363
2364split_toks_at_dot(AllToks) ->
2365    case lists:splitwith(fun is_no_dot/1, AllToks) of
2366        {Toks, [{dot,_}=Dot]}      -> [Toks ++ [Dot]];
2367        {Toks, [{dot,_}=Dot | Tl]} -> [Toks ++ [Dot] | split_toks_at_dot(Tl)]
2368    end.
2369
2370is_no_dot({dot,_}) -> false;
2371is_no_dot(_)       -> true.
2372
2373field_record_to_text() ->
2374    record_to_text(?gpb_field, record_info(fields, ?gpb_field), #?gpb_field{}).
2375
2376oneof_record_to_text() ->
2377    record_to_text(gpb_oneof, record_info(fields, gpb_oneof), #gpb_oneof{}).
2378
2379rpc_record_to_text() ->
2380    record_to_text(?gpb_rpc, record_info(fields, ?gpb_rpc), #?gpb_rpc{}).
2381
2382record_to_text(RecordName, Fields, DefaultR) ->
2383    FieldTexts =
2384        [if Default == undefined -> ?ff("~p", [FName]);
2385            Default /= undefined -> ?ff("~p = ~p", [FName, Default])
2386         end
2387         || {FName,Default} <- lists:zip(Fields, tl(tuple_to_list(DefaultR)))],
2388    ?f("-record(~p, {~s}).~n",
2389       [RecordName, gpb_lib:comma_join(FieldTexts)]).
2390
2391combine_erl_and_possible_nif(ErlCompilationResult, '$not_generated'=_Nif) ->
2392    ErlCompilationResult;
2393combine_erl_and_possible_nif({ok, ModuleName, ErlCode}, NifTxt) ->
2394    {ok, ModuleName, combine_erlcode_with_niftxt(ErlCode, NifTxt)};
2395combine_erl_and_possible_nif({ok, ModuleName, ErlCode, Warnings}, NifTxt) ->
2396    {ok, ModuleName, combine_erlcode_with_niftxt(ErlCode, NifTxt), Warnings};
2397combine_erl_and_possible_nif(Error, _NifTxt) ->
2398    Error.
2399
2400combine_erlcode_with_niftxt(ErlCode, NifTxt) ->
2401    [{erl, ErlCode},
2402     {nif, NifTxt}].
2403
2404%% -- internal utilities -----------------------------------------------------
2405
2406flatten_iolist(IoList) ->
2407    binary_to_list(iolist_to_binary(IoList)).
2408
2409file_read_file(FileName, Opts) ->
2410    file_op(read_file, [FileName], Opts).
2411
2412file_read_file_info(FileName, Opts) ->
2413    file_op(read_file_info, [FileName], Opts).
2414
2415file_write_file(FileName, Bin, Opts) ->
2416    file_op(write_file, [FileName, Bin], Opts).
2417
2418possibly_write_file(FileName, Bin, Opts) when is_binary(Bin) ->
2419    file_op(write_file, [FileName, Bin], Opts);
2420possibly_write_file(_FileName, '$not_generated', _Opts) ->
2421    ok.
2422
2423file_op(FnName, Args, Opts) ->
2424    case proplists:get_value(file_op, Opts) of
2425        undefined ->
2426            apply(file, FnName, Args);
2427        Ops ->
2428            case proplists:get_value(FnName, Ops) of
2429                undefined ->
2430                    apply(file, FnName, Args);
2431                Fn ->
2432                    apply(Fn, Args)
2433            end
2434    end.
2435
2436possibly_probe_defs(Defs, Opts) ->
2437    case proplists:get_value(probe_defs, Opts, '$no') of
2438        '$no' -> ok;
2439        Fn    -> Fn(Defs)
2440    end.
2441