1%% @author Bob Ippolito <bob@mochimedia.com>
2%% @copyright 2008 Mochi Media, Inc.
3
4%% @doc String Formatting for Erlang, inspired by Python 2.6
5%%      (<a href="http://www.python.org/dev/peps/pep-3101/">PEP 3101</a>).
6%%
7-module(mochifmt).
8-author('bob@mochimedia.com').
9-export([format/2, format_field/2, convert_field/2, get_value/2, get_field/2]).
10-export([tokenize/1, format/3, get_field/3, format_field/3]).
11-export([bformat/2, bformat/3]).
12-export([f/2, f/3]).
13
14-record(conversion, {length, precision, ctype, align, fill_char, sign}).
15
16%% @spec tokenize(S::string()) -> tokens()
17%% @doc Tokenize a format string into mochifmt's internal format.
18tokenize(S) ->
19    {?MODULE, tokenize(S, "", [])}.
20
21%% @spec convert_field(Arg, Conversion::conversion()) -> term()
22%% @doc Process Arg according to the given explicit conversion specifier.
23convert_field(Arg, "") ->
24    Arg;
25convert_field(Arg, "r") ->
26    repr(Arg);
27convert_field(Arg, "s") ->
28    str(Arg).
29
30%% @spec get_value(Key::string(), Args::args()) -> term()
31%% @doc Get the Key from Args. If Args is a tuple then convert Key to
32%%      an integer and get element(1 + Key, Args). If Args is a list and Key
33%%      can be parsed as an integer then use lists:nth(1 + Key, Args),
34%%      otherwise try and look for Key in Args as a proplist, converting
35%%      Key to an atom or binary if necessary.
36get_value(Key, Args) when is_tuple(Args) ->
37    element(1 + list_to_integer(Key), Args);
38get_value(Key, Args) when is_list(Args) ->
39    try lists:nth(1 + list_to_integer(Key), Args)
40    catch error:_ ->
41            {_K, V} = proplist_lookup(Key, Args),
42            V
43    end.
44
45%% @spec get_field(Key::string(), Args) -> term()
46%% @doc Consecutively call get_value/2 on parts of Key delimited by ".",
47%%      replacing Args with the result of the previous get_value. This
48%%      is used to implement formats such as {0.0}.
49get_field(Key, Args) ->
50    get_field(Key, Args, ?MODULE).
51
52%% @spec get_field(Key::string(), Args, Module) -> term()
53%% @doc Consecutively call Module:get_value/2 on parts of Key delimited by ".",
54%%      replacing Args with the result of the previous get_value. This
55%%      is used to implement formats such as {0.0}.
56get_field(Key, Args, Module) ->
57    {Name, Next} = lists:splitwith(fun (C) -> C =/= $. end, Key),
58    Res = try Module:get_value(Name, Args)
59          catch error:undef -> get_value(Name, Args) end,
60    case Next of
61        "" ->
62            Res;
63        "." ++ S1 ->
64            get_field(S1, Res, Module)
65    end.
66
67%% @spec format(Format::string(), Args) -> iolist()
68%% @doc Format Args with Format.
69format(Format, Args) ->
70    format(Format, Args, ?MODULE).
71
72%% @spec format(Format::string(), Args, Module) -> iolist()
73%% @doc Format Args with Format using Module.
74format({?MODULE, Parts}, Args, Module) ->
75    format2(Parts, Args, Module, []);
76format(S, Args, Module) ->
77    format(tokenize(S), Args, Module).
78
79%% @spec format_field(Arg, Format) -> iolist()
80%% @doc Format Arg with Format.
81format_field(Arg, Format) ->
82    format_field(Arg, Format, ?MODULE).
83
84%% @spec format_field(Arg, Format, _Module) -> iolist()
85%% @doc Format Arg with Format.
86format_field(Arg, Format, _Module) ->
87    F = default_ctype(Arg, parse_std_conversion(Format)),
88    fix_padding(fix_sign(convert2(Arg, F), F), F).
89
90%% @spec f(Format::string(), Args) -> string()
91%% @doc Format Args with Format and return a string().
92f(Format, Args) ->
93    f(Format, Args, ?MODULE).
94
95%% @spec f(Format::string(), Args, Module) -> string()
96%% @doc Format Args with Format using Module and return a string().
97f(Format, Args, Module) ->
98    case lists:member(${, Format) of
99        true ->
100            binary_to_list(bformat(Format, Args, Module));
101        false ->
102            Format
103    end.
104
105%% @spec bformat(Format::string(), Args) -> binary()
106%% @doc Format Args with Format and return a binary().
107bformat(Format, Args) ->
108    iolist_to_binary(format(Format, Args)).
109
110%% @spec bformat(Format::string(), Args, Module) -> binary()
111%% @doc Format Args with Format using Module and return a binary().
112bformat(Format, Args, Module) ->
113    iolist_to_binary(format(Format, Args, Module)).
114
115%% Internal API
116
117add_raw("", Acc) ->
118    Acc;
119add_raw(S, Acc) ->
120    [{raw, lists:reverse(S)} | Acc].
121
122tokenize([], S, Acc) ->
123    lists:reverse(add_raw(S, Acc));
124tokenize("{{" ++ Rest, S, Acc) ->
125    tokenize(Rest, "{" ++ S, Acc);
126tokenize("{" ++ Rest, S, Acc) ->
127    {Format, Rest1} = tokenize_format(Rest),
128    tokenize(Rest1, "", [{format, make_format(Format)} | add_raw(S, Acc)]);
129tokenize("}}" ++ Rest, S, Acc) ->
130    tokenize(Rest, "}" ++ S, Acc);
131tokenize([C | Rest], S, Acc) ->
132    tokenize(Rest, [C | S], Acc).
133
134tokenize_format(S) ->
135    tokenize_format(S, 1, []).
136
137tokenize_format("}" ++ Rest, 1, Acc) ->
138    {lists:reverse(Acc), Rest};
139tokenize_format("}" ++ Rest, N, Acc) ->
140    tokenize_format(Rest, N - 1, "}" ++ Acc);
141tokenize_format("{" ++ Rest, N, Acc) ->
142    tokenize_format(Rest, 1 + N, "{" ++ Acc);
143tokenize_format([C | Rest], N, Acc) ->
144    tokenize_format(Rest, N, [C | Acc]).
145
146make_format(S) ->
147    {Name0, Spec} = case lists:splitwith(fun (C) -> C =/= $: end, S) of
148                        {_, ""} ->
149                            {S, ""};
150                        {SN, ":" ++ SS} ->
151                            {SN, SS}
152                    end,
153    {Name, Transform} = case lists:splitwith(fun (C) -> C =/= $! end, Name0) of
154                            {_, ""} ->
155                                {Name0, ""};
156                            {TN, "!" ++ TT} ->
157                                {TN, TT}
158                        end,
159    {Name, Transform, Spec}.
160
161proplist_lookup(S, P) ->
162    A = try list_to_existing_atom(S)
163        catch error:_ -> make_ref() end,
164    B = try list_to_binary(S)
165        catch error:_ -> make_ref() end,
166    proplist_lookup2({S, A, B}, P).
167
168proplist_lookup2({KS, KA, KB}, [{K, V} | _])
169  when KS =:= K orelse KA =:= K orelse KB =:= K ->
170    {K, V};
171proplist_lookup2(Keys, [_ | Rest]) ->
172    proplist_lookup2(Keys, Rest).
173
174format2([], _Args, _Module, Acc) ->
175    lists:reverse(Acc);
176format2([{raw, S} | Rest], Args, Module, Acc) ->
177    format2(Rest, Args, Module, [S | Acc]);
178format2([{format, {Key, Convert, Format0}} | Rest], Args, Module, Acc) ->
179    Format = f(Format0, Args, Module),
180    V = case Module of
181            ?MODULE ->
182                V0 = get_field(Key, Args),
183                V1 = convert_field(V0, Convert),
184                format_field(V1, Format);
185            _ ->
186                V0 = try Module:get_field(Key, Args)
187                     catch error:undef -> get_field(Key, Args, Module) end,
188                V1 = try Module:convert_field(V0, Convert)
189                     catch error:undef -> convert_field(V0, Convert) end,
190                try Module:format_field(V1, Format)
191                catch error:undef -> format_field(V1, Format, Module) end
192        end,
193    format2(Rest, Args, Module, [V | Acc]).
194
195default_ctype(_Arg, C=#conversion{ctype=N}) when N =/= undefined ->
196    C;
197default_ctype(Arg, C) when is_integer(Arg) ->
198    C#conversion{ctype=decimal};
199default_ctype(Arg, C) when is_float(Arg) ->
200    C#conversion{ctype=general};
201default_ctype(_Arg, C) ->
202    C#conversion{ctype=string}.
203
204fix_padding(Arg, #conversion{length=undefined}) ->
205    Arg;
206fix_padding(Arg, F=#conversion{length=Length, fill_char=Fill0, align=Align0,
207                               ctype=Type}) ->
208    Padding = Length - iolist_size(Arg),
209    Fill = case Fill0 of
210               undefined ->
211                   $\s;
212               _ ->
213                   Fill0
214           end,
215    Align = case Align0 of
216                undefined ->
217                    case Type of
218                        string ->
219                            left;
220                        _ ->
221                            right
222                    end;
223                _ ->
224                    Align0
225            end,
226    case Padding > 0 of
227        true ->
228            do_padding(Arg, Padding, Fill, Align, F);
229        false ->
230            Arg
231    end.
232
233do_padding(Arg, Padding, Fill, right, _F) ->
234    [lists:duplicate(Padding, Fill), Arg];
235do_padding(Arg, Padding, Fill, center, _F) ->
236    LPadding = lists:duplicate(Padding div 2, Fill),
237    RPadding = case Padding band 1 of
238                   1 ->
239                       [Fill | LPadding];
240                   _ ->
241                       LPadding
242               end,
243    [LPadding, Arg, RPadding];
244do_padding([$- | Arg], Padding, Fill, sign_right, _F) ->
245    [[$- | lists:duplicate(Padding, Fill)], Arg];
246do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=$-}) ->
247    [lists:duplicate(Padding, Fill), Arg];
248do_padding([S | Arg], Padding, Fill, sign_right, #conversion{sign=S}) ->
249    [[S | lists:duplicate(Padding, Fill)], Arg];
250do_padding(Arg, Padding, Fill, sign_right, #conversion{sign=undefined}) ->
251    [lists:duplicate(Padding, Fill), Arg];
252do_padding(Arg, Padding, Fill, left, _F) ->
253    [Arg | lists:duplicate(Padding, Fill)].
254
255fix_sign(Arg, #conversion{sign=$+}) when Arg >= 0 ->
256    [$+, Arg];
257fix_sign(Arg, #conversion{sign=$\s}) when Arg >= 0 ->
258    [$\s, Arg];
259fix_sign(Arg, _F) ->
260    Arg.
261
262ctype($\%) -> percent;
263ctype($s) -> string;
264ctype($b) -> bin;
265ctype($o) -> oct;
266ctype($X) -> upper_hex;
267ctype($x) -> hex;
268ctype($c) -> char;
269ctype($d) -> decimal;
270ctype($g) -> general;
271ctype($f) -> fixed;
272ctype($e) -> exp.
273
274align($<) -> left;
275align($>) -> right;
276align($^) -> center;
277align($=) -> sign_right.
278
279convert2(Arg, F=#conversion{ctype=percent}) ->
280    [convert2(100.0 * Arg, F#conversion{ctype=fixed}), $\%];
281convert2(Arg, #conversion{ctype=string}) ->
282    str(Arg);
283convert2(Arg, #conversion{ctype=bin}) ->
284    erlang:integer_to_list(Arg, 2);
285convert2(Arg, #conversion{ctype=oct}) ->
286    erlang:integer_to_list(Arg, 8);
287convert2(Arg, #conversion{ctype=upper_hex}) ->
288    erlang:integer_to_list(Arg, 16);
289convert2(Arg, #conversion{ctype=hex}) ->
290    string:to_lower(erlang:integer_to_list(Arg, 16));
291convert2(Arg, #conversion{ctype=char}) when Arg < 16#80 ->
292    [Arg];
293convert2(Arg, #conversion{ctype=char}) ->
294    xmerl_ucs:to_utf8(Arg);
295convert2(Arg, #conversion{ctype=decimal}) ->
296    integer_to_list(Arg);
297convert2(Arg, #conversion{ctype=general, precision=undefined}) ->
298    try mochinum:digits(Arg)
299    catch error:undef -> io_lib:format("~g", [Arg]) end;
300convert2(Arg, #conversion{ctype=fixed, precision=undefined}) ->
301    io_lib:format("~f", [Arg]);
302convert2(Arg, #conversion{ctype=exp, precision=undefined}) ->
303    io_lib:format("~e", [Arg]);
304convert2(Arg, #conversion{ctype=general, precision=P}) ->
305    io_lib:format("~." ++ integer_to_list(P) ++ "g", [Arg]);
306convert2(Arg, #conversion{ctype=fixed, precision=P}) ->
307    io_lib:format("~." ++ integer_to_list(P) ++ "f", [Arg]);
308convert2(Arg, #conversion{ctype=exp, precision=P}) ->
309    io_lib:format("~." ++ integer_to_list(P) ++ "e", [Arg]).
310
311str(A) when is_atom(A) ->
312    atom_to_list(A);
313str(I) when is_integer(I) ->
314    integer_to_list(I);
315str(F) when is_float(F) ->
316    try mochinum:digits(F)
317    catch error:undef -> io_lib:format("~g", [F]) end;
318str(L) when is_list(L) ->
319    L;
320str(B) when is_binary(B) ->
321    B;
322str(P) ->
323    repr(P).
324
325repr(P) when is_float(P) ->
326    try mochinum:digits(P)
327    catch error:undef -> float_to_list(P) end;
328repr(P) ->
329    io_lib:format("~p", [P]).
330
331parse_std_conversion(S) ->
332    parse_std_conversion(S, #conversion{}).
333
334parse_std_conversion("", Acc) ->
335    Acc;
336parse_std_conversion([Fill, Align | Spec], Acc)
337  when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ ->
338    parse_std_conversion(Spec, Acc#conversion{fill_char=Fill,
339                                              align=align(Align)});
340parse_std_conversion([Align | Spec], Acc)
341  when Align =:= $< orelse Align =:= $> orelse Align =:= $= orelse Align =:= $^ ->
342    parse_std_conversion(Spec, Acc#conversion{align=align(Align)});
343parse_std_conversion([Sign | Spec], Acc)
344  when Sign =:= $+ orelse Sign =:= $- orelse Sign =:= $\s ->
345    parse_std_conversion(Spec, Acc#conversion{sign=Sign});
346parse_std_conversion("0" ++ Spec, Acc) ->
347    Align = case Acc#conversion.align of
348                undefined ->
349                    sign_right;
350                A ->
351                    A
352            end,
353    parse_std_conversion(Spec, Acc#conversion{fill_char=$0, align=Align});
354parse_std_conversion(Spec=[D|_], Acc) when D >= $0 andalso D =< $9 ->
355    {W, Spec1} = lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec),
356    parse_std_conversion(Spec1, Acc#conversion{length=list_to_integer(W)});
357parse_std_conversion([$. | Spec], Acc) ->
358    case lists:splitwith(fun (C) -> C >= $0 andalso C =< $9 end, Spec) of
359        {"", Spec1} ->
360            parse_std_conversion(Spec1, Acc);
361        {P, Spec1} ->
362            parse_std_conversion(Spec1,
363                                 Acc#conversion{precision=list_to_integer(P)})
364    end;
365parse_std_conversion([Type], Acc) ->
366    parse_std_conversion("", Acc#conversion{ctype=ctype(Type)}).
367
368
369%%
370%% Tests
371%%
372-ifdef(TEST).
373-include_lib("eunit/include/eunit.hrl").
374
375tokenize_test() ->
376    {?MODULE, [{raw, "ABC"}]} = tokenize("ABC"),
377    {?MODULE, [{format, {"0", "", ""}}]} = tokenize("{0}"),
378    {?MODULE, [{raw, "ABC"}, {format, {"1", "", ""}}, {raw, "DEF"}]} =
379        tokenize("ABC{1}DEF"),
380    ok.
381
382format_test() ->
383    <<"  -4">> = bformat("{0:4}", [-4]),
384    <<"   4">> = bformat("{0:4}", [4]),
385    <<"   4">> = bformat("{0:{0}}", [4]),
386    <<"4   ">> = bformat("{0:4}", ["4"]),
387    <<"4   ">> = bformat("{0:{0}}", ["4"]),
388    <<"1.2yoDEF">> = bformat("{2}{0}{1}{3}", {yo, "DE", 1.2, <<"F">>}),
389    <<"cafebabe">> = bformat("{0:x}", {16#cafebabe}),
390    <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}),
391    <<"CAFEBABE">> = bformat("{0:X}", {16#cafebabe}),
392    <<"755">> = bformat("{0:o}", {8#755}),
393    <<"a">> = bformat("{0:c}", {97}),
394    %% Horizontal ellipsis
395    <<226, 128, 166>> = bformat("{0:c}", {16#2026}),
396    <<"11">> = bformat("{0:b}", {3}),
397    <<"11">> = bformat("{0:b}", [3]),
398    <<"11">> = bformat("{three:b}", [{three, 3}]),
399    <<"11">> = bformat("{three:b}", [{"three", 3}]),
400    <<"11">> = bformat("{three:b}", [{<<"three">>, 3}]),
401    <<"\"foo\"">> = bformat("{0!r}", {"foo"}),
402    <<"2008-5-4">> = bformat("{0.0}-{0.1}-{0.2}", {{2008,5,4}}),
403    <<"2008-05-04">> = bformat("{0.0:04}-{0.1:02}-{0.2:02}", {{2008,5,4}}),
404    <<"foo6bar-6">> = bformat("foo{1}{0}-{1}", {bar, 6}),
405    <<"-'atom test'-">> = bformat("-{arg!r}-", [{arg, 'atom test'}]),
406    <<"2008-05-04">> = bformat("{0.0:0{1.0}}-{0.1:0{1.1}}-{0.2:0{1.2}}",
407                               {{2008,5,4}, {4, 2, 2}}),
408    ok.
409
410std_test() ->
411    M = mochifmt_std:new(),
412    <<"01">> = bformat("{0}{1}", [0, 1], M),
413    ok.
414
415records_test() ->
416    M = mochifmt_records:new([{conversion, record_info(fields, conversion)}]),
417    R = #conversion{length=long, precision=hard, sign=peace},
418    long = M:get_value("length", R),
419    hard = M:get_value("precision", R),
420    peace = M:get_value("sign", R),
421    <<"long hard">> = bformat("{length} {precision}", R, M),
422    <<"long hard">> = bformat("{0.length} {0.precision}", [R], M),
423    ok.
424
425-endif.
426