1%% This Source Code Form is subject to the terms of the Mozilla Public
2%% License, v. 2.0. If a copy of the MPL was not distributed with this
3%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4%%
5%% Copyright (c) 2007-2021 VMware, Inc. or its affiliates.  All rights reserved.
6%%
7
8-module(rabbit_stomp_frame).
9
10-include("rabbit_stomp_frame.hrl").
11-include("rabbit_stomp_headers.hrl").
12
13-export([parse/2, initial_state/0]).
14-export([header/2, header/3,
15         boolean_header/2, boolean_header/3,
16         integer_header/2, integer_header/3,
17         binary_header/2, binary_header/3]).
18-export([stream_offset_header/2]).
19-export([serialize/1, serialize/2]).
20
21initial_state() -> none.
22
23%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
24%% STOMP 1.1 frames basic syntax
25%%  Rabbit modifications:
26%%  o   CR LF is equivalent to LF in all element terminators (eol).
27%%  o   Escape codes for header names and values include \r for CR
28%%      and CR is not allowed.
29%%  o   Header names and values are not limited to UTF-8 strings.
30%%  o   Header values may contain unescaped colons
31%%
32%%  frame_seq   ::= *(noise frame)
33%%  noise       ::= *(NUL | eol)
34%%  eol         ::= LF | CR LF
35%%  frame       ::= cmd hdrs body NUL
36%%  body        ::= *OCTET
37%%  cmd         ::= 1*NOTEOL eol
38%%  hdrs        ::= *hdr eol
39%%  hdr         ::= hdrname COLON hdrvalue eol
40%%  hdrname     ::= 1*esc_char
41%%  hdrvalue    ::= *esc_char
42%%  esc_char    ::= HDROCT | BACKSLASH ESCCODE
43%%
44%% Terms in CAPS all represent sets (alternatives) of single octets.
45%% They are defined here using a small extension of BNF, minus (-):
46%%
47%%    term1 - term2         denotes any of the possibilities in term1
48%%                          excluding those in term2.
49%% In this grammar minus is only used for sets of single octets.
50%%
51%%  OCTET       ::= '00'x..'FF'x            % any octet
52%%  NUL         ::= '00'x                   % the zero octet
53%%  LF          ::= '\n'                    % '0a'x newline or linefeed
54%%  CR          ::= '\r'                    % '0d'x carriage return
55%%  NOTEOL      ::= OCTET - (CR | LF)       % any octet except CR or LF
56%%  BACKSLASH   ::= '\\'                    % '5c'x
57%%  ESCCODE     ::= 'c' | 'n' | 'r' | BACKSLASH
58%%  COLON       ::= ':'
59%%  HDROCT      ::= NOTEOL - (COLON | BACKSLASH)
60%%                                          % octets allowed in a header
61%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
62
63%% explicit frame characters
64-define(NUL,   0).
65-define(CR,    $\r).
66-define(LF,    $\n).
67-define(BSL,   $\\).
68-define(COLON, $:).
69
70%% header escape codes
71-define(LF_ESC,    $n).
72-define(BSL_ESC,   $\\).
73-define(COLON_ESC, $c).
74-define(CR_ESC,    $r).
75
76%% parser state
77-record(state, {acc, cmd, hdrs, hdrname}).
78
79parse(Content, {resume, Continuation}) -> Continuation(Content);
80parse(Content, none                  ) -> parser(Content, noframe, #state{}).
81
82more(Continuation) -> {more, {resume, Continuation}}.
83
84%% Single-function parser: Term :: noframe | command | headers | hdrname | hdrvalue
85%% general more and line-end detection
86parser(<<>>,                        Term    ,  State) -> more(fun(Rest) -> parser(Rest, Term, State) end);
87parser(<<?CR>>,                     Term    ,  State) -> more(fun(Rest) -> parser(<<?CR, Rest/binary>>, Term, State) end);
88parser(<<?CR, ?LF,   Rest/binary>>, Term    ,  State) -> parser(<<?LF, Rest/binary>>, Term, State);
89parser(<<?CR, Ch:8, _Rest/binary>>, Term    , _State) -> {error, {unexpected_chars(Term), [?CR, Ch]}};
90%% escape processing (only in hdrname and hdrvalue terms)
91parser(<<?BSL>>,                    Term    ,  State) -> more(fun(Rest) -> parser(<<?BSL, Rest/binary>>, Term, State) end);
92parser(<<?BSL, Ch:8, Rest/binary>>, Term    ,  State)
93                               when Term == hdrname;
94                                    Term == hdrvalue  -> unescape(Ch, fun(Ech) -> parser(Rest, Term, accum(Ech, State)) end);
95%% inter-frame noise
96parser(<<?NUL,       Rest/binary>>, noframe ,  State) -> parser(Rest, noframe, State);
97parser(<<?LF,        Rest/binary>>, noframe ,  State) -> parser(Rest, noframe, State);
98%% detect transitions
99parser(              Rest,          noframe ,  State) -> goto(noframe,  command,  Rest, State);
100parser(<<?LF,        Rest/binary>>, command ,  State) -> goto(command,  headers,  Rest, State);
101parser(<<?LF,        Rest/binary>>, headers ,  State) -> goto(headers,  body,     Rest, State);
102parser(              Rest,          headers ,  State) -> goto(headers,  hdrname,  Rest, State);
103parser(<<?COLON,     Rest/binary>>, hdrname ,  State) -> goto(hdrname,  hdrvalue, Rest, State);
104parser(<<?LF,        Rest/binary>>, hdrname ,  State) -> goto(hdrname,  headers,  Rest, State);
105parser(<<?LF,        Rest/binary>>, hdrvalue,  State) -> goto(hdrvalue, headers,  Rest, State);
106%% accumulate
107parser(<<Ch:8,       Rest/binary>>, Term    ,  State) -> parser(Rest, Term, accum(Ch, State)).
108
109%% state transitions
110goto(noframe,  command,  Rest, State                                 ) -> parser(Rest, command, State#state{acc = []});
111goto(command,  headers,  Rest, State = #state{acc = Acc}             ) -> parser(Rest, headers, State#state{cmd = lists:reverse(Acc), hdrs = []});
112goto(headers,  body,     Rest,         #state{cmd = Cmd, hdrs = Hdrs}) -> parse_body(Rest, #stomp_frame{command = Cmd, headers = Hdrs});
113goto(headers,  hdrname,  Rest, State                                 ) -> parser(Rest, hdrname, State#state{acc = []});
114goto(hdrname,  hdrvalue, Rest, State = #state{acc = Acc}             ) -> parser(Rest, hdrvalue, State#state{acc = [], hdrname = lists:reverse(Acc)});
115goto(hdrname,  headers, _Rest,         #state{acc = Acc}             ) -> {error, {header_no_value, lists:reverse(Acc)}};  % badly formed header -- fatal error
116goto(hdrvalue, headers,  Rest, State = #state{acc = Acc, hdrs = Headers, hdrname = HdrName}) ->
117    parser(Rest, headers, State#state{hdrs = insert_header(Headers, HdrName, lists:reverse(Acc))}).
118
119%% error atom
120unexpected_chars(noframe)  -> unexpected_chars_between_frames;
121unexpected_chars(command)  -> unexpected_chars_in_command;
122unexpected_chars(hdrname)  -> unexpected_chars_in_header;
123unexpected_chars(hdrvalue) -> unexpected_chars_in_header;
124unexpected_chars(_Term)    -> unexpected_chars.
125
126%% general accumulation
127accum(Ch, State = #state{acc = Acc}) -> State#state{acc = [Ch | Acc]}.
128
129%% resolve escapes (with error processing)
130unescape(?LF_ESC,    Fun) -> Fun(?LF);
131unescape(?BSL_ESC,   Fun) -> Fun(?BSL);
132unescape(?COLON_ESC, Fun) -> Fun(?COLON);
133unescape(?CR_ESC,    Fun) -> Fun(?CR);
134unescape(Ch,        _Fun) -> {error, {bad_escape, [?BSL, Ch]}}.
135
136%% insert header unless aleady seen
137insert_header(Headers, Name, Value) ->
138    case lists:keymember(Name, 1, Headers) of
139        true  -> Headers; % first header only
140        false -> [{Name, Value} | Headers]
141    end.
142
143parse_body(Content, Frame = #stomp_frame{command = Command}) ->
144    case Command of
145        "SEND" -> parse_body(Content, Frame, [], integer_header(Frame, ?HEADER_CONTENT_LENGTH, unknown));
146        _ -> parse_body(Content, Frame, [], unknown)
147    end.
148
149parse_body(Content, Frame, Chunks, unknown) ->
150    parse_body2(Content, Frame, Chunks, case firstnull(Content) of
151                                            -1  -> {more, unknown};
152                                            Pos -> {done, Pos}
153                                        end);
154parse_body(Content, Frame, Chunks, Remaining) ->
155    Size = byte_size(Content),
156    parse_body2(Content, Frame, Chunks, case Remaining >= Size of
157                                            true  -> {more, Remaining - Size};
158                                            false -> {done, Remaining}
159                                        end).
160
161parse_body2(Content, Frame, Chunks, {more, Left}) ->
162    Chunks1 = finalize_chunk(Content, Chunks),
163    more(fun(Rest) -> parse_body(Rest, Frame, Chunks1, Left) end);
164parse_body2(Content, Frame, Chunks, {done, Pos}) ->
165    <<Chunk:Pos/binary, 0, Rest/binary>> = Content,
166    Body = lists:reverse(finalize_chunk(Chunk, Chunks)),
167    {ok, Frame#stomp_frame{body_iolist = Body}, Rest}.
168
169finalize_chunk(<<>>,  Chunks) -> Chunks;
170finalize_chunk(Chunk, Chunks) -> [Chunk | Chunks].
171
172default_value({ok, Value}, _DefaultValue) -> Value;
173default_value(not_found,    DefaultValue) -> DefaultValue.
174
175header(#stomp_frame{headers = Headers}, Key) ->
176    case lists:keysearch(Key, 1, Headers) of
177        {value, {_, Str}} -> {ok, Str};
178        _                 -> not_found
179    end.
180
181header(F, K, D) -> default_value(header(F, K), D).
182
183boolean_header(#stomp_frame{headers = Headers}, Key) ->
184    case lists:keysearch(Key, 1, Headers) of
185        {value, {_, "true"}}  -> {ok, true};
186        {value, {_, "false"}} -> {ok, false};
187        %% some Python clients serialize True/False as "True"/"False"
188        {value, {_, "True"}}  -> {ok, true};
189        {value, {_, "False"}} -> {ok, false};
190        _                     -> not_found
191    end.
192
193boolean_header(F, K, D) -> default_value(boolean_header(F, K), D).
194
195internal_integer_header(Headers, Key) ->
196    case lists:keysearch(Key, 1, Headers) of
197        {value, {_, Str}} -> {ok, list_to_integer(string:strip(Str))};
198        _                 -> not_found
199    end.
200
201integer_header(#stomp_frame{headers = Headers}, Key) ->
202    internal_integer_header(Headers, Key).
203
204integer_header(F, K, D) -> default_value(integer_header(F, K), D).
205
206binary_header(F, K) ->
207    case header(F, K) of
208        {ok, Str} -> {ok, list_to_binary(Str)};
209        not_found -> not_found
210    end.
211
212binary_header(F, K, D) -> default_value(binary_header(F, K), D).
213
214stream_offset_header(F, D) ->
215    case binary_header(F, ?HEADER_X_STREAM_OFFSET, D) of
216        <<"first">> ->
217            {longstr, <<"first">>};
218        <<"last">> ->
219            {longstr, <<"last">>};
220        <<"next">> ->
221            {longstr, <<"next">>};
222        <<"offset=", OffsetValue/binary>> ->
223            {long, binary_to_integer(OffsetValue)};
224        <<"timestamp=", TimestampValue/binary>> ->
225            {timestamp, binary_to_integer(TimestampValue)};
226        _ ->
227            D
228    end.
229
230serialize(Frame) ->
231    serialize(Frame, true).
232
233%% second argument controls whether a trailing linefeed
234%% character should be added, see rabbitmq/rabbitmq-stomp#39.
235serialize(Frame, true) ->
236    serialize(Frame, false) ++ [?LF];
237serialize(#stomp_frame{command = Command,
238                       headers = Headers,
239                       body_iolist = BodyFragments}, false) ->
240    Len = iolist_size(BodyFragments),
241    [Command, ?LF,
242     lists:map(fun serialize_header/1,
243               lists:keydelete(?HEADER_CONTENT_LENGTH, 1, Headers)),
244     if
245         Len > 0 -> [?HEADER_CONTENT_LENGTH ++ ":", integer_to_list(Len), ?LF];
246         true    -> []
247     end,
248     ?LF, BodyFragments, 0].
249
250serialize_header({K, V}) when is_integer(V) -> hdr(escape(K), integer_to_list(V));
251serialize_header({K, V}) when is_boolean(V) -> hdr(escape(K), boolean_to_list(V));
252serialize_header({K, V}) when is_list(V)    -> hdr(escape(K), escape(V)).
253
254boolean_to_list(true) -> "true";
255boolean_to_list(_)    -> "false".
256
257hdr(K, V) -> [K, ?COLON, V, ?LF].
258
259escape(Str) -> [escape1(Ch) || Ch <- Str].
260
261escape1(?COLON) -> [?BSL, ?COLON_ESC];
262escape1(?BSL)   -> [?BSL, ?BSL_ESC];
263escape1(?LF)    -> [?BSL, ?LF_ESC];
264escape1(?CR)    -> [?BSL, ?CR_ESC];
265escape1(Ch)     -> Ch.
266
267firstnull(Content) -> firstnull(Content, 0).
268
269firstnull(<<>>,                _N) -> -1;
270firstnull(<<0,  _Rest/binary>>, N) -> N;
271firstnull(<<_Ch, Rest/binary>>, N) -> firstnull(Rest, N+1).
272