1%% This Source Code Form is subject to the terms of the Mozilla Public
2%% License, v. 2.0. If a copy of the MPL was not distributed with this
3%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4%%
5%% Copyright (c) 2007-2021 VMware, Inc. or its affiliates.  All rights reserved.
6%%
7
8-module(rabbit_msg_file).
9
10-export([append/3, read/2, scan/4]).
11
12%%----------------------------------------------------------------------------
13
14-include_lib("rabbit_common/include/rabbit_msg_store.hrl").
15
16-define(INTEGER_SIZE_BYTES,      8).
17-define(INTEGER_SIZE_BITS,       (8 * ?INTEGER_SIZE_BYTES)).
18-define(WRITE_OK_SIZE_BITS,      8).
19-define(WRITE_OK_MARKER,         255).
20-define(FILE_PACKING_ADJUSTMENT, (1 + ?INTEGER_SIZE_BYTES)).
21-define(MSG_ID_SIZE_BYTES,       16).
22-define(MSG_ID_SIZE_BITS,        (8 * ?MSG_ID_SIZE_BYTES)).
23-define(SCAN_BLOCK_SIZE,         4194304). %% 4MB
24
25%%----------------------------------------------------------------------------
26
27-type io_device() :: any().
28-type position() :: non_neg_integer().
29-type msg_size() :: non_neg_integer().
30-type file_size() :: non_neg_integer().
31-type message_accumulator(A) ::
32        fun (({rabbit_types:msg_id(), msg_size(), position(), binary()}, A) ->
33            A).
34
35%%----------------------------------------------------------------------------
36
37-spec append(io_device(), rabbit_types:msg_id(), msg()) ->
38          rabbit_types:ok_or_error2(msg_size(), any()).
39
40append(FileHdl, MsgId, MsgBody)
41  when is_binary(MsgId) andalso size(MsgId) =:= ?MSG_ID_SIZE_BYTES ->
42    MsgBodyBin  = term_to_binary(MsgBody),
43    MsgBodyBinSize = size(MsgBodyBin),
44    Size = MsgBodyBinSize + ?MSG_ID_SIZE_BYTES,
45    case file_handle_cache:append(FileHdl,
46                                  <<Size:?INTEGER_SIZE_BITS,
47                                    MsgId:?MSG_ID_SIZE_BYTES/binary,
48                                    MsgBodyBin:MsgBodyBinSize/binary,
49                                    ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>) of
50        ok -> {ok, Size + ?FILE_PACKING_ADJUSTMENT};
51        KO -> KO
52    end.
53
54-spec read(io_device(), msg_size()) ->
55          rabbit_types:ok_or_error2({rabbit_types:msg_id(), msg()},
56                                    any()).
57
58read(FileHdl, TotalSize) ->
59    Size = TotalSize - ?FILE_PACKING_ADJUSTMENT,
60    BodyBinSize = Size - ?MSG_ID_SIZE_BYTES,
61    case file_handle_cache:read(FileHdl, TotalSize) of
62        {ok, <<Size:?INTEGER_SIZE_BITS,
63               MsgId:?MSG_ID_SIZE_BYTES/binary,
64               MsgBodyBin:BodyBinSize/binary,
65               ?WRITE_OK_MARKER:?WRITE_OK_SIZE_BITS>>} ->
66            {ok, {MsgId, binary_to_term(MsgBodyBin)}};
67        KO -> KO
68    end.
69
70-spec scan(io_device(), file_size(), message_accumulator(A), A) ->
71          {'ok', A, position()}.
72
73scan(FileHdl, FileSize, Fun, Acc) when FileSize >= 0 ->
74    scan(FileHdl, FileSize, <<>>, 0, 0, Fun, Acc).
75
76scan(_FileHdl, FileSize, _Data, FileSize, ScanOffset, _Fun, Acc) ->
77    {ok, Acc, ScanOffset};
78scan(FileHdl, FileSize, Data, ReadOffset, ScanOffset, Fun, Acc) ->
79    Read = lists:min([?SCAN_BLOCK_SIZE, (FileSize - ReadOffset)]),
80    case file_handle_cache:read(FileHdl, Read) of
81        {ok, Data1} ->
82            {Data2, Acc1, ScanOffset1} =
83                scanner(<<Data/binary, Data1/binary>>, ScanOffset, Fun, Acc),
84            ReadOffset1 = ReadOffset + size(Data1),
85            scan(FileHdl, FileSize, Data2, ReadOffset1, ScanOffset1, Fun, Acc1);
86        _KO ->
87            {ok, Acc, ScanOffset}
88    end.
89
90scanner(<<>>, Offset, _Fun, Acc) ->
91    {<<>>, Acc, Offset};
92scanner(<<0:?INTEGER_SIZE_BITS, _Rest/binary>>, Offset, _Fun, Acc) ->
93    {<<>>, Acc, Offset}; %% Nothing to do other than stop.
94scanner(<<Size:?INTEGER_SIZE_BITS, MsgIdAndMsg:Size/binary,
95          WriteMarker:?WRITE_OK_SIZE_BITS, Rest/binary>>, Offset, Fun, Acc) ->
96    TotalSize = Size + ?FILE_PACKING_ADJUSTMENT,
97    case WriteMarker of
98        ?WRITE_OK_MARKER ->
99            %% Here we take option 5 from
100            %% https://www.erlang.org/cgi-bin/ezmlm-cgi?2:mss:1569 in
101            %% which we read the MsgId as a number, and then convert it
102            %% back to a binary in order to work around bugs in
103            %% Erlang's GC.
104            <<MsgIdNum:?MSG_ID_SIZE_BITS, Msg/binary>> =
105                <<MsgIdAndMsg:Size/binary>>,
106            <<MsgId:?MSG_ID_SIZE_BYTES/binary>> =
107                <<MsgIdNum:?MSG_ID_SIZE_BITS>>,
108            scanner(Rest, Offset + TotalSize, Fun,
109                    Fun({MsgId, TotalSize, Offset, Msg}, Acc));
110        _ ->
111            scanner(Rest, Offset + TotalSize, Fun, Acc)
112    end;
113scanner(Data, Offset, _Fun, Acc) ->
114    {Data, Acc, Offset}.
115