1 /**
2  * @file mega/raid.h
3  * @brief helper classes for managing cloudraid downloads
4  *
5  * (c) 2013-2017 by Mega Limited, Auckland, New Zealand
6  *
7  * This file is part of the MEGA SDK - Client Access Engine.
8  *
9  * Applications using the MEGA API must present a valid application key
10  * and comply with the the rules set forth in the Terms of Service.
11  *
12  * The MEGA SDK is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15  *
16  * @copyright Simplified (2-clause) BSD License.
17  *
18  * You should have received a copy of the license along with this
19  * program.
20  */
21 
22 #ifndef MEGA_RAID_H
23 #define MEGA_RAID_H 1
24 
25 #include "http.h"
26 #include "utils.h"
27 
28 namespace mega {
29 
30     enum { RAIDPARTS = 6 };
31     enum { RAIDSECTOR = 16 };
32     enum { RAIDLINE = ((RAIDPARTS - 1)*RAIDSECTOR) };
33 
34 
35     // Holds the latest download data received.   Raid-aware.   Suitable for file transfers, or direct streaming.
36     // For non-raid files, supplies the received buffer back to the same connection for writing to file (having decrypted and mac'd it),
37     // effectively the same way it worked before raid.
38     // For raid files, collects up enough input buffers until it can combine them to make a piece of the output file.
39     // Once a piece of the output is reconstructed the caller can access it with getAsyncOutputBufferPointer().
40     // Once that piece is no longer needed, call bufferWriteCompleted to indicate that it can be deallocated.
41     class MEGA_API RaidBufferManager
42     {
43     public:
44 
45         struct FilePiece {
46             m_off_t pos;
47             HttpReq::http_buf_t buf;  // owned here
48             chunkmac_map chunkmacs;
49 
50             std::condition_variable finalizedCV;
51             bool finalized = false;
52 
53             FilePiece();
54             FilePiece(m_off_t p, size_t len);    // makes a buffer of the specified size (with extra space for SymmCipher::ctr_crypt padding)
55             FilePiece(m_off_t p, HttpReq::http_buf_t* b); // takes ownership of the buffer
56             void swap(FilePiece& other);
57 
58             // decrypt & mac
59             bool finalize(bool parallel, m_off_t filesize, int64_t ctriv, SymmCipher *cipher, chunkmac_map* source_chunkmacs);
60 
61         };
62 
63         // call this before starting a transfer. Extracts the vector content
64         void setIsRaid(const std::vector<std::string>& tempUrls, m_off_t resumepos, m_off_t readtopos, m_off_t filesize, m_off_t maxDownloadRequestSize);
65 
66         // indicate if the file is raid or not.  Most variation due to raid/non-raid is captured in this class
67         bool isRaid() const;
68 
69         // in case URLs expire, use this to update them and keep downloading without wasting any data
70         void updateUrlsAndResetPos(const std::vector<std::string>& tempUrls);
71 
72         // pass a downloaded buffer to the manager, pre-decryption.  Takes ownership of the FilePiece. May update the connection pos (for raid)
73         void submitBuffer(unsigned connectionNum, FilePiece* piece);
74 
75         // get the file output data to write to the filesystem, on the asyncIO associated with a particular connection (or synchronously).  Buffer ownership is retained here.
76         std::shared_ptr<RaidBufferManager::FilePiece> getAsyncOutputBufferPointer(unsigned connectionNum);
77 
78         // indicate that the buffer written by asyncIO (or synchronously) can now be discarded.
79         void bufferWriteCompleted(unsigned connectionNum, bool succeeded);
80 
81         // temp URL to use on a given connection.  The same on all connections for a non-raid file.
82         const std::string& tempURL(unsigned connectionNum);
83 
84         // reference to the tempurls.  Useful for caching raid and non-raid
85         const std::vector<std::string>& tempUrlVector() const;
86 
87         // Track the progress of http requests sent.  For raid download, tracks the parts.  Otherwise, uses the position through the full file.
88         virtual m_off_t& transferPos(unsigned connectionNum);
89 
90         // start this part off again (eg. after abandoning slowest connection)
91         void resetPart(unsigned connectionNum);
92 
93         // Return the size of a particluar part of the file, for raid.  Or for non-raid the size of the whole wile.
94         m_off_t transferSize(unsigned connectionNum);
95 
96         // Get the file position to upload/download to on the specified connection
97         std::pair<m_off_t, m_off_t> nextNPosForConnection(unsigned connectionNum, bool& newBufferSupplied, bool& pauseConnectionForRaid);
98 
99         // calculate the exact size of each of the 6 parts of a raid file.  Some may not have a full last sector
100         static m_off_t raidPartSize(unsigned part, m_off_t fullfilesize);
101 
102         // report a failed connection.  The function tries to switch to 5 connection raid or a different 5 connections.  Two fails without progress and we should fail the transfer as usual
103         bool tryRaidHttpGetErrorRecovery(unsigned errorConnectionNum);
104 
105         // check to see if all other channels than the one specified are up to date with data and so we could go faster with 5 connections rather than 6.
106         bool connectionRaidPeersAreAllPaused(unsigned slowConnection);
107 
108         // indicate that this connection has responded with headers, and see if we now know which is the slowest connection, and make that the unused one
109         bool detectSlowestRaidConnection(unsigned thisConnection, unsigned& slowestConnection);
110 
111         // returns how far we are through the file on average, including uncombined data
112         m_off_t progress() const;
113 
114         RaidBufferManager();
115         ~RaidBufferManager();
116 
117     private:
118 
119         // parameters to control raid download
120         enum { RaidMaxChunksPerRead = 5 };
121         enum { RaidReadAheadChunksPausePoint = 8 };
122         enum { RaidReadAheadChunksUnpausePoint = 4 };
123 
124         bool is_raid;
125         bool raidKnown;
126         m_off_t deliverlimitpos;   // end of the data that the client requested
127         m_off_t acquirelimitpos;   // end of the data that we need to deliver that (can be up to the next raidline boundary)
128         m_off_t fullfilesize;      // end of the file
129 
130         // controls buffer sizes used
131         unsigned raidLinesPerChunk;
132 
133         // of the six raid URLs, which 5 are we downloading from
134         unsigned unusedRaidConnection;
135 
136         // storage server access URLs.  It either has 6 entries for a raid file, or 1 entry for a non-raid file, or empty if we have not looked up a tempurl yet.
137         std::vector<std::string> tempurls;
138         std::string emptyReturnString;
139 
140         // a connection is paused if it reads too far ahead of others.  This prevents excessive buffer usage
141         bool connectionPaused[RAIDPARTS];
142 
143         // for raid, how far through the raid part we are currently
144         m_off_t raidrequestpartpos[RAIDPARTS];
145 
146         // for raid, the http requested data before combining
147         std::deque<FilePiece*> raidinputparts[RAIDPARTS];
148 
149         // the data to output currently, per connection, raid or non-raid.  re-accessible in case retries are needed
150         std::map<unsigned, std::shared_ptr<FilePiece>> asyncoutputbuffers;
151 
152         // piece to carry over to the next combine operation, when we don't get pieces that match the chunkceil boundaries
153         FilePiece leftoverchunk;
154 
155         // the point we are at in the raid input parts.  raidinputparts buffers contain data from this point in their part.
156         m_off_t raidpartspos;
157 
158         // the point we are at in the output file.  asyncoutputbuffers contain data from this point.
159         m_off_t outputfilepos;
160 
161         // the point we started at in the output file.
162         m_off_t startfilepos;
163 
164         // In the case of resuming a file, the point we got to in the output might not line up nicely with a sector in an input part.
165         // This field allows us to start reading on a sector boundary but skip outputting data until we match where we got to last time.
166         size_t resumewastedbytes;
167 
168         // track errors across the connections.  A successful fetch resets the error count for a connection.  Stop trying to recover if we hit 3 total.
169         unsigned raidHttpGetErrorCount[RAIDPARTS];
170 
171         bool connectionStarted[RAIDPARTS];
172 
173         // take raid input part buffers and combine to form the asyncoutputbuffers
174         void combineRaidParts(unsigned connectionNum);
175         FilePiece* combineRaidParts(size_t partslen, size_t bufflen, m_off_t filepos, FilePiece& prevleftoverchunk);
176         void recoverSectorFromParity(byte* dest, byte* inputbufs[], unsigned offset);
177         void combineLastRaidLine(byte* dest, size_t nbytes);
178         void rollInputBuffers(size_t dataToDiscard);
179         virtual void bufferWriteCompletedAction(FilePiece& r);
180 
181         // decrypt and mac downloaded chunk.  virtual so Transfer and DirectNode derivations can be different
182         // calcOutputChunkPos is used to figure out how much of the available data can be passed to it
183         virtual void finalize(FilePiece& r) = 0;
184         virtual m_off_t calcOutputChunkPos(m_off_t acquiredpos) = 0;
185 
186         friend class DebugTestHook;
187     };
188 
189 
190     class MEGA_API TransferBufferManager : public RaidBufferManager
191     {
192     public:
193         // call this before starting a transfer. Extracts the vector content
194         void setIsRaid(Transfer* transfer, std::vector<std::string>& tempUrls, m_off_t resumepos, m_off_t maxDownloadRequestSize);
195 
196         // Track the progress of http requests sent.  For raid download, tracks the parts.  Otherwise, uses the full file position in the Transfer object, as it used to prior to raid.
197         m_off_t& transferPos(unsigned connectionNum) override;
198 
199         // Get the file position to upload/download to on the specified connection
200         std::pair<m_off_t, m_off_t> nextNPosForConnection(unsigned connectionNum, m_off_t maxDownloadRequestSize, unsigned connectionCount, bool& newBufferSupplied, bool& pauseConnectionForRaid, m_off_t uploadspeed);
201 
202         TransferBufferManager();
203 
204     private:
205 
206         Transfer* transfer;
207 
208         // decrypt and mac downloaded chunk
209         void finalize(FilePiece& r) override;
210         m_off_t calcOutputChunkPos(m_off_t acquiredpos) override;
211         void bufferWriteCompletedAction(FilePiece& r) override;
212 
213         friend class DebugTestHook;
214     };
215 
216     class MEGA_API DirectReadBufferManager : public RaidBufferManager
217     {
218     public:
219 
220         // Track the progress of http requests sent.  For raid download, tracks the parts.  Otherwise, uses the full file position in the Transfer object, as it used to prior to raid.
221         m_off_t& transferPos(unsigned connectionNum) override;
222 
223         DirectReadBufferManager(DirectRead* dr);
224 
225     private:
226 
227         DirectRead* directRead;
228 
229         // decrypt and mac downloaded chunk
230         void finalize(FilePiece& r) override;
231         m_off_t calcOutputChunkPos(m_off_t acquiredpos) override;
232 
233         friend class DebugTestHook;
234     };
235 
236 
237 
238 } // namespace
239 
240 #endif
241