1 /*
2    Copyright 2013-2015 Skytechnology sp. z o.o.
3 
4    This file is part of LizardFS.
5 
6    LizardFS is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, version 3.
9 
10    LizardFS is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with LizardFS. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #pragma once
20 
21 #include "common/platform.h"
22 
23 #include <condition_variable>
24 #include <functional>
25 #include <list>
26 #include <memory>
27 #include <string>
28 #include <vector>
29 
30 #include "common/io_limit_group.h"
31 #include "common/io_limits_database.h"
32 #include "common/time_utils.h"
33 
34 namespace ioLimiting {
35 
36 /**
37  * High level I/O limiter class. It enables one to configure a limiting by registering a
38  * reconfiguration function, calling it and request a resource assignment. A limiter can be
39  * reconfigured many times.
40  *
41  * This is an ABC. As for now it can represent an I/O limiter running locally in mount,
42  * remotely in master, locally in tests etc.
43  */
44 struct Limiter {
45 	// request bandwidth allocation and return obtained result (lower or equal to 'size')
46 	virtual uint64_t request(const IoLimitGroupId& groupId, uint64_t size) = 0;
47 
48 	// Type of a function that will be called to handle a reconfiguration
49 	typedef std::function<void (
50 			uint32_t /* delta */,
51 			const std::string& /* subsystem */,
52 			const std::vector<IoLimitGroupId>&  /* valid groups */)
53 	> ReconfigurationFunction;
54 
55 	// register reconfiguration callback
56 	void registerReconfigure(ReconfigurationFunction);
~LimiterLimiter57 	virtual ~Limiter() {}
58 protected:
59 	ReconfigurationFunction reconfigure_;
60 };
61 
62 // Abstract clock used by the limiting mechanism, introduced mostly for testing purposed.
63 // Should be monotonic.
64 struct Clock {
65 	// A method that returns a current time of a clock
66 	virtual SteadyTimePoint now() = 0;
67 	// A method that sleeps until now() > 'time'
68 	virtual void sleepUntil(SteadyTimePoint time) = 0;
~ClockClock69 	virtual ~Clock() {}
70 };
71 
72 // Real time clock
73 struct RTClock : public Clock {
74 	SteadyTimePoint now() override;
75 	void sleepUntil(SteadyTimePoint time) override;
76 };
77 
78 // State shared by 'Group' instances.
79 struct SharedState {
SharedStateSharedState80 	SharedState(Limiter& limiter, std::chrono::microseconds delta) :
81 		limiter(limiter), delta(delta) {}
82 	// A limiter that is used (local or remote)
83 	Limiter& limiter;
84 	// If a user of a group requests a resource assignment and its request
85 	// isn't fully satisfied, it should not send another request sooner then
86 	// after 'delta' microseconds:
87 	std::chrono::microseconds delta;
88 };
89 
90 // Single IO limiting group, allowing users to wait for a resource assignment
91 class Group {
92 public:
Group(const SharedState & shared,const std::string & groupId,Clock & clock)93 	Group(const SharedState& shared, const std::string& groupId, Clock& clock) : shared_(shared),
94 		groupId_(groupId), reserve_(0), lastRequestSuccessful_(true), dead_(false), clock_(clock) {}
~Group()95 	virtual ~Group() {}
96 
97 	// wait until we are allowed to transfer size bytes, return MFS status
98 	uint8_t wait(uint64_t size, const SteadyTimePoint deadline, std::unique_lock<std::mutex>& lock);
99 	// notify all waitees that the group has been removed
100 	void die();
101 private:
102 	// we keep some information about past and pending requests in order to calculate
103 	// suitable size for bandwidth reservations
104 	struct PastRequest {
PastRequestPastRequest105 		PastRequest(SteadyTimePoint creationTime, uint64_t size)
106 			: creationTime(creationTime), size(size) { }
107 		SteadyTimePoint creationTime;
108 		uint64_t size;
109 	};
110 	struct PendingRequest {
PendingRequestPendingRequest111 		PendingRequest(uint64_t size) : size(size) {}
112 		std::condition_variable cond;
113 		uint64_t size;
114 	};
115 	typedef std::list<PastRequest> PastRequests;
116 	typedef std::list<PendingRequest> PendingRequests;
117 
118 	PendingRequests::iterator enqueue(uint64_t size);
119 	void dequeue(PendingRequests::iterator it);
120 	bool isFirst(PendingRequests::iterator) const;
121 	bool attempt(uint64_t size);
122 	void askMaster(std::unique_lock<std::mutex>& lock);
123 	void notifyQueue();
124 
125 	const SharedState& shared_;
126 	const std::string groupId_;
127 	PastRequests pastRequests_;
128 	PendingRequests pendingRequests_;
129 	uint64_t reserve_;
130 	// We keep start time of the last sent to master request in order not to communicate with
131 	// the master to often.
132 	SteadyTimePoint lastRequestStartTime_;
133 	// We also keep the timestamp of the end of the last communication with master, in order not
134 	// to use a bandwidth that we obtained a long time ago. Note that due to the fact that the
135 	// communication with master can be slow, we can't used start time for this purpose. Note also
136 	// that we neither can resign from using start time for purposes described above, due to the
137 	// fact that some decisions are made on a basis of its value before the communication starts.
138 	SteadyTimePoint lastRequestEndTime_;
139 	bool lastRequestSuccessful_;
140 	bool dead_;
141 	Clock& clock_;
142 };
143 
144 } // namespace ioLimiting
145