1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #pragma once
32 
33 #include <memory>
34 #include <set>
35 #include <string>
36 #include <vector>
37 
38 #include "mongo/base/disallow_copying.h"
39 #include "mongo/db/jsobj.h"
40 #include "mongo/executor/task_executor.h"
41 #include "mongo/s/client/shard.h"
42 #include "mongo/stdx/condition_variable.h"
43 #include "mongo/stdx/mutex.h"
44 #include "mongo/stdx/unordered_map.h"
45 #include "mongo/util/concurrency/with_lock.h"
46 
47 namespace mongo {
48 
49 class BSONObjBuilder;
50 struct HostAndPort;
51 class NamespaceString;
52 class OperationContext;
53 class ServiceContext;
54 class ShardFactory;
55 class Shard;
56 class ShardType;
57 
58 class ShardRegistryData {
59 public:
60     /**
61      * Reads shards docs from the catalog client and fills in maps.
62      */
63     ShardRegistryData(OperationContext* opCtx, ShardFactory* shardFactory);
64     ShardRegistryData() = default;
65     ~ShardRegistryData() = default;
66 
67     void swap(ShardRegistryData& other);
68 
69     /**
70      * Lookup shard by replica set name. Returns nullptr if the name can't be found.
71      */
72     std::shared_ptr<Shard> findByRSName(const std::string& rsName) const;
73 
74     /**
75      * Returns a shared pointer to the shard object with the given shard id.
76      */
77     std::shared_ptr<Shard> findByShardId(const ShardId&) const;
78 
79     /**
80      * Finds the Shard that the mongod listening at this HostAndPort is a member of.
81      */
82     std::shared_ptr<Shard> findByHostAndPort(const HostAndPort&) const;
83 
84     /**
85      * Returns config shard.
86      */
87     std::shared_ptr<Shard> getConfigShard() const;
88 
89     /**
90      * Adds config shard.
91      */
92     void addConfigShard(std::shared_ptr<Shard>);
93 
94     void getAllShardIds(std::set<ShardId>& result) const;
95 
96     /**
97      * Erases known by this shardIds from the diff argument.
98      */
99     void shardIdSetDifference(std::set<ShardId>& diff) const;
100     void toBSON(BSONObjBuilder* result) const;
101     /**
102      * If the shard with same replica set name as in the newConnString already exists then replace
103      * it with the shard built for the newConnString.
104      */
105     void rebuildShardIfExists(const ConnectionString& newConnString, ShardFactory* factory);
106 
107 private:
108     /**
109      * Creates a shard based on the specified information and puts it into the lookup maps.
110      * if useOriginalCS = true it will use the ConnectionSring used for shard creation to update
111      * lookup maps. Otherwise the current connection string from the Shard's RemoteCommandTargeter
112      * will be used.
113      */
114     void _addShard(WithLock, std::shared_ptr<Shard> const&, bool useOriginalCS);
115     auto _findByShardId(WithLock, ShardId const&) const -> std::shared_ptr<Shard>;
116     void _rebuildShard(WithLock, ConnectionString const& newConnString, ShardFactory* factory);
117 
118     // Protects the lookup maps below.
119     mutable stdx::mutex _mutex;
120 
121     using ShardMap = stdx::unordered_map<ShardId, std::shared_ptr<Shard>, ShardId::Hasher>;
122 
123     // Map of both shardName -> Shard and hostName -> Shard
124     ShardMap _lookup;
125 
126     // Map from replica set name to shard corresponding to this replica set
127     ShardMap _rsLookup;
128 
129     stdx::unordered_map<HostAndPort, std::shared_ptr<Shard>> _hostLookup;
130 
131     // store configShard separately to always have a reference
132     std::shared_ptr<Shard> _configShard;
133 };
134 
135 /**
136  * Maintains the set of all shards known to the instance and their connections and exposes
137  * functionality to run commands against shards. All commands which this registry executes are
138  * retried on NotMaster class of errors and in addition all read commands are retried on network
139  * errors automatically as well.
140  */
141 class ShardRegistry {
142     MONGO_DISALLOW_COPYING(ShardRegistry);
143 
144 public:
145     /**
146      * A ShardId for the config servers.
147      */
148     static const ShardId kConfigServerShardId;
149 
150     /**
151      * Instantiates a new shard registry.
152      *
153      * @param shardFactory Makes shards
154      * @param configServerCS ConnectionString used for communicating with the config servers
155      */
156     ShardRegistry(std::unique_ptr<ShardFactory> shardFactory,
157                   const ConnectionString& configServerCS);
158 
159     ~ShardRegistry();
160     /**
161      *  Starts ReplicaSetMonitor by adding a config shard.
162      */
163     void startup(OperationContext* opCtx);
164 
165     /**
166      * This is invalid to use on the config server and will hit an invariant if it is done.
167      * If the config server has need of a connection string for itself, it should get it from the
168      * replication state.
169      *
170      * Returns the connection string for the config server.
171      */
172     ConnectionString getConfigServerConnectionString() const;
173 
174     /**
175      * Reloads the ShardRegistry based on the contents of the config server's config.shards
176      * collection. Returns true if this call performed a reload and false if this call only waited
177      * for another thread to perform the reload and did not actually reload. Because of this, it is
178      * possible that calling reload once may not result in the most up to date view. If strict
179      * reloading is required, the caller should call this method one more time if the first call
180      * returned false.
181      */
182     bool reload(OperationContext* opCtx);
183 
184     /**
185      * Takes a connection string describing either a shard or config server replica set, looks
186      * up the corresponding Shard object based on the replica set name, then updates the
187      * ShardRegistry's notion of what hosts make up that shard.
188      */
189     void updateReplSetHosts(const ConnectionString& newConnString);
190 
191     /**
192      * Returns a shared pointer to the shard object with the given shard id, or ShardNotFound error
193      * otherwise.
194      *
195      * May refresh the shard registry if there's no cached information about the shard. The shardId
196      * parameter can actually be the shard name or the HostAndPort for any
197      * server in the shard.
198      */
199     StatusWith<std::shared_ptr<Shard>> getShard(OperationContext* opCtx, const ShardId& shardId);
200 
201     /**
202      * Returns a shared pointer to the shard object with the given shard id. The shardId parameter
203      * can actually be the shard name or the HostAndPort for any server in the shard. Will not
204      * refresh the shard registry or otherwise perform any network traffic. This means that if the
205      * shard was recently added it may not be found.  USE WITH CAUTION.
206      */
207     std::shared_ptr<Shard> getShardNoReload(const ShardId& shardId);
208 
209     /**
210      * Finds the Shard that the mongod listening at this HostAndPort is a member of. Will not
211      * refresh the shard registry or otherwise perform any network traffic.
212      */
213     std::shared_ptr<Shard> getShardForHostNoReload(const HostAndPort& shardHost);
214 
215     /**
216      * Returns shared pointer to the shard object representing the config servers.
217      */
218     std::shared_ptr<Shard> getConfigShard() const;
219 
220     /**
221      * Instantiates a new detached shard connection, which does not appear in the list of shards
222      * tracked by the registry and as a result will not be returned by getAllShardIds.
223      *
224      * The caller owns the returned shard object and is responsible for disposing of it when done.
225      *
226      * @param connStr Connection string to the shard.
227      */
228     std::unique_ptr<Shard> createConnection(const ConnectionString& connStr) const;
229 
230     /**
231      * Lookup shard by replica set name. Returns nullptr if the name can't be found.
232      * Note: this doesn't refresh the table if the name isn't found, so it's possible that a
233      * newly added shard/Replica Set may not be found.
234      */
235     std::shared_ptr<Shard> lookupRSName(const std::string& name) const;
236 
237     void getAllShardIds(std::vector<ShardId>* all) const;
238     int getNumShards() const;
239 
240     void toBSON(BSONObjBuilder* result) const;
241     bool isUp() const;
242 
243     /**
244      * Initializes ShardRegistry with config shard. Must be called outside c-tor to avoid calls on
245      * this while its still not fully constructed.
246      */
247     void init();
248 
249     /**
250      * Shuts down _executor. Needs to be called explicitly because ShardRegistry is never destroyed
251      * as it's owned by the static grid object.
252      */
253     void shutdown();
254 
255     /**
256      * For use in mongos and mongod which needs notifications about changes to shard and config
257      * server replset membership to update the ShardRegistry.
258      *
259      * This is expected to be run in an existing thread.
260      */
261     static void replicaSetChangeShardRegistryUpdateHook(const std::string& setName,
262                                                         const std::string& newConnectionString);
263 
264     /**
265      * For use in mongos which needs notifications about changes to shard replset membership to
266      * update the config.shards collection.
267      *
268      * This is expected to be run in a brand new thread.
269      */
270     static void replicaSetChangeConfigServerUpdateHook(const std::string& setName,
271                                                        const std::string& newConnectionString);
272 
273 private:
274     /**
275      * Factory to create shards.  Never changed after startup so safe to access outside of _mutex.
276      */
277     const std::unique_ptr<ShardFactory> _shardFactory;
278 
279     /**
280      * Specified in the ShardRegistry c-tor. It's used only in startup() to initialize the config
281      * shard
282      */
283     ConnectionString _initConfigServerCS;
284     void _internalReload(const executor::TaskExecutor::CallbackArgs& cbArgs);
285     ShardRegistryData _data;
286 
287     // Protects the _reloadState and _initConfigServerCS during startup.
288     mutable stdx::mutex _reloadMutex;
289     stdx::condition_variable _inReloadCV;
290 
291     enum class ReloadState {
292         Idle,       // no other thread is loading data from config server in reload().
293         Reloading,  // another thread is loading data from the config server in reload().
294         Failed,     // last call to reload() caused an error when contacting the config server.
295     };
296 
297     ReloadState _reloadState{ReloadState::Idle};
298     bool _isUp{false};
299 
300     // Executor for reloading.
301     std::unique_ptr<executor::TaskExecutor> _executor{};
302 
303     // Set to true in shutdown call to prevent calling it twice.
304     bool _isShutdown{false};
305 };
306 
307 }  // namespace mongo
308