1 /*
2 	This file is part of solidity.
3 
4 	solidity is free software: you can redistribute it and/or modify
5 	it under the terms of the GNU General Public License as published by
6 	the Free Software Foundation, either version 3 of the License, or
7 	(at your option) any later version.
8 
9 	solidity is distributed in the hope that it will be useful,
10 	but WITHOUT ANY WARRANTY; without even the implied warranty of
11 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 	GNU General Public License for more details.
13 
14 	You should have received a copy of the GNU General Public License
15 	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 // SPDX-License-Identifier: GPL-3.0
18 /**
19  * @file KnownState.cpp
20  * @author Christian <c@ethdev.com>
21  * @date 2015
22  * Contains knowledge about the state of the virtual machine at a specific instruction.
23  */
24 
25 #include <libevmasm/KnownState.h>
26 #include <libevmasm/AssemblyItem.h>
27 #include <libsolutil/Keccak256.h>
28 
29 #include <functional>
30 
31 using namespace std;
32 using namespace solidity;
33 using namespace solidity::evmasm;
34 using namespace solidity::langutil;
35 
stream(ostream & _out) const36 ostream& KnownState::stream(ostream& _out) const
37 {
38 	auto streamExpressionClass = [this](ostream& _out, Id _id)
39 	{
40 		auto const& expr = m_expressionClasses->representative(_id);
41 		_out << "  " << dec << _id << ": ";
42 		if (!expr.item)
43 			_out << " no item";
44 		else if (expr.item->type() == UndefinedItem)
45 			_out << " unknown " << static_cast<int>(expr.item->data());
46 		else
47 			_out << *expr.item;
48 		if (expr.sequenceNumber)
49 			_out << "@" << dec << expr.sequenceNumber;
50 		_out << "(";
51 		for (Id arg: expr.arguments)
52 			_out << dec << arg << ",";
53 		_out << ")" << endl;
54 	};
55 
56 	_out << "=== State ===" << endl;
57 	_out << "Stack height: " << dec << m_stackHeight << endl;
58 	_out << "Equivalence classes:" << endl;
59 	for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
60 		streamExpressionClass(_out, eqClass);
61 
62 	_out << "Stack:" << endl;
63 	for (auto const& it: m_stackElements)
64 	{
65 		_out << "  " << dec << it.first << ": ";
66 		streamExpressionClass(_out, it.second);
67 	}
68 	_out << "Storage:" << endl;
69 	for (auto const& it: m_storageContent)
70 	{
71 		_out << "  ";
72 		streamExpressionClass(_out, it.first);
73 		_out << ": ";
74 		streamExpressionClass(_out, it.second);
75 	}
76 	_out << "Memory:" << endl;
77 	for (auto const& it: m_memoryContent)
78 	{
79 		_out << "  ";
80 		streamExpressionClass(_out, it.first);
81 		_out << ": ";
82 		streamExpressionClass(_out, it.second);
83 	}
84 
85 	return _out;
86 }
87 
feedItem(AssemblyItem const & _item,bool _copyItem)88 KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
89 {
90 	StoreOperation op;
91 	if (_item.type() == Tag)
92 	{
93 		// can be ignored
94 	}
95 	else if (_item.type() == AssignImmutable)
96 	{
97 		// Since AssignImmutable breaks blocks, it should be fine to only consider its changes to the stack, which
98 		// is the same as two POPs.
99 		// Note that the StoreOperation for POP is generic and _copyItem is ignored.
100 		feedItem(AssemblyItem(Instruction::POP), _copyItem);
101 		return feedItem(AssemblyItem(Instruction::POP), _copyItem);
102 	}
103 	else if (_item.type() == VerbatimBytecode)
104 	{
105 		m_sequenceNumber += 2;
106 		resetMemory();
107 		resetKnownKeccak256Hashes();
108 		resetStorage();
109 		// Consume all arguments and place unknown return values on the stack.
110 		m_stackElements.erase(
111 			m_stackElements.upper_bound(m_stackHeight - static_cast<int>(_item.arguments())),
112 			m_stackElements.end()
113 		);
114 		m_stackHeight += static_cast<int>(_item.deposit());
115 		for (size_t i = 0; i < _item.returnValues(); ++i)
116 			setStackElement(
117 				m_stackHeight - static_cast<int>(i),
118 				m_expressionClasses->newClass(_item.location())
119 			);
120 	}
121 	else if (_item.type() != Operation)
122 	{
123 		assertThrow(_item.deposit() == 1, InvalidDeposit, "");
124 		if (_item.pushedValue())
125 			// only available after assembly stage, should not be used for optimisation
126 			setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue()));
127 		else
128 			setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
129 	}
130 	else
131 	{
132 		Instruction instruction = _item.instruction();
133 		InstructionInfo info = instructionInfo(instruction);
134 		if (SemanticInformation::isDupInstruction(_item))
135 			setStackElement(
136 				m_stackHeight + 1,
137 				stackElement(
138 					m_stackHeight - static_cast<int>(instruction) + static_cast<int>(Instruction::DUP1),
139 					_item.location()
140 				)
141 			);
142 		else if (SemanticInformation::isSwapInstruction(_item))
143 			swapStackElements(
144 				m_stackHeight,
145 				m_stackHeight - 1 - static_cast<int>(instruction) + static_cast<int>(Instruction::SWAP1),
146 				_item.location()
147 			);
148 		else if (instruction != Instruction::POP)
149 		{
150 			vector<Id> arguments(static_cast<size_t>(info.args));
151 			for (size_t i = 0; i < static_cast<size_t>(info.args); ++i)
152 				arguments[i] = stackElement(m_stackHeight - static_cast<int>(i), _item.location());
153 			switch (_item.instruction())
154 			{
155 			case Instruction::SSTORE:
156 				op = storeInStorage(arguments[0], arguments[1], _item.location());
157 				break;
158 			case Instruction::SLOAD:
159 				setStackElement(
160 					m_stackHeight + static_cast<int>(_item.deposit()),
161 					loadFromStorage(arguments[0], _item.location())
162 				);
163 				break;
164 			case Instruction::MSTORE:
165 				op = storeInMemory(arguments[0], arguments[1], _item.location());
166 				break;
167 			case Instruction::MLOAD:
168 				setStackElement(
169 					m_stackHeight + static_cast<int>(_item.deposit()),
170 					loadFromMemory(arguments[0], _item.location())
171 				);
172 				break;
173 			case Instruction::KECCAK256:
174 				setStackElement(
175 					m_stackHeight + static_cast<int>(_item.deposit()),
176 					applyKeccak256(arguments.at(0), arguments.at(1), _item.location())
177 				);
178 				break;
179 			default:
180 				bool invMem =
181 					SemanticInformation::memory(_item.instruction()) == SemanticInformation::Write;
182 				bool invStor =
183 					SemanticInformation::storage(_item.instruction()) == SemanticInformation::Write;
184 				// We could be a bit more fine-grained here (CALL only invalidates part of
185 				// memory, etc), but we do not for now.
186 				if (invMem)
187 				{
188 					resetMemory();
189 					resetKnownKeccak256Hashes();
190 				}
191 				if (invStor)
192 					resetStorage();
193 				if (invMem || invStor)
194 					m_sequenceNumber += 2; // Increment by two because it can read and write
195 				assertThrow(info.ret <= 1, InvalidDeposit, "");
196 				if (info.ret == 1)
197 					setStackElement(
198 						m_stackHeight + static_cast<int>(_item.deposit()),
199 						m_expressionClasses->find(_item, arguments, _copyItem)
200 					);
201 			}
202 		}
203 		m_stackElements.erase(
204 			m_stackElements.upper_bound(m_stackHeight + static_cast<int>(_item.deposit())),
205 			m_stackElements.end()
206 		);
207 		m_stackHeight += static_cast<int>(_item.deposit());
208 	}
209 	return op;
210 }
211 
212 /// Helper function for KnownState::reduceToCommonKnowledge, removes everything from
213 /// _this which is not in or not equal to the value in _other.
intersect(Mapping & _this,Mapping const & _other)214 template <class Mapping> void intersect(Mapping& _this, Mapping const& _other)
215 {
216 	for (auto it = _this.begin(); it != _this.end();)
217 		if (_other.count(it->first) && _other.at(it->first) == it->second)
218 			++it;
219 		else
220 			it = _this.erase(it);
221 }
222 
reduceToCommonKnowledge(KnownState const & _other,bool _combineSequenceNumbers)223 void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers)
224 {
225 	int stackDiff = m_stackHeight - _other.m_stackHeight;
226 	for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
227 		if (_other.m_stackElements.count(it->first - stackDiff))
228 		{
229 			Id other = _other.m_stackElements.at(it->first - stackDiff);
230 			if (it->second == other)
231 				++it;
232 			else
233 			{
234 				set<u256> theseTags = tagsInExpression(it->second);
235 				set<u256> otherTags = tagsInExpression(other);
236 				if (!theseTags.empty() && !otherTags.empty())
237 				{
238 					theseTags.insert(otherTags.begin(), otherTags.end());
239 					it->second = tagUnion(theseTags);
240 					++it;
241 				}
242 				else
243 					it = m_stackElements.erase(it);
244 			}
245 		}
246 		else
247 			it = m_stackElements.erase(it);
248 
249 	// Use the smaller stack height. Essential to terminate in case of loops.
250 	if (m_stackHeight > _other.m_stackHeight)
251 	{
252 		map<int, Id> shiftedStack;
253 		for (auto const& stackElement: m_stackElements)
254 			shiftedStack[stackElement.first - stackDiff] = stackElement.second;
255 		m_stackElements = move(shiftedStack);
256 		m_stackHeight = _other.m_stackHeight;
257 	}
258 
259 	intersect(m_storageContent, _other.m_storageContent);
260 	intersect(m_memoryContent, _other.m_memoryContent);
261 	if (_combineSequenceNumbers)
262 		m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber);
263 }
264 
operator ==(KnownState const & _other) const265 bool KnownState::operator==(KnownState const& _other) const
266 {
267 	if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent)
268 		return false;
269 	int stackDiff = m_stackHeight - _other.m_stackHeight;
270 	auto thisIt = m_stackElements.cbegin();
271 	auto otherIt = _other.m_stackElements.cbegin();
272 	for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt)
273 		if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second)
274 			return false;
275 	return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend());
276 }
277 
stackElement(int _stackHeight,SourceLocation const & _location)278 ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
279 {
280 	if (m_stackElements.count(_stackHeight))
281 		return m_stackElements.at(_stackHeight);
282 	// Stack element not found (not assigned yet), create new unknown equivalence class.
283 	return m_stackElements[_stackHeight] =
284 			m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location));
285 }
286 
relativeStackElement(int _stackOffset,SourceLocation const & _location)287 KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location)
288 {
289 	return stackElement(m_stackHeight + _stackOffset, _location);
290 }
291 
clearTagUnions()292 void KnownState::clearTagUnions()
293 {
294 	for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
295 		if (m_tagUnions.left.count(it->second))
296 			it = m_stackElements.erase(it);
297 		else
298 			++it;
299 }
300 
setStackElement(int _stackHeight,Id _class)301 void KnownState::setStackElement(int _stackHeight, Id _class)
302 {
303 	m_stackElements[_stackHeight] = _class;
304 }
305 
swapStackElements(int _stackHeightA,int _stackHeightB,SourceLocation const & _location)306 void KnownState::swapStackElements(
307 	int _stackHeightA,
308 	int _stackHeightB,
309 	SourceLocation const& _location
310 )
311 {
312 	assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
313 	// ensure they are created
314 	stackElement(_stackHeightA, _location);
315 	stackElement(_stackHeightB, _location);
316 
317 	swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
318 }
319 
storeInStorage(Id _slot,Id _value,SourceLocation const & _location)320 KnownState::StoreOperation KnownState::storeInStorage(
321 	Id _slot,
322 	Id _value,
323 	SourceLocation const& _location)
324 {
325 	if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
326 		// do not execute the storage if we know that the value is already there
327 		return StoreOperation();
328 	m_sequenceNumber++;
329 	decltype(m_storageContent) storageContents;
330 	// Copy over all values (i.e. retain knowledge about them) where we know that this store
331 	// operation will not destroy the knowledge. Specifically, we copy storage locations we know
332 	// are different from _slot or locations where we know that the stored value is equal to _value.
333 	for (auto const& storageItem: m_storageContent)
334 		if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
335 			storageContents.insert(storageItem);
336 	m_storageContent = move(storageContents);
337 
338 	AssemblyItem item(Instruction::SSTORE, _location);
339 	Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
340 	StoreOperation operation{StoreOperation::Storage, _slot, m_sequenceNumber, id};
341 	m_storageContent[_slot] = _value;
342 	// increment a second time so that we get unique sequence numbers for writes
343 	m_sequenceNumber++;
344 
345 	return operation;
346 }
347 
loadFromStorage(Id _slot,SourceLocation const & _location)348 ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
349 {
350 	if (m_storageContent.count(_slot))
351 		return m_storageContent.at(_slot);
352 
353 	AssemblyItem item(Instruction::SLOAD, _location);
354 	return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
355 }
356 
storeInMemory(Id _slot,Id _value,SourceLocation const & _location)357 KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
358 {
359 	if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
360 		// do not execute the store if we know that the value is already there
361 		return StoreOperation();
362 	m_sequenceNumber++;
363 	decltype(m_memoryContent) memoryContents;
364 	// copy over values at points where we know that they are different from _slot by at least 32
365 	for (auto const& memoryItem: m_memoryContent)
366 		if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
367 			memoryContents.insert(memoryItem);
368 	m_memoryContent = move(memoryContents);
369 
370 	AssemblyItem item(Instruction::MSTORE, _location);
371 	Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
372 	StoreOperation operation{StoreOperation::Memory, _slot, m_sequenceNumber, id};
373 	m_memoryContent[_slot] = _value;
374 	// increment a second time so that we get unique sequence numbers for writes
375 	m_sequenceNumber++;
376 	return operation;
377 }
378 
loadFromMemory(Id _slot,SourceLocation const & _location)379 ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
380 {
381 	if (m_memoryContent.count(_slot))
382 		return m_memoryContent.at(_slot);
383 
384 	AssemblyItem item(Instruction::MLOAD, _location);
385 	return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
386 }
387 
applyKeccak256(Id _start,Id _length,SourceLocation const & _location)388 KnownState::Id KnownState::applyKeccak256(
389 	Id _start,
390 	Id _length,
391 	SourceLocation const& _location
392 )
393 {
394 	AssemblyItem keccak256Item(Instruction::KECCAK256, _location);
395 	// Special logic if length is a short constant, otherwise we cannot tell.
396 	u256 const* l = m_expressionClasses->knownConstant(_length);
397 	// unknown or too large length
398 	if (!l || *l > 128)
399 		return m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
400 	unsigned length = unsigned(*l);
401 	vector<Id> arguments;
402 	for (unsigned i = 0; i < length; i += 32)
403 	{
404 		Id slot = m_expressionClasses->find(
405 			AssemblyItem(Instruction::ADD, _location),
406 			{_start, m_expressionClasses->find(u256(i))}
407 		);
408 		arguments.push_back(loadFromMemory(slot, _location));
409 	}
410 	if (m_knownKeccak256Hashes.count({arguments, length}))
411 		return m_knownKeccak256Hashes.at({arguments, length});
412 	Id v;
413 	// If all arguments are known constants, compute the Keccak-256 here
414 	if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
415 	{
416 		bytes data;
417 		for (Id a: arguments)
418 			data += toBigEndian(*m_expressionClasses->knownConstant(a));
419 		data.resize(length);
420 		v = m_expressionClasses->find(AssemblyItem(u256(util::keccak256(data)), _location));
421 	}
422 	else
423 		v = m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
424 	return m_knownKeccak256Hashes[{arguments, length}] = v;
425 }
426 
tagsInExpression(KnownState::Id _expressionId)427 set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId)
428 {
429 	if (m_tagUnions.left.count(_expressionId))
430 		return m_tagUnions.left.at(_expressionId);
431 	// Might be a tag, then return the set of itself.
432 	ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId);
433 	if (expr.item && expr.item->type() == PushTag)
434 		return set<u256>({expr.item->data()});
435 	else
436 		return set<u256>();
437 }
438 
tagUnion(set<u256> _tags)439 KnownState::Id KnownState::tagUnion(set<u256> _tags)
440 {
441 	if (m_tagUnions.right.count(_tags))
442 		return m_tagUnions.right.at(_tags);
443 	else
444 	{
445 		Id id = m_expressionClasses->newClass(SourceLocation());
446 		m_tagUnions.right.insert(make_pair(_tags, id));
447 		return id;
448 	}
449 }
450