1 /*
2 This file is part of solidity.
3
4 solidity is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 solidity is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with solidity. If not, see <http://www.gnu.org/licenses/>.
16 */
17 // SPDX-License-Identifier: GPL-3.0
18 /**
19 * @file KnownState.cpp
20 * @author Christian <c@ethdev.com>
21 * @date 2015
22 * Contains knowledge about the state of the virtual machine at a specific instruction.
23 */
24
25 #include <libevmasm/KnownState.h>
26 #include <libevmasm/AssemblyItem.h>
27 #include <libsolutil/Keccak256.h>
28
29 #include <functional>
30
31 using namespace std;
32 using namespace solidity;
33 using namespace solidity::evmasm;
34 using namespace solidity::langutil;
35
stream(ostream & _out) const36 ostream& KnownState::stream(ostream& _out) const
37 {
38 auto streamExpressionClass = [this](ostream& _out, Id _id)
39 {
40 auto const& expr = m_expressionClasses->representative(_id);
41 _out << " " << dec << _id << ": ";
42 if (!expr.item)
43 _out << " no item";
44 else if (expr.item->type() == UndefinedItem)
45 _out << " unknown " << static_cast<int>(expr.item->data());
46 else
47 _out << *expr.item;
48 if (expr.sequenceNumber)
49 _out << "@" << dec << expr.sequenceNumber;
50 _out << "(";
51 for (Id arg: expr.arguments)
52 _out << dec << arg << ",";
53 _out << ")" << endl;
54 };
55
56 _out << "=== State ===" << endl;
57 _out << "Stack height: " << dec << m_stackHeight << endl;
58 _out << "Equivalence classes:" << endl;
59 for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
60 streamExpressionClass(_out, eqClass);
61
62 _out << "Stack:" << endl;
63 for (auto const& it: m_stackElements)
64 {
65 _out << " " << dec << it.first << ": ";
66 streamExpressionClass(_out, it.second);
67 }
68 _out << "Storage:" << endl;
69 for (auto const& it: m_storageContent)
70 {
71 _out << " ";
72 streamExpressionClass(_out, it.first);
73 _out << ": ";
74 streamExpressionClass(_out, it.second);
75 }
76 _out << "Memory:" << endl;
77 for (auto const& it: m_memoryContent)
78 {
79 _out << " ";
80 streamExpressionClass(_out, it.first);
81 _out << ": ";
82 streamExpressionClass(_out, it.second);
83 }
84
85 return _out;
86 }
87
feedItem(AssemblyItem const & _item,bool _copyItem)88 KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
89 {
90 StoreOperation op;
91 if (_item.type() == Tag)
92 {
93 // can be ignored
94 }
95 else if (_item.type() == AssignImmutable)
96 {
97 // Since AssignImmutable breaks blocks, it should be fine to only consider its changes to the stack, which
98 // is the same as two POPs.
99 // Note that the StoreOperation for POP is generic and _copyItem is ignored.
100 feedItem(AssemblyItem(Instruction::POP), _copyItem);
101 return feedItem(AssemblyItem(Instruction::POP), _copyItem);
102 }
103 else if (_item.type() == VerbatimBytecode)
104 {
105 m_sequenceNumber += 2;
106 resetMemory();
107 resetKnownKeccak256Hashes();
108 resetStorage();
109 // Consume all arguments and place unknown return values on the stack.
110 m_stackElements.erase(
111 m_stackElements.upper_bound(m_stackHeight - static_cast<int>(_item.arguments())),
112 m_stackElements.end()
113 );
114 m_stackHeight += static_cast<int>(_item.deposit());
115 for (size_t i = 0; i < _item.returnValues(); ++i)
116 setStackElement(
117 m_stackHeight - static_cast<int>(i),
118 m_expressionClasses->newClass(_item.location())
119 );
120 }
121 else if (_item.type() != Operation)
122 {
123 assertThrow(_item.deposit() == 1, InvalidDeposit, "");
124 if (_item.pushedValue())
125 // only available after assembly stage, should not be used for optimisation
126 setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue()));
127 else
128 setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
129 }
130 else
131 {
132 Instruction instruction = _item.instruction();
133 InstructionInfo info = instructionInfo(instruction);
134 if (SemanticInformation::isDupInstruction(_item))
135 setStackElement(
136 m_stackHeight + 1,
137 stackElement(
138 m_stackHeight - static_cast<int>(instruction) + static_cast<int>(Instruction::DUP1),
139 _item.location()
140 )
141 );
142 else if (SemanticInformation::isSwapInstruction(_item))
143 swapStackElements(
144 m_stackHeight,
145 m_stackHeight - 1 - static_cast<int>(instruction) + static_cast<int>(Instruction::SWAP1),
146 _item.location()
147 );
148 else if (instruction != Instruction::POP)
149 {
150 vector<Id> arguments(static_cast<size_t>(info.args));
151 for (size_t i = 0; i < static_cast<size_t>(info.args); ++i)
152 arguments[i] = stackElement(m_stackHeight - static_cast<int>(i), _item.location());
153 switch (_item.instruction())
154 {
155 case Instruction::SSTORE:
156 op = storeInStorage(arguments[0], arguments[1], _item.location());
157 break;
158 case Instruction::SLOAD:
159 setStackElement(
160 m_stackHeight + static_cast<int>(_item.deposit()),
161 loadFromStorage(arguments[0], _item.location())
162 );
163 break;
164 case Instruction::MSTORE:
165 op = storeInMemory(arguments[0], arguments[1], _item.location());
166 break;
167 case Instruction::MLOAD:
168 setStackElement(
169 m_stackHeight + static_cast<int>(_item.deposit()),
170 loadFromMemory(arguments[0], _item.location())
171 );
172 break;
173 case Instruction::KECCAK256:
174 setStackElement(
175 m_stackHeight + static_cast<int>(_item.deposit()),
176 applyKeccak256(arguments.at(0), arguments.at(1), _item.location())
177 );
178 break;
179 default:
180 bool invMem =
181 SemanticInformation::memory(_item.instruction()) == SemanticInformation::Write;
182 bool invStor =
183 SemanticInformation::storage(_item.instruction()) == SemanticInformation::Write;
184 // We could be a bit more fine-grained here (CALL only invalidates part of
185 // memory, etc), but we do not for now.
186 if (invMem)
187 {
188 resetMemory();
189 resetKnownKeccak256Hashes();
190 }
191 if (invStor)
192 resetStorage();
193 if (invMem || invStor)
194 m_sequenceNumber += 2; // Increment by two because it can read and write
195 assertThrow(info.ret <= 1, InvalidDeposit, "");
196 if (info.ret == 1)
197 setStackElement(
198 m_stackHeight + static_cast<int>(_item.deposit()),
199 m_expressionClasses->find(_item, arguments, _copyItem)
200 );
201 }
202 }
203 m_stackElements.erase(
204 m_stackElements.upper_bound(m_stackHeight + static_cast<int>(_item.deposit())),
205 m_stackElements.end()
206 );
207 m_stackHeight += static_cast<int>(_item.deposit());
208 }
209 return op;
210 }
211
212 /// Helper function for KnownState::reduceToCommonKnowledge, removes everything from
213 /// _this which is not in or not equal to the value in _other.
intersect(Mapping & _this,Mapping const & _other)214 template <class Mapping> void intersect(Mapping& _this, Mapping const& _other)
215 {
216 for (auto it = _this.begin(); it != _this.end();)
217 if (_other.count(it->first) && _other.at(it->first) == it->second)
218 ++it;
219 else
220 it = _this.erase(it);
221 }
222
reduceToCommonKnowledge(KnownState const & _other,bool _combineSequenceNumbers)223 void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers)
224 {
225 int stackDiff = m_stackHeight - _other.m_stackHeight;
226 for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
227 if (_other.m_stackElements.count(it->first - stackDiff))
228 {
229 Id other = _other.m_stackElements.at(it->first - stackDiff);
230 if (it->second == other)
231 ++it;
232 else
233 {
234 set<u256> theseTags = tagsInExpression(it->second);
235 set<u256> otherTags = tagsInExpression(other);
236 if (!theseTags.empty() && !otherTags.empty())
237 {
238 theseTags.insert(otherTags.begin(), otherTags.end());
239 it->second = tagUnion(theseTags);
240 ++it;
241 }
242 else
243 it = m_stackElements.erase(it);
244 }
245 }
246 else
247 it = m_stackElements.erase(it);
248
249 // Use the smaller stack height. Essential to terminate in case of loops.
250 if (m_stackHeight > _other.m_stackHeight)
251 {
252 map<int, Id> shiftedStack;
253 for (auto const& stackElement: m_stackElements)
254 shiftedStack[stackElement.first - stackDiff] = stackElement.second;
255 m_stackElements = move(shiftedStack);
256 m_stackHeight = _other.m_stackHeight;
257 }
258
259 intersect(m_storageContent, _other.m_storageContent);
260 intersect(m_memoryContent, _other.m_memoryContent);
261 if (_combineSequenceNumbers)
262 m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber);
263 }
264
operator ==(KnownState const & _other) const265 bool KnownState::operator==(KnownState const& _other) const
266 {
267 if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent)
268 return false;
269 int stackDiff = m_stackHeight - _other.m_stackHeight;
270 auto thisIt = m_stackElements.cbegin();
271 auto otherIt = _other.m_stackElements.cbegin();
272 for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt)
273 if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second)
274 return false;
275 return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend());
276 }
277
stackElement(int _stackHeight,SourceLocation const & _location)278 ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
279 {
280 if (m_stackElements.count(_stackHeight))
281 return m_stackElements.at(_stackHeight);
282 // Stack element not found (not assigned yet), create new unknown equivalence class.
283 return m_stackElements[_stackHeight] =
284 m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location));
285 }
286
relativeStackElement(int _stackOffset,SourceLocation const & _location)287 KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location)
288 {
289 return stackElement(m_stackHeight + _stackOffset, _location);
290 }
291
clearTagUnions()292 void KnownState::clearTagUnions()
293 {
294 for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
295 if (m_tagUnions.left.count(it->second))
296 it = m_stackElements.erase(it);
297 else
298 ++it;
299 }
300
setStackElement(int _stackHeight,Id _class)301 void KnownState::setStackElement(int _stackHeight, Id _class)
302 {
303 m_stackElements[_stackHeight] = _class;
304 }
305
swapStackElements(int _stackHeightA,int _stackHeightB,SourceLocation const & _location)306 void KnownState::swapStackElements(
307 int _stackHeightA,
308 int _stackHeightB,
309 SourceLocation const& _location
310 )
311 {
312 assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
313 // ensure they are created
314 stackElement(_stackHeightA, _location);
315 stackElement(_stackHeightB, _location);
316
317 swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
318 }
319
storeInStorage(Id _slot,Id _value,SourceLocation const & _location)320 KnownState::StoreOperation KnownState::storeInStorage(
321 Id _slot,
322 Id _value,
323 SourceLocation const& _location)
324 {
325 if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
326 // do not execute the storage if we know that the value is already there
327 return StoreOperation();
328 m_sequenceNumber++;
329 decltype(m_storageContent) storageContents;
330 // Copy over all values (i.e. retain knowledge about them) where we know that this store
331 // operation will not destroy the knowledge. Specifically, we copy storage locations we know
332 // are different from _slot or locations where we know that the stored value is equal to _value.
333 for (auto const& storageItem: m_storageContent)
334 if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
335 storageContents.insert(storageItem);
336 m_storageContent = move(storageContents);
337
338 AssemblyItem item(Instruction::SSTORE, _location);
339 Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
340 StoreOperation operation{StoreOperation::Storage, _slot, m_sequenceNumber, id};
341 m_storageContent[_slot] = _value;
342 // increment a second time so that we get unique sequence numbers for writes
343 m_sequenceNumber++;
344
345 return operation;
346 }
347
loadFromStorage(Id _slot,SourceLocation const & _location)348 ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
349 {
350 if (m_storageContent.count(_slot))
351 return m_storageContent.at(_slot);
352
353 AssemblyItem item(Instruction::SLOAD, _location);
354 return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
355 }
356
storeInMemory(Id _slot,Id _value,SourceLocation const & _location)357 KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
358 {
359 if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
360 // do not execute the store if we know that the value is already there
361 return StoreOperation();
362 m_sequenceNumber++;
363 decltype(m_memoryContent) memoryContents;
364 // copy over values at points where we know that they are different from _slot by at least 32
365 for (auto const& memoryItem: m_memoryContent)
366 if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
367 memoryContents.insert(memoryItem);
368 m_memoryContent = move(memoryContents);
369
370 AssemblyItem item(Instruction::MSTORE, _location);
371 Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
372 StoreOperation operation{StoreOperation::Memory, _slot, m_sequenceNumber, id};
373 m_memoryContent[_slot] = _value;
374 // increment a second time so that we get unique sequence numbers for writes
375 m_sequenceNumber++;
376 return operation;
377 }
378
loadFromMemory(Id _slot,SourceLocation const & _location)379 ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
380 {
381 if (m_memoryContent.count(_slot))
382 return m_memoryContent.at(_slot);
383
384 AssemblyItem item(Instruction::MLOAD, _location);
385 return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
386 }
387
applyKeccak256(Id _start,Id _length,SourceLocation const & _location)388 KnownState::Id KnownState::applyKeccak256(
389 Id _start,
390 Id _length,
391 SourceLocation const& _location
392 )
393 {
394 AssemblyItem keccak256Item(Instruction::KECCAK256, _location);
395 // Special logic if length is a short constant, otherwise we cannot tell.
396 u256 const* l = m_expressionClasses->knownConstant(_length);
397 // unknown or too large length
398 if (!l || *l > 128)
399 return m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
400 unsigned length = unsigned(*l);
401 vector<Id> arguments;
402 for (unsigned i = 0; i < length; i += 32)
403 {
404 Id slot = m_expressionClasses->find(
405 AssemblyItem(Instruction::ADD, _location),
406 {_start, m_expressionClasses->find(u256(i))}
407 );
408 arguments.push_back(loadFromMemory(slot, _location));
409 }
410 if (m_knownKeccak256Hashes.count({arguments, length}))
411 return m_knownKeccak256Hashes.at({arguments, length});
412 Id v;
413 // If all arguments are known constants, compute the Keccak-256 here
414 if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
415 {
416 bytes data;
417 for (Id a: arguments)
418 data += toBigEndian(*m_expressionClasses->knownConstant(a));
419 data.resize(length);
420 v = m_expressionClasses->find(AssemblyItem(u256(util::keccak256(data)), _location));
421 }
422 else
423 v = m_expressionClasses->find(keccak256Item, {_start, _length}, true, m_sequenceNumber);
424 return m_knownKeccak256Hashes[{arguments, length}] = v;
425 }
426
tagsInExpression(KnownState::Id _expressionId)427 set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId)
428 {
429 if (m_tagUnions.left.count(_expressionId))
430 return m_tagUnions.left.at(_expressionId);
431 // Might be a tag, then return the set of itself.
432 ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId);
433 if (expr.item && expr.item->type() == PushTag)
434 return set<u256>({expr.item->data()});
435 else
436 return set<u256>();
437 }
438
tagUnion(set<u256> _tags)439 KnownState::Id KnownState::tagUnion(set<u256> _tags)
440 {
441 if (m_tagUnions.right.count(_tags))
442 return m_tagUnions.right.at(_tags);
443 else
444 {
445 Id id = m_expressionClasses->newClass(SourceLocation());
446 m_tagUnions.right.insert(make_pair(_tags, id));
447 return id;
448 }
449 }
450