| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* Copyright 2025 Jonathan S. Arney | ||
| 2 | * | ||
| 3 | * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 4 | * you may not use this file except in compliance with the License. | ||
| 5 | * You may obtain a copy of the License at | ||
| 6 | * | ||
| 7 | * https://github.com/jarney/gyoji/blob/master/LICENSE | ||
| 8 | * | ||
| 9 | * Unless required by applicable law or agreed to in writing, software | ||
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 12 | * See the License for the specific language governing permissions and | ||
| 13 | * limitations under the License. | ||
| 14 | */ | ||
| 15 | #include <gyoji-analysis.hpp> | ||
| 16 | #include <stdio.h> | ||
| 17 | |||
| 18 | using namespace Gyoji::mir; | ||
| 19 | using namespace Gyoji::context; | ||
| 20 | using namespace Gyoji::analysis; | ||
| 21 | |||
| 22 | 28 | AnalysisPassUseBeforeAssignment::AnalysisPassUseBeforeAssignment(CompilerContext & _compiler_context) | |
| 23 | 56 | : AnalysisPass(_compiler_context, "use-before-initialization checks") | |
| 24 | 28 | {} | |
| 25 | 112 | AnalysisPassUseBeforeAssignment::~AnalysisPassUseBeforeAssignment() | |
| 26 | 112 | {} | |
| 27 | |||
| 28 | void | ||
| 29 | 28 | AnalysisPassUseBeforeAssignment::check(const Gyoji::mir::MIR & mir) const | |
| 30 | { | ||
| 31 |
2/2✓ Branch 7 taken 270 times.
✓ Branch 8 taken 28 times.
|
298 | for (const auto & function : mir.get_functions().get_functions()) { |
| 32 | 270 | check(*function); | |
| 33 | } | ||
| 34 | 28 | } | |
| 35 | |||
| 36 | namespace Gyoji::analysis { | ||
| 37 | |||
| 38 | // This is an access of a local variable | ||
| 39 | // inside a function. Here, we track the | ||
| 40 | // name of the variable, the tmpvar it's | ||
| 41 | // loaded into, and the 'ProgramPoint' it was loaded/declared | ||
| 42 | // from. | ||
| 43 | class VariableLoadPoint { | ||
| 44 | public: | ||
| 45 | VariableLoadPoint( | ||
| 46 | std::string _variable_name, | ||
| 47 | ProgramPoint _program_point | ||
| 48 | ); | ||
| 49 | VariableLoadPoint(const VariableLoadPoint & other); | ||
| 50 | ~VariableLoadPoint(); | ||
| 51 | std::string variable_name; | ||
| 52 | size_t tmpvar; | ||
| 53 | ProgramPoint program_point; | ||
| 54 | }; | ||
| 55 | |||
| 56 | class TmpvarAssignedAt { | ||
| 57 | public: | ||
| 58 | TmpvarAssignedAt(const TmpvarAssignedAt & other); | ||
| 59 | TmpvarAssignedAt( | ||
| 60 | size_t _tmpvar, | ||
| 61 | ProgramPoint _program_point | ||
| 62 | ); | ||
| 63 | ~TmpvarAssignedAt(); | ||
| 64 | |||
| 65 | size_t tmpvar; | ||
| 66 | ProgramPoint program_point; | ||
| 67 | }; | ||
| 68 | |||
| 69 | class VariableTmpvarVisitor : public OperationVisitor { | ||
| 70 | public: | ||
| 71 | VariableTmpvarVisitor(const std::map<std::string, std::string> & _ignore_arguments); | ||
| 72 | ~VariableTmpvarVisitor(); | ||
| 73 | void visit( | ||
| 74 | size_t block_id, | ||
| 75 | const BasicBlock & block, | ||
| 76 | size_t operation_index, | ||
| 77 | const Operation & operation | ||
| 78 | ); | ||
| 79 | const std::map<size_t, std::string> & get_tmpvars() const; | ||
| 80 | private: | ||
| 81 | std::map<size_t, std::string> tmpvars; | ||
| 82 | const std::map<std::string, std::string> & ignore_arguments; | ||
| 83 | }; | ||
| 84 | |||
| 85 | // First, we need to just build a map | ||
| 86 | // of all the tmpvar to the variables they | ||
| 87 | // represent (only for tmpvars associated with variables) | ||
| 88 | class VariableUseVisitor : public OperationVisitor { | ||
| 89 | public: | ||
| 90 | VariableUseVisitor( | ||
| 91 | const std::map<size_t, std::string> & _tmpvars | ||
| 92 | ); | ||
| 93 | ~VariableUseVisitor(); | ||
| 94 | void visit( | ||
| 95 | size_t block_id, | ||
| 96 | const BasicBlock & block, | ||
| 97 | size_t operation_index, | ||
| 98 | const Operation & operation | ||
| 99 | ); | ||
| 100 | const std::vector<VariableLoadPoint> & get_loads(); | ||
| 101 | const std::vector<VariableLoadPoint> & get_stores(); | ||
| 102 | private: | ||
| 103 | const std::map<size_t, std::string> & tmpvars; | ||
| 104 | std::vector<VariableLoadPoint> variable_loads; | ||
| 105 | std::vector<VariableLoadPoint> variable_stores; | ||
| 106 | }; | ||
| 107 | |||
| 108 | // Next, we need to find all of the places those tmpvars (variables) | ||
| 109 | // are used and all of the places they are assigned. | ||
| 110 | |||
| 111 | }; | ||
| 112 | ////////////////////////////////////////////// | ||
| 113 | // ProgramPoint | ||
| 114 | ////////////////////////////////////////////// | ||
| 115 | |||
| 116 | 4558 | ProgramPoint::ProgramPoint(size_t _block_id, size_t _operation_index) | |
| 117 | 4558 | : block_id(_block_id) | |
| 118 | 4558 | , operation_index(_operation_index) | |
| 119 | 4558 | {} | |
| 120 | 2914 | ProgramPoint::ProgramPoint(const ProgramPoint & other) | |
| 121 | 2914 | : block_id(other.block_id) | |
| 122 | 2914 | , operation_index(other.operation_index) | |
| 123 | 2914 | {} | |
| 124 | 7472 | ProgramPoint::~ProgramPoint() | |
| 125 | 7472 | {} | |
| 126 | ////////////////////////////////////////////// | ||
| 127 | // VariableLoadPoint | ||
| 128 | ////////////////////////////////////////////// | ||
| 129 | 712 | VariableLoadPoint::VariableLoadPoint( | |
| 130 | std::string _variable_name, | ||
| 131 | 712 | ProgramPoint _program_point) | |
| 132 | 712 | : variable_name(_variable_name) | |
| 133 | 712 | , program_point(_program_point) | |
| 134 | 712 | {} | |
| 135 | 1006 | VariableLoadPoint::VariableLoadPoint(const VariableLoadPoint & other) | |
| 136 | 1006 | : variable_name(other.variable_name) | |
| 137 | 1006 | , program_point(other.program_point) | |
| 138 | 1006 | {} | |
| 139 | |||
| 140 | 1718 | VariableLoadPoint::~VariableLoadPoint() | |
| 141 | 1718 | {} | |
| 142 | |||
| 143 | ////////////////////////////////////////////// | ||
| 144 | // VariableUseVisitor | ||
| 145 | ////////////////////////////////////////////// | ||
| 146 | |||
| 147 | 270 | VariableUseVisitor::VariableUseVisitor( | |
| 148 | const std::map<size_t, std::string> & _tmpvars | ||
| 149 | 270 | ) | |
| 150 | : OperationVisitor() | ||
| 151 | 270 | , tmpvars(_tmpvars) | |
| 152 | { | ||
| 153 | 270 | } | |
| 154 | |||
| 155 | 270 | VariableUseVisitor::~VariableUseVisitor() | |
| 156 | 270 | {} | |
| 157 | |||
| 158 | const std::vector<VariableLoadPoint> & | ||
| 159 | 270 | VariableUseVisitor::get_loads() | |
| 160 | { | ||
| 161 | 270 | return variable_loads; | |
| 162 | } | ||
| 163 | |||
| 164 | const std::vector<VariableLoadPoint> & | ||
| 165 | 270 | VariableUseVisitor::get_stores() | |
| 166 | 270 | { return variable_stores; } | |
| 167 | |||
| 168 | |||
| 169 | 4378 | void VariableUseVisitor::visit( | |
| 170 | size_t block_id, | ||
| 171 | const BasicBlock & block, | ||
| 172 | size_t operation_index, | ||
| 173 | const Operation & operation | ||
| 174 | ) | ||
| 175 | { | ||
| 176 | 4378 | const auto & operands = operation.get_operands(); | |
| 177 | 4378 | size_t noperands = operands.size(); | |
| 178 | 4378 | ProgramPoint program_point(block_id, operation_index); | |
| 179 | |||
| 180 |
2/2✓ Branch 1 taken 388 times.
✓ Branch 2 taken 3990 times.
|
4378 | if (operation.get_type() == Operation::OP_ASSIGN) { |
| 181 | 388 | const auto & it = tmpvars.find(operands.at(0)); | |
| 182 |
2/2✓ Branch 2 taken 382 times.
✓ Branch 3 taken 6 times.
|
388 | if (it != tmpvars.end()) { |
| 183 | VariableLoadPoint assigned_at( | ||
| 184 | 382 | it->second, | |
| 185 | program_point | ||
| 186 | 764 | ); | |
| 187 | 382 | variable_stores.push_back(assigned_at); | |
| 188 | 382 | } | |
| 189 | // All other operands are accesses. | ||
| 190 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 388 times.
|
776 | for (size_t operand_id = 1; operand_id < noperands; operand_id++) { |
| 191 | 388 | const auto & it = tmpvars.find(operands.at(operand_id)); | |
| 192 |
1/2✗ Branch 2 not taken.
✓ Branch 3 taken 388 times.
|
388 | if (it != tmpvars.end()) { |
| 193 | VariableLoadPoint assigned_at( | ||
| 194 | ✗ | it->second, | |
| 195 | program_point | ||
| 196 | ✗ | ); | |
| 197 | ✗ | variable_loads.push_back(assigned_at); | |
| 198 | ✗ | } | |
| 199 | } | ||
| 200 | } | ||
| 201 | else { | ||
| 202 | // For other operations, all operands are 'use' of the variable. | ||
| 203 |
2/2✓ Branch 0 taken 1266 times.
✓ Branch 1 taken 3990 times.
|
5256 | for (size_t operand_id = 0; operand_id < noperands; operand_id++) { |
| 204 | 1266 | const auto & it = tmpvars.find(operands.at(operand_id)); | |
| 205 |
2/2✓ Branch 2 taken 330 times.
✓ Branch 3 taken 936 times.
|
1266 | if (it != tmpvars.end()) { |
| 206 | VariableLoadPoint assigned_at( | ||
| 207 | 330 | it->second, | |
| 208 | program_point | ||
| 209 | 660 | ); | |
| 210 | 330 | variable_loads.push_back(assigned_at); | |
| 211 | 330 | } | |
| 212 | } | ||
| 213 | } | ||
| 214 | 4378 | } | |
| 215 | 270 | VariableTmpvarVisitor::VariableTmpvarVisitor( | |
| 216 | const std::map<std::string, std::string> & _ignore_arguments | ||
| 217 | 270 | ) | |
| 218 | 270 | : tmpvars() | |
| 219 | 270 | , ignore_arguments(_ignore_arguments) | |
| 220 | 270 | {} | |
| 221 | |||
| 222 | 270 | VariableTmpvarVisitor::~VariableTmpvarVisitor() | |
| 223 | 270 | {} | |
| 224 | |||
| 225 | void | ||
| 226 | 4378 | VariableTmpvarVisitor::visit( | |
| 227 | size_t block_id, | ||
| 228 | const BasicBlock & block, | ||
| 229 | size_t operation_index, | ||
| 230 | const Operation & operation | ||
| 231 | ) | ||
| 232 | { | ||
| 233 |
2/2✓ Branch 1 taken 312 times.
✓ Branch 2 taken 4066 times.
|
4378 | if (operation.get_type() == Operation::OP_LOCAL_DECLARE) { |
| 234 | 312 | const OperationLocalDeclare & local_declare = (const OperationLocalDeclare&)operation; | |
| 235 | // variables.push_back(std::pair(local_declare.get_variable(), &local_declare.get_source_ref())); | ||
| 236 | 3376 | return; | |
| 237 | } | ||
| 238 |
2/2✓ Branch 1 taken 2574 times.
✓ Branch 2 taken 1492 times.
|
4066 | if (operation.get_type() != Operation::OP_LOCAL_VARIABLE) { |
| 239 | 2574 | return; | |
| 240 | } | ||
| 241 | 1492 | const OperationLocalVariable &operation_local = (const OperationLocalVariable &)operation; | |
| 242 | 1492 | const std::string & varname = operation_local.get_symbol_name(); | |
| 243 | |||
| 244 | // Check to see if it's an argument and we | ||
| 245 | // can ignore it since arguments get initialized | ||
| 246 | // when the function begins. | ||
| 247 | 1492 | const auto & it = ignore_arguments.find(varname); | |
| 248 |
2/2✓ Branch 2 taken 490 times.
✓ Branch 3 taken 1002 times.
|
1492 | if (it != ignore_arguments.end()) { |
| 249 | 490 | return; | |
| 250 | } | ||
| 251 | |||
| 252 | // Just keep track of the tmpvars that map to local variables. | ||
| 253 | 1002 | tmpvars[operation_local.get_result()] = varname; | |
| 254 | } | ||
| 255 | |||
| 256 | const std::map<size_t, std::string> & | ||
| 257 | 270 | VariableTmpvarVisitor::get_tmpvars() const | |
| 258 | 270 | { return tmpvars; } | |
| 259 | |||
| 260 | 482 | bool AnalysisPassUseBeforeAssignment::true_at( | |
| 261 | const Function & function, | ||
| 262 | std::map<size_t, bool> & already_checked, | ||
| 263 | const std::vector<ProgramPoint> & true_points, | ||
| 264 | const ProgramPoint & check_at | ||
| 265 | ) const | ||
| 266 | { | ||
| 267 | 482 | bool found_in_same_block = false; | |
| 268 | |||
| 269 |
2/2✓ Branch 5 taken 708 times.
✓ Branch 6 taken 482 times.
|
1190 | for (const auto & true_point : true_points) { |
| 270 |
2/2✓ Branch 0 taken 390 times.
✓ Branch 1 taken 318 times.
|
708 | if (check_at.block_id == true_point.block_id && |
| 271 |
2/2✓ Branch 0 taken 374 times.
✓ Branch 1 taken 16 times.
|
390 | check_at.operation_index > true_point.operation_index) { |
| 272 | 374 | found_in_same_block = true; | |
| 273 | } | ||
| 274 | } | ||
| 275 | // It was true in the current block, so it's true. | ||
| 276 |
2/2✓ Branch 0 taken 356 times.
✓ Branch 1 taken 126 times.
|
482 | if (found_in_same_block) { |
| 277 | 356 | return true; | |
| 278 | } | ||
| 279 | 126 | const std::vector<size_t> & predecessors = function.get_basic_block(check_at.block_id).get_reachable_from(); | |
| 280 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 126 times.
|
126 | if (predecessors.size() == 0) { |
| 281 | ✗ | return false; | |
| 282 | } | ||
| 283 | |||
| 284 | // If it wasn't true in the current block, try all predecessors. | ||
| 285 |
2/2✓ Branch 5 taken 180 times.
✓ Branch 6 taken 126 times.
|
306 | for (const auto & predecessor : predecessors) { |
| 286 | 180 | ProgramPoint pred_point(predecessor, function.get_basic_block(predecessor).get_operations().size()); | |
| 287 | |||
| 288 | bool is_true; | ||
| 289 | 180 | const auto & already_checked_it = already_checked.find(pred_point.block_id); | |
| 290 |
2/2✓ Branch 2 taken 28 times.
✓ Branch 3 taken 152 times.
|
180 | if (already_checked_it != already_checked.end()) { |
| 291 | 28 | is_true = already_checked_it->second; | |
| 292 | } | ||
| 293 | else { | ||
| 294 | 152 | is_true = true_at(function, already_checked, true_points, pred_point); | |
| 295 | 152 | already_checked.insert(std::pair(pred_point.block_id, is_true)); | |
| 296 | } | ||
| 297 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 180 times.
|
180 | if (!is_true) { |
| 298 | ✗ | return false; | |
| 299 | } | ||
| 300 |
1/2✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
|
180 | } |
| 301 | 126 | return true; | |
| 302 | |||
| 303 | } | ||
| 304 | |||
| 305 | 270 | void AnalysisPassUseBeforeAssignment::check(const Function & function) const | |
| 306 | { | ||
| 307 | 270 | std::map<std::string, std::string> ignore_arguments; | |
| 308 |
2/2✓ Branch 6 taken 524 times.
✓ Branch 7 taken 270 times.
|
794 | for (const auto & arg : function.get_arguments()) { |
| 309 | 524 | ignore_arguments[arg.get_name()] = arg.get_name(); | |
| 310 | } | ||
| 311 | |||
| 312 | 270 | VariableTmpvarVisitor id_visitor(ignore_arguments); | |
| 313 | 270 | function.iterate_operations(id_visitor); | |
| 314 | |||
| 315 | 270 | VariableUseVisitor visitor(id_visitor.get_tmpvars()); | |
| 316 | 270 | function.iterate_operations(visitor); | |
| 317 | |||
| 318 | 270 | std::map<std::string, std::vector<ProgramPoint>> store_points_by_variable; | |
| 319 |
2/2✓ Branch 6 taken 382 times.
✓ Branch 7 taken 270 times.
|
652 | for (const auto & store : visitor.get_stores()) { |
| 320 | 382 | store_points_by_variable[store.variable_name].push_back(store.program_point); | |
| 321 | } | ||
| 322 | |||
| 323 |
2/2✓ Branch 6 taken 330 times.
✓ Branch 7 taken 270 times.
|
600 | for (const auto & load : visitor.get_loads()) { |
| 324 | 330 | std::map<size_t, bool> already_checked; | |
| 325 | 330 | bool is_true = true_at(function, already_checked, store_points_by_variable[load.variable_name], load.program_point); | |
| 326 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 330 times.
|
330 | if (!is_true) { |
| 327 | ✗ | std::unique_ptr<Gyoji::context::Error> error = std::make_unique<Gyoji::context::Error>("Variable use before initialization."); | |
| 328 | const SourceReference & source_ref = | ||
| 329 | ✗ | function.get_basic_block(load.program_point.block_id).get_operations().at(load.program_point.operation_index)->get_source_ref(); | |
| 330 | |||
| 331 | ✗ | error->add_message( | |
| 332 | source_ref, | ||
| 333 | ✗ | std::string("Variable ") | |
| 334 | ✗ | + load.variable_name | |
| 335 | ✗ | + std::string(" is uninitialized. This would result in undefined behavior. Note that if this appears on a 'return' line, then it is most likely a destructor call.") | |
| 336 | ); | ||
| 337 | ✗ | get_compiler_context() | |
| 338 | ✗ | .get_errors() | |
| 339 | ✗ | .add_error(std::move(error)); | |
| 340 | ✗ | } | |
| 341 | 330 | already_checked.insert(std::pair(load.program_point.block_id, is_true)); | |
| 342 | 330 | } | |
| 343 | |||
| 344 | |||
| 345 | // We now have all of the places | ||
| 346 | // the variable was stored | ||
| 347 | // and all of the places it was initialized. | ||
| 348 | // Next for each load, we need to find out if | ||
| 349 | |||
| 350 | // a) There exists a store for that variable | ||
| 351 | // b) For all stores that exist, they all happen "before" the | ||
| 352 | // use. | ||
| 353 | |||
| 354 | // If they're in the same BB, it's easy. | ||
| 355 | // If they're not, we need to find all BB that are "before" | ||
| 356 | // and ask the question on each. | ||
| 357 | |||
| 358 | 270 | } | |
| 359 | ////////////////////////// | ||
| 360 | // | ||
| 361 | /* | ||
| 362 | We start with the CFG graph of basic blocks. | ||
| 363 | 0 | ||
| 364 | / \ | ||
| 365 | / \ | ||
| 366 | 1 2 | ||
| 367 | / \ \ | ||
| 368 | / \ 3 | ||
| 369 | 4 5 / | ||
| 370 | \ / / | ||
| 371 | \ / / | ||
| 372 | \ / / | ||
| 373 | 6---- | ||
| 374 | |||
| 375 | Then we have some property like "used-at" which is | ||
| 376 | associated with a program-point such as (6,3) (basic block 6, 3rd instruction) | ||
| 377 | |||
| 378 | We also have several 'initialized-at' facts like | ||
| 379 | (4,7 ) (2,3) | ||
| 380 | |||
| 381 | What we would like to know if whether 'initialized-before(6,3)' is true. | ||
| 382 | |||
| 383 | The inference rule is P(b,i) = P(b,i+1) for all i. This says if it was true at point i, | ||
| 384 | it is also true at point i+1 in the same block. | ||
| 385 | |||
| 386 | We also have the rule that says P(b,i) = | (for all c in Prececessors of b : P(c,end)) | ||
| 387 | |||
| 388 | So we would like to know if initialized-before(6,3) is true, | ||
| 389 | We first check if there is a (6,k) with k<3. | ||
| 390 | If not, we check if ALL of (4,end) and (5,end) are true. | ||
| 391 | In this case, 4,end is true because (4,7) is true. | ||
| 392 | But (5,end) is false, so we check (1,end). Again no. | ||
| 393 | But (3,end) is true because (2,end) is true. | ||
| 394 | |||
| 395 | */ | ||
| 396 |