GCC Code Coverage Report


Directory: src/
File: src/analysis/analysis-use-before-assignment.cpp
Date: 2025-10-24 11:14:59
Exec Total Coverage
Lines: 121 137 88.3%
Functions: 20 20 100.0%
Branches: 41 46 89.1%

Line Branch Exec Source
1 /* Copyright 2025 Jonathan S. Arney
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * https://github.com/jarney/gyoji/blob/master/LICENSE
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <gyoji-analysis.hpp>
16 #include <stdio.h>
17
18 using namespace Gyoji::mir;
19 using namespace Gyoji::context;
20 using namespace Gyoji::analysis;
21
22 28 AnalysisPassUseBeforeAssignment::AnalysisPassUseBeforeAssignment(CompilerContext & _compiler_context)
23 56 : AnalysisPass(_compiler_context, "use-before-initialization checks")
24 28 {}
25 112 AnalysisPassUseBeforeAssignment::~AnalysisPassUseBeforeAssignment()
26 112 {}
27
28 void
29 28 AnalysisPassUseBeforeAssignment::check(const Gyoji::mir::MIR & mir) const
30 {
31
2/2
✓ Branch 7 taken 270 times.
✓ Branch 8 taken 28 times.
298 for (const auto & function : mir.get_functions().get_functions()) {
32 270 check(*function);
33 }
34 28 }
35
36 namespace Gyoji::analysis {
37
38 // This is an access of a local variable
39 // inside a function. Here, we track the
40 // name of the variable, the tmpvar it's
41 // loaded into, and the 'ProgramPoint' it was loaded/declared
42 // from.
43 class VariableLoadPoint {
44 public:
45 VariableLoadPoint(
46 std::string _variable_name,
47 ProgramPoint _program_point
48 );
49 VariableLoadPoint(const VariableLoadPoint & other);
50 ~VariableLoadPoint();
51 std::string variable_name;
52 size_t tmpvar;
53 ProgramPoint program_point;
54 };
55
56 class TmpvarAssignedAt {
57 public:
58 TmpvarAssignedAt(const TmpvarAssignedAt & other);
59 TmpvarAssignedAt(
60 size_t _tmpvar,
61 ProgramPoint _program_point
62 );
63 ~TmpvarAssignedAt();
64
65 size_t tmpvar;
66 ProgramPoint program_point;
67 };
68
69 class VariableTmpvarVisitor : public OperationVisitor {
70 public:
71 VariableTmpvarVisitor(const std::map<std::string, std::string> & _ignore_arguments);
72 ~VariableTmpvarVisitor();
73 void visit(
74 size_t block_id,
75 const BasicBlock & block,
76 size_t operation_index,
77 const Operation & operation
78 );
79 const std::map<size_t, std::string> & get_tmpvars() const;
80 private:
81 std::map<size_t, std::string> tmpvars;
82 const std::map<std::string, std::string> & ignore_arguments;
83 };
84
85 // First, we need to just build a map
86 // of all the tmpvar to the variables they
87 // represent (only for tmpvars associated with variables)
88 class VariableUseVisitor : public OperationVisitor {
89 public:
90 VariableUseVisitor(
91 const std::map<size_t, std::string> & _tmpvars
92 );
93 ~VariableUseVisitor();
94 void visit(
95 size_t block_id,
96 const BasicBlock & block,
97 size_t operation_index,
98 const Operation & operation
99 );
100 const std::vector<VariableLoadPoint> & get_loads();
101 const std::vector<VariableLoadPoint> & get_stores();
102 private:
103 const std::map<size_t, std::string> & tmpvars;
104 std::vector<VariableLoadPoint> variable_loads;
105 std::vector<VariableLoadPoint> variable_stores;
106 };
107
108 // Next, we need to find all of the places those tmpvars (variables)
109 // are used and all of the places they are assigned.
110
111 };
112 //////////////////////////////////////////////
113 // ProgramPoint
114 //////////////////////////////////////////////
115
116 4558 ProgramPoint::ProgramPoint(size_t _block_id, size_t _operation_index)
117 4558 : block_id(_block_id)
118 4558 , operation_index(_operation_index)
119 4558 {}
120 2914 ProgramPoint::ProgramPoint(const ProgramPoint & other)
121 2914 : block_id(other.block_id)
122 2914 , operation_index(other.operation_index)
123 2914 {}
124 7472 ProgramPoint::~ProgramPoint()
125 7472 {}
126 //////////////////////////////////////////////
127 // VariableLoadPoint
128 //////////////////////////////////////////////
129 712 VariableLoadPoint::VariableLoadPoint(
130 std::string _variable_name,
131 712 ProgramPoint _program_point)
132 712 : variable_name(_variable_name)
133 712 , program_point(_program_point)
134 712 {}
135 1006 VariableLoadPoint::VariableLoadPoint(const VariableLoadPoint & other)
136 1006 : variable_name(other.variable_name)
137 1006 , program_point(other.program_point)
138 1006 {}
139
140 1718 VariableLoadPoint::~VariableLoadPoint()
141 1718 {}
142
143 //////////////////////////////////////////////
144 // VariableUseVisitor
145 //////////////////////////////////////////////
146
147 270 VariableUseVisitor::VariableUseVisitor(
148 const std::map<size_t, std::string> & _tmpvars
149 270 )
150 : OperationVisitor()
151 270 , tmpvars(_tmpvars)
152 {
153 270 }
154
155 270 VariableUseVisitor::~VariableUseVisitor()
156 270 {}
157
158 const std::vector<VariableLoadPoint> &
159 270 VariableUseVisitor::get_loads()
160 {
161 270 return variable_loads;
162 }
163
164 const std::vector<VariableLoadPoint> &
165 270 VariableUseVisitor::get_stores()
166 270 { return variable_stores; }
167
168
169 4378 void VariableUseVisitor::visit(
170 size_t block_id,
171 const BasicBlock & block,
172 size_t operation_index,
173 const Operation & operation
174 )
175 {
176 4378 const auto & operands = operation.get_operands();
177 4378 size_t noperands = operands.size();
178 4378 ProgramPoint program_point(block_id, operation_index);
179
180
2/2
✓ Branch 1 taken 388 times.
✓ Branch 2 taken 3990 times.
4378 if (operation.get_type() == Operation::OP_ASSIGN) {
181 388 const auto & it = tmpvars.find(operands.at(0));
182
2/2
✓ Branch 2 taken 382 times.
✓ Branch 3 taken 6 times.
388 if (it != tmpvars.end()) {
183 VariableLoadPoint assigned_at(
184 382 it->second,
185 program_point
186 764 );
187 382 variable_stores.push_back(assigned_at);
188 382 }
189 // All other operands are accesses.
190
2/2
✓ Branch 0 taken 388 times.
✓ Branch 1 taken 388 times.
776 for (size_t operand_id = 1; operand_id < noperands; operand_id++) {
191 388 const auto & it = tmpvars.find(operands.at(operand_id));
192
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 388 times.
388 if (it != tmpvars.end()) {
193 VariableLoadPoint assigned_at(
194 it->second,
195 program_point
196 );
197 variable_loads.push_back(assigned_at);
198 }
199 }
200 }
201 else {
202 // For other operations, all operands are 'use' of the variable.
203
2/2
✓ Branch 0 taken 1266 times.
✓ Branch 1 taken 3990 times.
5256 for (size_t operand_id = 0; operand_id < noperands; operand_id++) {
204 1266 const auto & it = tmpvars.find(operands.at(operand_id));
205
2/2
✓ Branch 2 taken 330 times.
✓ Branch 3 taken 936 times.
1266 if (it != tmpvars.end()) {
206 VariableLoadPoint assigned_at(
207 330 it->second,
208 program_point
209 660 );
210 330 variable_loads.push_back(assigned_at);
211 330 }
212 }
213 }
214 4378 }
215 270 VariableTmpvarVisitor::VariableTmpvarVisitor(
216 const std::map<std::string, std::string> & _ignore_arguments
217 270 )
218 270 : tmpvars()
219 270 , ignore_arguments(_ignore_arguments)
220 270 {}
221
222 270 VariableTmpvarVisitor::~VariableTmpvarVisitor()
223 270 {}
224
225 void
226 4378 VariableTmpvarVisitor::visit(
227 size_t block_id,
228 const BasicBlock & block,
229 size_t operation_index,
230 const Operation & operation
231 )
232 {
233
2/2
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 4066 times.
4378 if (operation.get_type() == Operation::OP_LOCAL_DECLARE) {
234 312 const OperationLocalDeclare & local_declare = (const OperationLocalDeclare&)operation;
235 // variables.push_back(std::pair(local_declare.get_variable(), &local_declare.get_source_ref()));
236 3376 return;
237 }
238
2/2
✓ Branch 1 taken 2574 times.
✓ Branch 2 taken 1492 times.
4066 if (operation.get_type() != Operation::OP_LOCAL_VARIABLE) {
239 2574 return;
240 }
241 1492 const OperationLocalVariable &operation_local = (const OperationLocalVariable &)operation;
242 1492 const std::string & varname = operation_local.get_symbol_name();
243
244 // Check to see if it's an argument and we
245 // can ignore it since arguments get initialized
246 // when the function begins.
247 1492 const auto & it = ignore_arguments.find(varname);
248
2/2
✓ Branch 2 taken 490 times.
✓ Branch 3 taken 1002 times.
1492 if (it != ignore_arguments.end()) {
249 490 return;
250 }
251
252 // Just keep track of the tmpvars that map to local variables.
253 1002 tmpvars[operation_local.get_result()] = varname;
254 }
255
256 const std::map<size_t, std::string> &
257 270 VariableTmpvarVisitor::get_tmpvars() const
258 270 { return tmpvars; }
259
260 482 bool AnalysisPassUseBeforeAssignment::true_at(
261 const Function & function,
262 std::map<size_t, bool> & already_checked,
263 const std::vector<ProgramPoint> & true_points,
264 const ProgramPoint & check_at
265 ) const
266 {
267 482 bool found_in_same_block = false;
268
269
2/2
✓ Branch 5 taken 708 times.
✓ Branch 6 taken 482 times.
1190 for (const auto & true_point : true_points) {
270
2/2
✓ Branch 0 taken 390 times.
✓ Branch 1 taken 318 times.
708 if (check_at.block_id == true_point.block_id &&
271
2/2
✓ Branch 0 taken 374 times.
✓ Branch 1 taken 16 times.
390 check_at.operation_index > true_point.operation_index) {
272 374 found_in_same_block = true;
273 }
274 }
275 // It was true in the current block, so it's true.
276
2/2
✓ Branch 0 taken 356 times.
✓ Branch 1 taken 126 times.
482 if (found_in_same_block) {
277 356 return true;
278 }
279 126 const std::vector<size_t> & predecessors = function.get_basic_block(check_at.block_id).get_reachable_from();
280
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 126 times.
126 if (predecessors.size() == 0) {
281 return false;
282 }
283
284 // If it wasn't true in the current block, try all predecessors.
285
2/2
✓ Branch 5 taken 180 times.
✓ Branch 6 taken 126 times.
306 for (const auto & predecessor : predecessors) {
286 180 ProgramPoint pred_point(predecessor, function.get_basic_block(predecessor).get_operations().size());
287
288 bool is_true;
289 180 const auto & already_checked_it = already_checked.find(pred_point.block_id);
290
2/2
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 152 times.
180 if (already_checked_it != already_checked.end()) {
291 28 is_true = already_checked_it->second;
292 }
293 else {
294 152 is_true = true_at(function, already_checked, true_points, pred_point);
295 152 already_checked.insert(std::pair(pred_point.block_id, is_true));
296 }
297
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 180 times.
180 if (!is_true) {
298 return false;
299 }
300
1/2
✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
180 }
301 126 return true;
302
303 }
304
305 270 void AnalysisPassUseBeforeAssignment::check(const Function & function) const
306 {
307 270 std::map<std::string, std::string> ignore_arguments;
308
2/2
✓ Branch 6 taken 524 times.
✓ Branch 7 taken 270 times.
794 for (const auto & arg : function.get_arguments()) {
309 524 ignore_arguments[arg.get_name()] = arg.get_name();
310 }
311
312 270 VariableTmpvarVisitor id_visitor(ignore_arguments);
313 270 function.iterate_operations(id_visitor);
314
315 270 VariableUseVisitor visitor(id_visitor.get_tmpvars());
316 270 function.iterate_operations(visitor);
317
318 270 std::map<std::string, std::vector<ProgramPoint>> store_points_by_variable;
319
2/2
✓ Branch 6 taken 382 times.
✓ Branch 7 taken 270 times.
652 for (const auto & store : visitor.get_stores()) {
320 382 store_points_by_variable[store.variable_name].push_back(store.program_point);
321 }
322
323
2/2
✓ Branch 6 taken 330 times.
✓ Branch 7 taken 270 times.
600 for (const auto & load : visitor.get_loads()) {
324 330 std::map<size_t, bool> already_checked;
325 330 bool is_true = true_at(function, already_checked, store_points_by_variable[load.variable_name], load.program_point);
326
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 330 times.
330 if (!is_true) {
327 std::unique_ptr<Gyoji::context::Error> error = std::make_unique<Gyoji::context::Error>("Variable use before initialization.");
328 const SourceReference & source_ref =
329 function.get_basic_block(load.program_point.block_id).get_operations().at(load.program_point.operation_index)->get_source_ref();
330
331 error->add_message(
332 source_ref,
333 std::string("Variable ")
334 + load.variable_name
335 + std::string(" is uninitialized. This would result in undefined behavior. Note that if this appears on a 'return' line, then it is most likely a destructor call.")
336 );
337 get_compiler_context()
338 .get_errors()
339 .add_error(std::move(error));
340 }
341 330 already_checked.insert(std::pair(load.program_point.block_id, is_true));
342 330 }
343
344
345 // We now have all of the places
346 // the variable was stored
347 // and all of the places it was initialized.
348 // Next for each load, we need to find out if
349
350 // a) There exists a store for that variable
351 // b) For all stores that exist, they all happen "before" the
352 // use.
353
354 // If they're in the same BB, it's easy.
355 // If they're not, we need to find all BB that are "before"
356 // and ask the question on each.
357
358 270 }
359 //////////////////////////
360 //
361 /*
362 We start with the CFG graph of basic blocks.
363 0
364 / \
365 / \
366 1 2
367 / \ \
368 / \ 3
369 4 5 /
370 \ / /
371 \ / /
372 \ / /
373 6----
374
375 Then we have some property like "used-at" which is
376 associated with a program-point such as (6,3) (basic block 6, 3rd instruction)
377
378 We also have several 'initialized-at' facts like
379 (4,7 ) (2,3)
380
381 What we would like to know if whether 'initialized-before(6,3)' is true.
382
383 The inference rule is P(b,i) = P(b,i+1) for all i. This says if it was true at point i,
384 it is also true at point i+1 in the same block.
385
386 We also have the rule that says P(b,i) = | (for all c in Prececessors of b : P(c,end))
387
388 So we would like to know if initialized-before(6,3) is true,
389 We first check if there is a (6,k) with k<3.
390 If not, we check if ALL of (4,end) and (5,end) are true.
391 In this case, 4,end is true because (4,7) is true.
392 But (5,end) is false, so we check (1,end). Again no.
393 But (3,end) is true because (2,end) is true.
394
395 */
396