GCC Code Coverage Report


Directory: src/
File: src/frontend/gyoji.l
Date: 2025-10-24 11:14:59
Exec Total Coverage
Lines: 151 163 92.6%
Functions: 1 1 100.0%
Branches: 12 12 100.0%

Line Branch Exec Source
1 %{
2 /* Copyright 2025 Jonathan S. Arney
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * https://github.com/jarney/gyoji/blob/master/LICENSE
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <cstdlib>
17 #include <memory>
18 #include <gyoji-frontend.hpp>
19 #include <gyoji.y.hpp>
20
21 using namespace Gyoji::context;
22 using namespace Gyoji::frontend;
23 using namespace Gyoji::frontend::tree;
24 using namespace Gyoji::frontend::namespaces;
25 using namespace Gyoji::frontend::yacc;
26
27
28 std::vector<Gyoji::owned<TerminalNonSyntax>> non_syntax_data;
29
30 #define DEBUG_TERMINALS 0
31 #if DEBUG_TERMINALS
32 #define PRINT_TERMINALS(s,t) \
33 printf("%s : %s%s\n", s, t, \
34 (node->get_fully_qualified_name().size() > 0) ? \
35 (std::string(" : ") + node->get_fully_qualified_name()).c_str() : \
36 std::string().c_str());
37 #else
38 #define PRINT_TERMINALS(s,t) /**/
39 #endif
40
41 #define YY_INPUT(buf,result,max_size) \
42 ((LexContext*)yyget_extra(yyscanner))->input_source.read(buf, result, max_size)
43
44
45 32290 void move_array(
46 std::vector<Gyoji::owned<TerminalNonSyntax>> & dst,
47 std::vector<Gyoji::owned<TerminalNonSyntax>> & src
48 )
49 {
50 32290 dst.clear();
51
2/2
✓ Branch 5 taken 26586 times.
✓ Branch 6 taken 32290 times.
58876 for (auto & srcitem : src) {
52 26586 dst.push_back(std::move(srcitem));
53 }
54 32290 src.clear();
55 32290 }
56
57 #define TOKEN_APPEND() \
58 { \
59 LexContext *lc = (LexContext*)yyget_extra(yyscanner); \
60 lc->compiler_context.get_token_stream() \
61 .append_token(std::string(yytext)); \
62 }
63
64 #define TOKEN_ADD(nodetype) \
65 LexContext *lc = (LexContext*)yyget_extra(yyscanner); \
66 const Token &tok = \
67 lc->compiler_context.get_token_stream() \
68 .add_token( \
69 Gyoji::frontend::tree::TERMINAL_ ##nodetype, \
70 std::string(yytext), \
71 lc->compiler_context.get_filename(), \
72 lc->line, \
73 lc->column \
74 ); \
75 lc->column += strlen(yytext); \
76
77 #define START_NODE(nodetype) \
78 TOKEN_ADD(nodetype); \
79 Terminal* node = new Terminal(tok); \
80 move_array(node->non_syntax, non_syntax_data); \
81 yylval->emplace<Gyoji::owned<Terminal>>(node);
82
83 #define RETURN_NODE(nodetype) \
84 return YaccParser::token::nodetype;
85
86 #define PROCESS_NODE(nodetype) \
87 START_NODE(nodetype) \
88 PRINT_TERMINALS(#nodetype, yytext) \
89 RETURN_NODE(nodetype);
90
91 #define PROCESS_IDENTIFIER(nodetype, entity) \
92 START_NODE(nodetype) \
93 node->set_ns2_entity(entity); \
94 PRINT_TERMINALS(#nodetype, yytext) \
95 RETURN_NODE(nodetype);
96
97 // \&\& { PROCESS_NODE(AND_OP);}
98 //|"0b"|"0")
99 //radix ("0x"|"0b"|"0o")
100 //opt_radix ({radix}?)
101 %}
102
103 %option reentrant interactive noyywrap nodefault
104
105 %x COMMENT
106
107 opt_sign (\-?)
108
109 bin_digit [01]
110 oct_digit ({bin_digit}|[234567])
111 dec_digit ({oct_digit}|[89])
112 hex_digit ({dec_digit}|[aAbBcCdDeEfF])
113
114 bin_dseq ("0b"{bin_digit}({dec_digit}|\_)*)
115 oct_dseq ("0o"{oct_digit}({dec_digit}|\_)*)
116 dec_dseq ({dec_digit}({dec_digit}|\_)*)
117 hex_dseq ("0x"{hex_digit}({hex_digit}|\_)*)
118
119 int_dseq ({bin_dseq}|{oct_dseq}|{dec_dseq}|{hex_dseq})
120
121 opt_dec_dseq ({dec_dseq}?)
122
123 frac (({opt_dec_dseq}"."{dec_dseq})|{dec_dseq}".")
124 exp ([eE][+-]?{dec_dseq})
125 exp_opt ({exp}?)
126
127 integer ({opt_sign}{int_dseq}{opt_integer_size})
128 integer_size ("u64"|"u32"|"u16"|"u8"|"i64"|"i32"|"i16"|"i8")
129 opt_integer_size ({integer_size}?)
130
131 float ({opt_sign}(({frac}{exp_opt})|({dec_dseq}{exp}))({opt_float_size}))
132 float_size ("f32"|"f64")
133 opt_float_size ({float_size}?)
134
135 identifier ([a-zA-Z_][a-zA-Z_0-9]*)
136 whitespace ([[:space:]])
137 %%
138
139 600 namespace {PROCESS_NODE(NAMESPACE);}
140 168 using {PROCESS_NODE(USING);}
141 98 as {PROCESS_NODE(AS);}
142 548 typedef {PROCESS_NODE(TYPEDEF);}
143 856 class {PROCESS_NODE(CLASS);}
144 1042 public {PROCESS_NODE(PUBLIC);}
145 368 enum {PROCESS_NODE(ENUM);}
146 32
147 186 private {PROCESS_NODE(PRIVATE);}
148 66 protected {PROCESS_NODE(PROTECTED);}
149 22
150 struct {PROCESS_NODE(STRUCT);}
151 union {PROCESS_NODE(UNION);}
152
153 798 if {PROCESS_NODE(IF);}
154 384 else {PROCESS_NODE(ELSE);}
155 254 while {PROCESS_NODE(WHILE);}
156 48 for {PROCESS_NODE(FOR);}
157 32 switch {PROCESS_NODE(SWITCH);}
158 1648 return {PROCESS_NODE(RETURN);}
159 582 break {PROCESS_NODE(BREAK);}
160 48 continue {PROCESS_NODE(CONTINUE);}
161 48 label {PROCESS_NODE(LABEL);}
162 48 goto {PROCESS_NODE(GOTO);}
163 102 case {PROCESS_NODE(CASE);}
164 60 default {PROCESS_NODE(DEFAULT);}
165 10
166 264 sizeof {PROCESS_NODE(SIZEOF);}
167 390 cast {PROCESS_NODE(CAST);}
168 190 typeof {PROCESS_NODE(TYPEOF);}
169 20
170 354 const {PROCESS_NODE(CONST);}
171 318 volatile {PROCESS_NODE(VOLATILE);}
172 220 unsafe {PROCESS_NODE(UNSAFE);}
173 38
174 6 static {PROCESS_NODE(STATIC);}
175
176 14856 ; {PROCESS_NODE(SEMICOLON);}
177 180 -> {PROCESS_NODE(PTR_OP);}
178 150 >> {PROCESS_NODE(RIGHT_OP);}
179 876 \+\+ {PROCESS_NODE(INC_OP);}
180 822 \-\- {PROCESS_NODE(DEC_OP);}
181 210 \<\< {PROCESS_NODE(LEFT_OP);}
182 88 \< { PROCESS_NODE(COMPARE_LESS); }
183 62 \> { PROCESS_NODE(COMPARE_GREATER); }
184 42 \<= { PROCESS_NODE(COMPARE_LESS_EQUAL); }
185 70 \>= { PROCESS_NODE(COMPARE_GREATER_EQUAL); }
186 542 == { PROCESS_NODE(COMPARE_EQUAL); }
187 498 != { PROCESS_NODE(COMPARE_NOT_EQUAL); }
188 138 \^ { PROCESS_NODE(XOR_OP);}
189 10
190 60 \|\| { PROCESS_NODE(OR_OP);}
191 30 \*\= { PROCESS_NODE(MUL_ASSIGNMENT); }
192 40 \/\= { PROCESS_NODE(DIV_ASSIGNMENT); }
193 46 \+\= { PROCESS_NODE(ADD_ASSIGNMENT); }
194 48 \-\= { PROCESS_NODE(SUB_ASSIGNMENT); }
195 42 \<\<\= { PROCESS_NODE(LEFT_ASSIGNMENT); }
196 40 \>\>\= { PROCESS_NODE(RIGHT_ASSIGNMENT); }
197 10 \&\= { PROCESS_NODE(AND_ASSIGNMENT); }
198 \^\= { PROCESS_NODE(XOR_ASSIGNMENT); }
199 \|\= { PROCESS_NODE(OR_ASSIGNMENT); }
200
201 6162 \( {PROCESS_NODE(PAREN_L);}
202 6162 \) {PROCESS_NODE(PAREN_R);}
203 2264 \[ {PROCESS_NODE(BRACKET_L);}
204 280 \] {PROCESS_NODE(BRACKET_R);}
205 4024 \{ {PROCESS_NODE(BRACE_L);}
206 5272 \} {PROCESS_NODE(BRACE_R);}
207 1318
208 306 \. {PROCESS_NODE(DOT);}
209 \? {PROCESS_NODE(QUESTIONMARK);}
210 156 \: {PROCESS_NODE(COLON);}
211 2500 \, {PROCESS_NODE(COMMA);}
212 942 \! {PROCESS_NODE(BANG);}
213 426 \& {PROCESS_NODE(ANDPERSAND);}
214 158 \| {PROCESS_NODE(PIPE);}
215 292 \+ {PROCESS_NODE(PLUS);}
216 274 \- {PROCESS_NODE(MINUS);}
217 2562 \* {PROCESS_NODE(STAR);}
218 978 \/ {PROCESS_NODE(SLASH);}
219 174 \% {PROCESS_NODE(PERCENT);}
220 3354 \= {PROCESS_NODE(ASSIGNMENT);}
221 1104
222 492 \"[^\"]*\" {PROCESS_NODE(LITERAL_STRING);}
223 \'([^'\\\n]|\\[abefnrt\'])\' {
224 624 PROCESS_NODE(LITERAL_CHAR);
225 }
226 (true|false) {PROCESS_NODE(LITERAL_BOOL);}
227 null {PROCESS_NODE(LITERAL_NULL);}
228
229 288 {float} {PROCESS_NODE(LITERAL_FLOAT);}
230 4116 {integer} { PROCESS_NODE(LITERAL_INT);}
231 1372 (::)?{identifier}({whitespace}*:{whitespace}*:{whitespace}*(\~{whitespace}*)?{identifier})* {
232 11034 // The identifier regex is pretty intense, so here's the breakdown:
233 // It can start with "::" for root-qualified searches.
234 // Then it can contain an identifier [a-zA-z0-9]*
235 // After that, you can put any number of '::' delimiters (with whitespace)
236 // and optionally a ~ to denote a destructor.
237 //
238 // The fact that the ~ must appear AFTER the :: is what allows us
239 // to disambiguate it from a regular ~ as a bitwise not even at the lexical
240 // layer.
241
242 11034 LexContext *lex_context = (LexContext*)yyget_extra(yyscanner);
243 11034 NS2Context & ns2_context = lex_context->ns2_context;
244
245 //fprintf(stderr, "Looking up in namespace context %s\n", yytext);
246 11034 NS2Entity *entity = ns2_context.namespace_find(std::string(yytext));
247
2/2
✓ Branch 0 taken 3596 times.
✓ Branch 1 taken 7438 times.
11034 if (entity == nullptr) {
248 // Not yet known. We expect the syntax layer to
249 // find a place to put this identifier in a namespace.
250 //fprintf(stderr, "Name is an unknown identifier %s\n", yytext);
251 10788 PROCESS_IDENTIFIER(IDENTIFIER, nullptr);
252 }
253
2/2
✓ Branch 1 taken 38 times.
✓ Branch 2 taken 7400 times.
7438 else if (entity->get_type() == NS2Entity::ENTITY_TYPE_NAMESPACE) {
254 //fprintf(stderr, "Name is a namespace %s\n", yytext);
255 114 PROCESS_IDENTIFIER(NAMESPACE_NAME, entity);
256 }
257
6/6
✓ Branch 1 taken 4114 times.
✓ Branch 2 taken 3286 times.
✓ Branch 4 taken 176 times.
✓ Branch 5 taken 3938 times.
✓ Branch 6 taken 3462 times.
✓ Branch 7 taken 3938 times.
7400 else if (entity->get_type() == NS2Entity::ENTITY_TYPE_TYPE || entity->get_type() == NS2Entity::ENTITY_TYPE_CLASS) {
258 //fprintf(stderr, "Name is a type %s\n", yytext);
259 10386 PROCESS_IDENTIFIER(TYPE_NAME, entity);
260 }
261 else {
262 //fprintf(stderr, "Name is an identifier we've seen before %s\n", yytext);
263 // An identifier we've seen before. It should
264 // already be assigned to a namespace location.
265 11814 PROCESS_IDENTIFIER(IDENTIFIER, entity);
266 }
267 }
268 \~ {
269 54 // Needs to be after identifier because
270 // identifiers (for destructors) can contain a tilde.
271 162 PROCESS_NODE(TILDE);
272 }
273
274 62 "/*" {
275 62 BEGIN(COMMENT);
276 124 TOKEN_ADD(comment);
277 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
278 124 TerminalNonSyntax::TerminalNonSyntax::Type::EXTRA_COMMENT_MULTI_LINE,
279 tok
280 62 );
281 62 non_syntax_data.push_back(std::move(nsd));
282 62 }
283 62 <COMMENT>"*/" {
284 124 TOKEN_APPEND()
285 62 BEGIN(INITIAL);
286 }
287 62 <COMMENT>[^*]* {
288 208 TOKEN_APPEND()
289 }
290 104 <COMMENT>\* {
291 148 TOKEN_APPEND()
292 }
293 74
294 548 \/\/.* {
295 // Single-line comment:
296 1096 TOKEN_ADD(single_line_comment);
297 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
298 1096 TerminalNonSyntax::Type::EXTRA_COMMENT_SINGLE_LINE,
299 tok
300 548 );
301 548 non_syntax_data.push_back(std::move(nsd));
302 548 }
303 548 [ \t]+ {
304 33888 TOKEN_ADD(whitespace);
305 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
306 33888 TerminalNonSyntax::Type::EXTRA_WHITESPACE,
307 tok
308 16944 );
309 16944 non_syntax_data.push_back(std::move(nsd));
310 16944 }
311 16944 \n {
312 17992 TOKEN_ADD(newline);
313 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
314 17992 TerminalNonSyntax::Type::EXTRA_WHITESPACE,
315 tok
316 8996 );
317 8996 non_syntax_data.push_back(std::move(nsd));
318 8996 LexContext *lex_context = (LexContext*)yyget_extra(yyscanner);
319 8996 lex_context->line++;
320 8996 lex_context->column = 0;
321 8996 }
322 8996 \#[a-zA-Z]+\ [[:digit:]]+\ \".*\"\n {
323 10 // XXX TODO : Parse the source file and line number to
324 // mark the current position of compilation in terms of
325 // an original source file that generated this block of code.
326 // This is useful, for example, when working with a YACC file
327 // that generates some Gyoji code and you want to trace
328 // the error to the correct line of YACC code and not necessarily
329 // to the source file being compiled.
330 20 TOKEN_ADD(file_metadata)
331 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
332 20 TerminalNonSyntax::Type::EXTRA_FILE_METADATA,
333 tok
334 10 );
335 10 non_syntax_data.push_back(std::move(nsd));
336 10 }
337 10 \#.*\n {
338 52 TOKEN_ADD(file_metadata)
339 Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>(
340 52 TerminalNonSyntax::Type::EXTRA_FILE_METADATA,
341 tok
342 26 );
343 26 non_syntax_data.push_back(std::move(nsd));
344 26 }
345 26 . {
346 return YaccParser::token::INVALID_INPUT;
347 }
348 1626 <<EOF>> {PROCESS_NODE(YYEOF)}
349 542
350 %%
351