| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | %{ | ||
| 2 | /* Copyright 2025 Jonathan S. Arney | ||
| 3 | * | ||
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 5 | * you may not use this file except in compliance with the License. | ||
| 6 | * You may obtain a copy of the License at | ||
| 7 | * | ||
| 8 | * https://github.com/jarney/gyoji/blob/master/LICENSE | ||
| 9 | * | ||
| 10 | * Unless required by applicable law or agreed to in writing, software | ||
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 13 | * See the License for the specific language governing permissions and | ||
| 14 | * limitations under the License. | ||
| 15 | */ | ||
| 16 | #include <cstdlib> | ||
| 17 | #include <memory> | ||
| 18 | #include <gyoji-frontend.hpp> | ||
| 19 | #include <gyoji.y.hpp> | ||
| 20 | |||
| 21 | using namespace Gyoji::context; | ||
| 22 | using namespace Gyoji::frontend; | ||
| 23 | using namespace Gyoji::frontend::tree; | ||
| 24 | using namespace Gyoji::frontend::namespaces; | ||
| 25 | using namespace Gyoji::frontend::yacc; | ||
| 26 | |||
| 27 | |||
| 28 | std::vector<Gyoji::owned<TerminalNonSyntax>> non_syntax_data; | ||
| 29 | |||
| 30 | #define DEBUG_TERMINALS 0 | ||
| 31 | #if DEBUG_TERMINALS | ||
| 32 | #define PRINT_TERMINALS(s,t) \ | ||
| 33 | printf("%s : %s%s\n", s, t, \ | ||
| 34 | (node->get_fully_qualified_name().size() > 0) ? \ | ||
| 35 | (std::string(" : ") + node->get_fully_qualified_name()).c_str() : \ | ||
| 36 | std::string().c_str()); | ||
| 37 | #else | ||
| 38 | #define PRINT_TERMINALS(s,t) /**/ | ||
| 39 | #endif | ||
| 40 | |||
| 41 | #define YY_INPUT(buf,result,max_size) \ | ||
| 42 | ((LexContext*)yyget_extra(yyscanner))->input_source.read(buf, result, max_size) | ||
| 43 | |||
| 44 | |||
| 45 | 32290 | void move_array( | |
| 46 | std::vector<Gyoji::owned<TerminalNonSyntax>> & dst, | ||
| 47 | std::vector<Gyoji::owned<TerminalNonSyntax>> & src | ||
| 48 | ) | ||
| 49 | { | ||
| 50 | 32290 | dst.clear(); | |
| 51 |
2/2✓ Branch 5 taken 26586 times.
✓ Branch 6 taken 32290 times.
|
58876 | for (auto & srcitem : src) { |
| 52 | 26586 | dst.push_back(std::move(srcitem)); | |
| 53 | } | ||
| 54 | 32290 | src.clear(); | |
| 55 | 32290 | } | |
| 56 | |||
| 57 | #define TOKEN_APPEND() \ | ||
| 58 | { \ | ||
| 59 | LexContext *lc = (LexContext*)yyget_extra(yyscanner); \ | ||
| 60 | lc->compiler_context.get_token_stream() \ | ||
| 61 | .append_token(std::string(yytext)); \ | ||
| 62 | } | ||
| 63 | |||
| 64 | #define TOKEN_ADD(nodetype) \ | ||
| 65 | LexContext *lc = (LexContext*)yyget_extra(yyscanner); \ | ||
| 66 | const Token &tok = \ | ||
| 67 | lc->compiler_context.get_token_stream() \ | ||
| 68 | .add_token( \ | ||
| 69 | Gyoji::frontend::tree::TERMINAL_ ##nodetype, \ | ||
| 70 | std::string(yytext), \ | ||
| 71 | lc->compiler_context.get_filename(), \ | ||
| 72 | lc->line, \ | ||
| 73 | lc->column \ | ||
| 74 | ); \ | ||
| 75 | lc->column += strlen(yytext); \ | ||
| 76 | |||
| 77 | #define START_NODE(nodetype) \ | ||
| 78 | TOKEN_ADD(nodetype); \ | ||
| 79 | Terminal* node = new Terminal(tok); \ | ||
| 80 | move_array(node->non_syntax, non_syntax_data); \ | ||
| 81 | yylval->emplace<Gyoji::owned<Terminal>>(node); | ||
| 82 | |||
| 83 | #define RETURN_NODE(nodetype) \ | ||
| 84 | return YaccParser::token::nodetype; | ||
| 85 | |||
| 86 | #define PROCESS_NODE(nodetype) \ | ||
| 87 | START_NODE(nodetype) \ | ||
| 88 | PRINT_TERMINALS(#nodetype, yytext) \ | ||
| 89 | RETURN_NODE(nodetype); | ||
| 90 | |||
| 91 | #define PROCESS_IDENTIFIER(nodetype, entity) \ | ||
| 92 | START_NODE(nodetype) \ | ||
| 93 | node->set_ns2_entity(entity); \ | ||
| 94 | PRINT_TERMINALS(#nodetype, yytext) \ | ||
| 95 | RETURN_NODE(nodetype); | ||
| 96 | |||
| 97 | // \&\& { PROCESS_NODE(AND_OP);} | ||
| 98 | //|"0b"|"0") | ||
| 99 | //radix ("0x"|"0b"|"0o") | ||
| 100 | //opt_radix ({radix}?) | ||
| 101 | %} | ||
| 102 | |||
| 103 | %option reentrant interactive noyywrap nodefault | ||
| 104 | |||
| 105 | %x COMMENT | ||
| 106 | |||
| 107 | opt_sign (\-?) | ||
| 108 | |||
| 109 | bin_digit [01] | ||
| 110 | oct_digit ({bin_digit}|[234567]) | ||
| 111 | dec_digit ({oct_digit}|[89]) | ||
| 112 | hex_digit ({dec_digit}|[aAbBcCdDeEfF]) | ||
| 113 | |||
| 114 | bin_dseq ("0b"{bin_digit}({dec_digit}|\_)*) | ||
| 115 | oct_dseq ("0o"{oct_digit}({dec_digit}|\_)*) | ||
| 116 | dec_dseq ({dec_digit}({dec_digit}|\_)*) | ||
| 117 | hex_dseq ("0x"{hex_digit}({hex_digit}|\_)*) | ||
| 118 | |||
| 119 | int_dseq ({bin_dseq}|{oct_dseq}|{dec_dseq}|{hex_dseq}) | ||
| 120 | |||
| 121 | opt_dec_dseq ({dec_dseq}?) | ||
| 122 | |||
| 123 | frac (({opt_dec_dseq}"."{dec_dseq})|{dec_dseq}".") | ||
| 124 | exp ([eE][+-]?{dec_dseq}) | ||
| 125 | exp_opt ({exp}?) | ||
| 126 | |||
| 127 | integer ({opt_sign}{int_dseq}{opt_integer_size}) | ||
| 128 | integer_size ("u64"|"u32"|"u16"|"u8"|"i64"|"i32"|"i16"|"i8") | ||
| 129 | opt_integer_size ({integer_size}?) | ||
| 130 | |||
| 131 | float ({opt_sign}(({frac}{exp_opt})|({dec_dseq}{exp}))({opt_float_size})) | ||
| 132 | float_size ("f32"|"f64") | ||
| 133 | opt_float_size ({float_size}?) | ||
| 134 | |||
| 135 | identifier ([a-zA-Z_][a-zA-Z_0-9]*) | ||
| 136 | whitespace ([[:space:]]) | ||
| 137 | %% | ||
| 138 | |||
| 139 | 600 | namespace {PROCESS_NODE(NAMESPACE);} | |
| 140 | 168 | using {PROCESS_NODE(USING);} | |
| 141 | 98 | as {PROCESS_NODE(AS);} | |
| 142 | 548 | typedef {PROCESS_NODE(TYPEDEF);} | |
| 143 | 856 | class {PROCESS_NODE(CLASS);} | |
| 144 | 1042 | public {PROCESS_NODE(PUBLIC);} | |
| 145 | 368 | enum {PROCESS_NODE(ENUM);} | |
| 146 | 32 | ||
| 147 | 186 | private {PROCESS_NODE(PRIVATE);} | |
| 148 | 66 | protected {PROCESS_NODE(PROTECTED);} | |
| 149 | 22 | ||
| 150 | ✗ | struct {PROCESS_NODE(STRUCT);} | |
| 151 | ✗ | union {PROCESS_NODE(UNION);} | |
| 152 | ✗ | ||
| 153 | 798 | if {PROCESS_NODE(IF);} | |
| 154 | 384 | else {PROCESS_NODE(ELSE);} | |
| 155 | 254 | while {PROCESS_NODE(WHILE);} | |
| 156 | 48 | for {PROCESS_NODE(FOR);} | |
| 157 | 32 | switch {PROCESS_NODE(SWITCH);} | |
| 158 | 1648 | return {PROCESS_NODE(RETURN);} | |
| 159 | 582 | break {PROCESS_NODE(BREAK);} | |
| 160 | 48 | continue {PROCESS_NODE(CONTINUE);} | |
| 161 | 48 | label {PROCESS_NODE(LABEL);} | |
| 162 | 48 | goto {PROCESS_NODE(GOTO);} | |
| 163 | 102 | case {PROCESS_NODE(CASE);} | |
| 164 | 60 | default {PROCESS_NODE(DEFAULT);} | |
| 165 | 10 | ||
| 166 | 264 | sizeof {PROCESS_NODE(SIZEOF);} | |
| 167 | 390 | cast {PROCESS_NODE(CAST);} | |
| 168 | 190 | typeof {PROCESS_NODE(TYPEOF);} | |
| 169 | 20 | ||
| 170 | 354 | const {PROCESS_NODE(CONST);} | |
| 171 | 318 | volatile {PROCESS_NODE(VOLATILE);} | |
| 172 | 220 | unsafe {PROCESS_NODE(UNSAFE);} | |
| 173 | 38 | ||
| 174 | 6 | static {PROCESS_NODE(STATIC);} | |
| 175 | |||
| 176 | 14856 | ; {PROCESS_NODE(SEMICOLON);} | |
| 177 | 180 | -> {PROCESS_NODE(PTR_OP);} | |
| 178 | 150 | >> {PROCESS_NODE(RIGHT_OP);} | |
| 179 | 876 | \+\+ {PROCESS_NODE(INC_OP);} | |
| 180 | 822 | \-\- {PROCESS_NODE(DEC_OP);} | |
| 181 | 210 | \<\< {PROCESS_NODE(LEFT_OP);} | |
| 182 | 88 | \< { PROCESS_NODE(COMPARE_LESS); } | |
| 183 | 62 | \> { PROCESS_NODE(COMPARE_GREATER); } | |
| 184 | 42 | \<= { PROCESS_NODE(COMPARE_LESS_EQUAL); } | |
| 185 | 70 | \>= { PROCESS_NODE(COMPARE_GREATER_EQUAL); } | |
| 186 | 542 | == { PROCESS_NODE(COMPARE_EQUAL); } | |
| 187 | 498 | != { PROCESS_NODE(COMPARE_NOT_EQUAL); } | |
| 188 | 138 | \^ { PROCESS_NODE(XOR_OP);} | |
| 189 | 10 | ||
| 190 | 60 | \|\| { PROCESS_NODE(OR_OP);} | |
| 191 | 30 | \*\= { PROCESS_NODE(MUL_ASSIGNMENT); } | |
| 192 | 40 | \/\= { PROCESS_NODE(DIV_ASSIGNMENT); } | |
| 193 | 46 | \+\= { PROCESS_NODE(ADD_ASSIGNMENT); } | |
| 194 | 48 | \-\= { PROCESS_NODE(SUB_ASSIGNMENT); } | |
| 195 | 42 | \<\<\= { PROCESS_NODE(LEFT_ASSIGNMENT); } | |
| 196 | 40 | \>\>\= { PROCESS_NODE(RIGHT_ASSIGNMENT); } | |
| 197 | 10 | \&\= { PROCESS_NODE(AND_ASSIGNMENT); } | |
| 198 | ✗ | \^\= { PROCESS_NODE(XOR_ASSIGNMENT); } | |
| 199 | ✗ | \|\= { PROCESS_NODE(OR_ASSIGNMENT); } | |
| 200 | ✗ | ||
| 201 | 6162 | \( {PROCESS_NODE(PAREN_L);} | |
| 202 | 6162 | \) {PROCESS_NODE(PAREN_R);} | |
| 203 | 2264 | \[ {PROCESS_NODE(BRACKET_L);} | |
| 204 | 280 | \] {PROCESS_NODE(BRACKET_R);} | |
| 205 | 4024 | \{ {PROCESS_NODE(BRACE_L);} | |
| 206 | 5272 | \} {PROCESS_NODE(BRACE_R);} | |
| 207 | 1318 | ||
| 208 | 306 | \. {PROCESS_NODE(DOT);} | |
| 209 | ✗ | \? {PROCESS_NODE(QUESTIONMARK);} | |
| 210 | 156 | \: {PROCESS_NODE(COLON);} | |
| 211 | 2500 | \, {PROCESS_NODE(COMMA);} | |
| 212 | 942 | \! {PROCESS_NODE(BANG);} | |
| 213 | 426 | \& {PROCESS_NODE(ANDPERSAND);} | |
| 214 | 158 | \| {PROCESS_NODE(PIPE);} | |
| 215 | 292 | \+ {PROCESS_NODE(PLUS);} | |
| 216 | 274 | \- {PROCESS_NODE(MINUS);} | |
| 217 | 2562 | \* {PROCESS_NODE(STAR);} | |
| 218 | 978 | \/ {PROCESS_NODE(SLASH);} | |
| 219 | 174 | \% {PROCESS_NODE(PERCENT);} | |
| 220 | 3354 | \= {PROCESS_NODE(ASSIGNMENT);} | |
| 221 | 1104 | ||
| 222 | 492 | \"[^\"]*\" {PROCESS_NODE(LITERAL_STRING);} | |
| 223 | \'([^'\\\n]|\\[abefnrt\'])\' { | ||
| 224 | 624 | PROCESS_NODE(LITERAL_CHAR); | |
| 225 | } | ||
| 226 | ✗ | (true|false) {PROCESS_NODE(LITERAL_BOOL);} | |
| 227 | ✗ | null {PROCESS_NODE(LITERAL_NULL);} | |
| 228 | ✗ | ||
| 229 | 288 | {float} {PROCESS_NODE(LITERAL_FLOAT);} | |
| 230 | 4116 | {integer} { PROCESS_NODE(LITERAL_INT);} | |
| 231 | 1372 | (::)?{identifier}({whitespace}*:{whitespace}*:{whitespace}*(\~{whitespace}*)?{identifier})* { | |
| 232 | 11034 | // The identifier regex is pretty intense, so here's the breakdown: | |
| 233 | // It can start with "::" for root-qualified searches. | ||
| 234 | // Then it can contain an identifier [a-zA-z0-9]* | ||
| 235 | // After that, you can put any number of '::' delimiters (with whitespace) | ||
| 236 | // and optionally a ~ to denote a destructor. | ||
| 237 | // | ||
| 238 | // The fact that the ~ must appear AFTER the :: is what allows us | ||
| 239 | // to disambiguate it from a regular ~ as a bitwise not even at the lexical | ||
| 240 | // layer. | ||
| 241 | |||
| 242 | 11034 | LexContext *lex_context = (LexContext*)yyget_extra(yyscanner); | |
| 243 | 11034 | NS2Context & ns2_context = lex_context->ns2_context; | |
| 244 | |||
| 245 | //fprintf(stderr, "Looking up in namespace context %s\n", yytext); | ||
| 246 | 11034 | NS2Entity *entity = ns2_context.namespace_find(std::string(yytext)); | |
| 247 |
2/2✓ Branch 0 taken 3596 times.
✓ Branch 1 taken 7438 times.
|
11034 | if (entity == nullptr) { |
| 248 | // Not yet known. We expect the syntax layer to | ||
| 249 | // find a place to put this identifier in a namespace. | ||
| 250 | //fprintf(stderr, "Name is an unknown identifier %s\n", yytext); | ||
| 251 | 10788 | PROCESS_IDENTIFIER(IDENTIFIER, nullptr); | |
| 252 | } | ||
| 253 |
2/2✓ Branch 1 taken 38 times.
✓ Branch 2 taken 7400 times.
|
7438 | else if (entity->get_type() == NS2Entity::ENTITY_TYPE_NAMESPACE) { |
| 254 | //fprintf(stderr, "Name is a namespace %s\n", yytext); | ||
| 255 | 114 | PROCESS_IDENTIFIER(NAMESPACE_NAME, entity); | |
| 256 | } | ||
| 257 |
6/6✓ Branch 1 taken 4114 times.
✓ Branch 2 taken 3286 times.
✓ Branch 4 taken 176 times.
✓ Branch 5 taken 3938 times.
✓ Branch 6 taken 3462 times.
✓ Branch 7 taken 3938 times.
|
7400 | else if (entity->get_type() == NS2Entity::ENTITY_TYPE_TYPE || entity->get_type() == NS2Entity::ENTITY_TYPE_CLASS) { |
| 258 | //fprintf(stderr, "Name is a type %s\n", yytext); | ||
| 259 | 10386 | PROCESS_IDENTIFIER(TYPE_NAME, entity); | |
| 260 | } | ||
| 261 | else { | ||
| 262 | //fprintf(stderr, "Name is an identifier we've seen before %s\n", yytext); | ||
| 263 | // An identifier we've seen before. It should | ||
| 264 | // already be assigned to a namespace location. | ||
| 265 | 11814 | PROCESS_IDENTIFIER(IDENTIFIER, entity); | |
| 266 | } | ||
| 267 | } | ||
| 268 | \~ { | ||
| 269 | 54 | // Needs to be after identifier because | |
| 270 | // identifiers (for destructors) can contain a tilde. | ||
| 271 | 162 | PROCESS_NODE(TILDE); | |
| 272 | } | ||
| 273 | |||
| 274 | 62 | "/*" { | |
| 275 | 62 | BEGIN(COMMENT); | |
| 276 | 124 | TOKEN_ADD(comment); | |
| 277 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 278 | 124 | TerminalNonSyntax::TerminalNonSyntax::Type::EXTRA_COMMENT_MULTI_LINE, | |
| 279 | tok | ||
| 280 | 62 | ); | |
| 281 | 62 | non_syntax_data.push_back(std::move(nsd)); | |
| 282 | 62 | } | |
| 283 | 62 | <COMMENT>"*/" { | |
| 284 | 124 | TOKEN_APPEND() | |
| 285 | 62 | BEGIN(INITIAL); | |
| 286 | } | ||
| 287 | 62 | <COMMENT>[^*]* { | |
| 288 | 208 | TOKEN_APPEND() | |
| 289 | } | ||
| 290 | 104 | <COMMENT>\* { | |
| 291 | 148 | TOKEN_APPEND() | |
| 292 | } | ||
| 293 | 74 | ||
| 294 | 548 | \/\/.* { | |
| 295 | // Single-line comment: | ||
| 296 | 1096 | TOKEN_ADD(single_line_comment); | |
| 297 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 298 | 1096 | TerminalNonSyntax::Type::EXTRA_COMMENT_SINGLE_LINE, | |
| 299 | tok | ||
| 300 | 548 | ); | |
| 301 | 548 | non_syntax_data.push_back(std::move(nsd)); | |
| 302 | 548 | } | |
| 303 | 548 | [ \t]+ { | |
| 304 | 33888 | TOKEN_ADD(whitespace); | |
| 305 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 306 | 33888 | TerminalNonSyntax::Type::EXTRA_WHITESPACE, | |
| 307 | tok | ||
| 308 | 16944 | ); | |
| 309 | 16944 | non_syntax_data.push_back(std::move(nsd)); | |
| 310 | 16944 | } | |
| 311 | 16944 | \n { | |
| 312 | 17992 | TOKEN_ADD(newline); | |
| 313 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 314 | 17992 | TerminalNonSyntax::Type::EXTRA_WHITESPACE, | |
| 315 | tok | ||
| 316 | 8996 | ); | |
| 317 | 8996 | non_syntax_data.push_back(std::move(nsd)); | |
| 318 | 8996 | LexContext *lex_context = (LexContext*)yyget_extra(yyscanner); | |
| 319 | 8996 | lex_context->line++; | |
| 320 | 8996 | lex_context->column = 0; | |
| 321 | 8996 | } | |
| 322 | 8996 | \#[a-zA-Z]+\ [[:digit:]]+\ \".*\"\n { | |
| 323 | 10 | // XXX TODO : Parse the source file and line number to | |
| 324 | // mark the current position of compilation in terms of | ||
| 325 | // an original source file that generated this block of code. | ||
| 326 | // This is useful, for example, when working with a YACC file | ||
| 327 | // that generates some Gyoji code and you want to trace | ||
| 328 | // the error to the correct line of YACC code and not necessarily | ||
| 329 | // to the source file being compiled. | ||
| 330 | 20 | TOKEN_ADD(file_metadata) | |
| 331 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 332 | 20 | TerminalNonSyntax::Type::EXTRA_FILE_METADATA, | |
| 333 | tok | ||
| 334 | 10 | ); | |
| 335 | 10 | non_syntax_data.push_back(std::move(nsd)); | |
| 336 | 10 | } | |
| 337 | 10 | \#.*\n { | |
| 338 | 52 | TOKEN_ADD(file_metadata) | |
| 339 | Gyoji::owned<TerminalNonSyntax> nsd = std::make_unique<TerminalNonSyntax>( | ||
| 340 | 52 | TerminalNonSyntax::Type::EXTRA_FILE_METADATA, | |
| 341 | tok | ||
| 342 | 26 | ); | |
| 343 | 26 | non_syntax_data.push_back(std::move(nsd)); | |
| 344 | 26 | } | |
| 345 | 26 | . { | |
| 346 | ✗ | return YaccParser::token::INVALID_INPUT; | |
| 347 | } | ||
| 348 | 1626 | <<EOF>> {PROCESS_NODE(YYEOF)} | |
| 349 | 542 | ||
| 350 | ✗ | %% | |
| 351 |