Inja 3.4.0
A Template Engine for Modern C++
Loading...
Searching...
No Matches
parser.hpp
1#ifndef INCLUDE_INJA_PARSER_HPP_
2#define INCLUDE_INJA_PARSER_HPP_
3
4#include <limits>
5#include <stack>
6#include <string>
7#include <utility>
8#include <vector>
9
10#include "config.hpp"
11#include "exceptions.hpp"
12#include "function_storage.hpp"
13#include "lexer.hpp"
14#include "node.hpp"
15#include "template.hpp"
16#include "token.hpp"
17#include "utils.hpp"
18
19namespace inja {
20
24class Parser {
25 using Arguments = std::vector<std::shared_ptr<ExpressionNode>>;
26 using OperatorStack = std::stack<std::shared_ptr<FunctionNode>>;
27
28 const ParserConfig& config;
29
30 Lexer lexer;
31 TemplateStorage& template_storage;
32 const FunctionStorage& function_storage;
33
34 Token tok, peek_tok;
35 bool have_peek_tok {false};
36
37 std::string_view literal_start;
38
39 BlockNode* current_block {nullptr};
40 ExpressionListNode* current_expression_list {nullptr};
41
42 std::stack<IfStatementNode*> if_statement_stack;
43 std::stack<ForStatementNode*> for_statement_stack;
44 std::stack<BlockStatementNode*> block_statement_stack;
45
46 inline void throw_parser_error(const std::string& message) const {
47 INJA_THROW(ParserError(message, lexer.current_position()));
48 }
49
50 inline void get_next_token() {
51 if (have_peek_tok) {
52 tok = peek_tok;
53 have_peek_tok = false;
54 } else {
55 tok = lexer.scan();
56 }
57 }
58
59 inline void get_peek_token() {
60 if (!have_peek_tok) {
61 peek_tok = lexer.scan();
62 have_peek_tok = true;
63 }
64 }
65
66 inline void add_literal(Arguments &arguments, const char* content_ptr) {
67 std::string_view data_text(literal_start.data(), tok.text.data() - literal_start.data() + tok.text.size());
68 arguments.emplace_back(std::make_shared<LiteralNode>(data_text, data_text.data() - content_ptr));
69 }
70
71 inline void add_operator(Arguments &arguments, OperatorStack &operator_stack) {
72 auto function = operator_stack.top();
73 operator_stack.pop();
74
75 if (static_cast<int>(arguments.size()) < function->number_args) {
76 throw_parser_error("too few arguments");
77 }
78
79 for (int i = 0; i < function->number_args; ++i) {
80 function->arguments.insert(function->arguments.begin(), arguments.back());
81 arguments.pop_back();
82 }
83 arguments.emplace_back(function);
84 }
85
86 void add_to_template_storage(std::string_view path, std::string& template_name) {
87 if (template_storage.find(template_name) != template_storage.end()) {
88 return;
89 }
90
91 std::string original_path = static_cast<std::string>(path);
92 std::string original_name = template_name;
93
94 if (config.search_included_templates_in_files) {
95 // Build the relative path
96 template_name = original_path + original_name;
97 if (template_name.compare(0, 2, "./") == 0) {
98 template_name.erase(0, 2);
99 }
100
101 if (template_storage.find(template_name) == template_storage.end()) {
102 // Load file
103 std::ifstream file;
104 file.open(template_name);
105 if (!file.fail()) {
106 std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
107
108 auto include_template = Template(text);
109 template_storage.emplace(template_name, include_template);
110 parse_into_template(template_storage[template_name], template_name);
111 return;
112 } else if (!config.include_callback) {
113 INJA_THROW(FileError("failed accessing file at '" + template_name + "'"));
114 }
115 }
116 }
117
118 // Try include callback
119 if (config.include_callback) {
120 auto include_template = config.include_callback(original_path, original_name);
121 template_storage.emplace(template_name, include_template);
122 }
123 }
124
125 std::string parse_filename() const {
126 if (tok.kind != Token::Kind::String) {
127 throw_parser_error("expected string, got '" + tok.describe() + "'");
128 }
129
130 if (tok.text.length() < 2) {
131 throw_parser_error("expected filename, got '" + static_cast<std::string>(tok.text) + "'");
132 }
133
134 // Remove first and last character ""
135 return std::string {tok.text.substr(1, tok.text.length() - 2)};
136 }
137
138 bool parse_expression(Template& tmpl, Token::Kind closing) {
139 current_expression_list->root = parse_expression(tmpl);
140 return tok.kind == closing;
141 }
142
143 std::shared_ptr<ExpressionNode> parse_expression(Template& tmpl) {
144 size_t current_bracket_level {0};
145 size_t current_brace_level {0};
146 Arguments arguments;
147 OperatorStack operator_stack;
148
149 while (tok.kind != Token::Kind::Eof) {
150 // Literals
151 switch (tok.kind) {
152 case Token::Kind::String: {
153 if (current_brace_level == 0 && current_bracket_level == 0) {
154 literal_start = tok.text;
155 add_literal(arguments, tmpl.content.c_str());
156 }
157 } break;
158 case Token::Kind::Number: {
159 if (current_brace_level == 0 && current_bracket_level == 0) {
160 literal_start = tok.text;
161 add_literal(arguments, tmpl.content.c_str());
162 }
163 } break;
164 case Token::Kind::LeftBracket: {
165 if (current_brace_level == 0 && current_bracket_level == 0) {
166 literal_start = tok.text;
167 }
168 current_bracket_level += 1;
169 } break;
170 case Token::Kind::LeftBrace: {
171 if (current_brace_level == 0 && current_bracket_level == 0) {
172 literal_start = tok.text;
173 }
174 current_brace_level += 1;
175 } break;
176 case Token::Kind::RightBracket: {
177 if (current_bracket_level == 0) {
178 throw_parser_error("unexpected ']'");
179 }
180
181 current_bracket_level -= 1;
182 if (current_brace_level == 0 && current_bracket_level == 0) {
183 add_literal(arguments, tmpl.content.c_str());
184 }
185 } break;
186 case Token::Kind::RightBrace: {
187 if (current_brace_level == 0) {
188 throw_parser_error("unexpected '}'");
189 }
190
191 current_brace_level -= 1;
192 if (current_brace_level == 0 && current_bracket_level == 0) {
193 add_literal(arguments, tmpl.content.c_str());
194 }
195 } break;
196 case Token::Kind::Id: {
197 get_peek_token();
198
199 // Data Literal
200 if (tok.text == static_cast<decltype(tok.text)>("true") || tok.text == static_cast<decltype(tok.text)>("false") ||
201 tok.text == static_cast<decltype(tok.text)>("null")) {
202 if (current_brace_level == 0 && current_bracket_level == 0) {
203 literal_start = tok.text;
204 add_literal(arguments, tmpl.content.c_str());
205 }
206
207 // Operator
208 } else if (tok.text == "and" || tok.text == "or" || tok.text == "in" || tok.text == "not") {
209 goto parse_operator;
210
211 // Functions
212 } else if (peek_tok.kind == Token::Kind::LeftParen) {
213 auto func = std::make_shared<FunctionNode>(tok.text, tok.text.data() - tmpl.content.c_str());
214 get_next_token();
215 do {
216 get_next_token();
217 auto expr = parse_expression(tmpl);
218 if (!expr) {
219 break;
220 }
221 func->number_args += 1;
222 func->arguments.emplace_back(expr);
223 } while (tok.kind == Token::Kind::Comma);
224 if (tok.kind != Token::Kind::RightParen) {
225 throw_parser_error("expected right parenthesis, got '" + tok.describe() + "'");
226 }
227
228 auto function_data = function_storage.find_function(func->name, func->number_args);
229 if (function_data.operation == FunctionStorage::Operation::None) {
230 throw_parser_error("unknown function " + func->name);
231 }
232 func->operation = function_data.operation;
233 if (function_data.operation == FunctionStorage::Operation::Callback) {
234 func->callback = function_data.callback;
235 }
236 arguments.emplace_back(func);
237
238 // Variables
239 } else {
240 arguments.emplace_back(std::make_shared<DataNode>(static_cast<std::string>(tok.text), tok.text.data() - tmpl.content.c_str()));
241 }
242
243 // Operators
244 } break;
245 case Token::Kind::Equal:
246 case Token::Kind::NotEqual:
247 case Token::Kind::GreaterThan:
248 case Token::Kind::GreaterEqual:
249 case Token::Kind::LessThan:
250 case Token::Kind::LessEqual:
251 case Token::Kind::Plus:
252 case Token::Kind::Minus:
253 case Token::Kind::Times:
254 case Token::Kind::Slash:
255 case Token::Kind::Power:
256 case Token::Kind::Percent:
257 case Token::Kind::Dot: {
258
259 parse_operator:
260 FunctionStorage::Operation operation;
261 switch (tok.kind) {
262 case Token::Kind::Id: {
263 if (tok.text == "and") {
264 operation = FunctionStorage::Operation::And;
265 } else if (tok.text == "or") {
266 operation = FunctionStorage::Operation::Or;
267 } else if (tok.text == "in") {
268 operation = FunctionStorage::Operation::In;
269 } else if (tok.text == "not") {
270 operation = FunctionStorage::Operation::Not;
271 } else {
272 throw_parser_error("unknown operator in parser.");
273 }
274 } break;
275 case Token::Kind::Equal: {
276 operation = FunctionStorage::Operation::Equal;
277 } break;
278 case Token::Kind::NotEqual: {
279 operation = FunctionStorage::Operation::NotEqual;
280 } break;
281 case Token::Kind::GreaterThan: {
282 operation = FunctionStorage::Operation::Greater;
283 } break;
284 case Token::Kind::GreaterEqual: {
285 operation = FunctionStorage::Operation::GreaterEqual;
286 } break;
287 case Token::Kind::LessThan: {
288 operation = FunctionStorage::Operation::Less;
289 } break;
290 case Token::Kind::LessEqual: {
291 operation = FunctionStorage::Operation::LessEqual;
292 } break;
293 case Token::Kind::Plus: {
294 operation = FunctionStorage::Operation::Add;
295 } break;
296 case Token::Kind::Minus: {
297 operation = FunctionStorage::Operation::Subtract;
298 } break;
299 case Token::Kind::Times: {
300 operation = FunctionStorage::Operation::Multiplication;
301 } break;
302 case Token::Kind::Slash: {
303 operation = FunctionStorage::Operation::Division;
304 } break;
305 case Token::Kind::Power: {
306 operation = FunctionStorage::Operation::Power;
307 } break;
308 case Token::Kind::Percent: {
309 operation = FunctionStorage::Operation::Modulo;
310 } break;
311 case Token::Kind::Dot: {
312 operation = FunctionStorage::Operation::AtId;
313 } break;
314 default: {
315 throw_parser_error("unknown operator in parser.");
316 }
317 }
318 auto function_node = std::make_shared<FunctionNode>(operation, tok.text.data() - tmpl.content.c_str());
319
320 while (!operator_stack.empty() &&
321 ((operator_stack.top()->precedence > function_node->precedence) ||
322 (operator_stack.top()->precedence == function_node->precedence && function_node->associativity == FunctionNode::Associativity::Left))) {
323 add_operator(arguments, operator_stack);
324 }
325
326 operator_stack.emplace(function_node);
327 } break;
328 case Token::Kind::Comma: {
329 if (current_brace_level == 0 && current_bracket_level == 0) {
330 goto break_loop;
331 }
332 } break;
333 case Token::Kind::Colon: {
334 if (current_brace_level == 0 && current_bracket_level == 0) {
335 throw_parser_error("unexpected ':'");
336 }
337 } break;
338 case Token::Kind::LeftParen: {
339 get_next_token();
340 auto expr = parse_expression(tmpl);
341 if (tok.kind != Token::Kind::RightParen) {
342 throw_parser_error("expected right parenthesis, got '" + tok.describe() + "'");
343 }
344 if (!expr) {
345 throw_parser_error("empty expression in parentheses");
346 }
347 arguments.emplace_back(expr);
348 } break;
349 default:
350 goto break_loop;
351 }
352
353 get_next_token();
354 }
355
356 break_loop:
357 while (!operator_stack.empty()) {
358 add_operator(arguments, operator_stack);
359 }
360
361 std::shared_ptr<ExpressionNode> expr;
362 if (arguments.size() == 1) {
363 expr = arguments[0];
364 arguments = {};
365 } else if (arguments.size() > 1) {
366 throw_parser_error("malformed expression");
367 }
368 return expr;
369 }
370
371 bool parse_statement(Template& tmpl, Token::Kind closing, std::string_view path) {
372 if (tok.kind != Token::Kind::Id) {
373 return false;
374 }
375
376 if (tok.text == static_cast<decltype(tok.text)>("if")) {
377 get_next_token();
378
379 auto if_statement_node = std::make_shared<IfStatementNode>(current_block, tok.text.data() - tmpl.content.c_str());
380 current_block->nodes.emplace_back(if_statement_node);
381 if_statement_stack.emplace(if_statement_node.get());
382 current_block = &if_statement_node->true_statement;
383 current_expression_list = &if_statement_node->condition;
384
385 if (!parse_expression(tmpl, closing)) {
386 return false;
387 }
388 } else if (tok.text == static_cast<decltype(tok.text)>("else")) {
389 if (if_statement_stack.empty()) {
390 throw_parser_error("else without matching if");
391 }
392 auto& if_statement_data = if_statement_stack.top();
393 get_next_token();
394
395 if_statement_data->has_false_statement = true;
396 current_block = &if_statement_data->false_statement;
397
398 // Chained else if
399 if (tok.kind == Token::Kind::Id && tok.text == static_cast<decltype(tok.text)>("if")) {
400 get_next_token();
401
402 auto if_statement_node = std::make_shared<IfStatementNode>(true, current_block, tok.text.data() - tmpl.content.c_str());
403 current_block->nodes.emplace_back(if_statement_node);
404 if_statement_stack.emplace(if_statement_node.get());
405 current_block = &if_statement_node->true_statement;
406 current_expression_list = &if_statement_node->condition;
407
408 if (!parse_expression(tmpl, closing)) {
409 return false;
410 }
411 }
412 } else if (tok.text == static_cast<decltype(tok.text)>("endif")) {
413 if (if_statement_stack.empty()) {
414 throw_parser_error("endif without matching if");
415 }
416
417 // Nested if statements
418 while (if_statement_stack.top()->is_nested) {
419 if_statement_stack.pop();
420 }
421
422 auto& if_statement_data = if_statement_stack.top();
423 get_next_token();
424
425 current_block = if_statement_data->parent;
426 if_statement_stack.pop();
427 } else if (tok.text == static_cast<decltype(tok.text)>("block")) {
428 get_next_token();
429
430 if (tok.kind != Token::Kind::Id) {
431 throw_parser_error("expected block name, got '" + tok.describe() + "'");
432 }
433
434 const std::string block_name = static_cast<std::string>(tok.text);
435
436 auto block_statement_node = std::make_shared<BlockStatementNode>(current_block, block_name, tok.text.data() - tmpl.content.c_str());
437 current_block->nodes.emplace_back(block_statement_node);
438 block_statement_stack.emplace(block_statement_node.get());
439 current_block = &block_statement_node->block;
440 auto success = tmpl.block_storage.emplace(block_name, block_statement_node);
441 if (!success.second) {
442 throw_parser_error("block with the name '" + block_name + "' does already exist");
443 }
444
445 get_next_token();
446 } else if (tok.text == static_cast<decltype(tok.text)>("endblock")) {
447 if (block_statement_stack.empty()) {
448 throw_parser_error("endblock without matching block");
449 }
450
451 auto& block_statement_data = block_statement_stack.top();
452 get_next_token();
453
454 current_block = block_statement_data->parent;
455 block_statement_stack.pop();
456 } else if (tok.text == static_cast<decltype(tok.text)>("for")) {
457 get_next_token();
458
459 // options: for a in arr; for a, b in obj
460 if (tok.kind != Token::Kind::Id) {
461 throw_parser_error("expected id, got '" + tok.describe() + "'");
462 }
463
464 Token value_token = tok;
465 get_next_token();
466
467 // Object type
468 std::shared_ptr<ForStatementNode> for_statement_node;
469 if (tok.kind == Token::Kind::Comma) {
470 get_next_token();
471 if (tok.kind != Token::Kind::Id) {
472 throw_parser_error("expected id, got '" + tok.describe() + "'");
473 }
474
475 Token key_token = std::move(value_token);
476 value_token = tok;
477 get_next_token();
478
479 for_statement_node = std::make_shared<ForObjectStatementNode>(static_cast<std::string>(key_token.text), static_cast<std::string>(value_token.text),
480 current_block, tok.text.data() - tmpl.content.c_str());
481
482 // Array type
483 } else {
484 for_statement_node =
485 std::make_shared<ForArrayStatementNode>(static_cast<std::string>(value_token.text), current_block, tok.text.data() - tmpl.content.c_str());
486 }
487
488 current_block->nodes.emplace_back(for_statement_node);
489 for_statement_stack.emplace(for_statement_node.get());
490 current_block = &for_statement_node->body;
491 current_expression_list = &for_statement_node->condition;
492
493 if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("in")) {
494 throw_parser_error("expected 'in', got '" + tok.describe() + "'");
495 }
496 get_next_token();
497
498 if (!parse_expression(tmpl, closing)) {
499 return false;
500 }
501 } else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
502 if (for_statement_stack.empty()) {
503 throw_parser_error("endfor without matching for");
504 }
505
506 auto& for_statement_data = for_statement_stack.top();
507 get_next_token();
508
509 current_block = for_statement_data->parent;
510 for_statement_stack.pop();
511 } else if (tok.text == static_cast<decltype(tok.text)>("include")) {
512 get_next_token();
513
514 std::string template_name = parse_filename();
515 add_to_template_storage(path, template_name);
516
517 current_block->nodes.emplace_back(std::make_shared<IncludeStatementNode>(template_name, tok.text.data() - tmpl.content.c_str()));
518
519 get_next_token();
520 } else if (tok.text == static_cast<decltype(tok.text)>("extends")) {
521 get_next_token();
522
523 std::string template_name = parse_filename();
524 add_to_template_storage(path, template_name);
525
526 current_block->nodes.emplace_back(std::make_shared<ExtendsStatementNode>(template_name, tok.text.data() - tmpl.content.c_str()));
527
528 get_next_token();
529 } else if (tok.text == static_cast<decltype(tok.text)>("set")) {
530 get_next_token();
531
532 if (tok.kind != Token::Kind::Id) {
533 throw_parser_error("expected variable name, got '" + tok.describe() + "'");
534 }
535
536 std::string key = static_cast<std::string>(tok.text);
537 get_next_token();
538
539 auto set_statement_node = std::make_shared<SetStatementNode>(key, tok.text.data() - tmpl.content.c_str());
540 current_block->nodes.emplace_back(set_statement_node);
541 current_expression_list = &set_statement_node->expression;
542
543 if (tok.text != static_cast<decltype(tok.text)>("=")) {
544 throw_parser_error("expected '=', got '" + tok.describe() + "'");
545 }
546 get_next_token();
547
548 if (!parse_expression(tmpl, closing)) {
549 return false;
550 }
551 } else {
552 return false;
553 }
554 return true;
555 }
556
557 void parse_into(Template& tmpl, std::string_view path) {
558 lexer.start(tmpl.content);
559 current_block = &tmpl.root;
560
561 for (;;) {
562 get_next_token();
563 switch (tok.kind) {
564 case Token::Kind::Eof: {
565 if (!if_statement_stack.empty()) {
566 throw_parser_error("unmatched if");
567 }
568 if (!for_statement_stack.empty()) {
569 throw_parser_error("unmatched for");
570 }
571 }
572 return;
573 case Token::Kind::Text: {
574 current_block->nodes.emplace_back(std::make_shared<TextNode>(tok.text.data() - tmpl.content.c_str(), tok.text.size()));
575 } break;
576 case Token::Kind::StatementOpen: {
577 get_next_token();
578 if (!parse_statement(tmpl, Token::Kind::StatementClose, path)) {
579 throw_parser_error("expected statement, got '" + tok.describe() + "'");
580 }
581 if (tok.kind != Token::Kind::StatementClose) {
582 throw_parser_error("expected statement close, got '" + tok.describe() + "'");
583 }
584 } break;
585 case Token::Kind::LineStatementOpen: {
586 get_next_token();
587 if (!parse_statement(tmpl, Token::Kind::LineStatementClose, path)) {
588 throw_parser_error("expected statement, got '" + tok.describe() + "'");
589 }
590 if (tok.kind != Token::Kind::LineStatementClose && tok.kind != Token::Kind::Eof) {
591 throw_parser_error("expected line statement close, got '" + tok.describe() + "'");
592 }
593 } break;
594 case Token::Kind::ExpressionOpen: {
595 get_next_token();
596
597 auto expression_list_node = std::make_shared<ExpressionListNode>(tok.text.data() - tmpl.content.c_str());
598 current_block->nodes.emplace_back(expression_list_node);
599 current_expression_list = expression_list_node.get();
600
601 if (!parse_expression(tmpl, Token::Kind::ExpressionClose)) {
602 throw_parser_error("expected expression close, got '" + tok.describe() + "'");
603 }
604 } break;
605 case Token::Kind::CommentOpen: {
606 get_next_token();
607 if (tok.kind != Token::Kind::CommentClose) {
608 throw_parser_error("expected comment close, got '" + tok.describe() + "'");
609 }
610 } break;
611 default: {
612 throw_parser_error("unexpected token '" + tok.describe() + "'");
613 } break;
614 }
615 }
616 }
617
618public:
619 explicit Parser(const ParserConfig& parser_config, const LexerConfig& lexer_config, TemplateStorage& template_storage,
620 const FunctionStorage& function_storage)
621 : config(parser_config), lexer(lexer_config), template_storage(template_storage), function_storage(function_storage) {}
622
623 Template parse(std::string_view input, std::string_view path) {
624 auto result = Template(static_cast<std::string>(input));
625 parse_into(result, path);
626 return result;
627 }
628
629 void parse_into_template(Template& tmpl, std::string_view filename) {
630 std::string_view path = filename.substr(0, filename.find_last_of("/\\") + 1);
631
632 // StringRef path = sys::path::parent_path(filename);
633 auto sub_parser = Parser(config, lexer.get_config(), template_storage, function_storage);
634 sub_parser.parse_into(tmpl, path);
635 }
636
637 std::string load_file(const std::string& filename) {
638 std::ifstream file;
639 file.open(filename);
640 if (file.fail()) {
641 INJA_THROW(FileError("failed accessing file at '" + filename + "'"));
642 }
643 std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
644 return text;
645 }
646};
647
648} // namespace inja
649
650#endif // INCLUDE_INJA_PARSER_HPP_
Definition: node.hpp:66
Definition: node.hpp:251
Class for builtin functions and user-defined callbacks.
Definition: function_storage.hpp:16
Class for lexing an inja Template.
Definition: lexer.hpp:16
Class for parsing an inja Template.
Definition: parser.hpp:24
Definition: exceptions.hpp:36
Class for lexer configuration.
Definition: config.hpp:14
Class for parser configuration.
Definition: config.hpp:66
Definition: exceptions.hpp:28
The main inja Template.
Definition: template.hpp:17
Helper-class for the inja Lexer.
Definition: token.hpp:12