tokenize function
Generate a list of tokens from a source string.
Implementation
List<Token> tokenize(String source, [PreprocessOptions options = const PreprocessOptions()]) {
final List<Token> tokens = [];
final String src = preprocess(source, options);
int cursorPosition = 0;
int curlyBracketDepth = 0;
String consumeWhile(bool Function(String char) predicate) {
String str = "";
while (predicate(src[cursorPosition])) {
// Check for escaped characters
if (src[cursorPosition] == r'\') {
// Consume the backslash
++cursorPosition;
// Check for end of input
if (cursorPosition >= src.length) throw SyntaxError("Unexpected end of input");
// Add the escaped character
final escaped = src[cursorPosition++];
final unescaped = ESCAPE_CHARACTERS[escaped];
if (unescaped == null) {
throw SyntaxError('Unexpected escaped character: $escaped');
}
str += unescaped;
continue;
}
str += src[cursorPosition++];
if (cursorPosition >= src.length) throw SyntaxError("Unexpected end of input");
}
return str;
}
// Build each token until end of input
main: while (cursorPosition < src.length) {
// First, consume all text that is outside of a Jinja statement or expression
final lastTokenType = tokens.isNotEmpty ? tokens.last.type : null;
if (lastTokenType == null ||
lastTokenType == TokenType.CloseStatement ||
lastTokenType == TokenType.CloseExpression ||
lastTokenType == TokenType.Comment) {
String text = "";
while (cursorPosition < src.length &&
// Keep going until we hit the next Jinja statement or expression
!(
src[cursorPosition] == '{' &&
(src[cursorPosition + 1] == "%" || src[cursorPosition + 1] == "{" || src[cursorPosition + 1] == "#")
)) {
text += src[cursorPosition++];
}
// There is some text to add
if (text.isNotEmpty) {
tokens.add(Token(text, TokenType.Text));
continue;
}
}
// Possibly consume a comment
if (src[cursorPosition] == '{' && cursorPosition + 1 < src.length && src[cursorPosition + 1] == '#') {
cursorPosition += 2; // Skip the opening {#
String comment = "";
while (cursorPosition + 1 < src.length &&
(src[cursorPosition] != '#' || src[cursorPosition + 1] != '}')) {
// Check for end of input
if (cursorPosition + 2 >= src.length) {
throw SyntaxError("Missing end of comment tag");
}
comment += src[cursorPosition++];
}
tokens.add(Token(comment, TokenType.Comment));
cursorPosition += 2; // Skip the closing #}
continue;
}
// Consume (and ignore) all whitespace inside Jinja statements or expressions
consumeWhile((char) => RegExp(r'\s').hasMatch(char));
// After consuming whitespace, we might be at the end of the input
if (cursorPosition >= src.length) {
break;
}
// Handle multi-character tokens
final char = src[cursorPosition];
// Check for unary operators
if (char == '-' || char == '+') {
final lastTokenType = tokens.isNotEmpty ? tokens.last.type : null;
if (lastTokenType == null || lastTokenType == TokenType.Text) {
throw SyntaxError('Unexpected character: $char');
}
switch (lastTokenType) {
case TokenType.Identifier:
case TokenType.NumericLiteral:
case TokenType.StringLiteral:
case TokenType.CloseParen:
case TokenType.CloseSquareBracket:
// Part of a binary operator
// a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1
// Continue parsing normally
break;
default:
// Is part of a unary operator
// (-1), [-1], (1 + -1), not -1, -apple
cursorPosition++; // consume the unary operator
// Check for numbers following the unary operator
final num = consumeWhile(isInteger);
tokens.add(Token(
'$char$num',
num.isNotEmpty
? TokenType.NumericLiteral
: TokenType.UnaryOperator,
));
continue;
}
}
// Try to match one of the tokens in the mapping table
for (final (seq, type) in ORDERED_MAPPING_TABLE) {
// inside an object literal, don't treat "}}" as expression-end
if (seq == '}}' && curlyBracketDepth > 0) {
continue;
}
if (src.startsWith(seq, cursorPosition)) {
tokens.add(Token(seq, type));
// possibly adjust the curly bracket depth
if (type == TokenType.OpenExpression) {
curlyBracketDepth = 0;
} else if (type == TokenType.OpenCurlyBracket) {
curlyBracketDepth++;
} else if (type == TokenType.CloseCurlyBracket) {
curlyBracketDepth--;
}
cursorPosition += seq.length;
continue main;
}
}
if (char == "'" || char == '"') {
cursorPosition++; // Skip the opening quote
final str = consumeWhile((c) => c != char);
tokens.add(Token(str, TokenType.StringLiteral));
cursorPosition++; // Skip the closing quote
continue;
}
if (isInteger(char)) {
// Consume integer part
String num = consumeWhile(isInteger);
// Possibly, consume fractional part
if (cursorPosition < src.length &&
src[cursorPosition] == '.' &&
cursorPosition + 1 < src.length &&
isInteger(src[cursorPosition + 1])) {
cursorPosition++; // consume '.'
final frac = consumeWhile(isInteger);
num = '$num.$frac';
}
tokens.add(Token(num, TokenType.NumericLiteral));
continue;
}
if (isWord(char)) {
// consume any word characters and always classify as Identifier
final word = consumeWhile(isWord);
tokens.add(Token(word, TokenType.Identifier));
continue;
}
throw SyntaxError('Unexpected character: $char');
}
return tokens;
}