canParse method
Determines if the content is likely in TriG format
This method performs a heuristic analysis of the content to check if it appears to be in TriG format. It looks for common TriG syntax markers such as prefix declarations, GRAPH keywords, graph blocks, and triple patterns.
The method uses a lightweight approach that balances accuracy with performance, avoiding a full parse while still providing reasonable detection capability.
Parameters:
contentThe string content to analyze
Returns:
- true if the content appears to be in Turtle format
Implementation
@override
bool canParse(String content) {
// Simple heuristics for detecting Turtle format
final trimmed = content.trim();
// Early rejection: obvious HTML content
if (_isObviouslyHtml(trimmed)) {
return false;
}
// Check for explicit TriG directives
if (trimmed.contains('@prefix') ||
trimmed.contains('@base') ||
trimmed.contains('GRAPH') ||
trimmed.contains('prefix rdf:') ||
trimmed.contains('prefix rdfs:') ||
trimmed.contains('prefix owl:') ||
trimmed.contains('prefix xsd:')) {
return true;
}
// Look for Turtle-like triple patterns (more specific than before)
// Must have angle brackets for IRIs or prefixed names
final hasTriplePattern = RegExp(
r'(<[^>]+>|\w+:\w+)\s+(<[^>]+>|\w+:\w+|a)\s+(<[^>]+>|\w+:\w+|"[^"]*"|\d+|true|false)\s*\.',
multiLine: true,
).hasMatch(trimmed);
if (hasTriplePattern) {
return true;
}
// Check for blank node patterns
final hasBlankNodes = RegExp(r'\[\s*\]|\[.*?\]').hasMatch(trimmed);
if (hasBlankNodes && trimmed.contains('.')) {
return true;
}
// Check for collection patterns
final hasCollections = RegExp(r'\(\s*\)|\([^)]+\)').hasMatch(trimmed);
if (hasCollections && trimmed.contains('.')) {
return true;
}
// Check for TriG graph blocks
final hasGraphBlocks =
RegExp(r'GRAPH\s+<[^>]+>\s*\{|<[^>]+>\s*\{').hasMatch(trimmed);
if (hasGraphBlocks) {
return true;
}
return false;
}