run method
Run inference with Int32 token IDs, returns Float32 embedding vector.
tokenIds will be padded/truncated to inputSequenceLength.
Returns a list of outputDimension doubles.
Implementation
List<double> run(List<int> tokenIds) {
_assertNotClosed();
// Pad/truncate to input sequence length
final padded = Int32List(inputSequenceLength);
final copyLen = tokenIds.length < inputSequenceLength
? tokenIds.length
: inputSequenceLength;
for (var i = 0; i < copyLen; i++) {
padded[i] = tokenIds[i];
}
// Remaining values are already 0 (PAD token)
// Copy input data to tensor
final inputTensor =
_bindings.tfLiteInterpreterGetInputTensor(_interpreter, 0);
if (inputTensor == nullptr) {
throw StateError('Input tensor not available');
}
final inputBytes = padded.lengthInBytes;
final inputPtr = malloc<Int32>(inputSequenceLength);
try {
final inputList = inputPtr.asTypedList(inputSequenceLength);
inputList.setAll(0, padded);
final status = _bindings.tfLiteTensorCopyFromBuffer(
inputTensor, inputPtr.cast(), inputBytes);
if (status != TfLiteStatus.ok) {
throw StateError('Failed to copy input data (status: $status)');
}
// Invoke
final invokeStatus = _bindings.tfLiteInterpreterInvoke(_interpreter);
if (invokeStatus != TfLiteStatus.ok) {
throw StateError('Inference failed (status: $invokeStatus)');
}
// Read output
final outputTensor =
_bindings.tfLiteInterpreterGetOutputTensor(_interpreter, 0);
if (outputTensor == nullptr) {
throw StateError('Output tensor not available');
}
final outputPtr = malloc<Float>(outputDimension);
try {
final outputStatus = _bindings.tfLiteTensorCopyToBuffer(
outputTensor, outputPtr.cast(), outputDimension * sizeOf<Float>());
if (outputStatus != TfLiteStatus.ok) {
throw StateError('Failed to read output (status: $outputStatus)');
}
final outputList = outputPtr.asTypedList(outputDimension);
return List<double>.from(outputList);
} finally {
malloc.free(outputPtr);
}
} finally {
malloc.free(inputPtr);
}
}