fromFile static method

TfLiteInterpreter fromFile(
  1. String modelPath, {
  2. int numThreads = 4,
  3. String? libraryPath,
})

Create an interpreter from a .tflite model file.

numThreads controls CPU parallelism (default: 4).

Implementation

static TfLiteInterpreter fromFile(
  String modelPath, {
  int numThreads = 4,
  String? libraryPath,
}) {
  final bindings = TfLiteBindings.load(libraryPath: libraryPath);

  // Load model
  final pathNative = modelPath.toNativeUtf8();
  final model = bindings.tfLiteModelCreateFromFile(pathNative);
  malloc.free(pathNative);

  if (model == nullptr) {
    throw StateError('Failed to load TFLite model from: $modelPath');
  }

  // Create options
  final options = bindings.tfLiteInterpreterOptionsCreate();
  if (options == nullptr) {
    bindings.tfLiteModelDelete(model);
    throw StateError('Failed to create TFLite interpreter options');
  }
  bindings.tfLiteInterpreterOptionsSetNumThreads(options, numThreads);

  // Create XNNPACK delegate with default options (nullptr = use built-in defaults
  // which include QS8/QU8 quantization support, matching Python LiteRT behavior)
  Pointer<Void>? xnnpackDelegate;
  try {
    xnnpackDelegate = bindings.tfLiteXNNPackDelegateCreate(nullptr);
    if (xnnpackDelegate != nullptr) {
      bindings.tfLiteInterpreterOptionsAddDelegate(options, xnnpackDelegate);
    }
  } catch (e) {
    debugPrint(
        '[TfLiteInterpreter] XNNPACK delegate not available, using CPU: $e');
    xnnpackDelegate = null;
  }

  // Create interpreter (delegate applied during creation, matching Python LiteRT)
  var interpreter = bindings.tfLiteInterpreterCreate(model, options);
  if (interpreter == nullptr) {
    interpreter =
        bindings.tfLiteInterpreterCreateWithSelectedOps(model, options);
  }
  bindings.tfLiteInterpreterOptionsDelete(options);

  if (interpreter == nullptr) {
    _deleteDelegate(bindings, xnnpackDelegate);
    bindings.tfLiteModelDelete(model);
    throw StateError('Failed to create TFLite interpreter from: $modelPath');
  }

  // Allocate tensors
  final allocStatus = bindings.tfLiteInterpreterAllocateTensors(interpreter);
  if (allocStatus != TfLiteStatus.ok) {
    bindings.tfLiteInterpreterDelete(interpreter);
    _deleteDelegate(bindings, xnnpackDelegate);
    bindings.tfLiteModelDelete(model);
    throw StateError('Failed to allocate tensors (status: $allocStatus)');
  }

  // Auto-detect dimensions from model tensors
  final inputTensor =
      bindings.tfLiteInterpreterGetInputTensor(interpreter, 0);
  if (inputTensor == nullptr) {
    bindings.tfLiteInterpreterDelete(interpreter);
    _deleteDelegate(bindings, xnnpackDelegate);
    bindings.tfLiteModelDelete(model);
    throw StateError('Input tensor not found at index 0');
  }
  final outputTensor =
      bindings.tfLiteInterpreterGetOutputTensor(interpreter, 0);
  if (outputTensor == nullptr) {
    bindings.tfLiteInterpreterDelete(interpreter);
    _deleteDelegate(bindings, xnnpackDelegate);
    bindings.tfLiteModelDelete(model);
    throw StateError('Output tensor not found at index 0');
  }

  // Input shape: [1, sequenceLength]
  final inputSeqLen = bindings.tfLiteTensorDim(inputTensor, 1);

  // Output shape: [1, embeddingDimension]
  final outputDim = bindings.tfLiteTensorDim(outputTensor, 1);

  if (inputSeqLen <= 0 || outputDim <= 0) {
    bindings.tfLiteInterpreterDelete(interpreter);
    _deleteDelegate(bindings, xnnpackDelegate);
    bindings.tfLiteModelDelete(model);
    throw StateError(
        'Invalid model tensor dimensions: input=$inputSeqLen, output=$outputDim');
  }

  return TfLiteInterpreter._(
    bindings: bindings,
    model: model,
    interpreter: interpreter,
    xnnpackDelegate: xnnpackDelegate,
    inputSequenceLength: inputSeqLen,
    outputDimension: outputDim,
  );
}