createSession method

  1. @override
Future<InferenceModelSession> createSession({
  1. double temperature = .8,
  2. int randomSeed = 1,
  3. int topK = 1,
  4. double? topP,
  5. String? loraPath,
  6. bool? enableVisionModality,
  7. bool? enableAudioModality,
  8. String? systemInstruction,
  9. bool enableThinking = false,
  10. List<Tool> tools = const [],
})
override

Creates a new InferenceModelSession for generation.

temperature, randomSeed, topK, topP — parameters for sampling. loraPath — optional path to LoRA model. enableVisionModality — enable vision modality for multimodal models. enableAudioModality — enable audio modality for Gemma 3n E4B models.

Implementation

@override
Future<InferenceModelSession> createSession({
  double temperature = .8,
  int randomSeed = 1,
  int topK = 1,
  double? topP,
  String? loraPath,
  bool? enableVisionModality,
  bool? enableAudioModality,
  String? systemInstruction,
  bool enableThinking = false,
  List<Tool> tools = const [],
}) async {
  if (_isClosed) {
    throw StateError(
        'Model is closed. Create a new instance to use it again');
  }

  if (loraPath != null) {
    throw UnsupportedError(
      'LoRA weights are not supported on the .litertlm FFI path '
      '(loraPath=$loraPath). Track upstream LiteRT-LM C API support; '
      'remove loraPath or use a MediaPipe .task model on Android/iOS.',
    );
  }

  if (_createCompleter case Completer<InferenceModelSession> completer) {
    return completer.future;
  }

  final completer = _createCompleter = Completer<InferenceModelSession>();

  try {
    // For Gemma 4, push tools into the SDK conversation config so it can
    // render native `<|tool>declaration:...<tool|>` tokens via minja. Other
    // model types still use Dart-side prompt injection in chat.dart.
    final toolsJson = (modelType == ModelType.gemma4 && tools.isNotEmpty)
        ? SdkResponseParser.serializeToolsForSdk(tools)
        : null;

    ffiClient.createConversation(
      systemMessage: systemInstruction,
      toolsJson: toolsJson,
      temperature: temperature,
      topK: topK,
      topP: topP,
      seed: randomSeed,
    );

    final session = _session = FfiInferenceModelSession(
      ffiClient: ffiClient,
      modelType: modelType,
      fileType: fileType,
      supportImage: enableVisionModality ?? supportImage,
      supportAudio: enableAudioModality ?? supportAudio,
      enableThinking: enableThinking,
      onClose: () {
        _session = null;
        _createCompleter = null;
      },
    );

    completer.complete(session);
    return session;
  } catch (e, st) {
    completer.completeError(e, st);
    _createCompleter = null;
    rethrow;
  }
}