embedDocuments method

  1. @override
Future<BatchEmbeddingsResult> embedDocuments(
  1. List<String> texts, {
  2. OllamaEmbeddingsModelOptions? options,
})
override

Embed texts and return results with usage data.

Implementation

@override
Future<BatchEmbeddingsResult> embedDocuments(
  List<String> texts, {
  OllamaEmbeddingsModelOptions? options,
}) async {
  if (texts.isEmpty) {
    return BatchEmbeddingsResult(
      output: const [],
      finishReason: FinishReason.stop,
      metadata: {'model': name, 'provider': 'ollama'},
      usage: const LanguageModelUsage(),
    );
  }

  final actualBatchSize = options?.batchSize ?? batchSize ?? 100;
  final totalTexts = texts.length;
  final totalCharacters = texts.map((t) => t.length).fold(0, (a, b) => a + b);
  final chunks = chunkList(texts, chunkSize: actualBatchSize);

  _logger.info(
    'Embedding $totalTexts documents with Ollama model "$name" '
    '(batches: ${chunks.length}, batchSize: $actualBatchSize, '
    'totalChars: $totalCharacters)',
  );

  final allEmbeddings = <List<double>>[];
  var totalPromptTokens = 0;

  for (var i = 0; i < chunks.length; i++) {
    final chunk = chunks[i];
    final chunkCharacters = chunk
        .map((t) => t.length)
        .fold(0, (a, b) => a + b);

    _logger.fine(
      'Processing batch ${i + 1}/${chunks.length} '
      '(${chunk.length} texts, $chunkCharacters chars)',
    );

    final actualDimensions = options?.dimensions ?? dimensions;
    final actualTruncate = options?.truncate ?? defaultOptions.truncate;
    final actualKeepAlive = options?.keepAlive ?? defaultOptions.keepAlive;

    final response = await _client.embeddings.create(
      request: EmbedRequest(
        model: name,
        input: chunk,
        truncate: actualTruncate,
        dimensions: actualDimensions,
        keepAlive: actualKeepAlive,
      ),
    );

    // Handle both single embedding (embedding) and batch (embeddings)
    final batchEmbeddings =
        response.embeddings ??
        (response.embedding != null
            ? <List<double>>[response.embedding!]
            : const <List<double>>[]);
    if (batchEmbeddings.length != chunk.length) {
      throw StateError(
        'Expected ${chunk.length} embeddings for batch ${i + 1}, '
        'received ${batchEmbeddings.length}.',
      );
    }
    allEmbeddings.addAll(batchEmbeddings);

    // Accumulate usage data
    totalPromptTokens += response.promptEvalCount ?? 0;

    _logger.fine(
      'Batch ${i + 1} completed: '
      '${chunk.length} embeddings, '
      '${response.promptEvalCount ?? 0} tokens',
    );
  }

  final usage = LanguageModelUsage(
    promptTokens: totalPromptTokens > 0 ? totalPromptTokens : null,
    totalTokens: totalPromptTokens > 0 ? totalPromptTokens : null,
  );

  final result = BatchEmbeddingsResult(
    output: allEmbeddings,
    finishReason: FinishReason.stop,
    usage: usage,
    metadata: {'model': name, 'provider': 'ollama'},
  );

  _logger.info(
    'Ollama batch embedding completed: '
    '${result.output.length} embeddings, '
    '${result.usage?.totalTokens ?? 0} total tokens',
  );

  return result;
}