StellarXipher/Assets/Packages/LLamaSharp.0.21.0/lib/netstandard2.0/LLamaSharp.xml
2025-02-12 19:41:54 -05:00

7220 lines
327 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0"?>
<doc>
<assembly>
<name>LLamaSharp</name>
</assembly>
<members>
<member name="T:System.Runtime.CompilerServices.IsExternalInit">
<summary>
Reserved to be used by the compiler for tracking metadata.
This class should not be used by developers in source code.
</summary>
<remarks>
This definition is provided by the <i>IsExternalInit</i> NuGet package (https://www.nuget.org/packages/IsExternalInit).
Please see https://github.com/manuelroemer/IsExternalInit for more information.
</remarks>
</member>
<member name="T:LLama.Abstractions.IContextParams">
<summary>
The parameters for initializing a LLama context from a model.
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.ContextSize">
<summary>
Model context size (n_ctx)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.BatchSize">
<summary>
maximum batch size that can be submitted at once (must be >=32 to use BLAS) (n_batch)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.UBatchSize">
<summary>
Physical batch size
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.SeqMax">
<summary>
max number of sequences (i.e. distinct states for recurrent models)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.Embeddings">
<summary>
If true, extract embeddings (together with logits).
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.RopeFrequencyBase">
<summary>
RoPE base frequency (null to fetch from the model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.RopeFrequencyScale">
<summary>
RoPE frequency scaling factor (null to fetch from the model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.Encoding">
<summary>
The encoding to use for models
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.Threads">
<summary>
Number of threads (null = autodetect) (n_threads)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.BatchThreads">
<summary>
Number of threads to use for batch processing (null = autodetect) (n_threads)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnExtrapolationFactor">
<summary>
YaRN extrapolation mix factor (null = from model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnAttentionFactor">
<summary>
YaRN magnitude scaling factor (null = from model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnBetaFast">
<summary>
YaRN low correction dim (null = from model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnBetaSlow">
<summary>
YaRN high correction dim (null = from model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnOriginalContext">
<summary>
YaRN original context length (null = from model)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.YarnScalingType">
<summary>
YaRN scaling method to use.
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.TypeK">
<summary>
Override the type of the K cache
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.TypeV">
<summary>
Override the type of the V cache
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.NoKqvOffload">
<summary>
Whether to disable offloading the KQV cache to the GPU
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.FlashAttention">
<summary>
Whether to use flash attention
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.DefragThreshold">
<summary>
defragment the KV cache if holes/size &gt; defrag_threshold, Set to &lt; 0 to disable (default)
defragment the KV cache if holes/size &gt; defrag_threshold, Set to <see langword="null"/> or &lt; 0 to disable (default)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.PoolingType">
<summary>
How to pool (sum) embedding results by sequence id (ignored if no pooling layer)
</summary>
</member>
<member name="P:LLama.Abstractions.IContextParams.AttentionType">
<summary>
Attention type to use for embeddings
</summary>
</member>
<member name="T:LLama.Abstractions.IHistoryTransform">
<summary>
Transform history to plain text and vice versa.
</summary>
</member>
<member name="M:LLama.Abstractions.IHistoryTransform.HistoryToText(LLama.Common.ChatHistory)">
<summary>
Convert a ChatHistory instance to plain text.
</summary>
<param name="history">The ChatHistory instance</param>
<returns></returns>
</member>
<member name="M:LLama.Abstractions.IHistoryTransform.TextToHistory(LLama.Common.AuthorRole,System.String)">
<summary>
Converts plain text to a ChatHistory instance.
</summary>
<param name="role">The role for the author.</param>
<param name="text">The chat history as plain text.</param>
<returns>The updated history.</returns>
</member>
<member name="M:LLama.Abstractions.IHistoryTransform.Clone">
<summary>
Copy the transform.
</summary>
<returns></returns>
</member>
<member name="T:LLama.Abstractions.IInferenceParams">
<summary>
The parameters used for inference.
</summary>
</member>
<member name="P:LLama.Abstractions.IInferenceParams.TokensKeep">
<summary>
number of tokens to keep from initial prompt
</summary>
</member>
<member name="P:LLama.Abstractions.IInferenceParams.MaxTokens">
<summary>
how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
until it complete.
</summary>
</member>
<member name="P:LLama.Abstractions.IInferenceParams.AntiPrompts">
<summary>
Sequences where the model will stop generating further tokens.
</summary>
</member>
<member name="P:LLama.Abstractions.IInferenceParams.SamplingPipeline">
<summary>
Set a custom sampling pipeline to use.
</summary>
</member>
<member name="T:LLama.Abstractions.ILLamaExecutor">
<summary>
A high level interface for LLama models.
</summary>
</member>
<member name="P:LLama.Abstractions.ILLamaExecutor.Context">
<summary>
The loaded context for this executor.
</summary>
</member>
<member name="P:LLama.Abstractions.ILLamaExecutor.IsMultiModal">
<summary>
Identify if it's a multi-modal model and there is a image to process.
</summary>
</member>
<member name="P:LLama.Abstractions.ILLamaExecutor.ClipModel">
<summary>
Multi-Modal Projections / Clip Model weights
</summary>
</member>
<member name="P:LLama.Abstractions.ILLamaExecutor.Images">
<summary>
List of images: List of images in byte array format.
</summary>
</member>
<member name="M:LLama.Abstractions.ILLamaExecutor.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Asynchronously infers a response from the model.
</summary>
<param name="text">Your prompt</param>
<param name="inferenceParams">Any additional parameters</param>
<param name="token">A cancellation token.</param>
<returns></returns>
</member>
<member name="T:LLama.Abstractions.ILLamaParams">
<summary>
Convenience interface for implementing both type of parameters.
</summary>
<remarks>Mostly exists for backwards compatibility reasons, when these two were not split.</remarks>
</member>
<member name="T:LLama.Abstractions.IModelParams">
<summary>
The parameters for initializing a LLama model.
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.MainGpu">
<summary>
main_gpu interpretation depends on split_mode:
<list type="bullet">
<item>
<term>None</term>
<description>The GPU that is used for the entire mode.</description>
</item>
<item>
<term>Row</term>
<description>The GPU that is used for small tensors and intermediate results.</description>
</item>
<item>
<term>Layer</term>
<description>Ignored.</description>
</item>
</list>
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.SplitMode">
<summary>
How to split the model across multiple GPUs
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.GpuLayerCount">
<summary>
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.UseMemorymap">
<summary>
Use mmap for faster loads (use_mmap)
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.UseMemoryLock">
<summary>
Use mlock to keep model in memory (use_mlock)
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.ModelPath">
<summary>
Model path (model)
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.TensorSplits">
<summary>
how split tensors should be distributed across GPUs
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.VocabOnly">
<summary>
Load vocab only (no weights)
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.CheckTensors">
<summary>
Validate model tensor data before loading
</summary>
</member>
<member name="P:LLama.Abstractions.IModelParams.MetadataOverrides">
<summary>
Override specific metadata items in the model
</summary>
</member>
<member name="T:LLama.Abstractions.TensorSplitsCollection">
<summary>
A fixed size array to set the tensor splits across multiple GPUs
</summary>
</member>
<member name="P:LLama.Abstractions.TensorSplitsCollection.Length">
<summary>
The size of this array
</summary>
</member>
<member name="P:LLama.Abstractions.TensorSplitsCollection.Item(System.Int32)">
<summary>
Get or set the proportion of work to do on the given device.
</summary>
<remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
<param name="index"></param>
<returns></returns>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollection.#ctor(System.Single[])">
<summary>
Create a new tensor splits collection, copying the given values
</summary>
<param name="splits"></param>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollection.#ctor">
<summary>
Create a new tensor splits collection with all values initialised to the default
</summary>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollection.Clear">
<summary>
Set all values to zero
</summary>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollection.GetEnumerator">
<inheritdoc />
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollection.System#Collections#IEnumerable#GetEnumerator">
<inheritdoc />
</member>
<member name="T:LLama.Abstractions.TensorSplitsCollectionConverter">
<summary>
A JSON converter for <see cref="T:LLama.Abstractions.TensorSplitsCollection"/>
</summary>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollectionConverter.Read(System.Text.Json.Utf8JsonReader@,System.Type,System.Text.Json.JsonSerializerOptions)">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.TensorSplitsCollectionConverter.Write(System.Text.Json.Utf8JsonWriter,LLama.Abstractions.TensorSplitsCollection,System.Text.Json.JsonSerializerOptions)">
<inheritdoc/>
</member>
<member name="T:LLama.Abstractions.MetadataOverride">
<summary>
An override for a single key/value pair in model metadata
</summary>
</member>
<member name="P:LLama.Abstractions.MetadataOverride.Key">
<summary>
Get the key being overridden by this override
</summary>
</member>
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Int32)">
<summary>
Create a new override for an int key
</summary>
<param name="key"></param>
<param name="value"></param>
</member>
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Single)">
<summary>
Create a new override for a float key
</summary>
<param name="key"></param>
<param name="value"></param>
</member>
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Boolean)">
<summary>
Create a new override for a boolean key
</summary>
<param name="key"></param>
<param name="value"></param>
</member>
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.String)">
<summary>
Create a new override for a string key
</summary>
<param name="key"></param>
<param name="value"></param>
</member>
<member name="T:LLama.Abstractions.MetadataOverrideConverter">
<summary>
A JSON converter for <see cref="T:LLama.Abstractions.MetadataOverride"/>
</summary>
</member>
<member name="M:LLama.Abstractions.MetadataOverrideConverter.Read(System.Text.Json.Utf8JsonReader@,System.Type,System.Text.Json.JsonSerializerOptions)">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.MetadataOverrideConverter.Write(System.Text.Json.Utf8JsonWriter,LLama.Abstractions.MetadataOverride,System.Text.Json.JsonSerializerOptions)">
<inheritdoc/>
</member>
<member name="T:LLama.Abstractions.INativeLibrary">
<summary>
Descriptor of a native library.
</summary>
</member>
<member name="P:LLama.Abstractions.INativeLibrary.Metadata">
<summary>
Metadata of this library.
</summary>
</member>
<member name="M:LLama.Abstractions.INativeLibrary.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Prepare the native library file and returns the local path of it.
If it's a relative path, LLamaSharp will search the path in the search directies you set.
</summary>
<param name="systemInfo">The system information of the current machine.</param>
<param name="logCallback">The log callback.</param>
<returns>
The relative paths of the library. You could return multiple paths to try them one by one. If no file is available, please return an empty array.
</returns>
</member>
<member name="T:LLama.Abstractions.ITextStreamTransform">
<summary>
Takes a stream of tokens and transforms them.
</summary>
</member>
<member name="M:LLama.Abstractions.ITextStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
<summary>
Takes a stream of tokens and transforms them, returning a new stream of tokens asynchronously.
</summary>
<param name="tokens"></param>
<returns></returns>
</member>
<member name="M:LLama.Abstractions.ITextStreamTransform.Clone">
<summary>
Copy the transform.
</summary>
<returns></returns>
</member>
<member name="T:LLama.Abstractions.ITextTransform">
<summary>
An interface for text transformations.
These can be used to compose a pipeline of text transformations, such as:
- Tokenization
- Lowercasing
- Punctuation removal
- Trimming
- etc.
</summary>
</member>
<member name="M:LLama.Abstractions.ITextTransform.Transform(System.String)">
<summary>
Takes a string and transforms it.
</summary>
<param name="text"></param>
<returns></returns>
</member>
<member name="M:LLama.Abstractions.ITextTransform.Clone">
<summary>
Copy the transform.
</summary>
<returns></returns>
</member>
<member name="T:LLama.Abstractions.LLamaExecutorExtensions">
<summary>
Extension methods to the <see cref="T:LLama.Abstractions.LLamaExecutorExtensions" /> interface.
</summary>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.AsChatClient(LLama.Abstractions.ILLamaExecutor,LLama.Abstractions.IHistoryTransform,LLama.Abstractions.ITextStreamTransform)">
<summary>Gets an <see cref="T:Microsoft.Extensions.AI.IChatClient"/> instance for the specified <see cref="T:LLama.Abstractions.ILLamaExecutor"/>.</summary>
<param name="executor">The executor.</param>
<param name="historyTransform">The <see cref="T:LLama.Abstractions.IHistoryTransform"/> to use to transform an input list messages into a prompt.</param>
<param name="outputTransform">The <see cref="T:LLama.Abstractions.ITextStreamTransform"/> to use to transform the output into text.</param>
<returns>An <see cref="T:Microsoft.Extensions.AI.IChatClient"/> instance for the provided <see cref="T:LLama.Abstractions.ILLamaExecutor" />.</returns>
<exception cref="T:System.ArgumentNullException"><paramref name="executor"/> is null.</exception>
</member>
<member name="P:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.Metadata">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.Dispose">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.GetService(System.Type,System.Object)">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CompleteAsync(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,System.Threading.CancellationToken)">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CompleteStreamingAsync(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,System.Threading.CancellationToken)">
<inheritdoc/>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CreatePrompt(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage})">
<summary>Format the chat messages into a string prompt.</summary>
</member>
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CreateInferenceParams(Microsoft.Extensions.AI.ChatOptions)">
<summary>Convert the chat options to inference parameters.</summary>
</member>
<member name="T:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.AppendAssistantHistoryTransform">
<summary>A default transform that appends "Assistant: " to the end.</summary>
</member>
<member name="T:LLama.AntipromptProcessor">
<summary>
AntipromptProcessor keeps track of past tokens looking for any set Anti-Prompts
</summary>
</member>
<member name="M:LLama.AntipromptProcessor.#ctor(System.Collections.Generic.IEnumerable{System.String})">
<summary>
Initializes a new instance of the <see cref="T:LLama.AntipromptProcessor"/> class.
</summary>
<param name="antiprompts">The antiprompts.</param>
</member>
<member name="M:LLama.AntipromptProcessor.AddAntiprompt(System.String)">
<summary>
Add an antiprompt to the collection
</summary>
<param name="antiprompt"></param>
</member>
<member name="M:LLama.AntipromptProcessor.SetAntiprompts(System.Collections.Generic.IEnumerable{System.String})">
<summary>
Overwrite all current antiprompts with a new set
</summary>
<param name="antiprompts"></param>
</member>
<member name="M:LLama.AntipromptProcessor.Add(System.String)">
<summary>
Add some text and check if the buffer now ends with any antiprompt
</summary>
<param name="text"></param>
<returns>true if the text buffer ends with any antiprompt</returns>
</member>
<member name="T:LLama.Batched.BatchedExecutor">
<summary>
A batched executor that can infer multiple separate "conversations" simultaneously.
</summary>
</member>
<member name="F:LLama.Batched.BatchedExecutor._inferenceLock">
<summary>
Set to 1 using interlocked exchange while inference is running
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.Epoch">
<summary>
Epoch is incremented twice every time Infer is called. Conversations can use this to keep track of
whether they're waiting for inference, or can be sampled.
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.Context">
<summary>
The <see cref="T:LLama.LLamaContext"/> this executor is using
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.Model">
<summary>
The <see cref="T:LLama.LLamaWeights"/> this executor is using
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.BatchedTokenCount">
<summary>
Get the number of tokens in the batch, waiting for <see cref="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)"/> to be called
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.BatchQueueCount">
<summary>
Number of batches in the queue, waiting for <see cref="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)"/> to be called
</summary>
</member>
<member name="P:LLama.Batched.BatchedExecutor.IsDisposed">
<summary>
Check if this executor has been disposed.
</summary>
</member>
<member name="M:LLama.Batched.BatchedExecutor.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams)">
<summary>
Create a new batched executor
</summary>
<param name="model">The model to use</param>
<param name="contextParams">Parameters to create a new context</param>
</member>
<member name="M:LLama.Batched.BatchedExecutor.Create">
<summary>
Start a new <see cref="T:LLama.Batched.Conversation"/>
</summary>
<returns></returns>
</member>
<member name="M:LLama.Batched.BatchedExecutor.Load(System.String)">
<summary>
Load a conversation that was previously saved to a file. Once loaded the conversation will
need to be prompted.
</summary>
<param name="filepath"></param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
</member>
<member name="M:LLama.Batched.BatchedExecutor.Load(LLama.Batched.Conversation.State)">
<summary>
Load a conversation that was previously saved into memory. Once loaded the conversation will need to be prompted.
</summary>
<param name="state"></param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
</member>
<member name="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)">
<summary>
Run inference for all conversations in the batch which have pending tokens.
If the result is `NoKvSlot` then there is not enough memory for inference, try disposing some conversation
threads and running inference again.
</summary>
</member>
<member name="M:LLama.Batched.BatchedExecutor.Dispose">
<inheritdoc />
</member>
<member name="M:LLama.Batched.BatchedExecutor.GetTokenBatch(System.Int32)">
<summary>
Get a reference to a batch that tokens can be added to.
</summary>
<param name="minCapacity"></param>
<returns></returns>
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
</member>
<member name="M:LLama.Batched.BatchedExecutor.GetEmbeddingBatch(System.Int32)">
<summary>
Get a reference to a batch that embeddings can be added to.
</summary>
<param name="minCapacity"></param>
<returns></returns>
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
</member>
<member name="T:LLama.Batched.Conversation">
<summary>
A single conversation thread that can be prompted (adding tokens from the user) or inferred (extracting a token from the LLM)
</summary>
</member>
<member name="F:LLama.Batched.Conversation._forked">
<summary>
Indicates if this conversation has been "forked" and may share logits with another conversation.
</summary>
</member>
<member name="F:LLama.Batched.Conversation._batchSampleIndices">
<summary>
Stores the indices to sample from. Contains <see cref="F:LLama.Batched.Conversation._batchSampleCount"/> valid items.
</summary>
</member>
<member name="P:LLama.Batched.Conversation.Executor">
<summary>
The executor which this conversation belongs to
</summary>
</member>
<member name="P:LLama.Batched.Conversation.ConversationId">
<summary>
Unique ID for this conversation
</summary>
</member>
<member name="P:LLama.Batched.Conversation.TokenCount">
<summary>
Total number of tokens in this conversation, cannot exceed the context length.
</summary>
</member>
<member name="P:LLama.Batched.Conversation.IsDisposed">
<summary>
Indicates if this conversation has been disposed, nothing can be done with a disposed conversation
</summary>
</member>
<member name="P:LLama.Batched.Conversation.RequiresInference">
<summary>
Indicates if this conversation is waiting for inference to be run on the executor. "Prompt" and "Sample" cannot be called when this is true.
</summary>
</member>
<member name="P:LLama.Batched.Conversation.RequiresSampling">
<summary>
Indicates that this conversation should be sampled.
</summary>
</member>
<member name="M:LLama.Batched.Conversation.Finalize">
<summary>
Finalizer for Conversation
</summary>
</member>
<member name="M:LLama.Batched.Conversation.Dispose">
<summary>
End this conversation, freeing all resources used by it
</summary>
<exception cref="T:System.ObjectDisposedException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Fork">
<summary>
Create a copy of the current conversation
</summary>
<remarks>The copy shares internal state, so consumes very little extra memory.</remarks>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)">
<summary>
Get the index in the context which each token can be sampled from, the return value of this function get be used to retrieve logits
(<see cref="M:LLama.Native.SafeLLamaContextHandle.GetLogitsIth(System.Int32)"/>) or to sample a token (<see cref="M:LLama.Native.SafeLLamaSamplerChainHandle.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)"/>.
</summary>
<param name="offset">How far from the <b>end</b> of the previous prompt should logits be sampled. Any value other than 0 requires
allLogits to have been set during prompting.<br />
For example if 5 tokens were supplied in the last prompt call:
<list type="bullet">
<item>The logits of the first token can be accessed with 4</item>
<item>The logits of the second token can be accessed with 3</item>
<item>The logits of the third token can be accessed with 2</item>
<item>The logits of the fourth token can be accessed with 1</item>
<item>The logits of the fifth token can be accessed with 0</item>
</list>
</param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
<exception cref="T:LLama.Batched.CannotSampleRequiresPromptException">Thrown if this conversation was not prompted before the previous call to infer</exception>
<exception cref="T:LLama.Batched.CannotSampleRequiresInferenceException">Thrown if Infer() must be called on the executor</exception>
</member>
<member name="M:LLama.Batched.Conversation.Sample(System.Int32)">
<summary>
Get the logits from this conversation, ready for sampling
</summary>
<param name="offset">How far from the <b>end</b> of the previous prompt should logits be sampled. Any value other than 0 requires allLogits to have been set during prompting</param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
<exception cref="T:LLama.Batched.CannotSampleRequiresPromptException">Thrown if this conversation was not prompted before the previous call to infer</exception>
<exception cref="T:LLama.Batched.CannotSampleRequiresInferenceException">Thrown if Infer() must be called on the executor</exception>
</member>
<member name="M:LLama.Batched.Conversation.Prompt(System.Collections.Generic.List{LLama.Native.LLamaToken},System.Boolean)">
<summary>
Add tokens to this conversation
</summary>
<param name="tokens"></param>
<param name="allLogits">If true, generate logits for all tokens. If false, only generate logits for the last token.</param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Prompt(System.ReadOnlySpan{LLama.Native.LLamaToken},System.Boolean)">
<summary>
Add tokens to this conversation
</summary>
<param name="tokens"></param>
<param name="allLogits">If true, generate logits for all tokens. If false, only generate logits for the last token.</param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Prompt(LLama.Native.LLamaToken)">
<summary>
Add a single token to this conversation
</summary>
<param name="token"></param>
<returns></returns>
<exception cref="T:System.ObjectDisposedException"></exception>
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Prompt(LLama.Native.SafeLlavaImageEmbedHandle)">
<summary>
Prompt this conversation with an image embedding
</summary>
<param name="embedding"></param>
</member>
<member name="M:LLama.Batched.Conversation.Prompt(System.ReadOnlySpan{System.Single})">
<summary>
Prompt this conversation with embeddings
</summary>
<param name="embeddings">The raw values of the embeddings. This span must divide equally by the embedding size of this model.</param>
</member>
<member name="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)">
<summary>
Directly modify the KV cache of this conversation
</summary>
<param name="modifier"></param>
<exception cref="T:LLama.Batched.CannotModifyWhileRequiresInferenceException">Thrown if this method is called while <see cref="P:LLama.Batched.Conversation.RequiresInference"/> == true</exception>
</member>
<member name="T:LLama.Batched.Conversation.KvAccessor">
<summary>
Provides direct access to the KV cache of a <see cref="T:LLama.Batched.Conversation"/>.
See <see cref="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)"/> for how to use this.
</summary>
</member>
<member name="M:LLama.Batched.Conversation.KvAccessor.Remove(LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
<summary>
Removes all tokens that have positions in [start, end)
</summary>
<param name="start">Start position (inclusive)</param>
<param name="end">End position (exclusive)</param>
</member>
<member name="M:LLama.Batched.Conversation.KvAccessor.Remove(LLama.Native.LLamaPos,System.Int32)">
<summary>
Removes all tokens starting from the given position
</summary>
<param name="start">Start position (inclusive)</param>
<param name="count">Number of tokens</param>
</member>
<member name="M:LLama.Batched.Conversation.KvAccessor.Add(LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Adds relative position "delta" to all tokens that have positions in [p0, p1).
If the KV cache is RoPEd, the KV data is updated
accordingly
</summary>
<param name="start">Start position (inclusive)</param>
<param name="end">End position (exclusive)</param>
<param name="delta">Amount to add on to each token position</param>
</member>
<member name="M:LLama.Batched.Conversation.KvAccessor.Divide(LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Integer division of the positions by factor of `d > 1`.
If the KV cache is RoPEd, the KV data is updated accordingly.
</summary>
<param name="start">Start position (inclusive). If less than zero, it is clamped to zero.</param>
<param name="end">End position (exclusive). If less than zero, it is treated as "infinity".</param>
<param name="divisor">Amount to divide each position by.</param>
</member>
<member name="T:LLama.Batched.Conversation.ModifyKvCache">
<summary>
A function which can temporarily access the KV cache of a <see cref="T:LLama.Batched.Conversation"/> to modify it directly
</summary>
<param name="end">The current end token of this conversation</param>
<param name="kv">An <see cref="T:LLama.Batched.Conversation.KvAccessor"/> which allows direct access to modify the KV cache</param>
<returns>The new end token position</returns>
</member>
<member name="M:LLama.Batched.Conversation.Save(System.String)">
<summary>
Save the complete state of this conversation to a file. if the file already exists it will be overwritten.
</summary>
<param name="filepath"></param>
<exception cref="T:LLama.Batched.CannotSaveWhileRequiresInferenceException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Save">
<summary>
Save the complete state of this conversation in system memory.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Batched.Conversation.Load(System.String)">
<summary>
Load state from a file
This should only ever be called by the BatchedExecutor, on a newly created conversation object!
</summary>
<param name="filepath"></param>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.Batched.Conversation.Load(LLama.Batched.Conversation.State)">
<summary>
Load state from a previously saved state.
This should only ever be called by the BatchedExecutor, on a newly created conversation object!
</summary>
<param name="state"></param>
</member>
<member name="M:LLama.Batched.Conversation.PrivateState.Dispose">
<inheritdoc />
</member>
<member name="T:LLama.Batched.Conversation.State">
<summary>
In memory saved state of a <see cref="T:LLama.Batched.Conversation"/>
</summary>
</member>
<member name="P:LLama.Batched.Conversation.State.IsDisposed">
<summary>
Indicates if this state has been disposed
</summary>
</member>
<member name="P:LLama.Batched.Conversation.State.Size">
<summary>
Get the size in bytes of this state object
</summary>
</member>
<member name="M:LLama.Batched.Conversation.State.Dispose">
<inheritdoc />
</member>
<member name="M:LLama.Batched.Conversation.State.#ctor">
<summary>
Internal constructor prevent anyone outside of LLamaSharp extending this class
</summary>
</member>
<member name="T:LLama.Batched.ConversationExtensions">
<summary>
Extension method for <see cref="T:LLama.Batched.Conversation"/>
</summary>
</member>
<member name="M:LLama.Batched.ConversationExtensions.Sample(LLama.Batched.Conversation,LLama.Native.SafeLLamaSamplerChainHandle,System.Int32)">
<summary>
Sample a token from this conversation using the given sampler chain
</summary>
<param name="conversation"><see cref="T:LLama.Batched.Conversation"/> to sample from</param>
<param name="sampler"></param>
<param name="offset">Offset from the end of the conversation to the logits to sample, see <see cref="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)"/> for more details</param>
<returns></returns>
</member>
<member name="M:LLama.Batched.ConversationExtensions.Sample(LLama.Batched.Conversation,LLama.Sampling.ISamplingPipeline,System.Int32)">
<summary>
Sample a token from this conversation using the given sampling pipeline
</summary>
<param name="conversation"><see cref="T:LLama.Batched.Conversation"/> to sample from</param>
<param name="sampler"></param>
<param name="offset">Offset from the end of the conversation to the logits to sample, see <see cref="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)"/> for more details</param>
<returns></returns>
</member>
<member name="M:LLama.Batched.ConversationExtensions.Rewind(LLama.Batched.Conversation,System.Int32)">
<summary>
Rewind a <see cref="T:LLama.Batched.Conversation"/> back to an earlier state by removing tokens from the end
</summary>
<param name="conversation">The conversation to rewind</param>
<param name="tokens">The number of tokens to rewind</param>
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if `tokens` parameter is larger than TokenCount</exception>
</member>
<member name="M:LLama.Batched.ConversationExtensions.ShiftLeft(LLama.Batched.Conversation,System.Int32,System.Int32)">
<summary>
Shift all tokens over to the left, removing "count" tokens from the start and shifting everything over.
Leaves "keep" tokens at the start completely untouched. This can be used to free up space when the context
gets full, keeping the prompt at the start intact.
</summary>
<param name="conversation">The conversation to rewind</param>
<param name="count">How much to shift tokens over by</param>
<param name="keep">The number of tokens at the start which should <b>not</b> be shifted</param>
</member>
<member name="T:LLama.Batched.ExperimentalBatchedExecutorException">
<summary>
Base class for exceptions thrown from <see cref="T:LLama.Batched.BatchedExecutor"/>
</summary>
</member>
<member name="T:LLama.Batched.AlreadyPromptedConversationException">
<summary>
This exception is thrown when "Prompt()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
already been prompted and before "Infer()" has been called on the associated
<see cref="T:LLama.Batched.BatchedExecutor"/>.
</summary>
</member>
<member name="T:LLama.Batched.CannotSampleRequiresInferenceException">
<summary>
This exception is thrown when "Sample()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
already been prompted and before "Infer()" has been called on the associated
<see cref="T:LLama.Batched.BatchedExecutor"/>.
</summary>
</member>
<member name="T:LLama.Batched.CannotSampleRequiresPromptException">
<summary>
This exception is thrown when "Sample()" is called on a <see cref="T:LLama.Batched.Conversation"/> which was not
first prompted.
<see cref="T:LLama.Batched.BatchedExecutor"/>.
</summary>
</member>
<member name="T:LLama.Batched.CannotModifyWhileRequiresInferenceException">
<summary>
This exception is thrown when <see cref="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)"/> is called when <see cref="P:LLama.Batched.Conversation.RequiresInference"/> = true
</summary>
</member>
<member name="T:LLama.Batched.CannotSaveWhileRequiresInferenceException">
<summary>
This exception is thrown when "Save()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
already been prompted and before "Infer()" has been called.
<see cref="T:LLama.Batched.BatchedExecutor"/>.
</summary>
</member>
<member name="M:LLama.Batched.LLamaContextExtensions.SaveState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.ReadOnlySpan{System.Byte})">
<summary>
Save the state of a particular sequence to specified path. Also save some extra data which will be returned when loading.
Data saved with this method <b>must</b> be saved with <see cref="M:LLama.Batched.LLamaContextExtensions.LoadState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.Byte[]@)"/>
</summary>
<param name="context"></param>
<param name="filename"></param>
<param name="sequence"></param>
<param name="header"></param>
</member>
<member name="M:LLama.Batched.LLamaContextExtensions.LoadState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.Byte[]@)">
<summary>
Load the state from the specified path into a particular sequence. Also reading header data. Must only be used with
data previously saved with <see cref="M:LLama.Batched.LLamaContextExtensions.SaveState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.ReadOnlySpan{System.Byte})"/>
</summary>
<param name="context"></param>
<param name="filename"></param>
<param name="sequence"></param>
<param name="header"></param>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="T:LLama.ChatSession">
<summary>
The main chat session class.
</summary>
</member>
<member name="F:LLama.ChatSession.MODEL_STATE_FILENAME">
<summary>
The filename for the serialized model state (KV cache, etc).
</summary>
</member>
<member name="F:LLama.ChatSession.EXECUTOR_STATE_FILENAME">
<summary>
The filename for the serialized executor state.
</summary>
</member>
<member name="F:LLama.ChatSession.HISTORY_STATE_FILENAME">
<summary>
The filename for the serialized chat history.
</summary>
</member>
<member name="F:LLama.ChatSession.INPUT_TRANSFORM_FILENAME">
<summary>
The filename for the serialized input transform pipeline.
</summary>
</member>
<member name="F:LLama.ChatSession.OUTPUT_TRANSFORM_FILENAME">
<summary>
The filename for the serialized output transform.
</summary>
</member>
<member name="F:LLama.ChatSession.HISTORY_TRANSFORM_FILENAME">
<summary>
The filename for the serialized history transform.
</summary>
</member>
<member name="P:LLama.ChatSession.Executor">
<summary>
The executor for this session.
</summary>
</member>
<member name="P:LLama.ChatSession.History">
<summary>
The chat history for this session.
</summary>
</member>
<member name="P:LLama.ChatSession.HistoryTransform">
<summary>
The history transform used in this session.
</summary>
</member>
<member name="P:LLama.ChatSession.InputTransformPipeline">
<summary>
The input transform pipeline used in this session.
</summary>
</member>
<member name="F:LLama.ChatSession.OutputTransform">
<summary>
The output transform used in this session.
</summary>
</member>
<member name="M:LLama.ChatSession.InitializeSessionFromHistoryAsync(LLama.Abstractions.ILLamaExecutor,LLama.Common.ChatHistory,LLama.Abstractions.IHistoryTransform)">
<summary>
Create a new chat session and preprocess history.
</summary>
<param name="executor">The executor for this session</param>
<param name="history">History for this session</param>
<param name="transform">History Transform for this session</param>
<returns>A new chat session.</returns>
</member>
<member name="M:LLama.ChatSession.#ctor(LLama.Abstractions.ILLamaExecutor)">
<summary>
Create a new chat session.
</summary>
<param name="executor">The executor for this session</param>
</member>
<member name="M:LLama.ChatSession.#ctor(LLama.Abstractions.ILLamaExecutor,LLama.Common.ChatHistory)">
<summary>
Create a new chat session with a custom history.
</summary>
<param name="executor"></param>
<param name="history"></param>
</member>
<member name="M:LLama.ChatSession.WithHistoryTransform(LLama.Abstractions.IHistoryTransform)">
<summary>
Use a custom history transform.
</summary>
<param name="transform"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddInputTransform(LLama.Abstractions.ITextTransform)">
<summary>
Add a text transform to the input transform pipeline.
</summary>
<param name="transform"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.WithOutputTransform(LLama.Abstractions.ITextStreamTransform)">
<summary>
Use a custom output transform.
</summary>
<param name="transform"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.SaveSession(System.String)">
<summary>
Save a session from a directory.
</summary>
<param name="path"></param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.ChatSession.GetSessionState">
<summary>
Get the session state.
</summary>
<returns>SessionState object representing session state in-memory</returns>
</member>
<member name="M:LLama.ChatSession.LoadSession(LLama.SessionState,System.Boolean)">
<summary>
Load a session from a session state.
</summary>
<param name="state"></param>
<param name="loadTransforms">If true loads transforms saved in the session state.</param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.ChatSession.LoadSession(System.String,System.Boolean)">
<summary>
Load a session from a directory.
</summary>
<param name="path"></param>
<param name="loadTransforms">If true loads transforms saved in the session state.</param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.ChatSession.AddMessage(LLama.Common.ChatHistory.Message)">
<summary>
Add a message to the chat history.
</summary>
<param name="message"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddSystemMessage(System.String)">
<summary>
Add a system message to the chat history.
</summary>
<param name="content"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddAssistantMessage(System.String)">
<summary>
Add an assistant message to the chat history.
</summary>
<param name="content"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddUserMessage(System.String)">
<summary>
Add a user message to the chat history.
</summary>
<param name="content"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.RemoveLastMessage">
<summary>
Remove the last message from the chat history.
</summary>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddAndProcessMessage(LLama.Common.ChatHistory.Message)">
<summary>
Compute KV cache for the message and add it to the chat history.
</summary>
<param name="message"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.AddAndProcessSystemMessage(System.String)">
<summary>
Compute KV cache for the system message and add it to the chat history.
</summary>
</member>
<member name="M:LLama.ChatSession.AddAndProcessUserMessage(System.String)">
<summary>
Compute KV cache for the user message and add it to the chat history.
</summary>
</member>
<member name="M:LLama.ChatSession.AddAndProcessAssistantMessage(System.String)">
<summary>
Compute KV cache for the assistant message and add it to the chat history.
</summary>
</member>
<member name="M:LLama.ChatSession.ReplaceUserMessage(LLama.Common.ChatHistory.Message,LLama.Common.ChatHistory.Message)">
<summary>
Replace a user message with a new message and remove all messages after the new message.
This is useful when the user wants to edit a message. And regenerate the response.
</summary>
<param name="oldMessage"></param>
<param name="newMessage"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory.Message,System.Boolean,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Chat with the model.
</summary>
<param name="message"></param>
<param name="inferenceParams"></param>
<param name="applyInputTransformPipeline"></param>
<param name="cancellationToken"></param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory.Message,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Chat with the model.
</summary>
<param name="message"></param>
<param name="inferenceParams"></param>
<param name="cancellationToken"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory,System.Boolean,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Chat with the model.
</summary>
<param name="history"></param>
<param name="applyInputTransformPipeline"></param>
<param name="inferenceParams"></param>
<param name="cancellationToken"></param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Chat with the model.
</summary>
<param name="history"></param>
<param name="inferenceParams"></param>
<param name="cancellationToken"></param>
<returns></returns>
</member>
<member name="M:LLama.ChatSession.RegenerateAssistantMessageAsync(LLama.Common.InferenceParams,System.Threading.CancellationToken)">
<summary>
Regenerate the last assistant message.
</summary>
<param name="inferenceParams"></param>
<param name="cancellationToken"></param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="T:LLama.SessionState">
<summary>
The state of a chat session in-memory.
</summary>
</member>
<member name="P:LLama.SessionState.ExecutorState">
<summary>
Saved executor state for the session in JSON format.
</summary>
</member>
<member name="P:LLama.SessionState.ContextState">
<summary>
Saved context state (KV cache) for the session.
</summary>
</member>
<member name="P:LLama.SessionState.InputTransformPipeline">
<summary>
The input transform pipeline used in this session.
</summary>
</member>
<member name="P:LLama.SessionState.OutputTransform">
<summary>
The output transform used in this session.
</summary>
</member>
<member name="P:LLama.SessionState.HistoryTransform">
<summary>
The history transform used in this session.
</summary>
</member>
<member name="P:LLama.SessionState.History">
<summary>
The the chat history messages for this session.
</summary>
</member>
<member name="M:LLama.SessionState.#ctor(LLama.LLamaContext.State,LLama.StatefulExecutorBase.ExecutorBaseState,LLama.Common.ChatHistory,System.Collections.Generic.List{LLama.Abstractions.ITextTransform},LLama.Abstractions.ITextStreamTransform,LLama.Abstractions.IHistoryTransform)">
<summary>
Create a new session state.
</summary>
<param name="contextState"></param>
<param name="executorState"></param>
<param name="history"></param>
<param name="inputTransformPipeline"></param>
<param name="outputTransform"></param>
<param name="historyTransform"></param>
</member>
<member name="M:LLama.SessionState.Save(System.String)">
<summary>
Save the session state to folder.
</summary>
<param name="path"></param>
</member>
<member name="M:LLama.SessionState.Load(System.String)">
<summary>
Load the session state from folder.
</summary>
<param name="path"></param>
<returns></returns>
<exception cref="T:System.ArgumentException">Throws when session state is incorrect</exception>
</member>
<member name="T:LLama.Common.AuthorRole">
<summary>
Role of the message author, e.g. user/assistant/system
</summary>
</member>
<member name="F:LLama.Common.AuthorRole.Unknown">
<summary>
Role is unknown
</summary>
</member>
<member name="F:LLama.Common.AuthorRole.System">
<summary>
Message comes from a "system" prompt, not written by a user or language model
</summary>
</member>
<member name="F:LLama.Common.AuthorRole.User">
<summary>
Message comes from the user
</summary>
</member>
<member name="F:LLama.Common.AuthorRole.Assistant">
<summary>
Messages was generated by the language model
</summary>
</member>
<member name="T:LLama.Common.ChatHistory">
<summary>
The chat history class
</summary>
</member>
<member name="T:LLama.Common.ChatHistory.Message">
<summary>
Chat message representation
</summary>
</member>
<member name="P:LLama.Common.ChatHistory.Message.AuthorRole">
<summary>
Role of the message author, e.g. user/assistant/system
</summary>
</member>
<member name="P:LLama.Common.ChatHistory.Message.Content">
<summary>
Message content
</summary>
</member>
<member name="M:LLama.Common.ChatHistory.Message.#ctor(LLama.Common.AuthorRole,System.String)">
<summary>
Create a new instance
</summary>
<param name="authorRole">Role of message author</param>
<param name="content">Message content</param>
</member>
<member name="P:LLama.Common.ChatHistory.Messages">
<summary>
List of messages in the chat
</summary>
</member>
<member name="M:LLama.Common.ChatHistory.#ctor">
<summary>
Create a new instance of the chat content class
</summary>
</member>
<member name="M:LLama.Common.ChatHistory.#ctor(LLama.Common.ChatHistory.Message[])">
<summary>
Create a new instance of the chat history from array of messages
</summary>
<param name="messageHistory"></param>
</member>
<member name="M:LLama.Common.ChatHistory.AddMessage(LLama.Common.AuthorRole,System.String)">
<summary>
Add a message to the chat history
</summary>
<param name="authorRole">Role of the message author</param>
<param name="content">Message content</param>
</member>
<member name="M:LLama.Common.ChatHistory.ToJson">
<summary>
Serialize the chat history to JSON
</summary>
<returns></returns>
</member>
<member name="M:LLama.Common.ChatHistory.FromJson(System.String)">
<summary>
Deserialize a chat history from JSON
</summary>
<param name="json"></param>
<returns></returns>
</member>
<member name="T:LLama.Common.FixedSizeQueue`1">
<summary>
A queue with fixed storage size.
Currently it's only a naive implementation and needs to be further optimized in the future.
</summary>
</member>
<member name="P:LLama.Common.FixedSizeQueue`1.Item(System.Int32)">
<inheritdoc />
</member>
<member name="P:LLama.Common.FixedSizeQueue`1.Count">
<summary>
Number of items in this queue
</summary>
</member>
<member name="P:LLama.Common.FixedSizeQueue`1.Capacity">
<summary>
Maximum number of items allowed in this queue
</summary>
</member>
<member name="M:LLama.Common.FixedSizeQueue`1.#ctor(System.Int32)">
<summary>
Create a new queue
</summary>
<param name="size">the maximum number of items to store in this queue</param>
</member>
<member name="M:LLama.Common.FixedSizeQueue`1.#ctor(System.Int32,System.Collections.Generic.IEnumerable{`0})">
<summary>
Fill the quene with the data. Please ensure that data.Count &lt;= size
</summary>
<param name="size"></param>
<param name="data"></param>
</member>
<member name="M:LLama.Common.FixedSizeQueue`1.Enqueue(`0)">
<summary>
Enquene an element.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Common.FixedSizeQueue`1.GetEnumerator">
<inheritdoc />
</member>
<member name="M:LLama.Common.FixedSizeQueue`1.System#Collections#IEnumerable#GetEnumerator">
<inheritdoc />
</member>
<member name="T:LLama.Common.InferenceParams">
<summary>
The parameters used for inference.
</summary>
</member>
<member name="P:LLama.Common.InferenceParams.TokensKeep">
<summary>
number of tokens to keep from initial prompt when applying context shifting
</summary>
</member>
<member name="P:LLama.Common.InferenceParams.MaxTokens">
<summary>
how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
until it complete.
</summary>
</member>
<member name="P:LLama.Common.InferenceParams.AntiPrompts">
<summary>
Sequences where the model will stop generating further tokens.
</summary>
</member>
<member name="P:LLama.Common.InferenceParams.SamplingPipeline">
<inheritdoc />
</member>
<member name="T:LLama.Common.MirostatType">
<summary>
Type of "mirostat" sampling to use.
https://github.com/basusourya/mirostat
</summary>
</member>
<member name="F:LLama.Common.MirostatType.Disable">
<summary>
Disable Mirostat sampling
</summary>
</member>
<member name="F:LLama.Common.MirostatType.Mirostat">
<summary>
Original mirostat algorithm
</summary>
</member>
<member name="F:LLama.Common.MirostatType.Mirostat2">
<summary>
Mirostat 2.0 algorithm
</summary>
</member>
<member name="T:LLama.Common.ModelParams">
<summary>
The parameters for initializing a LLama model.
</summary>
</member>
<member name="P:LLama.Common.ModelParams.ContextSize">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.MainGpu">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.SplitMode">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.GpuLayerCount">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.SeqMax">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.UseMemorymap">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.UseMemoryLock">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.ModelPath">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.Threads">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.BatchThreads">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.BatchSize">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.UBatchSize">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.Embeddings">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.TensorSplits">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.CheckTensors">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.MetadataOverrides">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.RopeFrequencyBase">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.RopeFrequencyScale">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnExtrapolationFactor">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnAttentionFactor">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnBetaFast">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnBetaSlow">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnOriginalContext">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.YarnScalingType">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.TypeK">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.TypeV">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.NoKqvOffload">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.FlashAttention">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.DefragThreshold">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.PoolingType">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.AttentionType">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.VocabOnly">
<inheritdoc />
</member>
<member name="P:LLama.Common.ModelParams.EncodingName">
<summary>
`Encoding` cannot be directly JSON serialized, instead store the name as a string which can
</summary>
</member>
<member name="P:LLama.Common.ModelParams.Encoding">
<inheritdoc />
</member>
<member name="M:LLama.Common.ModelParams.#ctor(System.String)">
<summary>
</summary>
<param name="modelPath">The model path.</param>
</member>
<member name="T:LLama.Exceptions.RuntimeError">
<summary>
Base class for LLamaSharp runtime errors (i.e. errors produced by llama.cpp, converted into exceptions)
</summary>
</member>
<member name="M:LLama.Exceptions.RuntimeError.#ctor(System.String)">
<summary>
Create a new RuntimeError
</summary>
<param name="message"></param>
</member>
<member name="T:LLama.Exceptions.LoadWeightsFailedException">
<summary>
Loading model weights failed
</summary>
</member>
<member name="P:LLama.Exceptions.LoadWeightsFailedException.ModelPath">
<summary>
The model path which failed to load
</summary>
</member>
<member name="M:LLama.Exceptions.LoadWeightsFailedException.#ctor(System.String)">
<inheritdoc />
</member>
<member name="T:LLama.Exceptions.LLamaDecodeError">
<summary>
`llama_decode` return a non-zero status code
</summary>
</member>
<member name="P:LLama.Exceptions.LLamaDecodeError.ReturnCode">
<summary>
The return status code
</summary>
</member>
<member name="M:LLama.Exceptions.LLamaDecodeError.#ctor(LLama.Native.DecodeResult)">
<inheritdoc />
</member>
<member name="T:LLama.Exceptions.MissingTemplateException">
<summary>
`llama_decode` return a non-zero status code
</summary>
</member>
<member name="M:LLama.Exceptions.MissingTemplateException.#ctor">
<inheritdoc />
</member>
<member name="M:LLama.Exceptions.MissingTemplateException.#ctor(System.String)">
<inheritdoc />
</member>
<member name="T:LLama.Exceptions.GetLogitsInvalidIndexException">
<summary>
`llama_get_logits_ith` returned null, indicating that the index was invalid
</summary>
</member>
<member name="P:LLama.Exceptions.GetLogitsInvalidIndexException.Index">
<summary>
The incorrect index passed to the `llama_get_logits_ith` call
</summary>
</member>
<member name="M:LLama.Exceptions.GetLogitsInvalidIndexException.#ctor(System.Int32)">
<inheritdoc />
</member>
<member name="T:LLama.Extensions.IContextParamsExtensions">
<summary>
Extension methods to the IContextParams interface
</summary>
</member>
<member name="M:LLama.Extensions.IContextParamsExtensions.ToLlamaContextParams(LLama.Abstractions.IContextParams,LLama.Native.LLamaContextParams@)">
<summary>
Convert the given `IModelParams` into a `LLamaContextParams`
</summary>
<param name="params"></param>
<param name="result"></param>
<returns></returns>
<exception cref="T:System.IO.FileNotFoundException"></exception>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="T:LLama.Extensions.IModelParamsExtensions">
<summary>
Extension methods to the IModelParams interface
</summary>
</member>
<member name="M:LLama.Extensions.IModelParamsExtensions.ToLlamaModelParams(LLama.Abstractions.IModelParams,LLama.Native.LLamaModelParams@)">
<summary>
Convert the given `IModelParams` into a `LLamaModelParams`
</summary>
<param name="params"></param>
<param name="result"></param>
<returns></returns>
<exception cref="T:System.IO.FileNotFoundException"></exception>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.Extensions.IReadOnlyListExtensions.IndexOf``1(System.Collections.Generic.IReadOnlyList{``0},``0)">
<summary>
Find the index of `item` in `list`
</summary>
<typeparam name="T"></typeparam>
<param name="list">list to search</param>
<param name="item">item to search for</param>
<returns></returns>
</member>
<member name="M:LLama.Extensions.IReadOnlyListExtensions.TokensEndsWithAnyString``2(``0,``1,LLama.Native.SafeLlamaModelHandle,System.Text.Encoding)">
<summary>
Check if the given set of tokens ends with any of the given strings
</summary>
<param name="tokens">Tokens to check</param>
<param name="queries">Strings to search for</param>
<param name="model">Model to use to convert tokens into bytes</param>
<param name="encoding">Encoding to use to convert bytes into characters</param>
<returns></returns>
</member>
<member name="M:LLama.Extensions.IReadOnlyListExtensions.TokensEndsWithAnyString``1(``0,System.Collections.Generic.IList{System.String},LLama.Native.SafeLlamaModelHandle,System.Text.Encoding)">
<summary>
Check if the given set of tokens ends with any of the given strings
</summary>
<param name="tokens">Tokens to check</param>
<param name="queries">Strings to search for</param>
<param name="model">Model to use to convert tokens into bytes</param>
<param name="encoding">Encoding to use to convert bytes into characters</param>
<returns></returns>
</member>
<member name="T:LLama.Extensions.KeyValuePairExtensions">
<summary>
Extensions to the KeyValuePair struct
</summary>
</member>
<member name="M:LLama.Extensions.KeyValuePairExtensions.Deconstruct``2(System.Collections.Generic.KeyValuePair{``0,``1},``0@,``1@)">
<summary>
Deconstruct a KeyValuePair into it's constituent parts.
</summary>
<param name="pair">The KeyValuePair to deconstruct</param>
<param name="first">First element, the Key</param>
<param name="second">Second element, the Value</param>
<typeparam name="TKey">Type of the Key</typeparam>
<typeparam name="TValue">Type of the Value</typeparam>
</member>
<member name="M:LLama.Extensions.ProcessExtensions.SafeRun(System.Diagnostics.Process,System.TimeSpan)">
<summary>
Run a process for a certain amount of time and then terminate it
</summary>
<param name="process"></param>
<param name="timeout"></param>
<returns>return code, standard output, standard error, flag indicating if process exited or was terminated</returns>
</member>
<member name="T:LLama.Extensions.SpanNormalizationExtensions">
<summary>
Extensions to span which apply <b>in-place</b> normalization
</summary>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.MaxAbsoluteNormalization(System.Single[])">
<summary>
<b>In-place</b> multiple every element by 32760 and divide every element in the span by the max absolute value in the span
</summary>
<param name="vector"></param>
<returns>The same array</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.MaxAbsoluteNormalization(System.Span{System.Single})">
<summary>
<b>In-place</b> multiple every element by 32760 and divide every element in the span by the max absolute value in the span
</summary>
<param name="vector"></param>
<returns>The same span</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.TaxicabNormalization(System.Single[])">
<summary>
<b>In-place</b> divide every element in the array by the sum of absolute values in the array
</summary>
<remarks>Also known as "Manhattan normalization".</remarks>
<param name="vector"></param>
<returns>The same array</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.TaxicabNormalization(System.Span{System.Single})">
<summary>
<b>In-place</b> divide every element in the span by the sum of absolute values in the span
</summary>
<remarks>Also known as "Manhattan normalization".</remarks>
<param name="vector"></param>
<returns>The same span</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.Single[])">
<summary>
<b>In-place</b> divide every element by the euclidean length of the vector
</summary>
<remarks>Also known as "L2 normalization".</remarks>
<param name="vector"></param>
<returns>The same array</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.Span{System.Single})">
<summary>
<b>In-place</b> divide every element by the euclidean length of the vector
</summary>
<remarks>Also known as "L2 normalization".</remarks>
<param name="vector"></param>
<returns>The same span</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.ReadOnlySpan{System.Single})">
<summary>
Creates a new array containing an L2 normalization of the input vector.
</summary>
<param name="vector"></param>
<returns>The same span</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.PNormalization(System.Single[],System.Int32)">
<summary>
<b>In-place</b> apply p-normalization. https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
<list type="bullet">
<item>For p = 1, this is taxicab normalization</item>
<item>For p = 2, this is euclidean normalization</item>
<item>As p => infinity, this approaches infinity norm or maximum norm</item>
</list>
</summary>
<param name="vector"></param>
<param name="p"></param>
<returns>The same array</returns>
</member>
<member name="M:LLama.Extensions.SpanNormalizationExtensions.PNormalization(System.Span{System.Single},System.Int32)">
<summary>
<b>In-place</b> apply p-normalization. https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
<list type="bullet">
<item>For p = 1, this is taxicab normalization</item>
<item>For p = 2, this is euclidean normalization</item>
<item>As p => infinity, this approaches infinity norm or maximum norm</item>
</list>
</summary>
<param name="vector"></param>
<param name="p"></param>
<returns>The same span</returns>
</member>
<member name="T:LLama.LLamaContext">
<summary>
A llama_context, which holds all the context required to interact with a model
</summary>
</member>
<member name="P:LLama.LLamaContext.ContextSize">
<summary>
Total number of tokens in the context
</summary>
</member>
<member name="P:LLama.LLamaContext.EmbeddingSize">
<summary>
Dimension of embedding vectors
</summary>
</member>
<member name="P:LLama.LLamaContext.Params">
<summary>
The context params set for this context
</summary>
</member>
<member name="P:LLama.LLamaContext.NativeHandle">
<summary>
The native handle, which is used to be passed to the native APIs
</summary>
<remarks>Be careful how you use this!</remarks>
</member>
<member name="P:LLama.LLamaContext.Encoding">
<summary>
The encoding set for this model to deal with text input.
</summary>
</member>
<member name="P:LLama.LLamaContext.GenerationThreads">
<summary>
Get or set the number of threads to use for generation
</summary>
</member>
<member name="P:LLama.LLamaContext.BatchThreads">
<summary>
Get or set the number of threads to use for batch processing
</summary>
</member>
<member name="P:LLama.LLamaContext.BatchSize">
<summary>
Get the maximum batch size for this context
</summary>
</member>
<member name="P:LLama.LLamaContext.Vocab">
<summary>
Get the special tokens for the model associated with this context
</summary>
</member>
<member name="M:LLama.LLamaContext.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
<summary>
Create a new LLamaContext for the given LLamaWeights
</summary>
<param name="model"></param>
<param name="params"></param>
<param name="logger"></param>
<exception cref="T:System.ObjectDisposedException"></exception>
</member>
<member name="M:LLama.LLamaContext.Tokenize(System.String,System.Boolean,System.Boolean)">
<summary>
Tokenize a string.
</summary>
<param name="text"></param>
<param name="addBos">Whether to add a bos to the text.</param>
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
<returns></returns>
</member>
<member name="M:LLama.LLamaContext.DeTokenize(System.Collections.Generic.IReadOnlyList{LLama.Native.LLamaToken})">
<summary>
Detokenize the tokens to text.
</summary>
<param name="tokens"></param>
<returns></returns>
</member>
<member name="M:LLama.LLamaContext.SaveState(System.String)">
<summary>
Save the state to specified path.
</summary>
<param name="filename"></param>
</member>
<member name="M:LLama.LLamaContext.SaveState(System.String,LLama.Native.LLamaSeqId)">
<summary>
Save the state of a particular sequence to specified path.
</summary>
<param name="filename"></param>
<param name="sequence"></param>
</member>
<member name="M:LLama.LLamaContext.GetState">
<summary>
Get the state data as an opaque handle, which can be loaded later using <see cref="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)"/>
</summary>
<remarks>Use <see cref="M:LLama.LLamaContext.SaveState(System.String)"/> if you intend to save this state to disk.</remarks>
<returns></returns>
</member>
<member name="M:LLama.LLamaContext.GetState(LLama.Native.LLamaSeqId)">
<summary>
Get the state data as an opaque handle, which can be loaded later using <see cref="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)"/>
</summary>
<remarks>Use <see cref="M:LLama.LLamaContext.SaveState(System.String,LLama.Native.LLamaSeqId)"/> if you intend to save this state to disk.</remarks>
<returns></returns>
</member>
<member name="M:LLama.LLamaContext.LoadState(System.String)">
<summary>
Load the state from specified path.
</summary>
<param name="filename"></param>
</member>
<member name="M:LLama.LLamaContext.LoadState(System.String,LLama.Native.LLamaSeqId)">
<summary>
Load the state from specified path into a particular sequence
</summary>
<param name="filename"></param>
<param name="sequence"></param>
</member>
<member name="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)">
<summary>
Load the state from memory.
</summary>
<param name="state"></param>
</member>
<member name="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.SequenceState,LLama.Native.LLamaSeqId)">
<summary>
Load the state from memory into a particular sequence
</summary>
<param name="state"></param>
<param name="sequence"></param>
</member>
<member name="M:LLama.LLamaContext.Encode(LLama.Native.LLamaBatch)">
<summary>
</summary>
<param name="batch"></param>
</member>
<member name="M:LLama.LLamaContext.EncodeAsync(LLama.Native.LLamaBatch,System.Threading.CancellationToken)">
<summary>
</summary>
<param name="batch"></param>
<param name="cancellationToken"></param>
</member>
<member name="M:LLama.LLamaContext.Decode(LLama.Native.LLamaBatch)">
<summary>
</summary>
<param name="batch"></param>
</member>
<member name="M:LLama.LLamaContext.DecodeAsync(LLama.Native.LLamaBatch,System.Threading.CancellationToken)">
<summary>
</summary>
<param name="batch"></param>
<param name="cancellationToken"></param>
</member>
<member name="M:LLama.LLamaContext.Decode(LLama.Native.LLamaBatchEmbeddings)">
<summary>
</summary>
<param name="batch"></param>
</member>
<member name="M:LLama.LLamaContext.DecodeAsync(LLama.Native.LLamaBatchEmbeddings,System.Threading.CancellationToken)">
<summary>
</summary>
<param name="batch"></param>
<param name="cancellationToken"></param>
</member>
<member name="M:LLama.LLamaContext.DecodeAsync(System.Collections.Generic.List{LLama.Native.LLamaToken},LLama.Native.LLamaSeqId,LLama.Native.LLamaBatch,System.Int32)">
<summary>
</summary>
<param name="tokens"></param>
<param name="id"></param>
<param name="batch"></param>
<param name="n_past"></param>
<returns>A tuple, containing the decode result, the number of tokens that have <b>not</b> been decoded yet and the total number of tokens that have been decoded.</returns>
</member>
<member name="M:LLama.LLamaContext.Dispose">
<inheritdoc />
</member>
<member name="T:LLama.LLamaContext.State">
<summary>
The state of this context, which can be reloaded later
</summary>
</member>
<member name="P:LLama.LLamaContext.State.Size">
<summary>
Get the size in bytes of this state object
</summary>
</member>
<member name="M:LLama.LLamaContext.State.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.LLamaContext.State.SaveAsync(System.IO.Stream)">
<summary>
Write all the bytes of this state to the given stream
</summary>
<param name="stream"></param>
</member>
<member name="M:LLama.LLamaContext.State.Save(System.IO.Stream)">
<summary>
Write all the bytes of this state to the given stream
</summary>
<param name="stream"></param>
</member>
<member name="M:LLama.LLamaContext.State.LoadAsync(System.IO.Stream)">
<summary>
Load a state from a stream
</summary>
<param name="stream"></param>
<returns></returns>
</member>
<member name="M:LLama.LLamaContext.State.Load(System.IO.Stream)">
<summary>
Load a state from a stream
</summary>
<param name="stream"></param>
<returns></returns>
</member>
<member name="T:LLama.LLamaContext.SequenceState">
<summary>
The state of a single sequence, which can be reloaded later
</summary>
</member>
<member name="P:LLama.LLamaContext.SequenceState.Size">
<summary>
Get the size in bytes of this state object
</summary>
</member>
<member name="M:LLama.LLamaContext.SequenceState.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.LLamaContext.SequenceState.CopyTo(System.Byte*,System.UInt64,System.UInt64)">
<summary>
Copy bytes to a destination pointer.
</summary>
<param name="dst">Destination to write to</param>
<param name="length">Length of the destination buffer</param>
<param name="offset">Offset from start of src to start copying from</param>
<returns>Number of bytes written to destination</returns>
</member>
<member name="T:LLama.LLamaEmbedder">
<summary>
Generate high dimensional embedding vectors from text
</summary>
</member>
<member name="P:LLama.LLamaEmbedder.EmbeddingSize">
<summary>
Dimension of embedding vectors
</summary>
</member>
<member name="P:LLama.LLamaEmbedder.Context">
<summary>
LLama Context
</summary>
</member>
<member name="M:LLama.LLamaEmbedder.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
<summary>
Create a new embedder, using the given LLamaWeights
</summary>
<param name="weights"></param>
<param name="params"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.LLamaEmbedder.Dispose">
<inheritdoc />
</member>
<member name="M:LLama.LLamaEmbedder.GetEmbeddings(System.String,System.Threading.CancellationToken)">
<summary>
Get high dimensional embedding vectors for the given text. Depending on the pooling type used when constructing
this <see cref="T:LLama.LLamaEmbedder"/> this may return an embedding vector per token, or one single embedding vector for the entire string.
</summary>
<remarks>Embedding vectors are not normalized, consider using one of the extensions in <see cref="T:LLama.Extensions.SpanNormalizationExtensions"/>.</remarks>
<param name="input"></param>
<param name="cancellationToken"></param>
<returns></returns>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
<exception cref="T:System.NotSupportedException"></exception>
</member>
<member name="P:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#Metadata">
<inheritdoc />
</member>
<member name="M:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#GetService(System.Type,System.Object)">
<inheritdoc />
</member>
<member name="M:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#GenerateAsync(System.Collections.Generic.IEnumerable{System.String},Microsoft.Extensions.AI.EmbeddingGenerationOptions,System.Threading.CancellationToken)">
<inheritdoc />
</member>
<member name="T:LLama.StatefulExecutorBase">
<summary>
The base class for stateful LLama executors.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._logger">
<summary>
The logger used by this executor.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._pastTokensCount">
<summary>
The tokens that were already processed by the model.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._consumedTokensCount">
<summary>
The tokens that were consumed by the model during the current inference.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._n_session_consumed">
<summary>
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._n_matching_session_tokens">
<summary>
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._pathSession">
<summary>
The path of the session file.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._embeds">
<summary>
A container of the tokens to be processed and after processed.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._embed_inps">
<summary>
A container for the tokens of input.
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._session_tokens">
<summary>
</summary>
</member>
<member name="F:LLama.StatefulExecutorBase._last_n_tokens">
<summary>
The last tokens generated by the model.
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.Context">
<summary>
The context used by the executor.
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.IsMultiModal">
<inheritdoc />
</member>
<member name="P:LLama.StatefulExecutorBase.ClipModel">
<inheritdoc />
</member>
<member name="P:LLama.StatefulExecutorBase.Images">
<inheritdoc />
</member>
<member name="M:LLama.StatefulExecutorBase.#ctor(LLama.LLamaContext,Microsoft.Extensions.Logging.ILogger)">
<summary>
</summary>
<param name="context"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.#ctor(LLama.LLamaContext,LLama.LLavaWeights,Microsoft.Extensions.Logging.ILogger)">
<summary>
</summary>
<param name="context"></param>
<param name="lLavaWeights"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.WithSessionFile(System.String)">
<summary>
This API is currently not verified.
</summary>
<param name="filename"></param>
<returns></returns>
<exception cref="T:System.ArgumentNullException"></exception>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
</member>
<member name="M:LLama.StatefulExecutorBase.SaveSessionFile(System.String)">
<summary>
This API has not been verified currently.
</summary>
<param name="filename"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.HandleRunOutOfContext(System.Int32)">
<summary>
After running out of the context, take some tokens from the original prompt and recompute the logits in batches.
</summary>
<param name="tokensToKeep"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.TryReuseMatchingPrefix">
<summary>
Try to reuse the matching prefix from the session file.
</summary>
</member>
<member name="M:LLama.StatefulExecutorBase.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
Decide whether to continue the loop.
</summary>
<param name="args"></param>
<returns></returns>
</member>
<member name="M:LLama.StatefulExecutorBase.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
Preprocess the inputs before the inference.
</summary>
<param name="text"></param>
<param name="args"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
Do some post processing after the inference.
</summary>
<param name="inferenceParams"></param>
<param name="args"></param>
<returns></returns>
</member>
<member name="M:LLama.StatefulExecutorBase.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
The core inference logic.
</summary>
<param name="inferenceParams"></param>
<param name="args"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.SaveState(System.String)">
<summary>
Save the current state to a file.
</summary>
<param name="filename"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.GetStateData">
<summary>
Get the current state data.
</summary>
<returns></returns>
</member>
<member name="M:LLama.StatefulExecutorBase.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
<summary>
Load the state from data.
</summary>
<param name="data"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.LoadState(System.String)">
<summary>
Load the state from a file.
</summary>
<param name="filename"></param>
</member>
<member name="M:LLama.StatefulExecutorBase.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<summary>
Execute the inference.
</summary>
<param name="text">The prompt. If null, generation will continue where it left off previously.</param>
<param name="inferenceParams"></param>
<param name="cancellationToken"></param>
<returns></returns>
</member>
<member name="M:LLama.StatefulExecutorBase.PrefillPromptAsync(System.String)">
<summary>
Asynchronously runs a prompt through the model to compute KV cache without generating any new tokens.
It could reduce the latency of the first time response if the first input from the user is not immediate.
</summary>
<param name="prompt">Prompt to process</param>
<returns></returns>
</member>
<member name="T:LLama.StatefulExecutorBase.InferStateArgs">
<summary>
State arguments that are used in single inference
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.Antiprompts">
<summary>
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.RemainedTokens">
<summary>
Tokens count remained to be used. (n_remain)
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.ReturnValue">
<summary>
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.WaitForInput">
<summary>
</summary>
</member>
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.NeedToSaveSession">
<summary>
</summary>
</member>
<member name="T:LLama.InstructExecutor">
<summary>
The LLama executor for instruct mode.
</summary>
</member>
<member name="M:LLama.InstructExecutor.#ctor(LLama.LLamaContext,System.String,System.String,Microsoft.Extensions.Logging.ILogger)">
<summary>
</summary>
<param name="context"></param>
<param name="instructionPrefix"></param>
<param name="instructionSuffix"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.InstructExecutor.GetStateData">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.SaveState(System.String)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.LoadState(System.String)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="M:LLama.InstructExecutor.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="T:LLama.InstructExecutor.InstructExecutorState">
<summary>
The descriptor of the state of the instruct executor.
</summary>
</member>
<member name="P:LLama.InstructExecutor.InstructExecutorState.IsPromptRun">
<summary>
Whether the executor is running for the first time (running the prompt).
</summary>
</member>
<member name="P:LLama.InstructExecutor.InstructExecutorState.InputPrefixTokens">
<summary>
Instruction prefix tokens.
</summary>
</member>
<member name="P:LLama.InstructExecutor.InstructExecutorState.InputSuffixTokens">
<summary>
Instruction suffix tokens.
</summary>
</member>
<member name="T:LLama.InteractiveExecutor">
<summary>
The LLama executor for interactive mode.
</summary>
</member>
<member name="M:LLama.InteractiveExecutor.#ctor(LLama.LLamaContext,Microsoft.Extensions.Logging.ILogger)">
<summary>
</summary>
<param name="context"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.InteractiveExecutor.GetStateData">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.SaveState(System.String)">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.LoadState(System.String)">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
Define whether to continue the loop to generate responses.
</summary>
<returns></returns>
</member>
<member name="M:LLama.InteractiveExecutor.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.PreprocessLlava(System.String,LLama.StatefulExecutorBase.InferStateArgs,System.Boolean)">
<inheritdoc />
</member>
<member name="M:LLama.InteractiveExecutor.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<summary>
Return whether to break the generation.
</summary>
<param name="inferenceParams"></param>
<param name="args"></param>
<returns></returns>
</member>
<member name="M:LLama.InteractiveExecutor.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
<inheritdoc />
</member>
<member name="T:LLama.InteractiveExecutor.InteractiveExecutorState">
<summary>
The descriptor of the state of the interactive executor.
</summary>
</member>
<member name="P:LLama.InteractiveExecutor.InteractiveExecutorState.IsPromptRun">
<summary>
Whether the executor is running for the first time (running the prompt).
</summary>
</member>
<member name="T:LLama.LLamaQuantizer">
<summary>
The quantizer to quantize the model.
</summary>
</member>
<member name="M:LLama.LLamaQuantizer.Quantize(System.String,System.String,LLama.Native.LLamaFtype,System.Int32,System.Boolean,System.Boolean)">
<summary>
Quantize the model.
</summary>
<param name="srcFileName">The model file to be quantized.</param>
<param name="dstFilename">The path to save the quantized model.</param>
<param name="ftype">The type of quantization.</param>
<param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
<param name="allowRequantize"></param>
<param name="quantizeOutputTensor"></param>
<returns>Whether the quantization is successful.</returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.LLamaQuantizer.Quantize(System.String,System.String,System.String,System.Int32,System.Boolean,System.Boolean)">
<summary>
Quantize the model.
</summary>
<param name="srcFileName">The model file to be quantized.</param>
<param name="dstFilename">The path to save the quantized model.</param>
<param name="ftype">The type of quantization.</param>
<param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
<param name="allowRequantize"></param>
<param name="quantizeOutputTensor"></param>
<returns>Whether the quantization is successful.</returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.LLamaQuantizer.StringToFtype(System.String)">
<summary>
Parse a string into a LLamaFtype. This is a "relaxed" parsing, which allows any string which is contained within
the enum name to be used.
For example "Q5_K_M" will convert to "LLAMA_FTYPE_MOSTLY_Q5_K_M"
</summary>
<param name="str"></param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="T:LLama.StatelessExecutor">
<summary>
This executor infer the input as one-time job. Previous inputs won't impact on the
response to current input.
</summary>
</member>
<member name="P:LLama.StatelessExecutor.IsMultiModal">
<inheritdoc />
</member>
<member name="P:LLama.StatelessExecutor.ClipModel">
<inheritdoc />
</member>
<member name="P:LLama.StatelessExecutor.Images">
<inheritdoc />
</member>
<member name="P:LLama.StatelessExecutor.Context">
<summary>
The context used by the executor when running the inference.
</summary>
</member>
<member name="P:LLama.StatelessExecutor.ApplyTemplate">
<summary>
If true, applies the default template to the prompt as defined in the rules for <a href="https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template">llama_chat_apply_template</a> template.
</summary>
</member>
<member name="P:LLama.StatelessExecutor.SystemMessage">
<summary>
The system message to use with the prompt. Only used when <see cref="P:LLama.StatelessExecutor.ApplyTemplate" /> is true.
</summary>
</member>
<member name="M:LLama.StatelessExecutor.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
<summary>
Create a new stateless executor which will use the given model
</summary>
<param name="weights"></param>
<param name="params"></param>
<param name="logger"></param>
</member>
<member name="M:LLama.StatelessExecutor.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
<inheritdoc />
</member>
<member name="T:LLama.LLamaTemplate">
<summary>
Converts a sequence of messages into text according to a model template
</summary>
</member>
<member name="F:LLama.LLamaTemplate._customTemplate">
<summary>
Custom template. May be null if a model was supplied to the constructor.
</summary>
</member>
<member name="F:LLama.LLamaTemplate._roleCache">
<summary>
Keep a cache of roles converted into bytes. Roles are very frequently re-used, so this saves converting them many times.
</summary>
</member>
<member name="F:LLama.LLamaTemplate._messages">
<summary>
Array of messages. The <see cref="P:LLama.LLamaTemplate.Count"/> property indicates how many messages there are
</summary>
</member>
<member name="F:LLama.LLamaTemplate._addAssistant">
<summary>
Backing field for <see cref="P:LLama.LLamaTemplate.AddAssistant"/>
</summary>
</member>
<member name="F:LLama.LLamaTemplate._nativeChatMessages">
<summary>
Temporary array of messages in the format llama.cpp needs, used when applying the template
</summary>
</member>
<member name="F:LLama.LLamaTemplate._resultLength">
<summary>
Indicates how many bytes are in <see cref="F:LLama.LLamaTemplate._result"/> array
</summary>
</member>
<member name="F:LLama.LLamaTemplate._result">
<summary>
Result bytes of last call to <see cref="M:LLama.LLamaTemplate.Apply"/>
</summary>
</member>
<member name="F:LLama.LLamaTemplate._dirty">
<summary>
Indicates if this template has been modified and needs regenerating
</summary>
</member>
<member name="F:LLama.LLamaTemplate.Encoding">
<summary>
The encoding algorithm to use
</summary>
</member>
<member name="P:LLama.LLamaTemplate.Count">
<summary>
Number of messages added to this template
</summary>
</member>
<member name="P:LLama.LLamaTemplate.Item(System.Int32)">
<summary>
Get the message at the given index
</summary>
<param name="index"></param>
<returns></returns>
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is less than zero or greater than or equal to <see cref="P:LLama.LLamaTemplate.Count"/></exception>
</member>
<member name="P:LLama.LLamaTemplate.AddAssistant">
<summary>
Whether to end the prompt with the token(s) that indicate the start of an assistant message.
</summary>
</member>
<member name="M:LLama.LLamaTemplate.#ctor(LLama.Native.SafeLlamaModelHandle,System.String)">
<summary>
Construct a new template, using the default model template
</summary>
<param name="model"></param>
<param name="name"></param>
</member>
<member name="M:LLama.LLamaTemplate.#ctor(LLama.LLamaWeights)">
<summary>
Construct a new template, using the default model template
</summary>
<param name="weights"></param>
</member>
<member name="M:LLama.LLamaTemplate.#ctor(System.String)">
<summary>
Construct a new template, using a custom template.
</summary>
<remarks>Only support a pre-defined list of templates. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template</remarks>
<param name="customTemplate"></param>
</member>
<member name="M:LLama.LLamaTemplate.Add(System.String,System.String)">
<summary>
Add a new message to the end of this template
</summary>
<param name="role"></param>
<param name="content"></param>
<returns>This template, for chaining calls.</returns>
</member>
<member name="M:LLama.LLamaTemplate.Add(LLama.LLamaTemplate.TextMessage)">
<summary>
Add a new message to the end of this template
</summary>
<param name="message"></param>
<returns>This template, for chaining calls.</returns>
</member>
<member name="M:LLama.LLamaTemplate.RemoveAt(System.Int32)">
<summary>
Remove a message at the given index
</summary>
<param name="index"></param>
<returns>This template, for chaining calls.</returns>
</member>
<member name="M:LLama.LLamaTemplate.Clear">
<summary>
Remove all messages from the template and resets internal state to accept/generate new messages
</summary>
</member>
<member name="M:LLama.LLamaTemplate.Apply">
<summary>
Apply the template to the messages and return a span containing the results
</summary>
<returns>A span over the buffer that holds the applied template</returns>
</member>
<member name="T:LLama.LLamaTemplate.TextMessage">
<summary>
A message that has been added to a template
</summary>
</member>
<member name="P:LLama.LLamaTemplate.TextMessage.Role">
<summary>
The "role" string for this message
</summary>
</member>
<member name="P:LLama.LLamaTemplate.TextMessage.Content">
<summary>
The text content of this message
</summary>
</member>
<member name="M:LLama.LLamaTemplate.TextMessage.Deconstruct(System.String@,System.String@)">
<summary>
Deconstruct this message into role and content
</summary>
<param name="role"></param>
<param name="content"></param>
</member>
<member name="T:LLama.LLamaTransforms">
<summary>
A class that contains all the transforms provided internally by LLama.
</summary>
</member>
<member name="T:LLama.LLamaTransforms.DefaultHistoryTransform">
<summary>
The default history transform.
Uses plain text with the following format:
[Author]: [Message]
</summary>
</member>
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.#ctor(System.String,System.String,System.String,System.String,System.Boolean)">
<summary>
</summary>
<param name="userName"></param>
<param name="assistantName"></param>
<param name="systemName"></param>
<param name="unknownName"></param>
<param name="isInstructMode"></param>
</member>
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.Clone">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.HistoryToText(LLama.Common.ChatHistory)">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.TextToHistory(LLama.Common.AuthorRole,System.String)">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.TrimNamesFromText(System.String,LLama.Common.AuthorRole)">
<summary>
Drop the name at the beginning and the end of the text.
</summary>
<param name="text"></param>
<param name="role"></param>
<returns></returns>
</member>
<member name="T:LLama.LLamaTransforms.NaiveTextInputTransform">
<summary>
A text input transform that only trims the text.
</summary>
</member>
<member name="M:LLama.LLamaTransforms.NaiveTextInputTransform.Transform(System.String)">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.NaiveTextInputTransform.Clone">
<inheritdoc />
</member>
<member name="T:LLama.LLamaTransforms.EmptyTextOutputStreamTransform">
<summary>
A no-op text input transform.
</summary>
</member>
<member name="M:LLama.LLamaTransforms.EmptyTextOutputStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.EmptyTextOutputStreamTransform.Clone">
<inheritdoc />
</member>
<member name="T:LLama.LLamaTransforms.KeywordTextOutputStreamTransform">
<summary>
A text output transform that removes the keywords from the response.
</summary>
</member>
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.Keywords">
<summary>
Keywords that you want to remove from the response.
This property is used for JSON serialization.
</summary>
</member>
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.MaxKeywordLength">
<summary>
Maximum length of the keywords.
This property is used for JSON serialization.
</summary>
</member>
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.RemoveAllMatchedTokens">
<summary>
If set to true, when getting a matched keyword, all the related tokens will be removed.
Otherwise only the part of keyword will be removed.
This property is used for JSON serialization.
</summary>
</member>
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.#ctor(System.Collections.Generic.HashSet{System.String},System.Int32,System.Boolean)">
<summary>
JSON constructor.
</summary>
</member>
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.#ctor(System.Collections.Generic.IEnumerable{System.String},System.Int32,System.Boolean)">
<summary>
</summary>
<param name="keywords">Keywords that you want to remove from the response.</param>
<param name="redundancyLength">The extra length when searching for the keyword. For example, if your only keyword is "highlight",
maybe the token you get is "\r\nhighligt". In this condition, if redundancyLength=0, the token cannot be successfully matched because the length of "\r\nhighligt" (10)
has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh &gt;= 2 leads to successful match.
The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength &lt;= 5 </param>
<param name="removeAllMatchedTokens">If set to true, when getting a matched keyword, all the related tokens will be removed. Otherwise only the part of keyword will be removed.</param>
</member>
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.Clone">
<inheritdoc />
</member>
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
<inheritdoc />
</member>
<member name="T:LLama.LLamaWeights">
<summary>
A set of model weights, loaded into memory.
</summary>
</member>
<member name="P:LLama.LLamaWeights.NativeHandle">
<summary>
The native handle, which is used in the native APIs
</summary>
<remarks>Be careful how you use this!</remarks>
</member>
<member name="P:LLama.LLamaWeights.ContextSize">
<summary>
Total number of tokens in the context
</summary>
</member>
<member name="P:LLama.LLamaWeights.SizeInBytes">
<summary>
Get the size of this model in bytes
</summary>
</member>
<member name="P:LLama.LLamaWeights.ParameterCount">
<summary>
Get the number of parameters in this model
</summary>
</member>
<member name="P:LLama.LLamaWeights.EmbeddingSize">
<summary>
Dimension of embedding vectors
</summary>
</member>
<member name="P:LLama.LLamaWeights.Vocab">
<summary>
Get the special tokens of this model
</summary>
</member>
<member name="P:LLama.LLamaWeights.Metadata">
<summary>
All metadata keys in this model
</summary>
</member>
<member name="M:LLama.LLamaWeights.LoadFromFile(LLama.Abstractions.IModelParams)">
<summary>
Load weights into memory
</summary>
<param name="params"></param>
<returns></returns>
</member>
<member name="M:LLama.LLamaWeights.LoadFromFileAsync(LLama.Abstractions.IModelParams,System.Threading.CancellationToken,System.IProgress{System.Single})">
<summary>
Load weights into memory
</summary>
<param name="params">Parameters to use to load the model</param>
<param name="token">A cancellation token that can interrupt model loading</param>
<param name="progressReporter">Receives progress updates as the model loads (0 to 1)</param>
<returns></returns>
<exception cref="T:LLama.Exceptions.LoadWeightsFailedException">Thrown if weights failed to load for any reason. e.g. Invalid file format or loading cancelled.</exception>
<exception cref="T:System.OperationCanceledException">Thrown if the cancellation token is cancelled.</exception>
</member>
<member name="M:LLama.LLamaWeights.Dispose">
<inheritdoc />
</member>
<member name="M:LLama.LLamaWeights.CreateContext(LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
<summary>
Create a llama_context using this model
</summary>
<param name="params"></param>
<param name="logger"></param>
<returns></returns>
</member>
<member name="M:LLama.LLamaWeights.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
<summary>
Convert a string of text into tokens
</summary>
<param name="text"></param>
<param name="add_bos"></param>
<param name="encoding"></param>
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
<returns></returns>
</member>
<member name="T:LLama.LLavaWeights">
<summary>
A set of llava model weights (mmproj), loaded into memory.
</summary>
</member>
<member name="P:LLama.LLavaWeights.NativeHandle">
<summary>
The native handle, which is used in the native APIs
</summary>
<remarks>Be careful how you use this!</remarks>
</member>
<member name="M:LLama.LLavaWeights.LoadFromFile(System.String)">
<summary>
Load weights into memory
</summary>
<param name="mmProject">path to the "mmproj" model file</param>
<returns></returns>
</member>
<member name="M:LLama.LLavaWeights.LoadFromFileAsync(System.String,System.Threading.CancellationToken)">
<summary>
Load weights into memory
</summary>
<param name="mmProject">path to the "mmproj" model file</param>
<param name="token"></param>
<returns></returns>
</member>
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(LLama.LLamaContext,System.Byte[])">
<summary>
Create the Image Embeddings from the bytes of an image.
</summary>
<param name="ctxLlama"></param>
<param name="image">Image bytes. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<returns></returns>
</member>
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(System.Byte[],System.Int32)">
<summary>
Create the Image Embeddings.
</summary>
<param name="image">Image in binary format (it supports jpeg format only)</param>
<param name="threads">Number of threads to use</param>
<returns>return the SafeHandle of these embeddings</returns>
</member>
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(LLama.LLamaContext,System.String)">
<summary>
Create the Image Embeddings from the bytes of an image.
</summary>
<param name="ctxLlama"></param>
<param name="image">Path to the image file. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(System.String,System.Int32)">
<summary>
Create the Image Embeddings from the bytes of an image.
</summary>
<param name="image">Path to the image file. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<param name="threads"></param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.LLavaWeights.EvalImageEmbed(LLama.LLamaContext,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32@)">
<summary>
Eval the image embeddings
</summary>
<param name="ctxLlama"></param>
<param name="imageEmbed"></param>
<param name="n_past"></param>
<returns></returns>
</member>
<member name="M:LLama.LLavaWeights.Dispose">
<inheritdoc />
</member>
<member name="T:LLama.Native.DecodeResult">
<summary>
Return codes from llama_decode
</summary>
</member>
<member name="F:LLama.Native.DecodeResult.Error">
<summary>
An unspecified error
</summary>
</member>
<member name="F:LLama.Native.DecodeResult.Ok">
<summary>
Ok.
</summary>
</member>
<member name="F:LLama.Native.DecodeResult.NoKvSlot">
<summary>
Could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
</summary>
</member>
<member name="T:LLama.Native.EncodeResult">
<summary>
Return codes from llama_encode
</summary>
</member>
<member name="F:LLama.Native.EncodeResult.Error">
<summary>
An unspecified error
</summary>
</member>
<member name="F:LLama.Native.EncodeResult.Ok">
<summary>
Ok.
</summary>
</member>
<member name="T:LLama.Native.GGMLType">
<summary>
Possible GGML quantisation types
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_F32">
<summary>
Full 32 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_F16">
<summary>
16 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_0">
<summary>
4 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_1">
<summary>
4 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_0">
<summary>
5 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_1">
<summary>
5 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_0">
<summary>
8 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_1">
<summary>
8 bit float
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q2_K">
<summary>
"type-1" 2-bit quantization in super-blocks containing 16 blocks, each block having 16 weight.
Block scales and mins are quantized with 4 bits. This ends up effectively using 2.5625 bits per weight (bpw)
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q3_K">
<summary>
"type-0" 3-bit quantization in super-blocks containing 16 blocks, each block having 16 weights.
Scales are quantized with 6 bits. This end up using 3.4375 bpw.
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_K">
<summary>
"type-1" 4-bit quantization in super-blocks containing 8 blocks, each block having 32 weights.
Scales and mins are quantized with 6 bits. This ends up using 4.5 bpw.
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_K">
<summary>
"type-1" 5-bit quantization. Same super-block structure as GGML_TYPE_Q4_K resulting in 5.5 bpw
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q6_K">
<summary>
"type-0" 6-bit quantization. Super-blocks with 16 blocks, each block having 16 weights.
Scales are quantized with 8 bits. This ends up using 6.5625 bpw
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_K">
<summary>
"type-0" 8-bit quantization. Only used for quantizing intermediate results.
The difference to the existing Q8_0 is that the block size is 256. All 2-6 bit dot products are implemented for this quantization type.
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I8">
<summary>
Integer, 8 bit
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I16">
<summary>
Integer, 16 bit
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I32">
<summary>
Integer, 32 bit
</summary>
</member>
<member name="F:LLama.Native.GGMLType.GGML_TYPE_COUNT">
<summary>
The value of this entry is the count of the number of possible quant types.
</summary>
</member>
<member name="T:LLama.Native.GPUSplitMode">
<summary>
</summary>
<remarks>llama_split_mode</remarks>
</member>
<member name="F:LLama.Native.GPUSplitMode.None">
<summary>
Single GPU
</summary>
</member>
<member name="F:LLama.Native.GPUSplitMode.Layer">
<summary>
Split layers and KV across GPUs
</summary>
</member>
<member name="F:LLama.Native.GPUSplitMode.Row">
<summary>
split layers and KV across GPUs, use tensor parallelism if supported
</summary>
</member>
<member name="T:LLama.Native.GroupDisposable">
<summary>
Disposes all contained disposables when this class is disposed
</summary>
</member>
<member name="M:LLama.Native.GroupDisposable.Finalize">
<inheritdoc />
</member>
<member name="M:LLama.Native.GroupDisposable.Dispose">
<inheritdoc />
</member>
<member name="T:LLama.Native.LLamaAttentionType">
<summary>
</summary>
<remarks>llama_attention_type</remarks>
</member>
<member name="T:LLama.Native.LLamaBatch">
<summary>
A batch allows submitting multiple tokens to multiple sequences simultaneously
</summary>
</member>
<member name="F:LLama.Native.LLamaBatch._logitPositions">
<summary>
Keep a list of where logits can be sampled from
</summary>
</member>
<member name="P:LLama.Native.LLamaBatch.LogitPositionCount">
<summary>
Get the number of logit positions that will be generated from this batch
</summary>
</member>
<member name="P:LLama.Native.LLamaBatch.TokenCount">
<summary>
The number of tokens in this batch
</summary>
</member>
<member name="P:LLama.Native.LLamaBatch.TokenCapacity">
<summary>
Maximum number of tokens that can be added to this batch (automatically grows if exceeded)
</summary>
</member>
<member name="P:LLama.Native.LLamaBatch.SequenceCapacity">
<summary>
Maximum number of sequences a token can be assigned to (automatically grows if exceeded)
</summary>
</member>
<member name="M:LLama.Native.LLamaBatch.#ctor">
<summary>
Create a new batch for submitting inputs to llama.cpp
</summary>
</member>
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
<summary>
Add a single token to the batch at the same position in several sequences
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<param name="token">The token to add</param>
<param name="pos">The position to add it att</param>
<param name="sequences">The set of sequences to add this token to</param>
<param name="logits"></param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,System.Collections.Generic.List{LLama.Native.LLamaSeqId},System.Boolean)">
<summary>
Add a single token to the batch at the same position in several sequences
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<param name="token">The token to add</param>
<param name="pos">The position to add it att</param>
<param name="sequences">The set of sequences to add this token to</param>
<param name="logits"></param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
<summary>
Add a single token to the batch at a certain position for a single sequences
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<param name="token">The token to add</param>
<param name="pos">The position to add it att</param>
<param name="sequence">The sequence to add this token to</param>
<param name="logits"></param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatch.AddRange(System.ReadOnlySpan{LLama.Native.LLamaToken},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
<summary>
Add a range of tokens to a single sequence, start at the given position.
</summary>
<param name="tokens">The tokens to add</param>
<param name="start">The starting position to add tokens at</param>
<param name="sequence">The sequence to add this token to</param>
<param name="logitsLast">Whether the final token should generate logits</param>
<returns>The index that the final token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatch.Clear">
<summary>
Set TokenCount to zero for this batch
</summary>
</member>
<member name="M:LLama.Native.LLamaBatch.GetLogitPositions">
<summary>
Get the positions where logits can be sampled from
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaBatchEmbeddings">
<summary>
An embeddings batch allows submitting embeddings to multiple sequences simultaneously
</summary>
</member>
<member name="F:LLama.Native.LLamaBatchEmbeddings._logitPositions">
<summary>
Keep a list of where logits can be sampled from
</summary>
</member>
<member name="P:LLama.Native.LLamaBatchEmbeddings.LogitPositionCount">
<summary>
Get the number of logit positions that will be generated from this batch
</summary>
</member>
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingDimensions">
<summary>
Size of an individual embedding
</summary>
</member>
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingsCount">
<summary>
The number of items in this batch
</summary>
</member>
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingsCapacity">
<summary>
Maximum number of items that can be added to this batch (automatically grows if exceeded)
</summary>
</member>
<member name="P:LLama.Native.LLamaBatchEmbeddings.SequenceCapacity">
<summary>
Maximum number of sequences an item can be assigned to (automatically grows if exceeded)
</summary>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.#ctor(System.Int32)">
<summary>
Create a new batch for submitting inputs to llama.cpp
</summary>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add(System.ReadOnlySpan{System.Single},LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
<summary>
Add a single embedding to the batch at the same position in several sequences
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<param name="embedding">The embedding to add</param>
<param name="pos">The position to add it att</param>
<param name="sequences">The set of sequences to add this token to</param>
<param name="logits"></param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add(System.ReadOnlySpan{System.Single},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
<summary>
Add a single embedding to the batch for a single sequence
</summary>
<param name="embedding"></param>
<param name="pos"></param>
<param name="sequence"></param>
<param name="logits"></param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="T:LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate`1">
<summary>
Called by embeddings batch to write embeddings into a destination span
</summary>
<typeparam name="TParam">Type of user data parameter passed in</typeparam>
<param name="dest">Destination to write data to. Entire destination must be filled!</param>
<param name="parameter">User data parameter passed in</param>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add``1(``0,LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate{``0},LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
<summary>
Add a single embedding to the batch at the same position in several sequences
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<typeparam name="TParam">Type of userdata passed to write delegate</typeparam>
<param name="parameter">Userdata passed to write delegate</param>
<param name="write">Delegate called once to write data into a span</param>
<param name="pos">Position to write this embedding to</param>
<param name="sequences">All sequences to assign this embedding to</param>
<param name="logits">Whether logits should be generated for this embedding</param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add``1(``0,LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate{``0},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
<summary>
Add a single embedding to the batch at a position for one sequence
</summary>
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
<typeparam name="TParam">Type of userdata passed to write delegate</typeparam>
<param name="parameter">Userdata passed to write delegate</param>
<param name="write">Delegate called once to write data into a span</param>
<param name="pos">Position to write this embedding to</param>
<param name="sequence">Sequence to assign this embedding to</param>
<param name="logits">Whether logits should be generated for this embedding</param>
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.Clear">
<summary>
Set EmbeddingsCount to zero for this batch
</summary>
</member>
<member name="M:LLama.Native.LLamaBatchEmbeddings.GetLogitPositions(System.Span{System.ValueTuple{LLama.Native.LLamaSeqId,System.Int32}})">
<summary>
Get the positions where logits can be sampled from
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaChatMessage">
<summary>
</summary>
<remarks>llama_chat_message</remarks>
</member>
<member name="F:LLama.Native.LLamaChatMessage.role">
<summary>
Pointer to the null terminated bytes that make up the role string
</summary>
</member>
<member name="F:LLama.Native.LLamaChatMessage.content">
<summary>
Pointer to the null terminated bytes that make up the content string
</summary>
</member>
<member name="T:LLama.Native.LlamaProgressCallback">
<summary>
Called by llama.cpp with a progress value between 0 and 1
</summary>
<param name="progress"></param>
<param name="ctx"></param>
<returns>If the provided progress_callback returns true, model loading continues.
If it returns false, model loading is immediately aborted.</returns>
<remarks>llama_progress_callback</remarks>
</member>
<member name="T:LLama.Native.LLamaContextParams">
<summary>
A C# representation of the llama.cpp `llama_context_params` struct
</summary>
<remarks>changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
https://github.com/ggerganov/llama.cpp/pull/7544</remarks>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_ctx">
<summary>
text context, 0 = from model
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_batch">
<summary>
logical maximum batch size that can be submitted to llama_decode
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_ubatch">
<summary>
physical maximum batch size
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_seq_max">
<summary>
max number of sequences (i.e. distinct states for recurrent models)
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_threads">
<summary>
number of threads to use for generation
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.n_threads_batch">
<summary>
number of threads to use for batch processing
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.rope_scaling_type">
<summary>
RoPE scaling type, from `enum llama_rope_scaling_type`
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.llama_pooling_type">
<summary>
whether to pool (sum) embedding results by sequence id
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.attention_type">
<summary>
Attention type to use for embeddings
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.rope_freq_base">
<summary>
RoPE base frequency, 0 = from model
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.rope_freq_scale">
<summary>
RoPE frequency scaling factor, 0 = from model
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.yarn_ext_factor">
<summary>
YaRN extrapolation mix factor, negative = from model
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.yarn_attn_factor">
<summary>
YaRN magnitude scaling factor
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.yarn_beta_fast">
<summary>
YaRN low correction dim
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.yarn_beta_slow">
<summary>
YaRN high correction dim
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.yarn_orig_ctx">
<summary>
YaRN original context size
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.defrag_threshold">
<summary>
defragment the KV cache if holes/size &gt; defrag_threshold, Set to &lt; 0 to disable (default)
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.cb_eval">
<summary>
ggml_backend_sched_eval_callback
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.cb_eval_user_data">
<summary>
User data passed into cb_eval
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.type_k">
<summary>
data type for K cache. <b>EXPERIMENTAL</b>
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.type_v">
<summary>
data type for V cache. <b>EXPERIMENTAL</b>
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams._logits_all">
<summary>
Deprecated!
</summary>
</member>
<member name="P:LLama.Native.LLamaContextParams.embeddings">
<summary>
if true, extract embeddings (together with logits)
</summary>
</member>
<member name="P:LLama.Native.LLamaContextParams.offload_kqv">
<summary>
whether to offload the KQV ops (including the KV cache) to GPU
</summary>
</member>
<member name="P:LLama.Native.LLamaContextParams.flash_attention">
<summary>
whether to use flash attention. <b>EXPERIMENTAL</b>
</summary>
</member>
<member name="P:LLama.Native.LLamaContextParams.no_perf">
<summary>
whether to measure performance timings
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.abort_callback">
<summary>
ggml_abort_callback
</summary>
</member>
<member name="F:LLama.Native.LLamaContextParams.abort_callback_user_data">
<summary>
User data passed into abort_callback
</summary>
</member>
<member name="M:LLama.Native.LLamaContextParams.Default">
<summary>
Get the default LLamaContextParams
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaFtype">
<summary>
Supported model file types
</summary>
<remarks>C# representation of llama_ftype</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.ALL_F32">
<summary>
All f32
</summary>
<remarks>Benchmark@7B: 26GB</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_F16">
<summary>
Mostly f16
</summary>
<remarks>Benchmark@7B: 13GB</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q8_0">
<summary>
Mostly 8 bit
</summary>
<remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_0">
<summary>
Mostly 4 bit
</summary>
<remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_1">
<summary>
Mostly 4 bit
</summary>
<remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_0">
<summary>
Mostly 5 bit
</summary>
<remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_1">
<summary>
Mostly 5 bit
</summary>
<remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q2_K">
<summary>
K-Quant 2 bit
</summary>
<remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_S">
<summary>
K-Quant 3 bit (Small)
</summary>
<remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_M">
<summary>
K-Quant 3 bit (Medium)
</summary>
<remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_L">
<summary>
K-Quant 3 bit (Large)
</summary>
<remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_K_S">
<summary>
K-Quant 4 bit (Small)
</summary>
<remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_K_M">
<summary>
K-Quant 4 bit (Medium)
</summary>
<remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_K_S">
<summary>
K-Quant 5 bit (Small)
</summary>
<remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_K_M">
<summary>
K-Quant 5 bit (Medium)
</summary>
<remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q6_K">
<summary>
K-Quant 6 bit
</summary>
<remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_XXS">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_XS">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q2_K_S">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_K_XS">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_XXS">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ1_S">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ4_NL">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_S">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_M">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_S">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_M">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ4_XS">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ1_M">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.MOSTLY_BF16">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.LLAMA_FTYPE_MOSTLY_TQ1_0">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.LLAMA_FTYPE_MOSTLY_TQ2_0">
<summary>
except 1d tensors
</summary>
</member>
<member name="F:LLama.Native.LLamaFtype.GUESSED">
<summary>
File type was not specified
</summary>
</member>
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle">
<summary>
A safe handle for a LLamaKvCacheView
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.CellCount">
<summary>
Number of KV cache cells. This will be the same as the context size.
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.TokenCount">
<summary>
Get the total number of tokens in the KV cache.
For example, if there are two populated
cells, the first with 1 sequence id in it and the second with 2 sequence
ids then you'll have 3 tokens.
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxSequenceCount">
<summary>
Maximum number of sequences visible for a cell. There may be more sequences than this
in reality, this is simply the maximum number this view can see.
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.UsedCellCount">
<summary>
Number of populated cache cells
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxContiguous">
<summary>
Maximum contiguous empty slots in the cache.
</summary>
</member>
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxContiguousIdx">
<summary>
Index to the start of the MaxContiguous slot range. Can be negative when cache is full.
</summary>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.#ctor(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView)">
<summary>
Initialize a LLamaKvCacheViewSafeHandle which will call `llama_kv_cache_view_free` when disposed
</summary>
<param name="ctx"></param>
<param name="view"></param>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.Allocate(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Allocate a new KV cache view which can be used to inspect the KV cache
</summary>
<param name="ctx"></param>
<param name="maxSequences">The maximum number of sequences visible in this view per cell</param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.Update">
<summary>
Read the current KV cache state into this view.
</summary>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetNativeView">
<summary>
Get the raw KV cache view
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetCell(System.Int32)">
<summary>
Get the cell at the given index
</summary>
<param name="index">The index of the cell [0, CellCount)</param>
<returns>Data about the cell at the given index</returns>
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is out of range (0 &lt;= index &lt; CellCount)</exception>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetCellSequences(System.Int32)">
<summary>
Get all of the sequences assigned to the cell at the given index. This will contain <see cref="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxSequenceCount"/> entries
sequences even if the cell actually has more than that many sequences, allocate a new view with a larger maxSequences parameter
if necessary. Invalid sequences will be negative values.
</summary>
<param name="index">The index of the cell [0, CellCount)</param>
<returns>A span containing the sequences assigned to this cell</returns>
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is out of range (0 &lt;= index &lt; CellCount)</exception>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_init(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Create an empty KV cache view. (use only for debugging purposes)
</summary>
<param name="ctx"></param>
<param name="n_seq_max"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_free(LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView@)">
<summary>
Free a KV cache view. (use only for debugging purposes)
</summary>
</member>
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_update(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView@)">
<summary>
Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
</summary>
<param name="ctx"></param>
<param name="view"></param>
</member>
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheViewCell">
<summary>
Information associated with an individual cell in the KV cache view (llama_kv_cache_view_cell)
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheViewCell.pos">
<summary>
The position for this cell. Takes KV cache shifts into account.
May be negative if the cell is not populated.
</summary>
</member>
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView">
<summary>
An updateable view of the KV cache (llama_kv_cache_view)
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.n_cells">
<summary>
Number of KV cache cells. This will be the same as the context size.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.n_seq_max">
<summary>
Maximum number of sequences that can exist in a cell. It's not an error
if there are more sequences in a cell than this value, however they will
not be visible in the view cells_sequences.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.token_count">
<summary>
Number of tokens in the cache. For example, if there are two populated
cells, the first with 1 sequence id in it and the second with 2 sequence
ids then you'll have 3 tokens.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.used_cells">
<summary>
Number of populated cache cells.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.max_contiguous">
<summary>
Maximum contiguous empty slots in the cache.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.max_contiguous_idx">
<summary>
Index to the start of the max_contiguous slot range. Can be negative
when cache is full.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.cells">
<summary>
Information for an individual cell.
</summary>
</member>
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.cells_sequences">
<summary>
The sequences for each cell. There will be n_seq_max items per cell.
</summary>
</member>
<member name="T:LLama.Native.LLamaLogLevel">
<summary>
Severity level of a log message. This enum should always be aligned with
the one defined on llama.cpp side at
https://github.com/ggerganov/llama.cpp/blob/0eb4e12beebabae46d37b78742f4c5d4dbe52dc1/ggml/include/ggml.h#L559
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.None">
<summary>
Logs are never written.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.Debug">
<summary>
Logs that are used for interactive investigation during development.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.Info">
<summary>
Logs that track the general flow of the application.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.Warning">
<summary>
Logs that highlight an abnormal or unexpected event in the application flow, but do not otherwise cause the application execution to stop.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.Error">
<summary>
Logs that highlight when the current flow of execution is stopped due to a failure.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevel.Continue">
<summary>
Continue log level is equivalent to None in the way it is used in llama.cpp.
</summary>
</member>
<member name="F:LLama.Native.LLamaLogLevelExtensions._previous">
<summary>
Keeps track of the previous log level to be able to handle the log level <see cref="F:LLama.Native.LLamaLogLevel.Continue"/>.
</summary>
</member>
<member name="T:LLama.Native.LLamaModelMetadataOverride">
<summary>
Override a key/value pair in the llama model metadata (llama_model_kv_override)
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.key">
<summary>
Key to override
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.Tag">
<summary>
Type of value
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.PADDING">
<summary>
Add 4 bytes of padding, to align the next fields to 8 bytes
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.IntValue">
<summary>
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_INT
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.FloatValue">
<summary>
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_FLOAT
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.BoolValue">
<summary>
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_BOOL
</summary>
</member>
<member name="F:LLama.Native.LLamaModelMetadataOverride.StringValue">
<summary>
Value, **must** only be used if Tag == String
</summary>
</member>
<member name="T:LLama.Native.LLamaModelKvOverrideType">
<summary>
Specifies what type of value is being overridden by LLamaModelKvOverride
</summary>
<remarks>llama_model_kv_override_type</remarks>
</member>
<member name="F:LLama.Native.LLamaModelKvOverrideType.Int">
<summary>
Overriding an int value
</summary>
</member>
<member name="F:LLama.Native.LLamaModelKvOverrideType.Float">
<summary>
Overriding a float value
</summary>
</member>
<member name="F:LLama.Native.LLamaModelKvOverrideType.Bool">
<summary>
Overriding a bool value
</summary>
</member>
<member name="F:LLama.Native.LLamaModelKvOverrideType.String">
<summary>
Overriding a string value
</summary>
</member>
<member name="T:LLama.Native.LLamaModelParams">
<summary>
A C# representation of the llama.cpp `llama_model_params` struct
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.devices">
<summary>
NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
todo: add support for llama_model_params.devices
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.n_gpu_layers">
<summary>
// number of layers to store in VRAM
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.split_mode">
<summary>
how to split the model across multiple GPUs
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.main_gpu">
<summary>
the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.tensor_split">
<summary>
how to split layers across multiple GPUs (size: <see cref="M:LLama.Native.NativeApi.llama_max_devices"/>)
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.progress_callback">
<summary>
called with a progress value between 0 and 1, pass NULL to disable. If the provided progress_callback
returns true, model loading continues. If it returns false, model loading is immediately aborted.
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.progress_callback_user_data">
<summary>
context pointer passed to the progress callback
</summary>
</member>
<member name="F:LLama.Native.LLamaModelParams.kv_overrides">
<summary>
override key-value pairs of the model meta data
</summary>
</member>
<member name="P:LLama.Native.LLamaModelParams.vocab_only">
<summary>
only load the vocabulary, no weights
</summary>
</member>
<member name="P:LLama.Native.LLamaModelParams.use_mmap">
<summary>
use mmap if possible
</summary>
</member>
<member name="P:LLama.Native.LLamaModelParams.use_mlock">
<summary>
force system to keep model in RAM
</summary>
</member>
<member name="P:LLama.Native.LLamaModelParams.check_tensors">
<summary>
validate model tensor data
</summary>
</member>
<member name="M:LLama.Native.LLamaModelParams.Default">
<summary>
Create a LLamaModelParams with default values
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaModelQuantizeParams">
<summary>
Quantizer parameters used in the native API
</summary>
<remarks>llama_model_quantize_params</remarks>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.nthread">
<summary>
number of threads to use for quantizing, if &lt;=0 will use std::thread::hardware_concurrency()
</summary>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.ftype">
<summary>
quantize to this llama_ftype
</summary>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.output_tensor_type">
<summary>
output tensor type
</summary>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.token_embedding_type">
<summary>
token embeddings tensor type
</summary>
</member>
<member name="P:LLama.Native.LLamaModelQuantizeParams.allow_requantize">
<summary>
allow quantizing non-f32/f16 tensors
</summary>
</member>
<member name="P:LLama.Native.LLamaModelQuantizeParams.quantize_output_tensor">
<summary>
quantize output.weight
</summary>
</member>
<member name="P:LLama.Native.LLamaModelQuantizeParams.only_copy">
<summary>
only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
</summary>
</member>
<member name="P:LLama.Native.LLamaModelQuantizeParams.pure">
<summary>
quantize all tensors to the default type
</summary>
</member>
<member name="P:LLama.Native.LLamaModelQuantizeParams.keep_split">
<summary>
quantize to the same number of shards
</summary>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.imatrix">
<summary>
pointer to importance matrix data
</summary>
</member>
<member name="F:LLama.Native.LLamaModelQuantizeParams.kv_overrides">
<summary>
pointer to vector containing overrides
</summary>
</member>
<member name="M:LLama.Native.LLamaModelQuantizeParams.Default">
<summary>
Create a LLamaModelQuantizeParams with default values
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaNativeBatch">
<summary>
Input data for llama_decode
A llama_batch object can contain input about one or many sequences
The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.n_tokens">
<summary>
The number of items pointed at by pos, seq_id and logits.
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.tokens">
<summary>
Either `n_tokens` of `llama_token`, or `NULL`, depending on how this batch was created
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.embd">
<summary>
Either `n_tokens * embd * sizeof(float)` or `NULL`, depending on how this batch was created
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.pos">
<summary>
the positions of the respective token in the sequence
(if set to NULL, the token position will be tracked automatically by llama_decode)
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.n_seq_id">
<summary>
https://github.com/ggerganov/llama.cpp/blob/master/llama.h#L139 ???
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.seq_id">
<summary>
the sequence to which the respective token belongs
(if set to NULL, the sequence ID will be assumed to be 0)
</summary>
</member>
<member name="F:LLama.Native.LLamaNativeBatch.logits">
<summary>
if zero, the logits for the respective token will not be output
(if set to NULL, only the logits for last token will be returned)
</summary>
</member>
<member name="T:LLama.Native.LLamaPoolingType">
<summary>
</summary>
<remarks>llama_pooling_type</remarks>
</member>
<member name="F:LLama.Native.LLamaPoolingType.Unspecified">
<summary>
No specific pooling type. Use the model default if this is specific in <see cref="P:LLama.Abstractions.IContextParams.PoolingType"/>
</summary>
</member>
<member name="F:LLama.Native.LLamaPoolingType.None">
<summary>
Do not pool embeddings (per-token embeddings)
</summary>
</member>
<member name="F:LLama.Native.LLamaPoolingType.Mean">
<summary>
Take the mean of every token embedding
</summary>
</member>
<member name="F:LLama.Native.LLamaPoolingType.CLS">
<summary>
Return the embedding for the special "CLS" token
</summary>
</member>
<member name="F:LLama.Native.LLamaPoolingType.Rank">
<summary>
Used by reranking models to attach the classification head to the graph
</summary>
</member>
<member name="T:LLama.Native.LLamaPos">
<summary>
Indicates position in a sequence
</summary>
</member>
<member name="F:LLama.Native.LLamaPos.Value">
<summary>
The raw value
</summary>
</member>
<member name="M:LLama.Native.LLamaPos.#ctor(System.Int32)">
<summary>
Create a new LLamaPos
</summary>
<param name="value"></param>
</member>
<member name="M:LLama.Native.LLamaPos.op_Explicit(LLama.Native.LLamaPos)~System.Int32">
<summary>
Convert a LLamaPos into an integer (extract the raw value)
</summary>
<param name="pos"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaPos.op_Implicit(System.Int32)~LLama.Native.LLamaPos">
<summary>
Convert an integer into a LLamaPos
</summary>
<param name="value"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaPos.op_Increment(LLama.Native.LLamaPos)">
<summary>
Increment this position
</summary>
<param name="pos"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaPos.op_Decrement(LLama.Native.LLamaPos)">
<summary>
Increment this position
</summary>
<param name="pos"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaRopeType">
<summary>
</summary>
<remarks>llama_rope_type</remarks>
</member>
<member name="T:LLama.Native.LLamaSeqId">
<summary>
ID for a sequence in a batch
</summary>
</member>
<member name="F:LLama.Native.LLamaSeqId.Zero">
<summary>
LLamaSeqId with value 0
</summary>
</member>
<member name="F:LLama.Native.LLamaSeqId.Value">
<summary>
The raw value
</summary>
</member>
<member name="M:LLama.Native.LLamaSeqId.#ctor(System.Int32)">
<summary>
Create a new LLamaSeqId
</summary>
<param name="value"></param>
</member>
<member name="M:LLama.Native.LLamaSeqId.op_Explicit(LLama.Native.LLamaSeqId)~System.Int32">
<summary>
Convert a LLamaSeqId into an integer (extract the raw value)
</summary>
<param name="pos"></param>
</member>
<member name="M:LLama.Native.LLamaSeqId.op_Explicit(System.Int32)~LLama.Native.LLamaSeqId">
<summary>
Convert an integer into a LLamaSeqId
</summary>
<param name="value"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaSeqId.ToString">
<inheritdoc />
</member>
<member name="T:LLama.Native.LLamaPerfContextTimings">
<summary>
LLama performance information
</summary>
<remarks>llama_perf_context_data</remarks>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.t_start_ms">
<summary>
Timestamp when reset was last called
</summary>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.t_load_ms">
<summary>
Loading milliseconds
</summary>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.t_p_eval_ms">
<summary>
total milliseconds spent prompt processing
</summary>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.t_eval_ms">
<summary>
Total milliseconds in eval/decode calls
</summary>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.n_p_eval">
<summary>
number of tokens in eval calls for the prompt (with batch size > 1)
</summary>
</member>
<member name="F:LLama.Native.LLamaPerfContextTimings.n_eval">
<summary>
number of eval calls
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.ResetTimestamp">
<summary>
Timestamp when reset was last called
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.Loading">
<summary>
Time spent loading
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.PromptEval">
<summary>
total milliseconds spent prompt processing
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.Eval">
<summary>
Total milliseconds in eval/decode calls
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.PrompTokensEvaluated">
<summary>
number of tokens in eval calls for the prompt (with batch size > 1)
</summary>
</member>
<member name="P:LLama.Native.LLamaPerfContextTimings.TokensEvaluated">
<summary>
number of eval calls
</summary>
</member>
<member name="T:LLama.Native.LLamaSamplingTimings">
<summary>
LLama performance information
</summary>
<remarks>llama_perf_sampler_data</remarks>
</member>
<member name="T:LLama.Native.LLamaToken">
<summary>
A single token
</summary>
</member>
<member name="F:LLama.Native.LLamaToken.InvalidToken">
<summary>
Token Value used when token is inherently null
</summary>
</member>
<member name="F:LLama.Native.LLamaToken.Value">
<summary>
The raw value
</summary>
</member>
<member name="M:LLama.Native.LLamaToken.#ctor(System.Int32)">
<summary>
Create a new LLamaToken
</summary>
<param name="value"></param>
</member>
<member name="M:LLama.Native.LLamaToken.op_Explicit(LLama.Native.LLamaToken)~System.Int32">
<summary>
Convert a LLamaToken into an integer (extract the raw value)
</summary>
<param name="pos"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.op_Implicit(System.Int32)~LLama.Native.LLamaToken">
<summary>
Convert an integer into a LLamaToken
</summary>
<param name="value"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.GetAttributes(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get attributes for this token
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.GetAttributes(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
<summary>
Get attributes for this token
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.GetScore(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
<summary>
Get score for this token
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.IsControl(LLama.Native.SafeLlamaModelHandle)">
<summary>
Check if this is a control token
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.IsControl(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
<summary>
Check if this is a control token
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.IsEndOfGeneration(LLama.Native.SafeLlamaModelHandle)">
<summary>
Check if this token should end generation
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.IsEndOfGeneration(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
<summary>
Check if this token should end generation
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaToken.ToString">
<inheritdoc />
</member>
<member name="T:LLama.Native.LLamaTokenAttr">
<summary>
Token attributes
</summary>
<remarks>C# equivalent of llama_token_attr</remarks>
</member>
<member name="T:LLama.Native.LLamaTokenData">
<summary>
A single token along with probability of this token being selected
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenData.ID">
<summary>
token id
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenData.Logit">
<summary>
log-odds of the token
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenData.Probability">
<summary>
probability of the token
</summary>
</member>
<member name="M:LLama.Native.LLamaTokenData.#ctor(LLama.Native.LLamaToken,System.Single,System.Single)">
<summary>
Create a new LLamaTokenData
</summary>
<param name="id"></param>
<param name="logit"></param>
<param name="probability"></param>
</member>
<member name="T:LLama.Native.LLamaTokenDataArray">
<summary>
Contains an array of LLamaTokenData, potentially sorted.
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenDataArray.Data">
<summary>
The LLamaTokenData
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenDataArray.Sorted">
<summary>
Indicates if `data` is sorted by logits in descending order. If this is false the token data is in _no particular order_.
</summary>
</member>
<member name="M:LLama.Native.LLamaTokenDataArray.#ctor(System.Memory{LLama.Native.LLamaTokenData},System.Boolean)">
<summary>
Create a new LLamaTokenDataArray
</summary>
<param name="tokens"></param>
<param name="isSorted"></param>
</member>
<member name="M:LLama.Native.LLamaTokenDataArray.Create(System.ReadOnlySpan{System.Single})">
<summary>
Create a new LLamaTokenDataArray, copying the data from the given logits
</summary>
<param name="logits"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaTokenDataArray.Create(System.ReadOnlySpan{System.Single},System.Memory{LLama.Native.LLamaTokenData})">
<summary>
Create a new LLamaTokenDataArray, copying the data from the given logits into temporary memory.
</summary>
<remarks>The memory must not be modified while this <see cref="T:LLama.Native.LLamaTokenDataArray"/> is in use.</remarks>
<param name="logits"></param>
<param name="buffer">Temporary memory which will be used to work on these logits. Must be at least as large as logits array</param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaTokenDataArray.OverwriteLogits(System.ReadOnlySpan{System.ValueTuple{LLama.Native.LLamaToken,System.Single}})">
<summary>
Overwrite the logit values for all given tokens
</summary>
<param name="values">tuples of token and logit value to overwrite</param>
</member>
<member name="M:LLama.Native.LLamaTokenDataArray.Softmax">
<summary>
Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
</summary>
</member>
<member name="T:LLama.Native.LLamaTokenDataArrayNative">
<summary>
Contains a pointer to an array of LLamaTokenData which is pinned in memory.
</summary>
<remarks>C# equivalent of llama_token_data_array</remarks>
</member>
<member name="F:LLama.Native.LLamaTokenDataArrayNative._data">
<summary>
A pointer to an array of LlamaTokenData
</summary>
<remarks>Memory must be pinned in place for all the time this LLamaTokenDataArrayNative is in use (i.e. `fixed` or `.Pin()`)</remarks>
</member>
<member name="F:LLama.Native.LLamaTokenDataArrayNative._size">
<summary>
Number of LLamaTokenData in the array
</summary>
</member>
<member name="F:LLama.Native.LLamaTokenDataArrayNative._selected">
<summary>
The index in the array (i.e. not the token id)
</summary>
</member>
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Data">
<summary>
A pointer to an array of LlamaTokenData
</summary>
</member>
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Sorted">
<summary>
Indicates if the items in the array are sorted, so the most likely token is first
</summary>
</member>
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Selected">
<summary>
The index of the selected token (i.e. <b>not the token id</b>)
</summary>
</member>
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Size">
<summary>
Number of LLamaTokenData in the array. Set this to shrink the array
</summary>
</member>
<member name="M:LLama.Native.LLamaTokenDataArrayNative.Create(LLama.Native.LLamaTokenDataArray,LLama.Native.LLamaTokenDataArrayNative@)">
<summary>
Create a new LLamaTokenDataArrayNative around the data in the LLamaTokenDataArray
</summary>
<param name="array">Data source</param>
<param name="native">Created native array</param>
<returns>A memory handle, pinning the data in place until disposed</returns>
</member>
<member name="T:LLama.Native.LLamaVocabNative">
<summary>
C# equivalent of llama_vocab struct. This struct is an opaque type, with no fields in the API and is only used for typed pointers.
</summary>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_get_attr(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
<summary>
Get attributes for a specific token
</summary>
<param name="vocab"></param>
<param name="token"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_is_eog(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
<summary>
Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
</summary>
<param name="vocab"></param>
<param name="token"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_is_control(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
<summary>
Identify if Token Id is a control token or a render-able token
</summary>
<param name="vocab"></param>
<param name="token"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_bos(LLama.Native.LLamaVocabNative*)">
<summary>
beginning-of-sentence
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_eos(LLama.Native.LLamaVocabNative*)">
<summary>
end-of-sentence
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_eot(LLama.Native.LLamaVocabNative*)">
<summary>
end-of-turn
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_sep(LLama.Native.LLamaVocabNative*)">
<summary>
sentence separator
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_nl(LLama.Native.LLamaVocabNative*)">
<summary>
next-line
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_pad(LLama.Native.LLamaVocabNative*)">
<summary>
padding
</summary>
<param name="vocab"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaVocabPreType">
<summary>
</summary>
<remarks>llama_vocab_pre_type</remarks>
</member>
<member name="T:LLama.Native.LLamaVocabType">
<summary>
</summary>
<remarks>llama_vocab_type</remarks>
</member>
<member name="F:LLama.Native.LLamaVocabType.None">
<summary>
For models without vocab
</summary>
</member>
<member name="F:LLama.Native.LLamaVocabType.SentencePiece">
<summary>
LLaMA tokenizer based on byte-level BPE with byte fallback
</summary>
</member>
<member name="F:LLama.Native.LLamaVocabType.BytePairEncoding">
<summary>
GPT-2 tokenizer based on byte-level BPE
</summary>
</member>
<member name="F:LLama.Native.LLamaVocabType.WordPiece">
<summary>
BERT tokenizer based on WordPiece
</summary>
</member>
<member name="F:LLama.Native.LLamaVocabType.Unigram">
<summary>
T5 tokenizer based on Unigram
</summary>
</member>
<member name="F:LLama.Native.LLamaVocabType.RWKV">
<summary>
RWKV tokenizer based on greedy tokenization
</summary>
</member>
<member name="T:LLama.Native.LLavaImageEmbed">
<summary>
LLaVa Image embeddings
</summary>
<remarks>llava_image_embed</remarks>
</member>
<member name="P:LLama.Native.NativeLibraryConfig.Instance">
<summary>
Set configurations for all the native libraries, including LLama and LLava
</summary>
</member>
<member name="P:LLama.Native.NativeLibraryConfig.All">
<summary>
Set configurations for all the native libraries, including LLama and LLava
</summary>
</member>
<member name="P:LLama.Native.NativeLibraryConfig.LLama">
<summary>
Configuration for LLama native library
</summary>
</member>
<member name="P:LLama.Native.NativeLibraryConfig.LLava">
<summary>
Configuration for LLava native library
</summary>
</member>
<member name="P:LLama.Native.NativeLibraryConfig.LibraryHasLoaded">
<summary>
Check if the native library has already been loaded. Configuration cannot be modified if this is true.
</summary>
</member>
<member name="M:LLama.Native.NativeLibraryConfig.WithLogCallback(LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Set the log callback that will be used for all llama.cpp log messages
</summary>
<param name="callback"></param>
<exception cref="T:System.NotImplementedException"></exception>
</member>
<member name="M:LLama.Native.NativeLibraryConfig.WithLogCallback(Microsoft.Extensions.Logging.ILogger)">
<summary>
Set the log callback that will be used for all llama.cpp log messages
</summary>
<param name="logger"></param>
<exception cref="T:System.NotImplementedException"></exception>
</member>
<member name="M:LLama.Native.NativeLibraryConfig.DryRun(LLama.Abstractions.INativeLibrary@)">
<summary>
Try to load the native library with the current configurations,
but do not actually set it to <see cref="T:LLama.Native.NativeApi"/>.
You can still modify the configuration after this calling but only before any call from <see cref="T:LLama.Native.NativeApi"/>.
</summary>
<param name="loadedLibrary">
The loaded livrary. When the loading failed, this will be null.
However if you are using .NET standard2.0, this will never return null.
</param>
<returns>Whether the running is successful.</returns>
</member>
<member name="T:LLama.Native.NativeLibraryConfigContainer">
<summary>
A class to set same configurations to multiple libraries at the same time.
</summary>
</member>
<member name="M:LLama.Native.NativeLibraryConfigContainer.ForEach(System.Action{LLama.Native.NativeLibraryConfig})">
<summary>
Do an action for all the configs in this container.
</summary>
<param name="action"></param>
</member>
<member name="M:LLama.Native.NativeLibraryConfigContainer.WithLogCallback(LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Set the log callback that will be used for all llama.cpp log messages
</summary>
<param name="callback"></param>
<exception cref="T:System.NotImplementedException"></exception>
</member>
<member name="M:LLama.Native.NativeLibraryConfigContainer.WithLogCallback(Microsoft.Extensions.Logging.ILogger)">
<summary>
Set the log callback that will be used for all llama.cpp log messages
</summary>
<param name="logger"></param>
<exception cref="T:System.NotImplementedException"></exception>
</member>
<member name="M:LLama.Native.NativeLibraryConfigContainer.DryRun(LLama.Abstractions.INativeLibrary@,LLama.Abstractions.INativeLibrary@)">
<summary>
Try to load the native library with the current configurations,
but do not actually set it to <see cref="T:LLama.Native.NativeApi"/>.
You can still modify the configuration after this calling but only before any call from <see cref="T:LLama.Native.NativeApi"/>.
</summary>
<returns>Whether the running is successful.</returns>
</member>
<member name="T:LLama.Native.NativeLibraryName">
<summary>
The name of the native library
</summary>
</member>
<member name="F:LLama.Native.NativeLibraryName.LLama">
<summary>
The native library compiled from llama.cpp.
</summary>
</member>
<member name="F:LLama.Native.NativeLibraryName.LLava">
<summary>
The native library compiled from the LLaVA example of llama.cpp.
</summary>
</member>
<member name="T:LLama.Native.NativeLibraryFromPath">
<summary>
A native library specified with a local file path.
</summary>
</member>
<member name="P:LLama.Native.NativeLibraryFromPath.Metadata">
<inheritdoc/>
</member>
<member name="M:LLama.Native.NativeLibraryFromPath.#ctor(System.String)">
<summary>
</summary>
<param name="path"></param>
</member>
<member name="M:LLama.Native.NativeLibraryFromPath.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
<inheritdoc/>
</member>
<member name="T:LLama.Native.NativeLibraryMetadata">
<summary>
Information of a native library file.
</summary>
<param name="NativeLibraryName">Which kind of library it is.</param>
<param name="UseCuda">Whether it's compiled with cublas.</param>
<param name="UseVulkan">Whether it's compiled with vulkan.</param>
<param name="AvxLevel">Which AvxLevel it's compiled with.</param>
</member>
<member name="M:LLama.Native.NativeLibraryMetadata.#ctor(LLama.Native.NativeLibraryName,System.Boolean,System.Boolean,LLama.Native.AvxLevel)">
<summary>
Information of a native library file.
</summary>
<param name="NativeLibraryName">Which kind of library it is.</param>
<param name="UseCuda">Whether it's compiled with cublas.</param>
<param name="UseVulkan">Whether it's compiled with vulkan.</param>
<param name="AvxLevel">Which AvxLevel it's compiled with.</param>
</member>
<member name="P:LLama.Native.NativeLibraryMetadata.NativeLibraryName">
<summary>Which kind of library it is.</summary>
</member>
<member name="P:LLama.Native.NativeLibraryMetadata.UseCuda">
<summary>Whether it's compiled with cublas.</summary>
</member>
<member name="P:LLama.Native.NativeLibraryMetadata.UseVulkan">
<summary>Whether it's compiled with vulkan.</summary>
</member>
<member name="P:LLama.Native.NativeLibraryMetadata.AvxLevel">
<summary>Which AvxLevel it's compiled with.</summary>
</member>
<member name="T:LLama.Native.AvxLevel">
<summary>
Avx support configuration
</summary>
</member>
<member name="F:LLama.Native.AvxLevel.None">
<summary>
No AVX
</summary>
</member>
<member name="F:LLama.Native.AvxLevel.Avx">
<summary>
Advanced Vector Extensions (supported by most processors after 2011)
</summary>
</member>
<member name="F:LLama.Native.AvxLevel.Avx2">
<summary>
AVX2 (supported by most processors after 2013)
</summary>
</member>
<member name="F:LLama.Native.AvxLevel.Avx512">
<summary>
AVX512 (supported by some processors after 2016, not widely supported)
</summary>
</member>
<member name="M:LLama.Native.NativeLibraryUtils.TryLoadLibrary(LLama.Native.NativeLibraryConfig,LLama.Abstractions.INativeLibrary@)">
<summary>
Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible
</summary>
<returns>The library handle to unload later, or IntPtr.Zero if no library was loaded</returns>
</member>
<member name="T:LLama.Native.SystemInfo">
<summary>
Operating system information.
</summary>
<param name="OSPlatform"></param>
<param name="CudaMajorVersion"></param>
<param name="VulkanVersion"></param>
</member>
<member name="M:LLama.Native.SystemInfo.#ctor(System.Runtime.InteropServices.OSPlatform,System.Int32,System.String)">
<summary>
Operating system information.
</summary>
<param name="OSPlatform"></param>
<param name="CudaMajorVersion"></param>
<param name="VulkanVersion"></param>
</member>
<member name="P:LLama.Native.SystemInfo.OSPlatform">
<summary></summary>
</member>
<member name="P:LLama.Native.SystemInfo.CudaMajorVersion">
<summary></summary>
</member>
<member name="P:LLama.Native.SystemInfo.VulkanVersion">
<summary></summary>
</member>
<member name="M:LLama.Native.SystemInfo.Get">
<summary>
Get the system information of the current machine.
</summary>
<returns></returns>
<exception cref="T:System.PlatformNotSupportedException"></exception>
</member>
<member name="T:LLama.Native.UnknownNativeLibrary">
<summary>
When you are using .NET standard2.0, dynamic native library loading is not supported.
This class will be returned in <see cref="M:LLama.Native.NativeLibraryConfig.DryRun(LLama.Abstractions.INativeLibrary@)"/>.
</summary>
</member>
<member name="P:LLama.Native.UnknownNativeLibrary.Metadata">
<inheritdoc/>
</member>
<member name="M:LLama.Native.UnknownNativeLibrary.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
<inheritdoc/>
</member>
<member name="T:LLama.Native.LoraAdapter">
<summary>
A LoRA adapter which can be applied to a context for a specific model
</summary>
</member>
<member name="P:LLama.Native.LoraAdapter.Model">
<summary>
The model which this LoRA adapter was loaded with.
</summary>
</member>
<member name="P:LLama.Native.LoraAdapter.Path">
<summary>
The full path of the file this adapter was loaded from
</summary>
</member>
<member name="P:LLama.Native.LoraAdapter.Pointer">
<summary>
Native pointer of the loaded adapter, will be automatically freed when the model is unloaded
</summary>
</member>
<member name="P:LLama.Native.LoraAdapter.Loaded">
<summary>
Indicates if this adapter has been unloaded
</summary>
</member>
<member name="M:LLama.Native.LoraAdapter.Unload">
<summary>
Unload this adapter
</summary>
</member>
<member name="T:LLama.Native.NativeApi">
<summary>
Direct translation of the llama.cpp API
</summary>
</member>
<member name="M:LLama.Native.NativeApi.llama_empty_call">
<summary>
A method that does nothing. This is a native method, calling it will force the llama native dependencies to be loaded.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_backend_free">
<summary>
Call once at the end of the program - currently only used for MPI
</summary>
</member>
<member name="M:LLama.Native.NativeApi.llama_max_devices">
<summary>
Get the maximum number of devices supported by llama.cpp
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_supports_mmap">
<summary>
Check if memory mapping is supported
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_supports_mlock">
<summary>
Check if memory locking is supported
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_supports_gpu_offload">
<summary>
Check if GPU offload is supported
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_supports_rpc">
<summary>
Check if RPC offload is supported
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_backend_init">
<summary>
Initialize the llama + ggml backend. Call once at the start of the program.
This is private because LLamaSharp automatically calls it, and it's only valid to call it once!
</summary>
</member>
<member name="M:LLama.Native.NativeApi.llama_state_load_file(LLama.Native.SafeLLamaContextHandle,System.String,LLama.Native.LLamaToken[],System.UInt64,System.UInt64@)">
<summary>
Load session file
</summary>
<param name="ctx"></param>
<param name="path_session"></param>
<param name="tokens_out"></param>
<param name="n_token_capacity"></param>
<param name="n_token_count_out"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_state_save_file(LLama.Native.SafeLLamaContextHandle,System.String,LLama.Native.LLamaToken[],System.UInt64)">
<summary>
Save session file
</summary>
<param name="ctx"></param>
<param name="path_session"></param>
<param name="tokens"></param>
<param name="n_token_count"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_set_causal_attn(LLama.Native.SafeLLamaContextHandle,System.Boolean)">
<summary>
Set whether to use causal attention or not. If set to true, the model will only attend to the past tokens
</summary>
</member>
<member name="M:LLama.Native.NativeApi.llama_set_embeddings(LLama.Native.SafeLLamaContextHandle,System.Boolean)">
<summary>
Set whether the model is in embeddings mode or not.
</summary>
<param name="ctx"></param>
<param name="embeddings">If true, embeddings will be returned but logits will not</param>
</member>
<member name="M:LLama.Native.NativeApi.llama_set_abort_callback(LLama.Native.SafeLlamaModelHandle,System.IntPtr,System.IntPtr)">
<summary>
Set abort callback
</summary>
</member>
<member name="M:LLama.Native.NativeApi.llama_n_seq_max(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the n_seq_max for this context
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_get_embeddings(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get all output token embeddings.
When pooling_type == LLAMA_POOLING_TYPE_NONE or when using a generative model, the embeddings for which
llama_batch.logits[i] != 0 are stored contiguously in the order they have appeared in the batch.
shape: [n_outputs*n_embd]
Otherwise, returns an empty span.
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_chat_apply_template(System.Byte*,LLama.Native.LLamaChatMessage*,System.UIntPtr,System.Boolean,System.Byte*,System.Int32)">
<summary>
Apply chat template. Inspired by hf apply_chat_template() on python.
</summary>
<param name="tmpl">A Jinja template to use for this chat. If this is nullptr, the models default chat template will be used instead.</param>
<param name="chat">Pointer to a list of multiple llama_chat_message</param>
<param name="n_msg">Number of llama_chat_message in this chat</param>
<param name="add_ass">Whether to end the prompt with the token(s) that indicate the start of an assistant message.</param>
<param name="buf">A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)</param>
<param name="length">The size of the allocated buffer</param>
<returns>The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_chat_builtin_templates(System.Char**,System.UIntPtr)">
<summary>
Get list of built-in chat templates
</summary>
<param name="output"></param>
<param name="len"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_print_timings(LLama.Native.SafeLLamaContextHandle)">
<summary>
Print out timing information for this context
</summary>
<param name="ctx"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_print_system_info">
<summary>
Print system information
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_token_to_piece(LLama.Native.SafeLlamaModelHandle.Vocabulary,LLama.Native.LLamaToken,System.Span{System.Byte},System.Int32,System.Boolean)">
<summary>
Convert a single token into text
</summary>
<param name="vocab"></param>
<param name="llamaToken"></param>
<param name="buffer">buffer to write string into</param>
<param name="lstrip">User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')</param>
<param name="special">If true, special tokens are rendered in the output</param>
<returns>The length written, or if the buffer is too small a negative that indicates the length required</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_tokenize(LLama.Native.LLamaVocabNative*,System.Byte*,System.Int32,LLama.Native.LLamaToken*,System.Int32,System.Boolean,System.Boolean)">
<summary>
Convert text into tokens
</summary>
<param name="model"></param>
<param name="text"></param>
<param name="text_len"></param>
<param name="tokens">The tokens pointer must be large enough to hold the resulting tokens.</param>
<param name="n_max_tokens"></param>
<param name="add_special">add_special Allow to add BOS and EOS tokens if model is configured to do so.</param>
<param name="parse_special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.</param>
<returns>Returns the number of tokens on success, no more than n_max_tokens.
Returns a negative number on failure - the number of tokens that would have been returned
</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_detokenize(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken*,System.Int32,System.Byte*,System.Int32,System.Boolean,System.Boolean)">
<summary>
Convert the provided tokens into text (inverse of llama_tokenize()).
</summary>
<param name="model"></param>
<param name="tokens"></param>
<param name="nTokens"></param>
<param name="textOut">The char pointer must be large enough to hold the resulting text.</param>
<param name="textLengthMax"></param>
<param name="removeSpecial">remove_special Allow to remove BOS and EOS tokens if model is configured to do so.</param>
<param name="unparseSpecial">unparse_special If true, special tokens are rendered in the output.</param>
<returns>Returns the number of chars/bytes on success, no more than textLengthMax. Returns a negative number on failure - the number of chars/bytes that would have been returned.</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Register a callback to receive llama log messages
</summary>
<param name="logCallback"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_get_kv_cache_token_count(LLama.Native.SafeLLamaContextHandle)">
<summary>
Returns the number of tokens in the KV cache (slow, use only for debug)
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_get_kv_cache_used_cells(LLama.Native.SafeLLamaContextHandle)">
<summary>
Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_clear(LLama.Native.SafeLLamaContextHandle)">
<summary>
Clear the KV cache. Both cell info is erased and KV data is zeroed
</summary>
<param name="ctx"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_rm(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
<summary>
Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
</summary>
<param name="ctx"></param>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
<returns>Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_cp(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
<summary>
Copy all tokens that belong to the specified sequence to another sequence
Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
</summary>
<param name="ctx"></param>
<param name="src"></param>
<param name="dest"></param>
<param name="p0"></param>
<param name="p1"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_keep(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
<summary>
Removes all tokens that do not belong to the specified sequence
</summary>
<param name="ctx"></param>
<param name="seq"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_add(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
If the KV cache is RoPEd, the KV data is updated accordingly:
- lazily on next llama_decode()
- explicitly with llama_kv_cache_update()
</summary>
<param name="ctx"></param>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
<param name="delta"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_div(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Integer division of the positions by factor of `d > 1`
If the KV cache is RoPEd, the KV data is updated accordingly:
- lazily on next llama_decode()
- explicitly with llama_kv_cache_update()
<br />
p0 &lt; 0 : [0, p1]
<br />
p1 &lt; 0 : [p0, inf)
</summary>
<param name="ctx"></param>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
<param name="d"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_pos_max(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
<summary>
Returns the largest position present in the KV cache for the specified sequence
</summary>
<param name="ctx"></param>
<param name="seq"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_batch_init(System.Int32,System.Int32,System.Int32)">
<summary>
Allocates a batch of tokens on the heap
Each token can be assigned up to n_seq_max sequence ids
The batch has to be freed with llama_batch_free()
If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
The rest of the llama_batch members are allocated with size n_tokens
All members are left uninitialized
</summary>
<param name="n_tokens"></param>
<param name="embd"></param>
<param name="n_seq_max">Each token can be assigned up to n_seq_max sequence ids</param>
</member>
<member name="M:LLama.Native.NativeApi.llama_batch_free(LLama.Native.LLamaNativeBatch)">
<summary>
Frees a batch of tokens allocated with llama_batch_init()
</summary>
<param name="batch"></param>
</member>
<member name="M:LLama.Native.NativeApi.llama_apply_adapter_cvec(LLama.Native.SafeLLamaContextHandle,System.Single*,System.UIntPtr,System.Int32,System.Int32,System.Int32)">
<summary>
Apply a loaded control vector to a llama_context, or if data is NULL, clear
the currently loaded vector.
n_embd should be the size of a single layer's control, and data should point
to an n_embd x n_layers buffer starting from layer 1.
il_start and il_end are the layer range the vector should apply to (both inclusive)
See llama_control_vector_load in common to load a control vector.
</summary>
<param name="ctx"></param>
<param name="data"></param>
<param name="len"></param>
<param name="n_embd"></param>
<param name="il_start"></param>
<param name="il_end"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_split_path(System.String,System.UIntPtr,System.String,System.Int32,System.Int32)">
<summary>
Build a split GGUF final path for this chunk.
llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
</summary>
<param name="split_path"></param>
<param name="maxlen"></param>
<param name="path_prefix"></param>
<param name="split_no"></param>
<param name="split_count"></param>
<returns>Returns the split_path length.</returns>
</member>
<member name="M:LLama.Native.NativeApi.llama_split_prefix(System.String,System.UIntPtr,System.String,System.Int32,System.Int32)">
<summary>
Extract the path prefix from the split_path if and only if the split_no and split_count match.
llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
</summary>
<param name="split_prefix"></param>
<param name="maxlen"></param>
<param name="split_path"></param>
<param name="split_no"></param>
<param name="split_count"></param>
<returns>Returns the split_prefix length.</returns>
</member>
<member name="M:LLama.Native.NativeApi.llava_validate_embed_size(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLlavaModelHandle)">
<summary>
Sanity check for clip &lt;-&gt; llava embed size match
</summary>
<param name="ctxLlama">LLama Context</param>
<param name="ctxClip">Llava Model</param>
<returns>True if validate successfully</returns>
</member>
<member name="M:LLama.Native.NativeApi.llava_image_embed_make_with_bytes(LLama.Native.SafeLlavaModelHandle,System.Int32,System.Byte[],System.Int32)">
<summary>
Build an image embed from image file bytes
</summary>
<param name="ctx_clip">SafeHandle to the Clip Model</param>
<param name="n_threads">Number of threads</param>
<param name="image_bytes">Binary image in jpeg format</param>
<param name="image_bytes_length">Bytes length of the image</param>
<returns>SafeHandle to the Embeddings</returns>
</member>
<member name="M:LLama.Native.NativeApi.llava_image_embed_make_with_filename(LLama.Native.SafeLlavaModelHandle,System.Int32,System.String)">
<summary>
Build an image embed from a path to an image filename
</summary>
<param name="ctx_clip">SafeHandle to the Clip Model</param>
<param name="n_threads">Number of threads</param>
<param name="image_path">Image filename (jpeg) to generate embeddings</param>
<returns>SafeHandle to the embeddings</returns>
</member>
<member name="M:LLama.Native.NativeApi.llava_image_embed_free(System.IntPtr)">
<summary>
Free an embedding made with llava_image_embed_make_*
</summary>
<param name="embed">Embeddings to release</param>
</member>
<member name="M:LLama.Native.NativeApi.llava_eval_image_embed(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32,System.Int32@)">
<summary>
Write the image represented by embed into the llama context with batch size n_batch, starting at context
pos n_past. on completion, n_past points to the next position in the context after the image embed.
</summary>
<param name="ctx_llama">Llama Context</param>
<param name="embed">Embedding handle</param>
<returns>True on success</returns>
</member>
<member name="M:LLama.Native.NativeApi.GetLoadedNativeLibrary(LLama.Native.NativeLibraryName)">
<summary>
Get the loaded native library. If you are using netstandard2.0, it will always return null.
</summary>
<param name="name"></param>
<returns></returns>
<exception cref="T:System.ArgumentException"></exception>
</member>
<member name="M:LLama.Native.NativeApi.llama_model_quantize(System.String,System.String,LLama.Native.LLamaModelQuantizeParams@)">
<summary>
Returns 0 on success
</summary>
<param name="fname_inp"></param>
<param name="fname_out"></param>
<param name="param"></param>
<returns>Returns 0 on success</returns>
</member>
<member name="T:LLama.Native.NativeLogConfig">
<summary>
Configure llama.cpp logging
</summary>
</member>
<member name="T:LLama.Native.NativeLogConfig.LLamaLogCallback">
<summary>
Callback from llama.cpp with log messages
</summary>
<param name="level"></param>
<param name="message"></param>
</member>
<member name="M:LLama.Native.NativeLogConfig.native_llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Register a callback to receive llama log messages
</summary>
<param name="logCallback"></param>
</member>
<member name="F:LLama.Native.NativeLogConfig._currentLogCallbackHandle">
<summary>
A GC handle for the current log callback to ensure the callback is not collected
</summary>
</member>
<member name="M:LLama.Native.NativeLogConfig.llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
<summary>
Register a callback to receive llama log messages
</summary>
<param name="logCallback"></param>
</member>
<member name="M:LLama.Native.NativeLogConfig.llama_log_set(Microsoft.Extensions.Logging.ILogger)">
<summary>
Register a callback to receive llama log messages
</summary>
<param name="logger"></param>
</member>
<member name="T:LLama.Native.RopeScalingType">
<summary>
RoPE scaling type.
</summary>
<remarks>C# equivalent of llama_rope_scaling_type</remarks>
</member>
<member name="F:LLama.Native.RopeScalingType.Unspecified">
<summary>
No particular scaling type has been specified
</summary>
</member>
<member name="F:LLama.Native.RopeScalingType.None">
<summary>
Do not apply any RoPE scaling
</summary>
</member>
<member name="F:LLama.Native.RopeScalingType.Linear">
<summary>
Positional linear interpolation, as described by kaikendev: https://kaiokendev.github.io/til#extending-context-to-8k
</summary>
</member>
<member name="F:LLama.Native.RopeScalingType.Yarn">
<summary>
YaRN scaling: https://arxiv.org/pdf/2309.00071.pdf
</summary>
</member>
<member name="F:LLama.Native.RopeScalingType.LongRope">
<summary>
LongRope scaling
</summary>
</member>
<member name="T:LLama.Native.SafeLLamaContextHandle">
<summary>
A safe wrapper around a llama_context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.ContextSize">
<summary>
Total number of tokens in the context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.EmbeddingSize">
<summary>
Dimension of embedding vectors
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.BatchSize">
<summary>
Get the maximum batch size for this context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.UBatchSize">
<summary>
Get the physical maximum batch size for this context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.GenerationThreads">
<summary>
Get or set the number of threads used for generation of a single token.
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.BatchThreads">
<summary>
Get or set the number of threads used for prompt and batch processing (multiple token).
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.PoolingType">
<summary>
Get the pooling type for this context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.ModelHandle">
<summary>
Get the model which this context is using
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.Vocab">
<summary>
Get the vocabulary for the model this context is using
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Create(LLama.Native.SafeLlamaModelHandle,LLama.Native.LLamaContextParams)">
<summary>
Create a new llama_state for the given model
</summary>
<param name="model"></param>
<param name="lparams"></param>
<returns></returns>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_init_from_model(LLama.Native.SafeLlamaModelHandle,LLama.Native.LLamaContextParams)">
<summary>
Create a new llama_context with the given model. **This should never be called directly! Always use SafeLLamaContextHandle.Create**!
</summary>
<param name="model"></param>
<param name="params"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_free(System.IntPtr)">
<summary>
Frees all allocated memory in the given llama_context
</summary>
<param name="ctx"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_set_abort_callback(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLLamaContextHandle.GgmlAbortCallback,System.Void*)">
<summary>
Set a callback which can abort computation
</summary>
<param name="ctx"></param>
<param name="abort_callback"></param>
<param name="abort_callback_data"></param>
</member>
<member name="T:LLama.Native.SafeLLamaContextHandle.GgmlAbortCallback">
<summary>
If this returns true computation is cancelled
</summary>
<param name="data"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_decode(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaNativeBatch)">
<summary>
</summary>
<param name="ctx"></param>
<param name="batch"></param>
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
- 0: success<br />
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
- &lt; 0: error<br />
</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_encode(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaNativeBatch)">
<summary>
Processes a batch of tokens with the encoder part of the encoder-decoder model. Stores the encoder output
internally for later use by the decoder cross-attention layers.
</summary>
<param name="ctx"></param>
<param name="batch"></param>
<returns>0 = success <br />&lt; 0 = error</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_set_n_threads(LLama.Native.SafeLLamaContextHandle,System.Int32,System.Int32)">
<summary>
Set the number of threads used for decoding
</summary>
<param name="ctx"></param>
<param name="n_threads">n_threads is the number of threads used for generation (single token)</param>
<param name="n_threads_batch">n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_threads(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the number of threads used for generation of a single token.
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_threads_batch(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the number of threads used for prompt and batch processing (multiple token).
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_logits(LLama.Native.SafeLLamaContextHandle)">
<summary>
Token logits obtained from the last call to llama_decode
The logits for the last token are stored in the last row
Can be mutated in order to change the probabilities of the next token.<br />
Rows: n_tokens<br />
Cols: n_vocab
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_logits_ith(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
</summary>
<param name="ctx"></param>
<param name="i"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_ctx(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the size of the context window for the model for this context
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_batch(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the batch size for this context
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_ubatch(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the ubatch size for this context
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_get_size(LLama.Native.SafeLLamaContextHandle)">
<summary>
Returns the **actual** size in bytes of the state (logits, embedding and kv_cache).
Only use when saving the state, not when restoring it, otherwise the size may be too small.
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_get_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr)">
<summary>
Copies the state to the specified destination address.
Destination needs to have allocated enough memory.
</summary>
<param name="ctx"></param>
<param name="dest"></param>
<param name="size"></param>
<returns>the number of bytes copied</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_set_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr)">
<summary>
Set the state reading from the specified address
</summary>
<param name="ctx"></param>
<param name="src"></param>
<param name="size"></param>
<returns>the number of bytes read</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_get_size(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
<summary>
Get the exact size needed to copy the KV cache of a single sequence
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_get_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
<summary>
Copy the KV cache of a single sequence into the specified buffer
</summary>
<param name="ctx"></param>
<param name="dst"></param>
<param name="size"></param>
<param name="seqId"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_set_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
<summary>
Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence
</summary>
<param name="ctx"></param>
<param name="src"></param>
<param name="size"></param>
<param name="destSeqId"></param>
<returns>
- Positive: Ok
- Zero: Failed to load
</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_defrag(LLama.Native.SafeLLamaContextHandle)">
<summary>
Defragment the KV cache. This will be applied:
- lazily on next llama_decode()
- explicitly with llama_kv_cache_update()
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_update(LLama.Native.SafeLLamaContextHandle)">
<summary>
Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
</summary>
<param name="ctx"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_can_shift(LLama.Native.SafeLLamaContextHandle)">
<summary>
Check if the context supports KV cache shifting
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_synchronize(LLama.Native.SafeLLamaContextHandle)">
<summary>
Wait until all computations are finished. This is automatically done when using any of the functions to obtain computation results
and is not necessary to call it explicitly in most cases.
</summary>
<param name="ctx"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_pooling_type(LLama.Native.SafeLLamaContextHandle)">
<summary>
Get the pooling type for this context
</summary>
<param name="ctx"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_embeddings_seq(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
<summary>
Get the embeddings for a sequence id.
Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
otherwise: float[n_embd] (1-dimensional)
</summary>
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_embeddings_ith(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Get the embeddings for the ith sequence.
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
</summary>
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.AddLoraAdapter(LLama.Native.LoraAdapter,System.Single)">
<summary>
Add a LoRA adapter to this context
</summary>
<param name="lora"></param>
<param name="scale"></param>
<exception cref="T:System.ArgumentException"></exception>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.RemoveLoraAdapter(LLama.Native.LoraAdapter)">
<summary>
Remove a LoRA adapter from this context
</summary>
<param name="lora"></param>
<returns>Indicates if the lora was in this context and was remove</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.ClearLoraAdapters">
<summary>
Remove all LoRA adapters from this context
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetLogits(System.Int32)">
<summary>
Token logits obtained from the last call to llama_decode.
The logits for the last token are stored in the last row.
Only tokens with `logits = true` requested are present.<br/>
Can be mutated in order to change the probabilities of the next token.<br />
Rows: n_tokens<br />
Cols: n_vocab
</summary>
<param name="numTokens">
The amount of tokens whose logits should be retrieved, in <b>[numTokens X n_vocab]</b> format.<br/>
Tokens' order is based on their order in the LlamaBatch (so, first tokens are first, etc).<br/>
This is helpful when requesting logits for many tokens in a sequence, or want to decode multiple sequences in one go.
</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetLogitsIth(System.Int32)">
<summary>
Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
</summary>
<param name="i"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetEmbeddingsIth(LLama.Native.LLamaPos)">
<summary>
Get the embeddings for the ith sequence.
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
</summary>
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetEmbeddingsSeq(LLama.Native.LLamaSeqId)">
<summary>
Get the embeddings for the a specific sequence.
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
</summary>
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
<summary>
Convert the given text into tokens
</summary>
<param name="text">The text to tokenize</param>
<param name="add_bos">Whether the "BOS" token should be added</param>
<param name="encoding">Encoding to use for the text</param>
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
<returns></returns>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.TokenToSpan(LLama.Native.LLamaToken,System.Span{System.Byte})">
<summary>
Convert a single llama token into bytes
</summary>
<param name="token">Token to decode</param>
<param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
<returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
</member>
<member name="F:LLama.Native.SafeLLamaContextHandle.GlobalInferenceLock">
<summary>
This object exists to ensure there is only ever 1 inference running at a time. This is a workaround for thread safety issues in llama.cpp itself.
Most notably CUDA, which seems to use some global singleton resources and will crash if multiple inferences are run (even against different models).
For more information see these issues:
- https://github.com/SciSharp/LLamaSharp/issues/596
- https://github.com/ggerganov/llama.cpp/issues/3960
If these are ever resolved this lock can probably be removed.
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Synchronize">
<summary>
Wait until all computations are finished. This is automatically done when using any of the functions to obtain computation results
and is not necessary to call it explicitly in most cases.
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Encode(LLama.Native.LLamaBatch)">
<summary>
Processes a batch of tokens with the encoder part of the encoder-decoder model. Stores the encoder output
internally for later use by the decoder cross-attention layers.
</summary>
<param name="batch"></param>
<returns>0 = success <br />&lt; 0 = error (the KV cache state is restored to the state before this call)</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(LLama.Native.LLamaBatch)">
<summary>
</summary>
<param name="batch"></param>
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
- 0: success<br />
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
- &lt; 0: error (the KV cache state is restored to the state before this call)<br />
</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(System.Collections.Generic.List{LLama.Native.LLamaToken},LLama.Native.LLamaSeqId,LLama.Native.LLamaBatch,System.Int32@)">
<summary>
Decode a set of tokens in batch-size chunks.
</summary>
<param name="tokens"></param>
<param name="id"></param>
<param name="batch"></param>
<param name="n_past"></param>
<returns>A tuple, containing the decode result and the number of tokens that have <b>not</b> been decoded yet.</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(LLama.Native.LLamaBatchEmbeddings)">
<summary>
</summary>
<param name="batch"></param>
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
- 0: success<br />
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
- &lt; 0: error<br />
</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetStateSize">
<summary>
Get the size of the state, when saved as bytes
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetStateSize(LLama.Native.LLamaSeqId)">
<summary>
Get the size of the KV cache for a single sequence ID, when saved as bytes
</summary>
<param name="sequence"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetState(System.Byte*,System.UIntPtr)">
<summary>
Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
</summary>
<param name="dest">Destination to write to</param>
<param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
<returns>The number of bytes written to dest</returns>
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if dest is too small</exception>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetState(System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
<summary>
Get the raw state of a single sequence from this context, encoded as bytes. Data is written into the `dest` pointer.
</summary>
<param name="dest">Destination to write to</param>
<param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
<param name="sequence">The sequence to get state data for</param>
<returns>The number of bytes written to dest</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.SetState(System.Byte*,System.UIntPtr)">
<summary>
Set the raw state of this context
</summary>
<param name="src">The pointer to read the state from</param>
<param name="size">Number of bytes that can be safely read from the pointer</param>
<returns>Number of bytes read from the src pointer</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.SetState(System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
<summary>
Set the raw state of a single sequence
</summary>
<param name="src">The pointer to read the state from</param>
<param name="sequence">Sequence ID to set</param>
<param name="size">Number of bytes that can be safely read from the pointer</param>
<returns>Number of bytes read from the src pointer</returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.GetTimings">
<summary>
Get performance information
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.ResetTimings">
<summary>
Reset all performance information for this context
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaContextHandle.KvCacheCanShift">
<summary>
Check if the context supports KV cache shifting
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheUpdate">
<summary>
Apply KV cache updates (such as K-shifts, defragmentation, etc.)
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheDefrag">
<summary>
Defragment the KV cache. This will be applied:
- lazily on next llama_decode()
- explicitly with llama_kv_cache_update()
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheGetDebugView(System.Int32)">
<summary>
Get a new KV cache view that can be used to debug the KV cache
</summary>
<param name="maxSequences"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheCountCells">
<summary>
Count the number of used cells in the KV cache (i.e. have at least one sequence assigned to them)
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheCountTokens">
<summary>
Returns the number of tokens in the KV cache (slow, use only for debug)
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheClear">
<summary>
Clear the KV cache - both cell info is erased and KV data is zeroed
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheRemove(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
<summary>
Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
</summary>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceCopy(LLama.Native.LLamaSeqId,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
<summary>
Copy all tokens that belong to the specified sequence to another sequence. Note that
this does not allocate extra KV cache memory - it simply assigns the tokens to the
new sequence
</summary>
<param name="src"></param>
<param name="dest"></param>
<param name="p0"></param>
<param name="p1"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceKeep(LLama.Native.LLamaSeqId)">
<summary>
Removes all tokens that do not belong to the specified sequence
</summary>
<param name="seq"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceAdd(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Adds relative position "delta" to all tokens that belong to the specified sequence
and have positions in [p0, p1. If the KV cache is RoPEd, the KV data is updated
accordingly
</summary>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
<param name="delta"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceDivide(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
<summary>
Integer division of the positions by factor of `d > 1`.
If the KV cache is RoPEd, the KV data is updated accordingly.<br />
p0 &lt; 0 : [0, p1]<br />
p1 &lt; 0 : [p0, inf)
</summary>
<param name="seq"></param>
<param name="p0"></param>
<param name="p1"></param>
<param name="divisor"></param>
</member>
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheMaxPosition(LLama.Native.LLamaSeqId)">
<summary>
Returns the largest position present in the KV cache for the specified sequence
</summary>
<param name="seq"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.SafeLLamaHandleBase">
<summary>
Base class for all llama handles to native resources
</summary>
</member>
<member name="P:LLama.Native.SafeLLamaHandleBase.IsInvalid">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLLamaHandleBase.ToString">
<inheritdoc />
</member>
<member name="T:LLama.Native.SafeLlamaModelHandle">
<summary>
A reference to a set of llama model weights
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.RopeType">
<summary>
Get the rope (positional embedding) type for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.ContextSize">
<summary>
The number of tokens in the context that this model was trained for
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.RopeFrequency">
<summary>
Get the rope frequency this model was trained with
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.EmbeddingSize">
<summary>
Dimension of embedding vectors
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.SizeInBytes">
<summary>
Get the size of this model in bytes
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.ParameterCount">
<summary>
Get the number of parameters in this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.LayerCount">
<summary>
Get the number of layers in this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.HeadCount">
<summary>
Get the number of heads in this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.HasEncoder">
<summary>
Returns true if the model contains an encoder that requires llama_encode() call
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.HasDecoder">
<summary>
Returns true if the model contains a decoder that requires llama_decode() call
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.IsRecurrent">
<summary>
Returns true if the model is recurrent (like Mamba, RWKV, etc.)
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Description">
<summary>
Get a description of this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.MetadataCount">
<summary>
Get the number of metadata key/value pairs
</summary>
<returns></returns>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocab">
<summary>
Get the vocabulary of this model
</summary>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.LoadFromFile(System.String,LLama.Native.LLamaModelParams)">
<summary>
Load a model from the given file path into memory
</summary>
<param name="modelPath"></param>
<param name="lparams"></param>
<returns></returns>
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_load_from_file(System.String,LLama.Native.LLamaModelParams)">
<summary>
Load the model from a file
If the file is split into multiple parts, the file name must follow this pattern: {name}-%05d-of-%05d.gguf
If the split file name does not follow this pattern, use llama_model_load_from_splits
</summary>
<param name="path"></param>
<param name="params"></param>
<returns>The loaded model, or null on failure.</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_load_from_splits(System.Char**,System.UIntPtr,LLama.Native.LLamaModelParams)">
<summary>
Load the model from multiple splits (support custom naming scheme)
The paths must be in the correct order
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_apply_lora_from_file(LLama.Native.SafeLlamaModelHandle,System.String,System.Single,System.String,System.Int32)">
<summary>
Apply a LoRA adapter to a loaded model
path_base_model is the path to a higher quality model to use as a base for
the layers modified by the adapter. Can be NULL to use the current loaded model.
The model needs to be reloaded before applying a new adapter, otherwise the adapter
will be applied on top of the previous one
</summary>
<param name="model"></param>
<param name="path"></param>
<param name="scale"></param>
<param name="pathBase"></param>
<param name="threads"></param>
<returns>Returns 0 on success</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_free(System.IntPtr)">
<summary>
Frees all allocated memory associated with a model
</summary>
<param name="model"></param>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_count(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the number of metadata key/value pairs
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_key_by_index(LLama.Native.SafeLlamaModelHandle,System.Int32,System.Span{System.Byte})">
<summary>
Get metadata key name by index
</summary>
<param name="model">Model to fetch from</param>
<param name="index">Index of key to fetch</param>
<param name="dest">buffer to write result into</param>
<returns>The length of the string on success (even if the buffer is too small). -1 is the key does not exist.</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_val_str_by_index(LLama.Native.SafeLlamaModelHandle,System.Int32,System.Span{System.Byte})">
<summary>
Get metadata value as a string by index
</summary>
<param name="model">Model to fetch from</param>
<param name="index">Index of val to fetch</param>
<param name="dest">Buffer to write result into</param>
<returns>The length of the string on success (even if the buffer is too small). -1 is the key does not exist.</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_val_str(LLama.Native.SafeLlamaModelHandle,System.String,System.Span{System.Byte})">
<summary>
Get metadata value as a string by key name
</summary>
<param name="model"></param>
<param name="key"></param>
<param name="dest"></param>
<returns>The length of the string on success, or -1 on failure</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_n_vocab(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the number of tokens in the model vocabulary
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_ctx_train(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the size of the context window for the model
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_embd(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the dimension of embedding vectors from this model
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_layer(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the number of layers in this model
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_head(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the number of heads in this model
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_desc(LLama.Native.SafeLlamaModelHandle,System.Byte*,System.IntPtr)">
<summary>
Get a string describing the model type
</summary>
<param name="model"></param>
<param name="buf"></param>
<param name="bufSize"></param>
<returns>The length of the string on success (even if the buffer is too small)., or -1 on failure</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_size(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the size of the model in bytes
</summary>
<param name="model"></param>
<returns>The size of the model</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_params(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the number of parameters in this model
</summary>
<param name="model"></param>
<returns>The functions return the length of the string on success, or -1 on failure</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_rope_freq_scale_train(LLama.Native.SafeLlamaModelHandle)">
<summary>
Get the model's RoPE frequency scaling factor
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_decoder_start_token(LLama.Native.SafeLlamaModelHandle)">
<summary>
For encoder-decoder models, this function returns id of the token that must be provided
to the decoder to start generating output sequence. For other models, it returns -1.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_has_encoder(LLama.Native.SafeLlamaModelHandle)">
<summary>
Returns true if the model contains an encoder that requires llama_encode() call
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_has_decoder(LLama.Native.SafeLlamaModelHandle)">
<summary>
Returns true if the model contains a decoder that requires llama_decode() call
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_is_recurrent(LLama.Native.SafeLlamaModelHandle)">
<summary>
Returns true if the model is recurrent (like Mamba, RWKV, etc.)
</summary>
<param name="model"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.LoadLoraFromFile(System.String)">
<summary>
Load a LoRA adapter from file. The adapter will be associated with this model but will not be applied
</summary>
<param name="path"></param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.TokenToSpan(LLama.Native.LLamaToken,System.Span{System.Byte},System.Int32,System.Boolean)">
<summary>
Convert a single llama token into bytes
</summary>
<param name="token">Token to decode</param>
<param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
<param name="lstrip">User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')</param>
<param name="special">If true, special characters will be converted to text. If false they will be invisible.</param>
<returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.TokensToSpan(System.Collections.Generic.IReadOnlyList{LLama.Native.LLamaToken},System.Span{System.Char},System.Text.Encoding)">
<summary>
Convert a sequence of tokens into characters.
</summary>
<param name="tokens"></param>
<param name="dest"></param>
<param name="encoding"></param>
<returns>The section of the span which has valid data in it.
If there was insufficient space in the output span this will be
filled with as many characters as possible, starting from the _last_ token.
</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
<summary>
Convert a string of text into tokens
</summary>
<param name="text"></param>
<param name="addBos"></param>
<param name="encoding"></param>
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.CreateContext(LLama.Native.LLamaContextParams)">
<summary>
Create a new context for this model
</summary>
<param name="params"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataValueByKey(System.String)">
<summary>
Get the metadata value for the given key
</summary>
<param name="key">The key to fetch</param>
<returns>The value, null if there is no such key</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataKeyByIndex(System.Int32)">
<summary>
Get the metadata key for the given index
</summary>
<param name="index">The index to get</param>
<returns>The key, null if there is no such key or if the buffer was too small</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataValueByIndex(System.Int32)">
<summary>
Get the metadata value for the given index
</summary>
<param name="index">The index to get</param>
<returns>The value, null if there is no such value or if the buffer was too small</returns>
</member>
<member name="M:LLama.Native.SafeLlamaModelHandle.GetTemplate(System.String)">
<summary>
Get the default chat template. Returns nullptr if not available
If name is NULL, returns the default chat template
</summary>
<param name="name"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.SafeLlamaModelHandle.Vocabulary">
<summary>
Get tokens for a model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Count">
<summary>
Total number of tokens in this vocabulary
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Type">
<summary>
Get the the type of this vocabulary
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.BOS">
<summary>
Get the Beginning of Sentence token for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.EOS">
<summary>
Get the End of Sentence token for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Newline">
<summary>
Get the newline token for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Pad">
<summary>
Get the padding token for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.SEP">
<summary>
Get the sentence separator token for this model
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillPrefix">
<summary>
Codellama beginning of infill prefix
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillMiddle">
<summary>
Codellama beginning of infill middle
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillSuffix">
<summary>
Codellama beginning of infill suffix
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillPad">
<summary>
Codellama pad
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillRep">
<summary>
Codellama rep
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillSep">
<summary>
Codellama rep
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.EOT">
<summary>
end-of-turn token
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.DecoderStartToken">
<summary>
For encoder-decoder models, this function returns id of the token that must be provided
to the decoder to start generating output sequence.
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.ShouldAddBOS">
<summary>
Check if the current model requires a BOS token added
</summary>
</member>
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.ShouldAddEOS">
<summary>
Check if the current model requires a EOS token added
</summary>
</member>
<member name="T:LLama.Native.SafeLLamaSamplerChainHandle">
<summary>
A chain of sampler stages that can be used to select tokens from logits.
</summary>
<remarks>Wraps a handle returned from `llama_sampler_chain_init`. Other samplers are owned by this chain and are never directly exposed.</remarks>
</member>
<member name="P:LLama.Native.SafeLLamaSamplerChainHandle.Count">
<summary>
Get the number of samplers in this chain
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Apply(LLama.Native.LLamaTokenDataArrayNative@)">
<summary>
Apply this sampler to a set of candidates
</summary>
<param name="candidates"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Sample and accept a token from the idx-th output of the last evaluation. Shorthand for:
<code>
var logits = ctx.GetLogitsIth(idx);
var token_data_array = LLamaTokenDataArray.Create(logits);
using LLamaTokenDataArrayNative.Create(token_data_array, out var native_token_data);
sampler_chain.Apply(native_token_data);
var token = native_token_data.Data.Span[native_token_data.Selected];
sampler_chain.Accept(token);
return token;
</code>
</summary>
<param name="context"></param>
<param name="index"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Reset">
<summary>
Reset the state of this sampler
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Accept(LLama.Native.LLamaToken)">
<summary>
Accept a token and update the internal state of this sampler
</summary>
<param name="token"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.GetName(System.Int32)">
<summary>
Get the name of the sampler at the given index
</summary>
<param name="index"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.GetSeed(System.Int32)">
<summary>
Get the seed of the sampler at the given index if applicable. returns LLAMA_DEFAULT_SEED otherwise
</summary>
<param name="index"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Create(LLama.Native.LLamaSamplerChainParams)">
<summary>
Create a new sampler chain
</summary>
<param name="params"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddClone(LLama.Native.SafeLLamaSamplerChainHandle,System.Int32)">
<summary>
Clone a sampler stage from another chain and add it to this chain
</summary>
<param name="src">The chain to clone a stage from</param>
<param name="index">The index of the stage to clone</param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Remove(System.Int32)">
<summary>
Remove a sampler stage from this chain
</summary>
<param name="index"></param>
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddCustom``1(``0)">
<summary>
Add a custom sampler stage
</summary>
<typeparam name="TSampler"></typeparam>
<param name="sampler"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddGreedySampler">
<summary>
Add a sampler which picks the most likely token.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDistributionSampler(System.UInt32)">
<summary>
Add a sampler which picks from the probability distribution of all tokens
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMirostat1Sampler(System.Int32,System.UInt32,System.Single,System.Single,System.Int32)">
<summary>
Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
</summary>
<param name="seed"></param>
<param name="tau">The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.</param>
<param name="eta">The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.</param>
<param name="m">The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm.</param>
<param name="vocabCount"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMirostat2Sampler(System.UInt32,System.Single,System.Single)">
<summary>
Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
</summary>
<param name="seed"></param>
<param name="tau">The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.</param>
<param name="eta">The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTopK(System.Int32)">
<summary>
Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTopP(System.Single,System.IntPtr)">
<summary>
Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
</summary>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMinP(System.Single,System.IntPtr)">
<summary>
Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTypical(System.Single,System.IntPtr)">
<summary>
Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
</summary>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTemperature(System.Single)">
<summary>
Apply temperature to the logits.
If temperature is less than zero the maximum logit is left unchanged and the rest are set to -infinity
</summary>
<param name="t"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDynamicTemperature(System.Single,System.Single,System.Single)">
<summary>
Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
</summary>
<param name="t"></param>
<param name="delta"></param>
<param name="exponent"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddXTC(System.Single,System.Single,System.Int32,System.UInt32)">
<summary>
XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
</summary>
<param name="p"></param>
<param name="t"></param>
<param name="minKeep"></param>
<param name="seed"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddFillInMiddleInfill(LLama.Native.SafeLlamaModelHandle)">
<summary>
This sampler is meant to be used for fill-in-the-middle infilling, after top_k + top_p sampling
<br />
1. if the sum of the EOG probs times the number of candidates is higher than the sum of the other probs -> pick EOG<br />
2. combine probs of tokens that have the same prefix<br />
<br />
example:<br />
<br />
- before:<br />
"abc": 0.5<br />
"abcd": 0.2<br />
"abcde": 0.1<br />
"dummy": 0.1<br />
<br />
- after:<br />
"abc": 0.8<br />
"dummy": 0.1<br />
<br />
3. discard non-EOG tokens with low prob<br />
4. if no tokens are left -> pick EOT
</summary>
<param name="model"></param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddGrammar(LLama.Native.SafeLlamaModelHandle,System.String,System.String)">
<summary>
Create a sampler which makes tokens impossible unless they match the grammar
</summary>
<param name="model"></param>
<param name="grammar"></param>
<param name="root">Root rule of the grammar</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddLazyGrammar(LLama.Native.SafeLlamaModelHandle,System.String,System.String,System.ReadOnlySpan{System.String},System.ReadOnlySpan{LLama.Native.LLamaToken})">
<summary>
Create a sampler using lazy grammar sampling: https://github.com/ggerganov/llama.cpp/pull/9639
</summary>
<param name="model"></param>
<param name="grammar">Grammar in GBNF form</param>
<param name="root">Root rule of the grammar</param>
<param name="triggerTokens">A list of tokens that will trigger the grammar sampler.</param>
<param name="triggerWords">A list of words that will trigger the grammar sampler.</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddPenalties(System.Int32,System.Single,System.Single,System.Single)">
<summary>
Create a sampler that applies various repetition penalties.
Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
</summary>
<param name="penaltyCount">How many tokens of history to consider when calculating penalties</param>
<param name="repeat">Repetition penalty</param>
<param name="freq">Frequency penalty</param>
<param name="presence">Presence penalty</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDry(LLama.Native.SafeLlamaModelHandle,System.ReadOnlySpan{System.String},System.Single,System.Single,System.Int32,System.Int32)">
<summary>
DRY sampler, designed by p-e-w, as described in: <a href="https://github.com/oobabooga/text-generation-webui/pull/5677">https://github.com/oobabooga/text-generation-webui/pull/5677</a>.
Porting Koboldcpp implementation authored by pi6am: <a href="https://github.com/LostRuins/koboldcpp/pull/982">https://github.com/LostRuins/koboldcpp/pull/982</a>
</summary>
<param name="model">The model this sampler will be used with</param>
<param name="sequenceBreakers"></param>
<param name="multiplier">penalty multiplier, 0.0 = disabled</param>
<param name="base">exponential base</param>
<param name="allowedLength">repeated sequences longer than this are penalized</param>
<param name="penaltyLastN">how many tokens to scan for repetitions (0 = entire context)</param>
</member>
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddLogitBias(System.Int32,System.Span{LLama.Native.LLamaLogitBias})">
<summary>
Create a sampler that applies a bias directly to the logits
</summary>
<param name="vocabSize"></param>
<param name="biases"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaSamplerChainParams">
<summary>
</summary>
<remarks>llama_sampler_chain_params</remarks>
</member>
<member name="P:LLama.Native.LLamaSamplerChainParams.NoPerf">
<summary>
whether to measure performance timings
</summary>
</member>
<member name="M:LLama.Native.LLamaSamplerChainParams.Default">
<summary>
Get the default LLamaSamplerChainParams
</summary>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaLogitBias">
<summary>
A bias to apply directly to a logit
</summary>
</member>
<member name="F:LLama.Native.LLamaLogitBias.Token">
<summary>
The token to apply the bias to
</summary>
</member>
<member name="F:LLama.Native.LLamaLogitBias.Bias">
<summary>
The bias to add
</summary>
</member>
<member name="T:LLama.Native.LLamaSamplerINative">
<summary>
</summary>
<remarks>llama_sampler_i</remarks>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.NameDelegate">
<summary>
Get the name of this sampler
</summary>
<param name="smpl"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.AcceptDelegate">
<summary>
Update internal sampler state after a token has been chosen
</summary>
<param name="smpl"></param>
<param name="token"></param>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.ApplyDelegate">
<summary>
Apply this sampler to a set of logits
</summary>
<param name="smpl"></param>
<param name="logits"></param>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.ResetDelegate">
<summary>
Reset the internal state of this sampler
</summary>
<param name="smpl"></param>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.CloneDelegate">
<summary>
Create a clone of this sampler
</summary>
<param name="smpl"></param>
<returns></returns>
</member>
<member name="T:LLama.Native.LLamaSamplerINative.FreeDelegate">
<summary>
Free all resources held by this sampler
</summary>
<param name="smpl"></param>
</member>
<member name="T:LLama.Native.LLamaSamplerNative">
<summary>
</summary>
<remarks>llama_sampler</remarks>
</member>
<member name="F:LLama.Native.LLamaSamplerNative.Interface">
<summary>
Holds the function pointers which make up the actual sampler
</summary>
</member>
<member name="F:LLama.Native.LLamaSamplerNative.Context">
<summary>
Any additional context this sampler needs, may be anything. We will use it
to hold a GCHandle.
</summary>
</member>
<member name="F:LLama.Native.CustomSamplerHandle._gcHandle">
<summary>
This GCHandle roots this object, preventing it from being freed.
</summary>
</member>
<member name="F:LLama.Native.CustomSamplerHandle._sampler">
<summary>
A reference to the user code which implements the custom sampler
</summary>
</member>
<member name="M:LLama.Native.CustomSamplerHandle.GetLLamaSamplerPointer">
<summary>
Get a pointer to a `llama_sampler` (LLamaSamplerNative) struct, suitable for passing to `llama_sampler_chain_add`
</summary>
<returns></returns>
<exception cref="T:System.NotImplementedException"></exception>
</member>
<member name="T:LLama.Native.ICustomSampler">
<summary>
A custom sampler stage for modifying logits or selecting a token
</summary>
</member>
<member name="P:LLama.Native.ICustomSampler.Name">
<summary>
The human readable name of this stage
</summary>
</member>
<member name="M:LLama.Native.ICustomSampler.Apply(LLama.Native.LLamaTokenDataArrayNative@)">
<summary>
Apply this stage to a set of logits.
This can modify logits or select a token (or both).
If logits are modified the Sorted flag <b>must</b> be set to false.
</summary>
<remarks>
If the logits are no longer sorted after the custom sampler has run it is <b>critically</b> important to
set <i>Sorted=false</i>. If unsure, always set it to false, this is a safe default.
</remarks>
<param name="tokenData"></param>
</member>
<member name="M:LLama.Native.ICustomSampler.Accept(LLama.Native.LLamaToken)">
<summary>
Update the internal state of the sampler when a token is chosen
</summary>
<param name="token"></param>
</member>
<member name="M:LLama.Native.ICustomSampler.Reset">
<summary>
Reset the internal state of this sampler
</summary>
</member>
<member name="M:LLama.Native.ICustomSampler.Clone">
<summary>
Create a clone of this sampler
</summary>
</member>
<member name="T:LLama.Native.SafeLlavaImageEmbedHandle">
<summary>
A Reference to a llava Image Embed handle
</summary>
</member>
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.Model">
<summary>
Get the model used to create this image embedding
</summary>
</member>
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.EmbeddingDimensions">
<summary>
Get the number of dimensions in an embedding
</summary>
</member>
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.PatchCount">
<summary>
Get the number of "patches" in an image embedding
</summary>
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromFileName(LLama.Native.SafeLlavaModelHandle,LLama.LLamaContext,System.String)">
<summary>
Create an image embed from an image file
</summary>
<param name="clip"></param>
<param name="ctx"></param>
<param name="image">Path to the image file. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromFileName(LLama.Native.SafeLlavaModelHandle,System.String,System.Int32)">
<summary>
Create an image embed from an image file
</summary>
<param name="clip"></param>
<param name="image">Path to the image file. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<param name="threads"></param>
<returns></returns>
<exception cref="T:System.InvalidOperationException"></exception>
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromMemory(LLama.Native.SafeLlavaModelHandle,LLama.LLamaContext,System.Byte[])">
<summary>
Create an image embed from the bytes of an image.
</summary>
<param name="clip"></param>
<param name="ctx"></param>
<param name="image">Image bytes. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromMemory(LLama.Native.SafeLlavaModelHandle,System.Byte[],System.Int32)">
<summary>
Create an image embed from the bytes of an image.
</summary>
<param name="clip"></param>
<param name="image">Image bytes. Supported formats:
<list type="bullet">
<item>JPG</item>
<item>PNG</item>
<item>BMP</item>
<item>TGA</item>
</list>
</param>
<param name="threads"></param>
<returns></returns>
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.GetEmbedding(System.Span{System.Single},System.Int32)">
<summary>
Copy the embeddings data to the destination span
</summary>
<param name="dest"></param>
<param name="index"></param>
</member>
<member name="T:LLama.Native.SafeLlavaModelHandle">
<summary>
A reference to a set of llava model weights.
</summary>
</member>
<member name="P:LLama.Native.SafeLlavaModelHandle.EmbeddingDimensions">
<summary>
Get the number of dimensions in an embedding
</summary>
</member>
<member name="P:LLama.Native.SafeLlavaModelHandle.PatchCount">
<summary>
Get the number of "patches" in an image embedding
</summary>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.ReleaseHandle">
<inheritdoc />
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.LoadFromFile(System.String,System.Int32)">
<summary>
Load a model from the given file path into memory
</summary>
<param name="modelPath">MMP File (Multi-Modal Projections)</param>
<param name="verbosity">Verbosity level</param>
<returns>SafeHandle of the Clip Model</returns>
<exception cref="T:System.InvalidOperationException"></exception>
<exception cref="T:LLama.Exceptions.LoadWeightsFailedException"></exception>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(LLama.LLamaContext,System.String)">
<summary>
Create the Image Embeddings.
</summary>
<param name="ctxLlama">LLama Context</param>
<param name="image">Image filename (it supports jpeg format only)</param>
<returns>return the SafeHandle of these embeddings</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(System.String,System.Int32)">
<summary>
Create the Image Embeddings.
</summary>
<param name="image">Image in binary format (it supports jpeg format only)</param>
<param name="threads">Number of threads to use</param>
<returns>return the SafeHandle of these embeddings</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(LLama.LLamaContext,System.Byte[])">
<summary>
Create the Image Embeddings.
</summary>
<param name="ctxLlama">LLama Context</param>
<param name="image">Image in binary format (it supports jpeg format only)</param>
<returns>return the SafeHandle of these embeddings</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(System.Byte[],System.Int32)">
<summary>
Create the Image Embeddings.
</summary>
<param name="image">Image in binary format (it supports jpeg format only)</param>
<param name="threads">Number of threads to use</param>
<returns>return the SafeHandle of these embeddings</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.EvalImageEmbed(LLama.LLamaContext,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32@)">
<summary>
Evaluates the image embeddings.
</summary>
<param name="ctxLlama">Llama Context</param>
<param name="imageEmbed">The current embeddings to evaluate</param>
<param name="n_past"></param>
<returns>True on success</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.clip_model_load(System.String,System.Int32)">
<summary>
Load MULTI MODAL PROJECTIONS model / Clip Model
</summary>
<param name="mmProj"> Model path/file</param>
<param name="verbosity">Verbosity level</param>
<returns>SafeLlavaModelHandle</returns>
</member>
<member name="M:LLama.Native.SafeLlavaModelHandle.clip_free(System.IntPtr)">
<summary>
Frees MULTI MODAL PROJECTIONS model / Clip Model
</summary>
<param name="ctx">Internal Pointer to the model</param>
</member>
<member name="T:LLama.Sampling.BaseSamplingPipeline">
<inheritdoc />
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.#ctor">
<summary>
Create a new sampler wrapping a llama.cpp sampler chain
</summary>
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
<summary>
Create a sampling chain. This will be called once, the base class will automatically dispose the chain.
</summary>
<returns></returns>
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.Dispose">
<inheritdoc />
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<inheritdoc />
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.Reset">
<inheritdoc />
</member>
<member name="M:LLama.Sampling.BaseSamplingPipeline.Accept(LLama.Native.LLamaToken)">
<inheritdoc />
</member>
<member name="T:LLama.Sampling.DefaultSamplingPipeline">
<summary>
An implementation of ISamplePipeline which mimics the default llama.cpp sampling
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.LogitBias">
<summary>
Bias values to add to certain logits
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.RepeatPenalty">
<summary>
Repetition penalty, as described in https://arxiv.org/abs/1909.05858
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.FrequencyPenalty">
<summary>
Frequency penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text
so far, decreasing the model's likelihood to repeat the same line verbatim.
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PresencePenalty">
<summary>
Presence penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the
text so far, increasing the model's likelihood to talk about new topics.
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PenaltyCount">
<summary>
How many tokens should be considered for penalties
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PenalizeNewline">
<summary>
Whether the newline token should be protected from being modified by penalty
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PreventEOS">
<summary>
Whether the EOS token should be suppressed. Setting this to 'true' prevents EOS from being sampled
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Temperature">
<summary>
Temperature to apply (higher temperature is more "creative")
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TopK">
<summary>
Number of tokens to keep in TopK sampling
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TypicalP">
<summary>
P value for locally typical sampling
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TopP">
<summary>
P value for TopP sampling
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.MinP">
<summary>
P value for MinP sampling
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Grammar">
<summary>
Grammar to apply to constrain possible tokens
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.MinKeep">
<summary>
The minimum number of tokens to keep for samplers which remove tokens
</summary>
</member>
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Seed">
<summary>
Seed to use for random sampling
</summary>
</member>
<member name="M:LLama.Sampling.DefaultSamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
<inheritdoc />
</member>
<member name="T:LLama.Sampling.Grammar">
<summary>
A grammar in GBNF form
</summary>
<param name="Gbnf"></param>
<param name="Root"></param>
</member>
<member name="M:LLama.Sampling.Grammar.#ctor(System.String,System.String)">
<summary>
A grammar in GBNF form
</summary>
<param name="Gbnf"></param>
<param name="Root"></param>
</member>
<member name="P:LLama.Sampling.Grammar.Gbnf">
<summary></summary>
</member>
<member name="P:LLama.Sampling.Grammar.Root">
<summary></summary>
</member>
<member name="T:LLama.Sampling.GreedySamplingPipeline">
<summary>
A sampling pipeline which always selects the most likely token
</summary>
</member>
<member name="P:LLama.Sampling.GreedySamplingPipeline.Grammar">
<summary>
Grammar to apply to constrain possible tokens
</summary>
</member>
<member name="M:LLama.Sampling.GreedySamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
<inheritdoc />
</member>
<member name="T:LLama.Sampling.ISamplingPipeline">
<summary>
Convert a span of logits into a single sampled token. This interface can be implemented to completely customise the sampling process.
</summary>
</member>
<member name="M:LLama.Sampling.ISamplingPipeline.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
<summary>
Sample a single token from the given context at the given position
</summary>
<param name="ctx">The context being sampled from</param>
<param name="index">Position to sample logits from</param>
<returns></returns>
</member>
<member name="M:LLama.Sampling.ISamplingPipeline.Reset">
<summary>
Reset all internal state of the sampling pipeline
</summary>
</member>
<member name="M:LLama.Sampling.ISamplingPipeline.Accept(LLama.Native.LLamaToken)">
<summary>
Update the pipeline, with knowledge that a particular token was just accepted
</summary>
<param name="token"></param>
</member>
<member name="T:LLama.Sampling.ISamplingPipelineExtensions">
<summary>
Extension methods for <see cref="T:LLama.Sampling.ISamplingPipeline"/>
</summary>
</member>
<member name="M:LLama.Sampling.ISamplingPipelineExtensions.Sample(LLama.Sampling.ISamplingPipeline,LLama.LLamaContext,System.Int32)">
<summary>
Sample a single token from the given context at the given position
</summary>
<param name="pipe"></param>
<param name="ctx">The context being sampled from</param>
<param name="index">Position to sample logits from</param>
<returns></returns>
</member>
<member name="T:LLama.StreamingTokenDecoder">
<summary>
Decodes a stream of tokens into a stream of characters
</summary>
</member>
<member name="P:LLama.StreamingTokenDecoder.AvailableCharacters">
<summary>
The number of decoded characters waiting to be read
</summary>
</member>
<member name="P:LLama.StreamingTokenDecoder.DecodeSpecialTokens">
<summary>
If true, special characters will be converted to text. If false they will be invisible.
</summary>
</member>
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.LLamaWeights)">
<summary>
Create a new decoder
</summary>
<param name="encoding">Text encoding to use</param>
<param name="weights">Model weights</param>
</member>
<member name="M:LLama.StreamingTokenDecoder.#ctor(LLama.LLamaContext)">
<summary>
Create a new decoder
</summary>
<param name="context">Context to retrieve encoding and model weights from</param>
</member>
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.Native.SafeLLamaContextHandle)">
<summary>
Create a new decoder
</summary>
<param name="encoding">Text encoding to use</param>
<param name="context">Context to retrieve model weights from</param>
</member>
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.Native.SafeLlamaModelHandle)">
<summary>
Create a new decoder
</summary>
<param name="encoding">Text encoding to use</param>
<param name="weights">Models weights to use</param>
</member>
<member name="M:LLama.StreamingTokenDecoder.Add(LLama.Native.LLamaToken)">
<summary>
Add a single token to the decoder
</summary>
<param name="token"></param>
</member>
<member name="M:LLama.StreamingTokenDecoder.Add(System.Int32)">
<summary>
Add a single token to the decoder
</summary>
<param name="token"></param>
</member>
<member name="M:LLama.StreamingTokenDecoder.AddRange``1(``0)">
<summary>
Add all tokens in the given enumerable
</summary>
<param name="tokens"></param>
</member>
<member name="M:LLama.StreamingTokenDecoder.AddRange(System.ReadOnlySpan{LLama.Native.LLamaToken})">
<summary>
Add all tokens in the given span
</summary>
<param name="tokens"></param>
</member>
<member name="M:LLama.StreamingTokenDecoder.Read(System.Collections.Generic.List{System.Char})">
<summary>
Read all decoded characters and clear the buffer
</summary>
<param name="dest"></param>
</member>
<member name="M:LLama.StreamingTokenDecoder.Read">
<summary>
Read all decoded characters as a string and clear the buffer
</summary>
<returns></returns>
</member>
<member name="M:LLama.StreamingTokenDecoder.Reset">
<summary>
Set the decoder back to its initial state
</summary>
</member>
<member name="T:LLama.Transformers.PromptTemplateTransformer">
<summary>
A prompt formatter that will use llama.cpp's template formatter
If your model is not supported, you will need to define your own formatter according the cchat prompt specification for your model
</summary>
</member>
<member name="M:LLama.Transformers.PromptTemplateTransformer.#ctor(LLama.LLamaWeights,System.Boolean)">
<summary>
A prompt formatter that will use llama.cpp's template formatter
If your model is not supported, you will need to define your own formatter according the cchat prompt specification for your model
</summary>
</member>
<member name="M:LLama.Transformers.PromptTemplateTransformer.HistoryToText(LLama.Common.ChatHistory)">
<inheritdoc />
</member>
<member name="M:LLama.Transformers.PromptTemplateTransformer.TextToHistory(LLama.Common.AuthorRole,System.String)">
<inheritdoc />
</member>
<member name="M:LLama.Transformers.PromptTemplateTransformer.Clone">
<inheritdoc />
</member>
<member name="M:LLama.Transformers.PromptTemplateTransformer.ToModelPrompt(LLama.LLamaTemplate)">
<summary>
Apply the template to the messages and return the resulting prompt as a string
</summary>
<returns>The formatted template string as defined by the model</returns>
</member>
</members>
</doc>