7220 lines
327 KiB
XML
7220 lines
327 KiB
XML
<?xml version="1.0"?>
|
||
<doc>
|
||
<assembly>
|
||
<name>LLamaSharp</name>
|
||
</assembly>
|
||
<members>
|
||
<member name="T:System.Runtime.CompilerServices.IsExternalInit">
|
||
<summary>
|
||
Reserved to be used by the compiler for tracking metadata.
|
||
This class should not be used by developers in source code.
|
||
</summary>
|
||
<remarks>
|
||
This definition is provided by the <i>IsExternalInit</i> NuGet package (https://www.nuget.org/packages/IsExternalInit).
|
||
Please see https://github.com/manuelroemer/IsExternalInit for more information.
|
||
</remarks>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.IContextParams">
|
||
<summary>
|
||
The parameters for initializing a LLama context from a model.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.ContextSize">
|
||
<summary>
|
||
Model context size (n_ctx)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.BatchSize">
|
||
<summary>
|
||
maximum batch size that can be submitted at once (must be >=32 to use BLAS) (n_batch)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.UBatchSize">
|
||
<summary>
|
||
Physical batch size
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.SeqMax">
|
||
<summary>
|
||
max number of sequences (i.e. distinct states for recurrent models)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.Embeddings">
|
||
<summary>
|
||
If true, extract embeddings (together with logits).
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.RopeFrequencyBase">
|
||
<summary>
|
||
RoPE base frequency (null to fetch from the model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.RopeFrequencyScale">
|
||
<summary>
|
||
RoPE frequency scaling factor (null to fetch from the model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.Encoding">
|
||
<summary>
|
||
The encoding to use for models
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.Threads">
|
||
<summary>
|
||
Number of threads (null = autodetect) (n_threads)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.BatchThreads">
|
||
<summary>
|
||
Number of threads to use for batch processing (null = autodetect) (n_threads)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnExtrapolationFactor">
|
||
<summary>
|
||
YaRN extrapolation mix factor (null = from model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnAttentionFactor">
|
||
<summary>
|
||
YaRN magnitude scaling factor (null = from model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnBetaFast">
|
||
<summary>
|
||
YaRN low correction dim (null = from model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnBetaSlow">
|
||
<summary>
|
||
YaRN high correction dim (null = from model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnOriginalContext">
|
||
<summary>
|
||
YaRN original context length (null = from model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.YarnScalingType">
|
||
<summary>
|
||
YaRN scaling method to use.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.TypeK">
|
||
<summary>
|
||
Override the type of the K cache
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.TypeV">
|
||
<summary>
|
||
Override the type of the V cache
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.NoKqvOffload">
|
||
<summary>
|
||
Whether to disable offloading the KQV cache to the GPU
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.FlashAttention">
|
||
<summary>
|
||
Whether to use flash attention
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.DefragThreshold">
|
||
<summary>
|
||
defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)
|
||
defragment the KV cache if holes/size > defrag_threshold, Set to <see langword="null"/> or < 0 to disable (default)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.PoolingType">
|
||
<summary>
|
||
How to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IContextParams.AttentionType">
|
||
<summary>
|
||
Attention type to use for embeddings
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.IHistoryTransform">
|
||
<summary>
|
||
Transform history to plain text and vice versa.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.IHistoryTransform.HistoryToText(LLama.Common.ChatHistory)">
|
||
<summary>
|
||
Convert a ChatHistory instance to plain text.
|
||
</summary>
|
||
<param name="history">The ChatHistory instance</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.IHistoryTransform.TextToHistory(LLama.Common.AuthorRole,System.String)">
|
||
<summary>
|
||
Converts plain text to a ChatHistory instance.
|
||
</summary>
|
||
<param name="role">The role for the author.</param>
|
||
<param name="text">The chat history as plain text.</param>
|
||
<returns>The updated history.</returns>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.IHistoryTransform.Clone">
|
||
<summary>
|
||
Copy the transform.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.IInferenceParams">
|
||
<summary>
|
||
The parameters used for inference.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IInferenceParams.TokensKeep">
|
||
<summary>
|
||
number of tokens to keep from initial prompt
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IInferenceParams.MaxTokens">
|
||
<summary>
|
||
how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
|
||
until it complete.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IInferenceParams.AntiPrompts">
|
||
<summary>
|
||
Sequences where the model will stop generating further tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IInferenceParams.SamplingPipeline">
|
||
<summary>
|
||
Set a custom sampling pipeline to use.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.ILLamaExecutor">
|
||
<summary>
|
||
A high level interface for LLama models.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.ILLamaExecutor.Context">
|
||
<summary>
|
||
The loaded context for this executor.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.ILLamaExecutor.IsMultiModal">
|
||
<summary>
|
||
Identify if it's a multi-modal model and there is a image to process.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.ILLamaExecutor.ClipModel">
|
||
<summary>
|
||
Multi-Modal Projections / Clip Model weights
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.ILLamaExecutor.Images">
|
||
<summary>
|
||
List of images: List of images in byte array format.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.ILLamaExecutor.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Asynchronously infers a response from the model.
|
||
</summary>
|
||
<param name="text">Your prompt</param>
|
||
<param name="inferenceParams">Any additional parameters</param>
|
||
<param name="token">A cancellation token.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.ILLamaParams">
|
||
<summary>
|
||
Convenience interface for implementing both type of parameters.
|
||
</summary>
|
||
<remarks>Mostly exists for backwards compatibility reasons, when these two were not split.</remarks>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.IModelParams">
|
||
<summary>
|
||
The parameters for initializing a LLama model.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.MainGpu">
|
||
<summary>
|
||
main_gpu interpretation depends on split_mode:
|
||
<list type="bullet">
|
||
<item>
|
||
<term>None</term>
|
||
<description>The GPU that is used for the entire mode.</description>
|
||
</item>
|
||
<item>
|
||
<term>Row</term>
|
||
<description>The GPU that is used for small tensors and intermediate results.</description>
|
||
</item>
|
||
<item>
|
||
<term>Layer</term>
|
||
<description>Ignored.</description>
|
||
</item>
|
||
</list>
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.SplitMode">
|
||
<summary>
|
||
How to split the model across multiple GPUs
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.GpuLayerCount">
|
||
<summary>
|
||
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.UseMemorymap">
|
||
<summary>
|
||
Use mmap for faster loads (use_mmap)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.UseMemoryLock">
|
||
<summary>
|
||
Use mlock to keep model in memory (use_mlock)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.ModelPath">
|
||
<summary>
|
||
Model path (model)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.TensorSplits">
|
||
<summary>
|
||
how split tensors should be distributed across GPUs
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.VocabOnly">
|
||
<summary>
|
||
Load vocab only (no weights)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.CheckTensors">
|
||
<summary>
|
||
Validate model tensor data before loading
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.IModelParams.MetadataOverrides">
|
||
<summary>
|
||
Override specific metadata items in the model
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.TensorSplitsCollection">
|
||
<summary>
|
||
A fixed size array to set the tensor splits across multiple GPUs
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.TensorSplitsCollection.Length">
|
||
<summary>
|
||
The size of this array
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.TensorSplitsCollection.Item(System.Int32)">
|
||
<summary>
|
||
Get or set the proportion of work to do on the given device.
|
||
</summary>
|
||
<remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
|
||
<param name="index"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollection.#ctor(System.Single[])">
|
||
<summary>
|
||
Create a new tensor splits collection, copying the given values
|
||
</summary>
|
||
<param name="splits"></param>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollection.#ctor">
|
||
<summary>
|
||
Create a new tensor splits collection with all values initialised to the default
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollection.Clear">
|
||
<summary>
|
||
Set all values to zero
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollection.GetEnumerator">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollection.System#Collections#IEnumerable#GetEnumerator">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Abstractions.TensorSplitsCollectionConverter">
|
||
<summary>
|
||
A JSON converter for <see cref="T:LLama.Abstractions.TensorSplitsCollection"/>
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollectionConverter.Read(System.Text.Json.Utf8JsonReader@,System.Type,System.Text.Json.JsonSerializerOptions)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.TensorSplitsCollectionConverter.Write(System.Text.Json.Utf8JsonWriter,LLama.Abstractions.TensorSplitsCollection,System.Text.Json.JsonSerializerOptions)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.MetadataOverride">
|
||
<summary>
|
||
An override for a single key/value pair in model metadata
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.MetadataOverride.Key">
|
||
<summary>
|
||
Get the key being overridden by this override
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Int32)">
|
||
<summary>
|
||
Create a new override for an int key
|
||
</summary>
|
||
<param name="key"></param>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Single)">
|
||
<summary>
|
||
Create a new override for a float key
|
||
</summary>
|
||
<param name="key"></param>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.Boolean)">
|
||
<summary>
|
||
Create a new override for a boolean key
|
||
</summary>
|
||
<param name="key"></param>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverride.#ctor(System.String,System.String)">
|
||
<summary>
|
||
Create a new override for a string key
|
||
</summary>
|
||
<param name="key"></param>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.MetadataOverrideConverter">
|
||
<summary>
|
||
A JSON converter for <see cref="T:LLama.Abstractions.MetadataOverride"/>
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverrideConverter.Read(System.Text.Json.Utf8JsonReader@,System.Type,System.Text.Json.JsonSerializerOptions)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.MetadataOverrideConverter.Write(System.Text.Json.Utf8JsonWriter,LLama.Abstractions.MetadataOverride,System.Text.Json.JsonSerializerOptions)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.INativeLibrary">
|
||
<summary>
|
||
Descriptor of a native library.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.INativeLibrary.Metadata">
|
||
<summary>
|
||
Metadata of this library.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.INativeLibrary.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Prepare the native library file and returns the local path of it.
|
||
If it's a relative path, LLamaSharp will search the path in the search directies you set.
|
||
</summary>
|
||
<param name="systemInfo">The system information of the current machine.</param>
|
||
<param name="logCallback">The log callback.</param>
|
||
<returns>
|
||
The relative paths of the library. You could return multiple paths to try them one by one. If no file is available, please return an empty array.
|
||
</returns>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.ITextStreamTransform">
|
||
<summary>
|
||
Takes a stream of tokens and transforms them.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.ITextStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
|
||
<summary>
|
||
Takes a stream of tokens and transforms them, returning a new stream of tokens asynchronously.
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.ITextStreamTransform.Clone">
|
||
<summary>
|
||
Copy the transform.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.ITextTransform">
|
||
<summary>
|
||
An interface for text transformations.
|
||
These can be used to compose a pipeline of text transformations, such as:
|
||
- Tokenization
|
||
- Lowercasing
|
||
- Punctuation removal
|
||
- Trimming
|
||
- etc.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.ITextTransform.Transform(System.String)">
|
||
<summary>
|
||
Takes a string and transforms it.
|
||
</summary>
|
||
<param name="text"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.ITextTransform.Clone">
|
||
<summary>
|
||
Copy the transform.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.LLamaExecutorExtensions">
|
||
<summary>
|
||
Extension methods to the <see cref="T:LLama.Abstractions.LLamaExecutorExtensions" /> interface.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.AsChatClient(LLama.Abstractions.ILLamaExecutor,LLama.Abstractions.IHistoryTransform,LLama.Abstractions.ITextStreamTransform)">
|
||
<summary>Gets an <see cref="T:Microsoft.Extensions.AI.IChatClient"/> instance for the specified <see cref="T:LLama.Abstractions.ILLamaExecutor"/>.</summary>
|
||
<param name="executor">The executor.</param>
|
||
<param name="historyTransform">The <see cref="T:LLama.Abstractions.IHistoryTransform"/> to use to transform an input list messages into a prompt.</param>
|
||
<param name="outputTransform">The <see cref="T:LLama.Abstractions.ITextStreamTransform"/> to use to transform the output into text.</param>
|
||
<returns>An <see cref="T:Microsoft.Extensions.AI.IChatClient"/> instance for the provided <see cref="T:LLama.Abstractions.ILLamaExecutor" />.</returns>
|
||
<exception cref="T:System.ArgumentNullException"><paramref name="executor"/> is null.</exception>
|
||
</member>
|
||
<member name="P:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.Metadata">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.Dispose">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.GetService(System.Type,System.Object)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CompleteAsync(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,System.Threading.CancellationToken)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CompleteStreamingAsync(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,System.Threading.CancellationToken)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CreatePrompt(System.Collections.Generic.IList{Microsoft.Extensions.AI.ChatMessage})">
|
||
<summary>Format the chat messages into a string prompt.</summary>
|
||
</member>
|
||
<member name="M:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.CreateInferenceParams(Microsoft.Extensions.AI.ChatOptions)">
|
||
<summary>Convert the chat options to inference parameters.</summary>
|
||
</member>
|
||
<member name="T:LLama.Abstractions.LLamaExecutorExtensions.LLamaExecutorChatClient.AppendAssistantHistoryTransform">
|
||
<summary>A default transform that appends "Assistant: " to the end.</summary>
|
||
</member>
|
||
<member name="T:LLama.AntipromptProcessor">
|
||
<summary>
|
||
AntipromptProcessor keeps track of past tokens looking for any set Anti-Prompts
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.AntipromptProcessor.#ctor(System.Collections.Generic.IEnumerable{System.String})">
|
||
<summary>
|
||
Initializes a new instance of the <see cref="T:LLama.AntipromptProcessor"/> class.
|
||
</summary>
|
||
<param name="antiprompts">The antiprompts.</param>
|
||
</member>
|
||
<member name="M:LLama.AntipromptProcessor.AddAntiprompt(System.String)">
|
||
<summary>
|
||
Add an antiprompt to the collection
|
||
</summary>
|
||
<param name="antiprompt"></param>
|
||
</member>
|
||
<member name="M:LLama.AntipromptProcessor.SetAntiprompts(System.Collections.Generic.IEnumerable{System.String})">
|
||
<summary>
|
||
Overwrite all current antiprompts with a new set
|
||
</summary>
|
||
<param name="antiprompts"></param>
|
||
</member>
|
||
<member name="M:LLama.AntipromptProcessor.Add(System.String)">
|
||
<summary>
|
||
Add some text and check if the buffer now ends with any antiprompt
|
||
</summary>
|
||
<param name="text"></param>
|
||
<returns>true if the text buffer ends with any antiprompt</returns>
|
||
</member>
|
||
<member name="T:LLama.Batched.BatchedExecutor">
|
||
<summary>
|
||
A batched executor that can infer multiple separate "conversations" simultaneously.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Batched.BatchedExecutor._inferenceLock">
|
||
<summary>
|
||
Set to 1 using interlocked exchange while inference is running
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.Epoch">
|
||
<summary>
|
||
Epoch is incremented twice every time Infer is called. Conversations can use this to keep track of
|
||
whether they're waiting for inference, or can be sampled.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.Context">
|
||
<summary>
|
||
The <see cref="T:LLama.LLamaContext"/> this executor is using
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.Model">
|
||
<summary>
|
||
The <see cref="T:LLama.LLamaWeights"/> this executor is using
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.BatchedTokenCount">
|
||
<summary>
|
||
Get the number of tokens in the batch, waiting for <see cref="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)"/> to be called
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.BatchQueueCount">
|
||
<summary>
|
||
Number of batches in the queue, waiting for <see cref="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)"/> to be called
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.BatchedExecutor.IsDisposed">
|
||
<summary>
|
||
Check if this executor has been disposed.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams)">
|
||
<summary>
|
||
Create a new batched executor
|
||
</summary>
|
||
<param name="model">The model to use</param>
|
||
<param name="contextParams">Parameters to create a new context</param>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.Create">
|
||
<summary>
|
||
Start a new <see cref="T:LLama.Batched.Conversation"/>
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.Load(System.String)">
|
||
<summary>
|
||
Load a conversation that was previously saved to a file. Once loaded the conversation will
|
||
need to be prompted.
|
||
</summary>
|
||
<param name="filepath"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.Load(LLama.Batched.Conversation.State)">
|
||
<summary>
|
||
Load a conversation that was previously saved into memory. Once loaded the conversation will need to be prompted.
|
||
</summary>
|
||
<param name="state"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.Infer(System.Threading.CancellationToken)">
|
||
<summary>
|
||
Run inference for all conversations in the batch which have pending tokens.
|
||
|
||
If the result is `NoKvSlot` then there is not enough memory for inference, try disposing some conversation
|
||
threads and running inference again.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.GetTokenBatch(System.Int32)">
|
||
<summary>
|
||
Get a reference to a batch that tokens can be added to.
|
||
</summary>
|
||
<param name="minCapacity"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.BatchedExecutor.GetEmbeddingBatch(System.Int32)">
|
||
<summary>
|
||
Get a reference to a batch that embeddings can be added to.
|
||
</summary>
|
||
<param name="minCapacity"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
|
||
</member>
|
||
<member name="T:LLama.Batched.Conversation">
|
||
<summary>
|
||
A single conversation thread that can be prompted (adding tokens from the user) or inferred (extracting a token from the LLM)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Batched.Conversation._forked">
|
||
<summary>
|
||
Indicates if this conversation has been "forked" and may share logits with another conversation.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Batched.Conversation._batchSampleIndices">
|
||
<summary>
|
||
Stores the indices to sample from. Contains <see cref="F:LLama.Batched.Conversation._batchSampleCount"/> valid items.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.Executor">
|
||
<summary>
|
||
The executor which this conversation belongs to
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.ConversationId">
|
||
<summary>
|
||
Unique ID for this conversation
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.TokenCount">
|
||
<summary>
|
||
Total number of tokens in this conversation, cannot exceed the context length.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.IsDisposed">
|
||
<summary>
|
||
Indicates if this conversation has been disposed, nothing can be done with a disposed conversation
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.RequiresInference">
|
||
<summary>
|
||
Indicates if this conversation is waiting for inference to be run on the executor. "Prompt" and "Sample" cannot be called when this is true.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.RequiresSampling">
|
||
<summary>
|
||
Indicates that this conversation should be sampled.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Finalize">
|
||
<summary>
|
||
Finalizer for Conversation
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Dispose">
|
||
<summary>
|
||
End this conversation, freeing all resources used by it
|
||
</summary>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Fork">
|
||
<summary>
|
||
Create a copy of the current conversation
|
||
</summary>
|
||
<remarks>The copy shares internal state, so consumes very little extra memory.</remarks>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)">
|
||
<summary>
|
||
Get the index in the context which each token can be sampled from, the return value of this function get be used to retrieve logits
|
||
(<see cref="M:LLama.Native.SafeLLamaContextHandle.GetLogitsIth(System.Int32)"/>) or to sample a token (<see cref="M:LLama.Native.SafeLLamaSamplerChainHandle.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)"/>.
|
||
</summary>
|
||
<param name="offset">How far from the <b>end</b> of the previous prompt should logits be sampled. Any value other than 0 requires
|
||
allLogits to have been set during prompting.<br />
|
||
For example if 5 tokens were supplied in the last prompt call:
|
||
<list type="bullet">
|
||
<item>The logits of the first token can be accessed with 4</item>
|
||
<item>The logits of the second token can be accessed with 3</item>
|
||
<item>The logits of the third token can be accessed with 2</item>
|
||
<item>The logits of the fourth token can be accessed with 1</item>
|
||
<item>The logits of the fifth token can be accessed with 0</item>
|
||
</list>
|
||
</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
<exception cref="T:LLama.Batched.CannotSampleRequiresPromptException">Thrown if this conversation was not prompted before the previous call to infer</exception>
|
||
<exception cref="T:LLama.Batched.CannotSampleRequiresInferenceException">Thrown if Infer() must be called on the executor</exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Sample(System.Int32)">
|
||
<summary>
|
||
Get the logits from this conversation, ready for sampling
|
||
</summary>
|
||
<param name="offset">How far from the <b>end</b> of the previous prompt should logits be sampled. Any value other than 0 requires allLogits to have been set during prompting</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
<exception cref="T:LLama.Batched.CannotSampleRequiresPromptException">Thrown if this conversation was not prompted before the previous call to infer</exception>
|
||
<exception cref="T:LLama.Batched.CannotSampleRequiresInferenceException">Thrown if Infer() must be called on the executor</exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Prompt(System.Collections.Generic.List{LLama.Native.LLamaToken},System.Boolean)">
|
||
<summary>
|
||
Add tokens to this conversation
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="allLogits">If true, generate logits for all tokens. If false, only generate logits for the last token.</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Prompt(System.ReadOnlySpan{LLama.Native.LLamaToken},System.Boolean)">
|
||
<summary>
|
||
Add tokens to this conversation
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="allLogits">If true, generate logits for all tokens. If false, only generate logits for the last token.</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Prompt(LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Add a single token to this conversation
|
||
</summary>
|
||
<param name="token"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
<exception cref="T:LLama.Batched.AlreadyPromptedConversationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Prompt(LLama.Native.SafeLlavaImageEmbedHandle)">
|
||
<summary>
|
||
Prompt this conversation with an image embedding
|
||
</summary>
|
||
<param name="embedding"></param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Prompt(System.ReadOnlySpan{System.Single})">
|
||
<summary>
|
||
Prompt this conversation with embeddings
|
||
</summary>
|
||
<param name="embeddings">The raw values of the embeddings. This span must divide equally by the embedding size of this model.</param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)">
|
||
<summary>
|
||
Directly modify the KV cache of this conversation
|
||
</summary>
|
||
<param name="modifier"></param>
|
||
<exception cref="T:LLama.Batched.CannotModifyWhileRequiresInferenceException">Thrown if this method is called while <see cref="P:LLama.Batched.Conversation.RequiresInference"/> == true</exception>
|
||
</member>
|
||
<member name="T:LLama.Batched.Conversation.KvAccessor">
|
||
<summary>
|
||
Provides direct access to the KV cache of a <see cref="T:LLama.Batched.Conversation"/>.
|
||
See <see cref="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)"/> for how to use this.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.KvAccessor.Remove(LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Removes all tokens that have positions in [start, end)
|
||
</summary>
|
||
<param name="start">Start position (inclusive)</param>
|
||
<param name="end">End position (exclusive)</param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.KvAccessor.Remove(LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Removes all tokens starting from the given position
|
||
</summary>
|
||
<param name="start">Start position (inclusive)</param>
|
||
<param name="count">Number of tokens</param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.KvAccessor.Add(LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Adds relative position "delta" to all tokens that have positions in [p0, p1).
|
||
If the KV cache is RoPEd, the KV data is updated
|
||
accordingly
|
||
</summary>
|
||
<param name="start">Start position (inclusive)</param>
|
||
<param name="end">End position (exclusive)</param>
|
||
<param name="delta">Amount to add on to each token position</param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.KvAccessor.Divide(LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Integer division of the positions by factor of `d > 1`.
|
||
If the KV cache is RoPEd, the KV data is updated accordingly.
|
||
</summary>
|
||
<param name="start">Start position (inclusive). If less than zero, it is clamped to zero.</param>
|
||
<param name="end">End position (exclusive). If less than zero, it is treated as "infinity".</param>
|
||
<param name="divisor">Amount to divide each position by.</param>
|
||
</member>
|
||
<member name="T:LLama.Batched.Conversation.ModifyKvCache">
|
||
<summary>
|
||
A function which can temporarily access the KV cache of a <see cref="T:LLama.Batched.Conversation"/> to modify it directly
|
||
</summary>
|
||
<param name="end">The current end token of this conversation</param>
|
||
<param name="kv">An <see cref="T:LLama.Batched.Conversation.KvAccessor"/> which allows direct access to modify the KV cache</param>
|
||
<returns>The new end token position</returns>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Save(System.String)">
|
||
<summary>
|
||
Save the complete state of this conversation to a file. if the file already exists it will be overwritten.
|
||
</summary>
|
||
<param name="filepath"></param>
|
||
<exception cref="T:LLama.Batched.CannotSaveWhileRequiresInferenceException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Save">
|
||
<summary>
|
||
Save the complete state of this conversation in system memory.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Load(System.String)">
|
||
<summary>
|
||
Load state from a file
|
||
This should only ever be called by the BatchedExecutor, on a newly created conversation object!
|
||
</summary>
|
||
<param name="filepath"></param>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.Load(LLama.Batched.Conversation.State)">
|
||
<summary>
|
||
Load state from a previously saved state.
|
||
This should only ever be called by the BatchedExecutor, on a newly created conversation object!
|
||
</summary>
|
||
<param name="state"></param>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.PrivateState.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Batched.Conversation.State">
|
||
<summary>
|
||
In memory saved state of a <see cref="T:LLama.Batched.Conversation"/>
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.State.IsDisposed">
|
||
<summary>
|
||
Indicates if this state has been disposed
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Batched.Conversation.State.Size">
|
||
<summary>
|
||
Get the size in bytes of this state object
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.State.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Batched.Conversation.State.#ctor">
|
||
<summary>
|
||
Internal constructor prevent anyone outside of LLamaSharp extending this class
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.ConversationExtensions">
|
||
<summary>
|
||
Extension method for <see cref="T:LLama.Batched.Conversation"/>
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.ConversationExtensions.Sample(LLama.Batched.Conversation,LLama.Native.SafeLLamaSamplerChainHandle,System.Int32)">
|
||
<summary>
|
||
Sample a token from this conversation using the given sampler chain
|
||
</summary>
|
||
<param name="conversation"><see cref="T:LLama.Batched.Conversation"/> to sample from</param>
|
||
<param name="sampler"></param>
|
||
<param name="offset">Offset from the end of the conversation to the logits to sample, see <see cref="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)"/> for more details</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Batched.ConversationExtensions.Sample(LLama.Batched.Conversation,LLama.Sampling.ISamplingPipeline,System.Int32)">
|
||
<summary>
|
||
Sample a token from this conversation using the given sampling pipeline
|
||
</summary>
|
||
<param name="conversation"><see cref="T:LLama.Batched.Conversation"/> to sample from</param>
|
||
<param name="sampler"></param>
|
||
<param name="offset">Offset from the end of the conversation to the logits to sample, see <see cref="M:LLama.Batched.Conversation.GetSampleIndex(System.Int32)"/> for more details</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Batched.ConversationExtensions.Rewind(LLama.Batched.Conversation,System.Int32)">
|
||
<summary>
|
||
Rewind a <see cref="T:LLama.Batched.Conversation"/> back to an earlier state by removing tokens from the end
|
||
</summary>
|
||
<param name="conversation">The conversation to rewind</param>
|
||
<param name="tokens">The number of tokens to rewind</param>
|
||
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if `tokens` parameter is larger than TokenCount</exception>
|
||
</member>
|
||
<member name="M:LLama.Batched.ConversationExtensions.ShiftLeft(LLama.Batched.Conversation,System.Int32,System.Int32)">
|
||
<summary>
|
||
Shift all tokens over to the left, removing "count" tokens from the start and shifting everything over.
|
||
Leaves "keep" tokens at the start completely untouched. This can be used to free up space when the context
|
||
gets full, keeping the prompt at the start intact.
|
||
</summary>
|
||
<param name="conversation">The conversation to rewind</param>
|
||
<param name="count">How much to shift tokens over by</param>
|
||
<param name="keep">The number of tokens at the start which should <b>not</b> be shifted</param>
|
||
</member>
|
||
<member name="T:LLama.Batched.ExperimentalBatchedExecutorException">
|
||
<summary>
|
||
Base class for exceptions thrown from <see cref="T:LLama.Batched.BatchedExecutor"/>
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.AlreadyPromptedConversationException">
|
||
<summary>
|
||
This exception is thrown when "Prompt()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
|
||
already been prompted and before "Infer()" has been called on the associated
|
||
<see cref="T:LLama.Batched.BatchedExecutor"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.CannotSampleRequiresInferenceException">
|
||
<summary>
|
||
This exception is thrown when "Sample()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
|
||
already been prompted and before "Infer()" has been called on the associated
|
||
<see cref="T:LLama.Batched.BatchedExecutor"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.CannotSampleRequiresPromptException">
|
||
<summary>
|
||
This exception is thrown when "Sample()" is called on a <see cref="T:LLama.Batched.Conversation"/> which was not
|
||
first prompted.
|
||
<see cref="T:LLama.Batched.BatchedExecutor"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.CannotModifyWhileRequiresInferenceException">
|
||
<summary>
|
||
This exception is thrown when <see cref="M:LLama.Batched.Conversation.Modify(LLama.Batched.Conversation.ModifyKvCache)"/> is called when <see cref="P:LLama.Batched.Conversation.RequiresInference"/> = true
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Batched.CannotSaveWhileRequiresInferenceException">
|
||
<summary>
|
||
This exception is thrown when "Save()" is called on a <see cref="T:LLama.Batched.Conversation"/> which has
|
||
already been prompted and before "Infer()" has been called.
|
||
<see cref="T:LLama.Batched.BatchedExecutor"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Batched.LLamaContextExtensions.SaveState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.ReadOnlySpan{System.Byte})">
|
||
<summary>
|
||
Save the state of a particular sequence to specified path. Also save some extra data which will be returned when loading.
|
||
Data saved with this method <b>must</b> be saved with <see cref="M:LLama.Batched.LLamaContextExtensions.LoadState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.Byte[]@)"/>
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="filename"></param>
|
||
<param name="sequence"></param>
|
||
<param name="header"></param>
|
||
</member>
|
||
<member name="M:LLama.Batched.LLamaContextExtensions.LoadState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.Byte[]@)">
|
||
<summary>
|
||
Load the state from the specified path into a particular sequence. Also reading header data. Must only be used with
|
||
data previously saved with <see cref="M:LLama.Batched.LLamaContextExtensions.SaveState(LLama.LLamaContext,System.String,LLama.Native.LLamaSeqId,System.ReadOnlySpan{System.Byte})"/>
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="filename"></param>
|
||
<param name="sequence"></param>
|
||
<param name="header"></param>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="T:LLama.ChatSession">
|
||
<summary>
|
||
The main chat session class.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.MODEL_STATE_FILENAME">
|
||
<summary>
|
||
The filename for the serialized model state (KV cache, etc).
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.EXECUTOR_STATE_FILENAME">
|
||
<summary>
|
||
The filename for the serialized executor state.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.HISTORY_STATE_FILENAME">
|
||
<summary>
|
||
The filename for the serialized chat history.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.INPUT_TRANSFORM_FILENAME">
|
||
<summary>
|
||
The filename for the serialized input transform pipeline.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.OUTPUT_TRANSFORM_FILENAME">
|
||
<summary>
|
||
The filename for the serialized output transform.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.HISTORY_TRANSFORM_FILENAME">
|
||
<summary>
|
||
The filename for the serialized history transform.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.ChatSession.Executor">
|
||
<summary>
|
||
The executor for this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.ChatSession.History">
|
||
<summary>
|
||
The chat history for this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.ChatSession.HistoryTransform">
|
||
<summary>
|
||
The history transform used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.ChatSession.InputTransformPipeline">
|
||
<summary>
|
||
The input transform pipeline used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.ChatSession.OutputTransform">
|
||
<summary>
|
||
The output transform used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.InitializeSessionFromHistoryAsync(LLama.Abstractions.ILLamaExecutor,LLama.Common.ChatHistory,LLama.Abstractions.IHistoryTransform)">
|
||
<summary>
|
||
Create a new chat session and preprocess history.
|
||
</summary>
|
||
<param name="executor">The executor for this session</param>
|
||
<param name="history">History for this session</param>
|
||
<param name="transform">History Transform for this session</param>
|
||
<returns>A new chat session.</returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.#ctor(LLama.Abstractions.ILLamaExecutor)">
|
||
<summary>
|
||
Create a new chat session.
|
||
</summary>
|
||
<param name="executor">The executor for this session</param>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.#ctor(LLama.Abstractions.ILLamaExecutor,LLama.Common.ChatHistory)">
|
||
<summary>
|
||
Create a new chat session with a custom history.
|
||
</summary>
|
||
<param name="executor"></param>
|
||
<param name="history"></param>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.WithHistoryTransform(LLama.Abstractions.IHistoryTransform)">
|
||
<summary>
|
||
Use a custom history transform.
|
||
</summary>
|
||
<param name="transform"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddInputTransform(LLama.Abstractions.ITextTransform)">
|
||
<summary>
|
||
Add a text transform to the input transform pipeline.
|
||
</summary>
|
||
<param name="transform"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.WithOutputTransform(LLama.Abstractions.ITextStreamTransform)">
|
||
<summary>
|
||
Use a custom output transform.
|
||
</summary>
|
||
<param name="transform"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.SaveSession(System.String)">
|
||
<summary>
|
||
Save a session from a directory.
|
||
</summary>
|
||
<param name="path"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.GetSessionState">
|
||
<summary>
|
||
Get the session state.
|
||
</summary>
|
||
<returns>SessionState object representing session state in-memory</returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.LoadSession(LLama.SessionState,System.Boolean)">
|
||
<summary>
|
||
Load a session from a session state.
|
||
</summary>
|
||
<param name="state"></param>
|
||
<param name="loadTransforms">If true loads transforms saved in the session state.</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.LoadSession(System.String,System.Boolean)">
|
||
<summary>
|
||
Load a session from a directory.
|
||
</summary>
|
||
<param name="path"></param>
|
||
<param name="loadTransforms">If true loads transforms saved in the session state.</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddMessage(LLama.Common.ChatHistory.Message)">
|
||
<summary>
|
||
Add a message to the chat history.
|
||
</summary>
|
||
<param name="message"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddSystemMessage(System.String)">
|
||
<summary>
|
||
Add a system message to the chat history.
|
||
</summary>
|
||
<param name="content"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddAssistantMessage(System.String)">
|
||
<summary>
|
||
Add an assistant message to the chat history.
|
||
</summary>
|
||
<param name="content"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddUserMessage(System.String)">
|
||
<summary>
|
||
Add a user message to the chat history.
|
||
</summary>
|
||
<param name="content"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.RemoveLastMessage">
|
||
<summary>
|
||
Remove the last message from the chat history.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddAndProcessMessage(LLama.Common.ChatHistory.Message)">
|
||
<summary>
|
||
Compute KV cache for the message and add it to the chat history.
|
||
</summary>
|
||
<param name="message"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddAndProcessSystemMessage(System.String)">
|
||
<summary>
|
||
Compute KV cache for the system message and add it to the chat history.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddAndProcessUserMessage(System.String)">
|
||
<summary>
|
||
Compute KV cache for the user message and add it to the chat history.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.AddAndProcessAssistantMessage(System.String)">
|
||
<summary>
|
||
Compute KV cache for the assistant message and add it to the chat history.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.ReplaceUserMessage(LLama.Common.ChatHistory.Message,LLama.Common.ChatHistory.Message)">
|
||
<summary>
|
||
Replace a user message with a new message and remove all messages after the new message.
|
||
This is useful when the user wants to edit a message. And regenerate the response.
|
||
</summary>
|
||
<param name="oldMessage"></param>
|
||
<param name="newMessage"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory.Message,System.Boolean,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Chat with the model.
|
||
</summary>
|
||
<param name="message"></param>
|
||
<param name="inferenceParams"></param>
|
||
<param name="applyInputTransformPipeline"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory.Message,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Chat with the model.
|
||
</summary>
|
||
<param name="message"></param>
|
||
<param name="inferenceParams"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory,System.Boolean,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Chat with the model.
|
||
</summary>
|
||
<param name="history"></param>
|
||
<param name="applyInputTransformPipeline"></param>
|
||
<param name="inferenceParams"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.ChatAsync(LLama.Common.ChatHistory,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Chat with the model.
|
||
</summary>
|
||
<param name="history"></param>
|
||
<param name="inferenceParams"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.ChatSession.RegenerateAssistantMessageAsync(LLama.Common.InferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Regenerate the last assistant message.
|
||
</summary>
|
||
<param name="inferenceParams"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="T:LLama.SessionState">
|
||
<summary>
|
||
The state of a chat session in-memory.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.ExecutorState">
|
||
<summary>
|
||
Saved executor state for the session in JSON format.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.ContextState">
|
||
<summary>
|
||
Saved context state (KV cache) for the session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.InputTransformPipeline">
|
||
<summary>
|
||
The input transform pipeline used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.OutputTransform">
|
||
<summary>
|
||
The output transform used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.HistoryTransform">
|
||
<summary>
|
||
The history transform used in this session.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.SessionState.History">
|
||
<summary>
|
||
The the chat history messages for this session.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.SessionState.#ctor(LLama.LLamaContext.State,LLama.StatefulExecutorBase.ExecutorBaseState,LLama.Common.ChatHistory,System.Collections.Generic.List{LLama.Abstractions.ITextTransform},LLama.Abstractions.ITextStreamTransform,LLama.Abstractions.IHistoryTransform)">
|
||
<summary>
|
||
Create a new session state.
|
||
</summary>
|
||
<param name="contextState"></param>
|
||
<param name="executorState"></param>
|
||
<param name="history"></param>
|
||
<param name="inputTransformPipeline"></param>
|
||
<param name="outputTransform"></param>
|
||
<param name="historyTransform"></param>
|
||
</member>
|
||
<member name="M:LLama.SessionState.Save(System.String)">
|
||
<summary>
|
||
Save the session state to folder.
|
||
</summary>
|
||
<param name="path"></param>
|
||
</member>
|
||
<member name="M:LLama.SessionState.Load(System.String)">
|
||
<summary>
|
||
Load the session state from folder.
|
||
</summary>
|
||
<param name="path"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException">Throws when session state is incorrect</exception>
|
||
</member>
|
||
<member name="T:LLama.Common.AuthorRole">
|
||
<summary>
|
||
Role of the message author, e.g. user/assistant/system
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.AuthorRole.Unknown">
|
||
<summary>
|
||
Role is unknown
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.AuthorRole.System">
|
||
<summary>
|
||
Message comes from a "system" prompt, not written by a user or language model
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.AuthorRole.User">
|
||
<summary>
|
||
Message comes from the user
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.AuthorRole.Assistant">
|
||
<summary>
|
||
Messages was generated by the language model
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Common.ChatHistory">
|
||
<summary>
|
||
The chat history class
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Common.ChatHistory.Message">
|
||
<summary>
|
||
Chat message representation
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.ChatHistory.Message.AuthorRole">
|
||
<summary>
|
||
Role of the message author, e.g. user/assistant/system
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.ChatHistory.Message.Content">
|
||
<summary>
|
||
Message content
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.Message.#ctor(LLama.Common.AuthorRole,System.String)">
|
||
<summary>
|
||
Create a new instance
|
||
</summary>
|
||
<param name="authorRole">Role of message author</param>
|
||
<param name="content">Message content</param>
|
||
</member>
|
||
<member name="P:LLama.Common.ChatHistory.Messages">
|
||
<summary>
|
||
List of messages in the chat
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.#ctor">
|
||
<summary>
|
||
Create a new instance of the chat content class
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.#ctor(LLama.Common.ChatHistory.Message[])">
|
||
<summary>
|
||
Create a new instance of the chat history from array of messages
|
||
</summary>
|
||
<param name="messageHistory"></param>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.AddMessage(LLama.Common.AuthorRole,System.String)">
|
||
<summary>
|
||
Add a message to the chat history
|
||
</summary>
|
||
<param name="authorRole">Role of the message author</param>
|
||
<param name="content">Message content</param>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.ToJson">
|
||
<summary>
|
||
Serialize the chat history to JSON
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Common.ChatHistory.FromJson(System.String)">
|
||
<summary>
|
||
Deserialize a chat history from JSON
|
||
</summary>
|
||
<param name="json"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Common.FixedSizeQueue`1">
|
||
<summary>
|
||
A queue with fixed storage size.
|
||
Currently it's only a naive implementation and needs to be further optimized in the future.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.FixedSizeQueue`1.Item(System.Int32)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.FixedSizeQueue`1.Count">
|
||
<summary>
|
||
Number of items in this queue
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.FixedSizeQueue`1.Capacity">
|
||
<summary>
|
||
Maximum number of items allowed in this queue
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Common.FixedSizeQueue`1.#ctor(System.Int32)">
|
||
<summary>
|
||
Create a new queue
|
||
</summary>
|
||
<param name="size">the maximum number of items to store in this queue</param>
|
||
</member>
|
||
<member name="M:LLama.Common.FixedSizeQueue`1.#ctor(System.Int32,System.Collections.Generic.IEnumerable{`0})">
|
||
<summary>
|
||
Fill the quene with the data. Please ensure that data.Count <= size
|
||
</summary>
|
||
<param name="size"></param>
|
||
<param name="data"></param>
|
||
</member>
|
||
<member name="M:LLama.Common.FixedSizeQueue`1.Enqueue(`0)">
|
||
<summary>
|
||
Enquene an element.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Common.FixedSizeQueue`1.GetEnumerator">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Common.FixedSizeQueue`1.System#Collections#IEnumerable#GetEnumerator">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Common.InferenceParams">
|
||
<summary>
|
||
The parameters used for inference.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.InferenceParams.TokensKeep">
|
||
<summary>
|
||
number of tokens to keep from initial prompt when applying context shifting
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.InferenceParams.MaxTokens">
|
||
<summary>
|
||
how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
|
||
until it complete.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.InferenceParams.AntiPrompts">
|
||
<summary>
|
||
Sequences where the model will stop generating further tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.InferenceParams.SamplingPipeline">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Common.MirostatType">
|
||
<summary>
|
||
Type of "mirostat" sampling to use.
|
||
https://github.com/basusourya/mirostat
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.MirostatType.Disable">
|
||
<summary>
|
||
Disable Mirostat sampling
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.MirostatType.Mirostat">
|
||
<summary>
|
||
Original mirostat algorithm
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Common.MirostatType.Mirostat2">
|
||
<summary>
|
||
Mirostat 2.0 algorithm
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Common.ModelParams">
|
||
<summary>
|
||
The parameters for initializing a LLama model.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.ContextSize">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.MainGpu">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.SplitMode">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.GpuLayerCount">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.SeqMax">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.UseMemorymap">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.UseMemoryLock">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.ModelPath">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.Threads">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.BatchThreads">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.BatchSize">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.UBatchSize">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.Embeddings">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.TensorSplits">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.CheckTensors">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.MetadataOverrides">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.RopeFrequencyBase">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.RopeFrequencyScale">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnExtrapolationFactor">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnAttentionFactor">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnBetaFast">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnBetaSlow">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnOriginalContext">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.YarnScalingType">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.TypeK">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.TypeV">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.NoKqvOffload">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.FlashAttention">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.DefragThreshold">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.PoolingType">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.AttentionType">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.VocabOnly">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.EncodingName">
|
||
<summary>
|
||
`Encoding` cannot be directly JSON serialized, instead store the name as a string which can
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Common.ModelParams.Encoding">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Common.ModelParams.#ctor(System.String)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="modelPath">The model path.</param>
|
||
</member>
|
||
<member name="T:LLama.Exceptions.RuntimeError">
|
||
<summary>
|
||
Base class for LLamaSharp runtime errors (i.e. errors produced by llama.cpp, converted into exceptions)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Exceptions.RuntimeError.#ctor(System.String)">
|
||
<summary>
|
||
Create a new RuntimeError
|
||
</summary>
|
||
<param name="message"></param>
|
||
</member>
|
||
<member name="T:LLama.Exceptions.LoadWeightsFailedException">
|
||
<summary>
|
||
Loading model weights failed
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Exceptions.LoadWeightsFailedException.ModelPath">
|
||
<summary>
|
||
The model path which failed to load
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Exceptions.LoadWeightsFailedException.#ctor(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Exceptions.LLamaDecodeError">
|
||
<summary>
|
||
`llama_decode` return a non-zero status code
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Exceptions.LLamaDecodeError.ReturnCode">
|
||
<summary>
|
||
The return status code
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Exceptions.LLamaDecodeError.#ctor(LLama.Native.DecodeResult)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Exceptions.MissingTemplateException">
|
||
<summary>
|
||
`llama_decode` return a non-zero status code
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Exceptions.MissingTemplateException.#ctor">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Exceptions.MissingTemplateException.#ctor(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Exceptions.GetLogitsInvalidIndexException">
|
||
<summary>
|
||
`llama_get_logits_ith` returned null, indicating that the index was invalid
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Exceptions.GetLogitsInvalidIndexException.Index">
|
||
<summary>
|
||
The incorrect index passed to the `llama_get_logits_ith` call
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Exceptions.GetLogitsInvalidIndexException.#ctor(System.Int32)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Extensions.IContextParamsExtensions">
|
||
<summary>
|
||
Extension methods to the IContextParams interface
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Extensions.IContextParamsExtensions.ToLlamaContextParams(LLama.Abstractions.IContextParams,LLama.Native.LLamaContextParams@)">
|
||
<summary>
|
||
Convert the given `IModelParams` into a `LLamaContextParams`
|
||
</summary>
|
||
<param name="params"></param>
|
||
<param name="result"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.IO.FileNotFoundException"></exception>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="T:LLama.Extensions.IModelParamsExtensions">
|
||
<summary>
|
||
Extension methods to the IModelParams interface
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Extensions.IModelParamsExtensions.ToLlamaModelParams(LLama.Abstractions.IModelParams,LLama.Native.LLamaModelParams@)">
|
||
<summary>
|
||
Convert the given `IModelParams` into a `LLamaModelParams`
|
||
</summary>
|
||
<param name="params"></param>
|
||
<param name="result"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.IO.FileNotFoundException"></exception>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Extensions.IReadOnlyListExtensions.IndexOf``1(System.Collections.Generic.IReadOnlyList{``0},``0)">
|
||
<summary>
|
||
Find the index of `item` in `list`
|
||
</summary>
|
||
<typeparam name="T"></typeparam>
|
||
<param name="list">list to search</param>
|
||
<param name="item">item to search for</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.IReadOnlyListExtensions.TokensEndsWithAnyString``2(``0,``1,LLama.Native.SafeLlamaModelHandle,System.Text.Encoding)">
|
||
<summary>
|
||
Check if the given set of tokens ends with any of the given strings
|
||
</summary>
|
||
<param name="tokens">Tokens to check</param>
|
||
<param name="queries">Strings to search for</param>
|
||
<param name="model">Model to use to convert tokens into bytes</param>
|
||
<param name="encoding">Encoding to use to convert bytes into characters</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.IReadOnlyListExtensions.TokensEndsWithAnyString``1(``0,System.Collections.Generic.IList{System.String},LLama.Native.SafeLlamaModelHandle,System.Text.Encoding)">
|
||
<summary>
|
||
Check if the given set of tokens ends with any of the given strings
|
||
</summary>
|
||
<param name="tokens">Tokens to check</param>
|
||
<param name="queries">Strings to search for</param>
|
||
<param name="model">Model to use to convert tokens into bytes</param>
|
||
<param name="encoding">Encoding to use to convert bytes into characters</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Extensions.KeyValuePairExtensions">
|
||
<summary>
|
||
Extensions to the KeyValuePair struct
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Extensions.KeyValuePairExtensions.Deconstruct``2(System.Collections.Generic.KeyValuePair{``0,``1},``0@,``1@)">
|
||
<summary>
|
||
Deconstruct a KeyValuePair into it's constituent parts.
|
||
</summary>
|
||
<param name="pair">The KeyValuePair to deconstruct</param>
|
||
<param name="first">First element, the Key</param>
|
||
<param name="second">Second element, the Value</param>
|
||
<typeparam name="TKey">Type of the Key</typeparam>
|
||
<typeparam name="TValue">Type of the Value</typeparam>
|
||
</member>
|
||
<member name="M:LLama.Extensions.ProcessExtensions.SafeRun(System.Diagnostics.Process,System.TimeSpan)">
|
||
<summary>
|
||
Run a process for a certain amount of time and then terminate it
|
||
</summary>
|
||
<param name="process"></param>
|
||
<param name="timeout"></param>
|
||
<returns>return code, standard output, standard error, flag indicating if process exited or was terminated</returns>
|
||
</member>
|
||
<member name="T:LLama.Extensions.SpanNormalizationExtensions">
|
||
<summary>
|
||
Extensions to span which apply <b>in-place</b> normalization
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.MaxAbsoluteNormalization(System.Single[])">
|
||
<summary>
|
||
<b>In-place</b> multiple every element by 32760 and divide every element in the span by the max absolute value in the span
|
||
</summary>
|
||
<param name="vector"></param>
|
||
<returns>The same array</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.MaxAbsoluteNormalization(System.Span{System.Single})">
|
||
<summary>
|
||
<b>In-place</b> multiple every element by 32760 and divide every element in the span by the max absolute value in the span
|
||
</summary>
|
||
<param name="vector"></param>
|
||
<returns>The same span</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.TaxicabNormalization(System.Single[])">
|
||
<summary>
|
||
<b>In-place</b> divide every element in the array by the sum of absolute values in the array
|
||
</summary>
|
||
<remarks>Also known as "Manhattan normalization".</remarks>
|
||
<param name="vector"></param>
|
||
<returns>The same array</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.TaxicabNormalization(System.Span{System.Single})">
|
||
<summary>
|
||
<b>In-place</b> divide every element in the span by the sum of absolute values in the span
|
||
</summary>
|
||
<remarks>Also known as "Manhattan normalization".</remarks>
|
||
<param name="vector"></param>
|
||
<returns>The same span</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.Single[])">
|
||
<summary>
|
||
<b>In-place</b> divide every element by the euclidean length of the vector
|
||
</summary>
|
||
<remarks>Also known as "L2 normalization".</remarks>
|
||
<param name="vector"></param>
|
||
<returns>The same array</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.Span{System.Single})">
|
||
<summary>
|
||
<b>In-place</b> divide every element by the euclidean length of the vector
|
||
</summary>
|
||
<remarks>Also known as "L2 normalization".</remarks>
|
||
<param name="vector"></param>
|
||
<returns>The same span</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.EuclideanNormalization(System.ReadOnlySpan{System.Single})">
|
||
<summary>
|
||
Creates a new array containing an L2 normalization of the input vector.
|
||
</summary>
|
||
<param name="vector"></param>
|
||
<returns>The same span</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.PNormalization(System.Single[],System.Int32)">
|
||
<summary>
|
||
<b>In-place</b> apply p-normalization. https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
|
||
<list type="bullet">
|
||
<item>For p = 1, this is taxicab normalization</item>
|
||
<item>For p = 2, this is euclidean normalization</item>
|
||
<item>As p => infinity, this approaches infinity norm or maximum norm</item>
|
||
</list>
|
||
</summary>
|
||
<param name="vector"></param>
|
||
<param name="p"></param>
|
||
<returns>The same array</returns>
|
||
</member>
|
||
<member name="M:LLama.Extensions.SpanNormalizationExtensions.PNormalization(System.Span{System.Single},System.Int32)">
|
||
<summary>
|
||
<b>In-place</b> apply p-normalization. https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
|
||
<list type="bullet">
|
||
<item>For p = 1, this is taxicab normalization</item>
|
||
<item>For p = 2, this is euclidean normalization</item>
|
||
<item>As p => infinity, this approaches infinity norm or maximum norm</item>
|
||
</list>
|
||
</summary>
|
||
<param name="vector"></param>
|
||
<param name="p"></param>
|
||
<returns>The same span</returns>
|
||
</member>
|
||
<member name="T:LLama.LLamaContext">
|
||
<summary>
|
||
A llama_context, which holds all the context required to interact with a model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.ContextSize">
|
||
<summary>
|
||
Total number of tokens in the context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.EmbeddingSize">
|
||
<summary>
|
||
Dimension of embedding vectors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.Params">
|
||
<summary>
|
||
The context params set for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.NativeHandle">
|
||
<summary>
|
||
The native handle, which is used to be passed to the native APIs
|
||
</summary>
|
||
<remarks>Be careful how you use this!</remarks>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.Encoding">
|
||
<summary>
|
||
The encoding set for this model to deal with text input.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.GenerationThreads">
|
||
<summary>
|
||
Get or set the number of threads to use for generation
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.BatchThreads">
|
||
<summary>
|
||
Get or set the number of threads to use for batch processing
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.BatchSize">
|
||
<summary>
|
||
Get the maximum batch size for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.Vocab">
|
||
<summary>
|
||
Get the special tokens for the model associated with this context
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Create a new LLamaContext for the given LLamaWeights
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="params"></param>
|
||
<param name="logger"></param>
|
||
<exception cref="T:System.ObjectDisposedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.Tokenize(System.String,System.Boolean,System.Boolean)">
|
||
<summary>
|
||
Tokenize a string.
|
||
</summary>
|
||
<param name="text"></param>
|
||
<param name="addBos">Whether to add a bos to the text.</param>
|
||
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.DeTokenize(System.Collections.Generic.IReadOnlyList{LLama.Native.LLamaToken})">
|
||
<summary>
|
||
Detokenize the tokens to text.
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.SaveState(System.String)">
|
||
<summary>
|
||
Save the state to specified path.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.SaveState(System.String,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Save the state of a particular sequence to specified path.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
<param name="sequence"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.GetState">
|
||
<summary>
|
||
Get the state data as an opaque handle, which can be loaded later using <see cref="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)"/>
|
||
</summary>
|
||
<remarks>Use <see cref="M:LLama.LLamaContext.SaveState(System.String)"/> if you intend to save this state to disk.</remarks>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.GetState(LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the state data as an opaque handle, which can be loaded later using <see cref="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)"/>
|
||
</summary>
|
||
<remarks>Use <see cref="M:LLama.LLamaContext.SaveState(System.String,LLama.Native.LLamaSeqId)"/> if you intend to save this state to disk.</remarks>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.LoadState(System.String)">
|
||
<summary>
|
||
Load the state from specified path.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.LoadState(System.String,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Load the state from specified path into a particular sequence
|
||
</summary>
|
||
<param name="filename"></param>
|
||
<param name="sequence"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.State)">
|
||
<summary>
|
||
Load the state from memory.
|
||
</summary>
|
||
<param name="state"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.LoadState(LLama.LLamaContext.SequenceState,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Load the state from memory into a particular sequence
|
||
</summary>
|
||
<param name="state"></param>
|
||
<param name="sequence"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.Encode(LLama.Native.LLamaBatch)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.EncodeAsync(LLama.Native.LLamaBatch,System.Threading.CancellationToken)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<param name="cancellationToken"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.Decode(LLama.Native.LLamaBatch)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.DecodeAsync(LLama.Native.LLamaBatch,System.Threading.CancellationToken)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<param name="cancellationToken"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.Decode(LLama.Native.LLamaBatchEmbeddings)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.DecodeAsync(LLama.Native.LLamaBatchEmbeddings,System.Threading.CancellationToken)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<param name="cancellationToken"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.DecodeAsync(System.Collections.Generic.List{LLama.Native.LLamaToken},LLama.Native.LLamaSeqId,LLama.Native.LLamaBatch,System.Int32)">
|
||
<summary>
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="id"></param>
|
||
<param name="batch"></param>
|
||
<param name="n_past"></param>
|
||
<returns>A tuple, containing the decode result, the number of tokens that have <b>not</b> been decoded yet and the total number of tokens that have been decoded.</returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.LLamaContext.State">
|
||
<summary>
|
||
The state of this context, which can be reloaded later
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.State.Size">
|
||
<summary>
|
||
Get the size in bytes of this state object
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.State.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.State.SaveAsync(System.IO.Stream)">
|
||
<summary>
|
||
Write all the bytes of this state to the given stream
|
||
</summary>
|
||
<param name="stream"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.State.Save(System.IO.Stream)">
|
||
<summary>
|
||
Write all the bytes of this state to the given stream
|
||
</summary>
|
||
<param name="stream"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.State.LoadAsync(System.IO.Stream)">
|
||
<summary>
|
||
Load a state from a stream
|
||
</summary>
|
||
<param name="stream"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.State.Load(System.IO.Stream)">
|
||
<summary>
|
||
Load a state from a stream
|
||
</summary>
|
||
<param name="stream"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.LLamaContext.SequenceState">
|
||
<summary>
|
||
The state of a single sequence, which can be reloaded later
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaContext.SequenceState.Size">
|
||
<summary>
|
||
Get the size in bytes of this state object
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.SequenceState.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaContext.SequenceState.CopyTo(System.Byte*,System.UInt64,System.UInt64)">
|
||
<summary>
|
||
Copy bytes to a destination pointer.
|
||
</summary>
|
||
<param name="dst">Destination to write to</param>
|
||
<param name="length">Length of the destination buffer</param>
|
||
<param name="offset">Offset from start of src to start copying from</param>
|
||
<returns>Number of bytes written to destination</returns>
|
||
</member>
|
||
<member name="T:LLama.LLamaEmbedder">
|
||
<summary>
|
||
Generate high dimensional embedding vectors from text
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaEmbedder.EmbeddingSize">
|
||
<summary>
|
||
Dimension of embedding vectors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaEmbedder.Context">
|
||
<summary>
|
||
LLama Context
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaEmbedder.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Create a new embedder, using the given LLamaWeights
|
||
</summary>
|
||
<param name="weights"></param>
|
||
<param name="params"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaEmbedder.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaEmbedder.GetEmbeddings(System.String,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Get high dimensional embedding vectors for the given text. Depending on the pooling type used when constructing
|
||
this <see cref="T:LLama.LLamaEmbedder"/> this may return an embedding vector per token, or one single embedding vector for the entire string.
|
||
</summary>
|
||
<remarks>Embedding vectors are not normalized, consider using one of the extensions in <see cref="T:LLama.Extensions.SpanNormalizationExtensions"/>.</remarks>
|
||
<param name="input"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
<exception cref="T:System.NotSupportedException"></exception>
|
||
</member>
|
||
<member name="P:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#Metadata">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#GetService(System.Type,System.Object)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaEmbedder.Microsoft#Extensions#AI#IEmbeddingGenerator{System#String,Microsoft#Extensions#AI#Embedding{System#Single}}#GenerateAsync(System.Collections.Generic.IEnumerable{System.String},Microsoft.Extensions.AI.EmbeddingGenerationOptions,System.Threading.CancellationToken)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.StatefulExecutorBase">
|
||
<summary>
|
||
The base class for stateful LLama executors.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._logger">
|
||
<summary>
|
||
The logger used by this executor.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._pastTokensCount">
|
||
<summary>
|
||
The tokens that were already processed by the model.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._consumedTokensCount">
|
||
<summary>
|
||
The tokens that were consumed by the model during the current inference.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._n_session_consumed">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._n_matching_session_tokens">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._pathSession">
|
||
<summary>
|
||
The path of the session file.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._embeds">
|
||
<summary>
|
||
A container of the tokens to be processed and after processed.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._embed_inps">
|
||
<summary>
|
||
A container for the tokens of input.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._session_tokens">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.StatefulExecutorBase._last_n_tokens">
|
||
<summary>
|
||
The last tokens generated by the model.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.Context">
|
||
<summary>
|
||
The context used by the executor.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.IsMultiModal">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.ClipModel">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.Images">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.#ctor(LLama.LLamaContext,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.#ctor(LLama.LLamaContext,LLama.LLavaWeights,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="lLavaWeights"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.WithSessionFile(System.String)">
|
||
<summary>
|
||
This API is currently not verified.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentNullException"></exception>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.SaveSessionFile(System.String)">
|
||
<summary>
|
||
This API has not been verified currently.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.HandleRunOutOfContext(System.Int32)">
|
||
<summary>
|
||
After running out of the context, take some tokens from the original prompt and recompute the logits in batches.
|
||
</summary>
|
||
<param name="tokensToKeep"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.TryReuseMatchingPrefix">
|
||
<summary>
|
||
Try to reuse the matching prefix from the session file.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
Decide whether to continue the loop.
|
||
</summary>
|
||
<param name="args"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
Preprocess the inputs before the inference.
|
||
</summary>
|
||
<param name="text"></param>
|
||
<param name="args"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
Do some post processing after the inference.
|
||
</summary>
|
||
<param name="inferenceParams"></param>
|
||
<param name="args"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
The core inference logic.
|
||
</summary>
|
||
<param name="inferenceParams"></param>
|
||
<param name="args"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.SaveState(System.String)">
|
||
<summary>
|
||
Save the current state to a file.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.GetStateData">
|
||
<summary>
|
||
Get the current state data.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
|
||
<summary>
|
||
Load the state from data.
|
||
</summary>
|
||
<param name="data"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.LoadState(System.String)">
|
||
<summary>
|
||
Load the state from a file.
|
||
</summary>
|
||
<param name="filename"></param>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Execute the inference.
|
||
</summary>
|
||
<param name="text">The prompt. If null, generation will continue where it left off previously.</param>
|
||
<param name="inferenceParams"></param>
|
||
<param name="cancellationToken"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.StatefulExecutorBase.PrefillPromptAsync(System.String)">
|
||
<summary>
|
||
Asynchronously runs a prompt through the model to compute KV cache without generating any new tokens.
|
||
It could reduce the latency of the first time response if the first input from the user is not immediate.
|
||
</summary>
|
||
<param name="prompt">Prompt to process</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.StatefulExecutorBase.InferStateArgs">
|
||
<summary>
|
||
State arguments that are used in single inference
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.Antiprompts">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.RemainedTokens">
|
||
<summary>
|
||
Tokens count remained to be used. (n_remain)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.ReturnValue">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.WaitForInput">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatefulExecutorBase.InferStateArgs.NeedToSaveSession">
|
||
<summary>
|
||
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.InstructExecutor">
|
||
<summary>
|
||
The LLama executor for instruct mode.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.#ctor(LLama.LLamaContext,System.String,System.String,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="instructionPrefix"></param>
|
||
<param name="instructionSuffix"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.GetStateData">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.SaveState(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.LoadState(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InstructExecutor.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.InstructExecutor.InstructExecutorState">
|
||
<summary>
|
||
The descriptor of the state of the instruct executor.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.InstructExecutor.InstructExecutorState.IsPromptRun">
|
||
<summary>
|
||
Whether the executor is running for the first time (running the prompt).
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.InstructExecutor.InstructExecutorState.InputPrefixTokens">
|
||
<summary>
|
||
Instruction prefix tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.InstructExecutor.InstructExecutorState.InputSuffixTokens">
|
||
<summary>
|
||
Instruction suffix tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.InteractiveExecutor">
|
||
<summary>
|
||
The LLama executor for interactive mode.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.#ctor(LLama.LLamaContext,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.GetStateData">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.LoadState(LLama.StatefulExecutorBase.ExecutorBaseState)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.SaveState(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.LoadState(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.GetLoopCondition(LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
Define whether to continue the loop to generate responses.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.PreprocessInputs(System.String,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.PreprocessLlava(System.String,LLama.StatefulExecutorBase.InferStateArgs,System.Boolean)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.PostProcess(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<summary>
|
||
Return whether to break the generation.
|
||
</summary>
|
||
<param name="inferenceParams"></param>
|
||
<param name="args"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.InteractiveExecutor.InferInternal(LLama.Abstractions.IInferenceParams,LLama.StatefulExecutorBase.InferStateArgs)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.InteractiveExecutor.InteractiveExecutorState">
|
||
<summary>
|
||
The descriptor of the state of the interactive executor.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.InteractiveExecutor.InteractiveExecutorState.IsPromptRun">
|
||
<summary>
|
||
Whether the executor is running for the first time (running the prompt).
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.LLamaQuantizer">
|
||
<summary>
|
||
The quantizer to quantize the model.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaQuantizer.Quantize(System.String,System.String,LLama.Native.LLamaFtype,System.Int32,System.Boolean,System.Boolean)">
|
||
<summary>
|
||
Quantize the model.
|
||
</summary>
|
||
<param name="srcFileName">The model file to be quantized.</param>
|
||
<param name="dstFilename">The path to save the quantized model.</param>
|
||
<param name="ftype">The type of quantization.</param>
|
||
<param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
|
||
<param name="allowRequantize"></param>
|
||
<param name="quantizeOutputTensor"></param>
|
||
<returns>Whether the quantization is successful.</returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.LLamaQuantizer.Quantize(System.String,System.String,System.String,System.Int32,System.Boolean,System.Boolean)">
|
||
<summary>
|
||
Quantize the model.
|
||
</summary>
|
||
<param name="srcFileName">The model file to be quantized.</param>
|
||
<param name="dstFilename">The path to save the quantized model.</param>
|
||
<param name="ftype">The type of quantization.</param>
|
||
<param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
|
||
<param name="allowRequantize"></param>
|
||
<param name="quantizeOutputTensor"></param>
|
||
<returns>Whether the quantization is successful.</returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.LLamaQuantizer.StringToFtype(System.String)">
|
||
<summary>
|
||
Parse a string into a LLamaFtype. This is a "relaxed" parsing, which allows any string which is contained within
|
||
the enum name to be used.
|
||
|
||
For example "Q5_K_M" will convert to "LLAMA_FTYPE_MOSTLY_Q5_K_M"
|
||
</summary>
|
||
<param name="str"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="T:LLama.StatelessExecutor">
|
||
<summary>
|
||
This executor infer the input as one-time job. Previous inputs won't impact on the
|
||
response to current input.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.IsMultiModal">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.ClipModel">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.Images">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.Context">
|
||
<summary>
|
||
The context used by the executor when running the inference.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.ApplyTemplate">
|
||
<summary>
|
||
If true, applies the default template to the prompt as defined in the rules for <a href="https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template">llama_chat_apply_template</a> template.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StatelessExecutor.SystemMessage">
|
||
<summary>
|
||
The system message to use with the prompt. Only used when <see cref="P:LLama.StatelessExecutor.ApplyTemplate" /> is true.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.StatelessExecutor.#ctor(LLama.LLamaWeights,LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Create a new stateless executor which will use the given model
|
||
</summary>
|
||
<param name="weights"></param>
|
||
<param name="params"></param>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="M:LLama.StatelessExecutor.InferAsync(System.String,LLama.Abstractions.IInferenceParams,System.Threading.CancellationToken)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.LLamaTemplate">
|
||
<summary>
|
||
Converts a sequence of messages into text according to a model template
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._customTemplate">
|
||
<summary>
|
||
Custom template. May be null if a model was supplied to the constructor.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._roleCache">
|
||
<summary>
|
||
Keep a cache of roles converted into bytes. Roles are very frequently re-used, so this saves converting them many times.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._messages">
|
||
<summary>
|
||
Array of messages. The <see cref="P:LLama.LLamaTemplate.Count"/> property indicates how many messages there are
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._addAssistant">
|
||
<summary>
|
||
Backing field for <see cref="P:LLama.LLamaTemplate.AddAssistant"/>
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._nativeChatMessages">
|
||
<summary>
|
||
Temporary array of messages in the format llama.cpp needs, used when applying the template
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._resultLength">
|
||
<summary>
|
||
Indicates how many bytes are in <see cref="F:LLama.LLamaTemplate._result"/> array
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._result">
|
||
<summary>
|
||
Result bytes of last call to <see cref="M:LLama.LLamaTemplate.Apply"/>
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate._dirty">
|
||
<summary>
|
||
Indicates if this template has been modified and needs regenerating
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.LLamaTemplate.Encoding">
|
||
<summary>
|
||
The encoding algorithm to use
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTemplate.Count">
|
||
<summary>
|
||
Number of messages added to this template
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTemplate.Item(System.Int32)">
|
||
<summary>
|
||
Get the message at the given index
|
||
</summary>
|
||
<param name="index"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is less than zero or greater than or equal to <see cref="P:LLama.LLamaTemplate.Count"/></exception>
|
||
</member>
|
||
<member name="P:LLama.LLamaTemplate.AddAssistant">
|
||
<summary>
|
||
Whether to end the prompt with the token(s) that indicate the start of an assistant message.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.#ctor(LLama.Native.SafeLlamaModelHandle,System.String)">
|
||
<summary>
|
||
Construct a new template, using the default model template
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="name"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.#ctor(LLama.LLamaWeights)">
|
||
<summary>
|
||
Construct a new template, using the default model template
|
||
</summary>
|
||
<param name="weights"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.#ctor(System.String)">
|
||
<summary>
|
||
Construct a new template, using a custom template.
|
||
</summary>
|
||
<remarks>Only support a pre-defined list of templates. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template</remarks>
|
||
<param name="customTemplate"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.Add(System.String,System.String)">
|
||
<summary>
|
||
Add a new message to the end of this template
|
||
</summary>
|
||
<param name="role"></param>
|
||
<param name="content"></param>
|
||
<returns>This template, for chaining calls.</returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.Add(LLama.LLamaTemplate.TextMessage)">
|
||
<summary>
|
||
Add a new message to the end of this template
|
||
</summary>
|
||
<param name="message"></param>
|
||
<returns>This template, for chaining calls.</returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.RemoveAt(System.Int32)">
|
||
<summary>
|
||
Remove a message at the given index
|
||
</summary>
|
||
<param name="index"></param>
|
||
<returns>This template, for chaining calls.</returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.Clear">
|
||
<summary>
|
||
Remove all messages from the template and resets internal state to accept/generate new messages
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.Apply">
|
||
<summary>
|
||
Apply the template to the messages and return a span containing the results
|
||
</summary>
|
||
<returns>A span over the buffer that holds the applied template</returns>
|
||
</member>
|
||
<member name="T:LLama.LLamaTemplate.TextMessage">
|
||
<summary>
|
||
A message that has been added to a template
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTemplate.TextMessage.Role">
|
||
<summary>
|
||
The "role" string for this message
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTemplate.TextMessage.Content">
|
||
<summary>
|
||
The text content of this message
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTemplate.TextMessage.Deconstruct(System.String@,System.String@)">
|
||
<summary>
|
||
Deconstruct this message into role and content
|
||
</summary>
|
||
<param name="role"></param>
|
||
<param name="content"></param>
|
||
</member>
|
||
<member name="T:LLama.LLamaTransforms">
|
||
<summary>
|
||
A class that contains all the transforms provided internally by LLama.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.LLamaTransforms.DefaultHistoryTransform">
|
||
<summary>
|
||
The default history transform.
|
||
Uses plain text with the following format:
|
||
[Author]: [Message]
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.#ctor(System.String,System.String,System.String,System.String,System.Boolean)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="userName"></param>
|
||
<param name="assistantName"></param>
|
||
<param name="systemName"></param>
|
||
<param name="unknownName"></param>
|
||
<param name="isInstructMode"></param>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.Clone">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.HistoryToText(LLama.Common.ChatHistory)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.TextToHistory(LLama.Common.AuthorRole,System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.DefaultHistoryTransform.TrimNamesFromText(System.String,LLama.Common.AuthorRole)">
|
||
<summary>
|
||
Drop the name at the beginning and the end of the text.
|
||
</summary>
|
||
<param name="text"></param>
|
||
<param name="role"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.LLamaTransforms.NaiveTextInputTransform">
|
||
<summary>
|
||
A text input transform that only trims the text.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.NaiveTextInputTransform.Transform(System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.NaiveTextInputTransform.Clone">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.LLamaTransforms.EmptyTextOutputStreamTransform">
|
||
<summary>
|
||
A no-op text input transform.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.EmptyTextOutputStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.EmptyTextOutputStreamTransform.Clone">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.LLamaTransforms.KeywordTextOutputStreamTransform">
|
||
<summary>
|
||
A text output transform that removes the keywords from the response.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.Keywords">
|
||
<summary>
|
||
Keywords that you want to remove from the response.
|
||
This property is used for JSON serialization.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.MaxKeywordLength">
|
||
<summary>
|
||
Maximum length of the keywords.
|
||
This property is used for JSON serialization.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.RemoveAllMatchedTokens">
|
||
<summary>
|
||
If set to true, when getting a matched keyword, all the related tokens will be removed.
|
||
Otherwise only the part of keyword will be removed.
|
||
This property is used for JSON serialization.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.#ctor(System.Collections.Generic.HashSet{System.String},System.Int32,System.Boolean)">
|
||
<summary>
|
||
JSON constructor.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.#ctor(System.Collections.Generic.IEnumerable{System.String},System.Int32,System.Boolean)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="keywords">Keywords that you want to remove from the response.</param>
|
||
<param name="redundancyLength">The extra length when searching for the keyword. For example, if your only keyword is "highlight",
|
||
maybe the token you get is "\r\nhighligt". In this condition, if redundancyLength=0, the token cannot be successfully matched because the length of "\r\nhighligt" (10)
|
||
has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh >= 2 leads to successful match.
|
||
The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength <= 5 </param>
|
||
<param name="removeAllMatchedTokens">If set to true, when getting a matched keyword, all the related tokens will be removed. Otherwise only the part of keyword will be removed.</param>
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.Clone">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaTransforms.KeywordTextOutputStreamTransform.TransformAsync(System.Collections.Generic.IAsyncEnumerable{System.String})">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.LLamaWeights">
|
||
<summary>
|
||
A set of model weights, loaded into memory.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.NativeHandle">
|
||
<summary>
|
||
The native handle, which is used in the native APIs
|
||
</summary>
|
||
<remarks>Be careful how you use this!</remarks>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.ContextSize">
|
||
<summary>
|
||
Total number of tokens in the context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.SizeInBytes">
|
||
<summary>
|
||
Get the size of this model in bytes
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.ParameterCount">
|
||
<summary>
|
||
Get the number of parameters in this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.EmbeddingSize">
|
||
<summary>
|
||
Dimension of embedding vectors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.Vocab">
|
||
<summary>
|
||
Get the special tokens of this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLamaWeights.Metadata">
|
||
<summary>
|
||
All metadata keys in this model
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.LLamaWeights.LoadFromFile(LLama.Abstractions.IModelParams)">
|
||
<summary>
|
||
Load weights into memory
|
||
</summary>
|
||
<param name="params"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaWeights.LoadFromFileAsync(LLama.Abstractions.IModelParams,System.Threading.CancellationToken,System.IProgress{System.Single})">
|
||
<summary>
|
||
Load weights into memory
|
||
</summary>
|
||
<param name="params">Parameters to use to load the model</param>
|
||
<param name="token">A cancellation token that can interrupt model loading</param>
|
||
<param name="progressReporter">Receives progress updates as the model loads (0 to 1)</param>
|
||
<returns></returns>
|
||
<exception cref="T:LLama.Exceptions.LoadWeightsFailedException">Thrown if weights failed to load for any reason. e.g. Invalid file format or loading cancelled.</exception>
|
||
<exception cref="T:System.OperationCanceledException">Thrown if the cancellation token is cancelled.</exception>
|
||
</member>
|
||
<member name="M:LLama.LLamaWeights.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.LLamaWeights.CreateContext(LLama.Abstractions.IContextParams,Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Create a llama_context using this model
|
||
</summary>
|
||
<param name="params"></param>
|
||
<param name="logger"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLamaWeights.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
|
||
<summary>
|
||
Convert a string of text into tokens
|
||
</summary>
|
||
<param name="text"></param>
|
||
<param name="add_bos"></param>
|
||
<param name="encoding"></param>
|
||
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.LLavaWeights">
|
||
<summary>
|
||
A set of llava model weights (mmproj), loaded into memory.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.LLavaWeights.NativeHandle">
|
||
<summary>
|
||
The native handle, which is used in the native APIs
|
||
</summary>
|
||
<remarks>Be careful how you use this!</remarks>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.LoadFromFile(System.String)">
|
||
<summary>
|
||
Load weights into memory
|
||
</summary>
|
||
<param name="mmProject">path to the "mmproj" model file</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.LoadFromFileAsync(System.String,System.Threading.CancellationToken)">
|
||
<summary>
|
||
Load weights into memory
|
||
</summary>
|
||
<param name="mmProject">path to the "mmproj" model file</param>
|
||
<param name="token"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(LLama.LLamaContext,System.Byte[])">
|
||
<summary>
|
||
Create the Image Embeddings from the bytes of an image.
|
||
</summary>
|
||
<param name="ctxLlama"></param>
|
||
<param name="image">Image bytes. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(System.Byte[],System.Int32)">
|
||
<summary>
|
||
Create the Image Embeddings.
|
||
</summary>
|
||
<param name="image">Image in binary format (it supports jpeg format only)</param>
|
||
<param name="threads">Number of threads to use</param>
|
||
<returns>return the SafeHandle of these embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(LLama.LLamaContext,System.String)">
|
||
<summary>
|
||
Create the Image Embeddings from the bytes of an image.
|
||
</summary>
|
||
<param name="ctxLlama"></param>
|
||
<param name="image">Path to the image file. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.CreateImageEmbeddings(System.String,System.Int32)">
|
||
<summary>
|
||
Create the Image Embeddings from the bytes of an image.
|
||
</summary>
|
||
<param name="image">Path to the image file. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<param name="threads"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.EvalImageEmbed(LLama.LLamaContext,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32@)">
|
||
<summary>
|
||
Eval the image embeddings
|
||
</summary>
|
||
<param name="ctxLlama"></param>
|
||
<param name="imageEmbed"></param>
|
||
<param name="n_past"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.LLavaWeights.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Native.DecodeResult">
|
||
<summary>
|
||
Return codes from llama_decode
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.DecodeResult.Error">
|
||
<summary>
|
||
An unspecified error
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.DecodeResult.Ok">
|
||
<summary>
|
||
Ok.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.DecodeResult.NoKvSlot">
|
||
<summary>
|
||
Could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.EncodeResult">
|
||
<summary>
|
||
Return codes from llama_encode
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.EncodeResult.Error">
|
||
<summary>
|
||
An unspecified error
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.EncodeResult.Ok">
|
||
<summary>
|
||
Ok.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.GGMLType">
|
||
<summary>
|
||
Possible GGML quantisation types
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_F32">
|
||
<summary>
|
||
Full 32 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_F16">
|
||
<summary>
|
||
16 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_0">
|
||
<summary>
|
||
4 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_1">
|
||
<summary>
|
||
4 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_0">
|
||
<summary>
|
||
5 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_1">
|
||
<summary>
|
||
5 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_0">
|
||
<summary>
|
||
8 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_1">
|
||
<summary>
|
||
8 bit float
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q2_K">
|
||
<summary>
|
||
"type-1" 2-bit quantization in super-blocks containing 16 blocks, each block having 16 weight.
|
||
Block scales and mins are quantized with 4 bits. This ends up effectively using 2.5625 bits per weight (bpw)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q3_K">
|
||
<summary>
|
||
"type-0" 3-bit quantization in super-blocks containing 16 blocks, each block having 16 weights.
|
||
Scales are quantized with 6 bits. This end up using 3.4375 bpw.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q4_K">
|
||
<summary>
|
||
"type-1" 4-bit quantization in super-blocks containing 8 blocks, each block having 32 weights.
|
||
Scales and mins are quantized with 6 bits. This ends up using 4.5 bpw.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q5_K">
|
||
<summary>
|
||
"type-1" 5-bit quantization. Same super-block structure as GGML_TYPE_Q4_K resulting in 5.5 bpw
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q6_K">
|
||
<summary>
|
||
"type-0" 6-bit quantization. Super-blocks with 16 blocks, each block having 16 weights.
|
||
Scales are quantized with 8 bits. This ends up using 6.5625 bpw
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_Q8_K">
|
||
<summary>
|
||
"type-0" 8-bit quantization. Only used for quantizing intermediate results.
|
||
The difference to the existing Q8_0 is that the block size is 256. All 2-6 bit dot products are implemented for this quantization type.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I8">
|
||
<summary>
|
||
Integer, 8 bit
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I16">
|
||
<summary>
|
||
Integer, 16 bit
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_I32">
|
||
<summary>
|
||
Integer, 32 bit
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GGMLType.GGML_TYPE_COUNT">
|
||
<summary>
|
||
The value of this entry is the count of the number of possible quant types.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.GPUSplitMode">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_split_mode</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.GPUSplitMode.None">
|
||
<summary>
|
||
Single GPU
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GPUSplitMode.Layer">
|
||
<summary>
|
||
Split layers and KV across GPUs
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.GPUSplitMode.Row">
|
||
<summary>
|
||
split layers and KV across GPUs, use tensor parallelism if supported
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.GroupDisposable">
|
||
<summary>
|
||
Disposes all contained disposables when this class is disposed
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.GroupDisposable.Finalize">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.GroupDisposable.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaAttentionType">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_attention_type</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaBatch">
|
||
<summary>
|
||
A batch allows submitting multiple tokens to multiple sequences simultaneously
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaBatch._logitPositions">
|
||
<summary>
|
||
Keep a list of where logits can be sampled from
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatch.LogitPositionCount">
|
||
<summary>
|
||
Get the number of logit positions that will be generated from this batch
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatch.TokenCount">
|
||
<summary>
|
||
The number of tokens in this batch
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatch.TokenCapacity">
|
||
<summary>
|
||
Maximum number of tokens that can be added to this batch (automatically grows if exceeded)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatch.SequenceCapacity">
|
||
<summary>
|
||
Maximum number of sequences a token can be assigned to (automatically grows if exceeded)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.#ctor">
|
||
<summary>
|
||
Create a new batch for submitting inputs to llama.cpp
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
|
||
<summary>
|
||
Add a single token to the batch at the same position in several sequences
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<param name="token">The token to add</param>
|
||
<param name="pos">The position to add it att</param>
|
||
<param name="sequences">The set of sequences to add this token to</param>
|
||
<param name="logits"></param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,System.Collections.Generic.List{LLama.Native.LLamaSeqId},System.Boolean)">
|
||
<summary>
|
||
Add a single token to the batch at the same position in several sequences
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<param name="token">The token to add</param>
|
||
<param name="pos">The position to add it att</param>
|
||
<param name="sequences">The set of sequences to add this token to</param>
|
||
<param name="logits"></param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.Add(LLama.Native.LLamaToken,LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
|
||
<summary>
|
||
Add a single token to the batch at a certain position for a single sequences
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<param name="token">The token to add</param>
|
||
<param name="pos">The position to add it att</param>
|
||
<param name="sequence">The sequence to add this token to</param>
|
||
<param name="logits"></param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.AddRange(System.ReadOnlySpan{LLama.Native.LLamaToken},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
|
||
<summary>
|
||
Add a range of tokens to a single sequence, start at the given position.
|
||
</summary>
|
||
<param name="tokens">The tokens to add</param>
|
||
<param name="start">The starting position to add tokens at</param>
|
||
<param name="sequence">The sequence to add this token to</param>
|
||
<param name="logitsLast">Whether the final token should generate logits</param>
|
||
<returns>The index that the final token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.Clear">
|
||
<summary>
|
||
Set TokenCount to zero for this batch
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatch.GetLogitPositions">
|
||
<summary>
|
||
Get the positions where logits can be sampled from
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaBatchEmbeddings">
|
||
<summary>
|
||
An embeddings batch allows submitting embeddings to multiple sequences simultaneously
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaBatchEmbeddings._logitPositions">
|
||
<summary>
|
||
Keep a list of where logits can be sampled from
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatchEmbeddings.LogitPositionCount">
|
||
<summary>
|
||
Get the number of logit positions that will be generated from this batch
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingDimensions">
|
||
<summary>
|
||
Size of an individual embedding
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingsCount">
|
||
<summary>
|
||
The number of items in this batch
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatchEmbeddings.EmbeddingsCapacity">
|
||
<summary>
|
||
Maximum number of items that can be added to this batch (automatically grows if exceeded)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaBatchEmbeddings.SequenceCapacity">
|
||
<summary>
|
||
Maximum number of sequences an item can be assigned to (automatically grows if exceeded)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.#ctor(System.Int32)">
|
||
<summary>
|
||
Create a new batch for submitting inputs to llama.cpp
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add(System.ReadOnlySpan{System.Single},LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
|
||
<summary>
|
||
Add a single embedding to the batch at the same position in several sequences
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<param name="embedding">The embedding to add</param>
|
||
<param name="pos">The position to add it att</param>
|
||
<param name="sequences">The set of sequences to add this token to</param>
|
||
<param name="logits"></param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add(System.ReadOnlySpan{System.Single},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
|
||
<summary>
|
||
Add a single embedding to the batch for a single sequence
|
||
</summary>
|
||
<param name="embedding"></param>
|
||
<param name="pos"></param>
|
||
<param name="sequence"></param>
|
||
<param name="logits"></param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate`1">
|
||
<summary>
|
||
Called by embeddings batch to write embeddings into a destination span
|
||
</summary>
|
||
<typeparam name="TParam">Type of user data parameter passed in</typeparam>
|
||
<param name="dest">Destination to write data to. Entire destination must be filled!</param>
|
||
<param name="parameter">User data parameter passed in</param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add``1(``0,LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate{``0},LLama.Native.LLamaPos,System.ReadOnlySpan{LLama.Native.LLamaSeqId},System.Boolean)">
|
||
<summary>
|
||
Add a single embedding to the batch at the same position in several sequences
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<typeparam name="TParam">Type of userdata passed to write delegate</typeparam>
|
||
<param name="parameter">Userdata passed to write delegate</param>
|
||
<param name="write">Delegate called once to write data into a span</param>
|
||
<param name="pos">Position to write this embedding to</param>
|
||
<param name="sequences">All sequences to assign this embedding to</param>
|
||
<param name="logits">Whether logits should be generated for this embedding</param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.Add``1(``0,LLama.Native.LLamaBatchEmbeddings.WriteEmbeddingsDelegate{``0},LLama.Native.LLamaPos,LLama.Native.LLamaSeqId,System.Boolean)">
|
||
<summary>
|
||
Add a single embedding to the batch at a position for one sequence
|
||
</summary>
|
||
<remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
|
||
<typeparam name="TParam">Type of userdata passed to write delegate</typeparam>
|
||
<param name="parameter">Userdata passed to write delegate</param>
|
||
<param name="write">Delegate called once to write data into a span</param>
|
||
<param name="pos">Position to write this embedding to</param>
|
||
<param name="sequence">Sequence to assign this embedding to</param>
|
||
<param name="logits">Whether logits should be generated for this embedding</param>
|
||
<returns>The index that the token was added at. Use this for GetLogitsIth</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.Clear">
|
||
<summary>
|
||
Set EmbeddingsCount to zero for this batch
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaBatchEmbeddings.GetLogitPositions(System.Span{System.ValueTuple{LLama.Native.LLamaSeqId,System.Int32}})">
|
||
<summary>
|
||
Get the positions where logits can be sampled from
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaChatMessage">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_chat_message</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaChatMessage.role">
|
||
<summary>
|
||
Pointer to the null terminated bytes that make up the role string
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaChatMessage.content">
|
||
<summary>
|
||
Pointer to the null terminated bytes that make up the content string
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LlamaProgressCallback">
|
||
<summary>
|
||
Called by llama.cpp with a progress value between 0 and 1
|
||
</summary>
|
||
<param name="progress"></param>
|
||
<param name="ctx"></param>
|
||
<returns>If the provided progress_callback returns true, model loading continues.
|
||
If it returns false, model loading is immediately aborted.</returns>
|
||
<remarks>llama_progress_callback</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaContextParams">
|
||
<summary>
|
||
A C# representation of the llama.cpp `llama_context_params` struct
|
||
</summary>
|
||
<remarks>changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
||
https://github.com/ggerganov/llama.cpp/pull/7544</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_ctx">
|
||
<summary>
|
||
text context, 0 = from model
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_batch">
|
||
<summary>
|
||
logical maximum batch size that can be submitted to llama_decode
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_ubatch">
|
||
<summary>
|
||
physical maximum batch size
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_seq_max">
|
||
<summary>
|
||
max number of sequences (i.e. distinct states for recurrent models)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_threads">
|
||
<summary>
|
||
number of threads to use for generation
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.n_threads_batch">
|
||
<summary>
|
||
number of threads to use for batch processing
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.rope_scaling_type">
|
||
<summary>
|
||
RoPE scaling type, from `enum llama_rope_scaling_type`
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.llama_pooling_type">
|
||
<summary>
|
||
whether to pool (sum) embedding results by sequence id
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.attention_type">
|
||
<summary>
|
||
Attention type to use for embeddings
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.rope_freq_base">
|
||
<summary>
|
||
RoPE base frequency, 0 = from model
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.rope_freq_scale">
|
||
<summary>
|
||
RoPE frequency scaling factor, 0 = from model
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.yarn_ext_factor">
|
||
<summary>
|
||
YaRN extrapolation mix factor, negative = from model
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.yarn_attn_factor">
|
||
<summary>
|
||
YaRN magnitude scaling factor
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.yarn_beta_fast">
|
||
<summary>
|
||
YaRN low correction dim
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.yarn_beta_slow">
|
||
<summary>
|
||
YaRN high correction dim
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.yarn_orig_ctx">
|
||
<summary>
|
||
YaRN original context size
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.defrag_threshold">
|
||
<summary>
|
||
defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.cb_eval">
|
||
<summary>
|
||
ggml_backend_sched_eval_callback
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.cb_eval_user_data">
|
||
<summary>
|
||
User data passed into cb_eval
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.type_k">
|
||
<summary>
|
||
data type for K cache. <b>EXPERIMENTAL</b>
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.type_v">
|
||
<summary>
|
||
data type for V cache. <b>EXPERIMENTAL</b>
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams._logits_all">
|
||
<summary>
|
||
Deprecated!
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaContextParams.embeddings">
|
||
<summary>
|
||
if true, extract embeddings (together with logits)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaContextParams.offload_kqv">
|
||
<summary>
|
||
whether to offload the KQV ops (including the KV cache) to GPU
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaContextParams.flash_attention">
|
||
<summary>
|
||
whether to use flash attention. <b>EXPERIMENTAL</b>
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaContextParams.no_perf">
|
||
<summary>
|
||
whether to measure performance timings
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.abort_callback">
|
||
<summary>
|
||
ggml_abort_callback
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaContextParams.abort_callback_user_data">
|
||
<summary>
|
||
User data passed into abort_callback
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaContextParams.Default">
|
||
<summary>
|
||
Get the default LLamaContextParams
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaFtype">
|
||
<summary>
|
||
Supported model file types
|
||
</summary>
|
||
<remarks>C# representation of llama_ftype</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.ALL_F32">
|
||
<summary>
|
||
All f32
|
||
</summary>
|
||
<remarks>Benchmark@7B: 26GB</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_F16">
|
||
<summary>
|
||
Mostly f16
|
||
</summary>
|
||
<remarks>Benchmark@7B: 13GB</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q8_0">
|
||
<summary>
|
||
Mostly 8 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_0">
|
||
<summary>
|
||
Mostly 4 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_1">
|
||
<summary>
|
||
Mostly 4 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_0">
|
||
<summary>
|
||
Mostly 5 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_1">
|
||
<summary>
|
||
Mostly 5 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q2_K">
|
||
<summary>
|
||
K-Quant 2 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_S">
|
||
<summary>
|
||
K-Quant 3 bit (Small)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_M">
|
||
<summary>
|
||
K-Quant 3 bit (Medium)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q3_K_L">
|
||
<summary>
|
||
K-Quant 3 bit (Large)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_K_S">
|
||
<summary>
|
||
K-Quant 4 bit (Small)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q4_K_M">
|
||
<summary>
|
||
K-Quant 4 bit (Medium)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_K_S">
|
||
<summary>
|
||
K-Quant 5 bit (Small)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q5_K_M">
|
||
<summary>
|
||
K-Quant 5 bit (Medium)
|
||
</summary>
|
||
<remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q6_K">
|
||
<summary>
|
||
K-Quant 6 bit
|
||
</summary>
|
||
<remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_XXS">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_XS">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_Q2_K_S">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_K_XS">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_XXS">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ1_S">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ4_NL">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_S">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ3_M">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_S">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ2_M">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ4_XS">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_IQ1_M">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.MOSTLY_BF16">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.LLAMA_FTYPE_MOSTLY_TQ1_0">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.LLAMA_FTYPE_MOSTLY_TQ2_0">
|
||
<summary>
|
||
except 1d tensors
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaFtype.GUESSED">
|
||
<summary>
|
||
File type was not specified
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle">
|
||
<summary>
|
||
A safe handle for a LLamaKvCacheView
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.CellCount">
|
||
<summary>
|
||
Number of KV cache cells. This will be the same as the context size.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.TokenCount">
|
||
<summary>
|
||
Get the total number of tokens in the KV cache.
|
||
|
||
For example, if there are two populated
|
||
cells, the first with 1 sequence id in it and the second with 2 sequence
|
||
ids then you'll have 3 tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxSequenceCount">
|
||
<summary>
|
||
Maximum number of sequences visible for a cell. There may be more sequences than this
|
||
in reality, this is simply the maximum number this view can see.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.UsedCellCount">
|
||
<summary>
|
||
Number of populated cache cells
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxContiguous">
|
||
<summary>
|
||
Maximum contiguous empty slots in the cache.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxContiguousIdx">
|
||
<summary>
|
||
Index to the start of the MaxContiguous slot range. Can be negative when cache is full.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.#ctor(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView)">
|
||
<summary>
|
||
Initialize a LLamaKvCacheViewSafeHandle which will call `llama_kv_cache_view_free` when disposed
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="view"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.Allocate(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Allocate a new KV cache view which can be used to inspect the KV cache
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="maxSequences">The maximum number of sequences visible in this view per cell</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.Update">
|
||
<summary>
|
||
Read the current KV cache state into this view.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetNativeView">
|
||
<summary>
|
||
Get the raw KV cache view
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetCell(System.Int32)">
|
||
<summary>
|
||
Get the cell at the given index
|
||
</summary>
|
||
<param name="index">The index of the cell [0, CellCount)</param>
|
||
<returns>Data about the cell at the given index</returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is out of range (0 <= index < CellCount)</exception>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.GetCellSequences(System.Int32)">
|
||
<summary>
|
||
Get all of the sequences assigned to the cell at the given index. This will contain <see cref="P:LLama.Native.LLamaKvCacheViewSafeHandle.MaxSequenceCount"/> entries
|
||
sequences even if the cell actually has more than that many sequences, allocate a new view with a larger maxSequences parameter
|
||
if necessary. Invalid sequences will be negative values.
|
||
</summary>
|
||
<param name="index">The index of the cell [0, CellCount)</param>
|
||
<returns>A span containing the sequences assigned to this cell</returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if index is out of range (0 <= index < CellCount)</exception>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_init(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Create an empty KV cache view. (use only for debugging purposes)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="n_seq_max"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_free(LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView@)">
|
||
<summary>
|
||
Free a KV cache view. (use only for debugging purposes)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaKvCacheViewSafeHandle.llama_kv_cache_view_update(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView@)">
|
||
<summary>
|
||
Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="view"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheViewCell">
|
||
<summary>
|
||
Information associated with an individual cell in the KV cache view (llama_kv_cache_view_cell)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheViewCell.pos">
|
||
<summary>
|
||
The position for this cell. Takes KV cache shifts into account.
|
||
May be negative if the cell is not populated.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView">
|
||
<summary>
|
||
An updateable view of the KV cache (llama_kv_cache_view)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.n_cells">
|
||
<summary>
|
||
Number of KV cache cells. This will be the same as the context size.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.n_seq_max">
|
||
<summary>
|
||
Maximum number of sequences that can exist in a cell. It's not an error
|
||
if there are more sequences in a cell than this value, however they will
|
||
not be visible in the view cells_sequences.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.token_count">
|
||
<summary>
|
||
Number of tokens in the cache. For example, if there are two populated
|
||
cells, the first with 1 sequence id in it and the second with 2 sequence
|
||
ids then you'll have 3 tokens.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.used_cells">
|
||
<summary>
|
||
Number of populated cache cells.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.max_contiguous">
|
||
<summary>
|
||
Maximum contiguous empty slots in the cache.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.max_contiguous_idx">
|
||
<summary>
|
||
Index to the start of the max_contiguous slot range. Can be negative
|
||
when cache is full.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.cells">
|
||
<summary>
|
||
Information for an individual cell.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaKvCacheViewSafeHandle.NativeLLamaKvCacheView.cells_sequences">
|
||
<summary>
|
||
The sequences for each cell. There will be n_seq_max items per cell.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaLogLevel">
|
||
<summary>
|
||
Severity level of a log message. This enum should always be aligned with
|
||
the one defined on llama.cpp side at
|
||
https://github.com/ggerganov/llama.cpp/blob/0eb4e12beebabae46d37b78742f4c5d4dbe52dc1/ggml/include/ggml.h#L559
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.None">
|
||
<summary>
|
||
Logs are never written.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.Debug">
|
||
<summary>
|
||
Logs that are used for interactive investigation during development.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.Info">
|
||
<summary>
|
||
Logs that track the general flow of the application.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.Warning">
|
||
<summary>
|
||
Logs that highlight an abnormal or unexpected event in the application flow, but do not otherwise cause the application execution to stop.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.Error">
|
||
<summary>
|
||
Logs that highlight when the current flow of execution is stopped due to a failure.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevel.Continue">
|
||
<summary>
|
||
Continue log level is equivalent to None in the way it is used in llama.cpp.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogLevelExtensions._previous">
|
||
<summary>
|
||
Keeps track of the previous log level to be able to handle the log level <see cref="F:LLama.Native.LLamaLogLevel.Continue"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaModelMetadataOverride">
|
||
<summary>
|
||
Override a key/value pair in the llama model metadata (llama_model_kv_override)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.key">
|
||
<summary>
|
||
Key to override
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.Tag">
|
||
<summary>
|
||
Type of value
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.PADDING">
|
||
<summary>
|
||
Add 4 bytes of padding, to align the next fields to 8 bytes
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.IntValue">
|
||
<summary>
|
||
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_INT
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.FloatValue">
|
||
<summary>
|
||
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_FLOAT
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.BoolValue">
|
||
<summary>
|
||
Value, **must** only be used if Tag == LLAMA_KV_OVERRIDE_BOOL
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelMetadataOverride.StringValue">
|
||
<summary>
|
||
Value, **must** only be used if Tag == String
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaModelKvOverrideType">
|
||
<summary>
|
||
Specifies what type of value is being overridden by LLamaModelKvOverride
|
||
</summary>
|
||
<remarks>llama_model_kv_override_type</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelKvOverrideType.Int">
|
||
<summary>
|
||
Overriding an int value
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelKvOverrideType.Float">
|
||
<summary>
|
||
Overriding a float value
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelKvOverrideType.Bool">
|
||
<summary>
|
||
Overriding a bool value
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelKvOverrideType.String">
|
||
<summary>
|
||
Overriding a string value
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaModelParams">
|
||
<summary>
|
||
A C# representation of the llama.cpp `llama_model_params` struct
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.devices">
|
||
<summary>
|
||
NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
||
todo: add support for llama_model_params.devices
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.n_gpu_layers">
|
||
<summary>
|
||
// number of layers to store in VRAM
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.split_mode">
|
||
<summary>
|
||
how to split the model across multiple GPUs
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.main_gpu">
|
||
<summary>
|
||
the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.tensor_split">
|
||
<summary>
|
||
how to split layers across multiple GPUs (size: <see cref="M:LLama.Native.NativeApi.llama_max_devices"/>)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.progress_callback">
|
||
<summary>
|
||
called with a progress value between 0 and 1, pass NULL to disable. If the provided progress_callback
|
||
returns true, model loading continues. If it returns false, model loading is immediately aborted.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.progress_callback_user_data">
|
||
<summary>
|
||
context pointer passed to the progress callback
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelParams.kv_overrides">
|
||
<summary>
|
||
override key-value pairs of the model meta data
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelParams.vocab_only">
|
||
<summary>
|
||
only load the vocabulary, no weights
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelParams.use_mmap">
|
||
<summary>
|
||
use mmap if possible
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelParams.use_mlock">
|
||
<summary>
|
||
force system to keep model in RAM
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelParams.check_tensors">
|
||
<summary>
|
||
validate model tensor data
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaModelParams.Default">
|
||
<summary>
|
||
Create a LLamaModelParams with default values
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaModelQuantizeParams">
|
||
<summary>
|
||
Quantizer parameters used in the native API
|
||
</summary>
|
||
<remarks>llama_model_quantize_params</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.nthread">
|
||
<summary>
|
||
number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.ftype">
|
||
<summary>
|
||
quantize to this llama_ftype
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.output_tensor_type">
|
||
<summary>
|
||
output tensor type
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.token_embedding_type">
|
||
<summary>
|
||
token embeddings tensor type
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelQuantizeParams.allow_requantize">
|
||
<summary>
|
||
allow quantizing non-f32/f16 tensors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelQuantizeParams.quantize_output_tensor">
|
||
<summary>
|
||
quantize output.weight
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelQuantizeParams.only_copy">
|
||
<summary>
|
||
only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelQuantizeParams.pure">
|
||
<summary>
|
||
quantize all tensors to the default type
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaModelQuantizeParams.keep_split">
|
||
<summary>
|
||
quantize to the same number of shards
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.imatrix">
|
||
<summary>
|
||
pointer to importance matrix data
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaModelQuantizeParams.kv_overrides">
|
||
<summary>
|
||
pointer to vector containing overrides
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaModelQuantizeParams.Default">
|
||
<summary>
|
||
Create a LLamaModelQuantizeParams with default values
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaNativeBatch">
|
||
<summary>
|
||
Input data for llama_decode
|
||
A llama_batch object can contain input about one or many sequences
|
||
The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.n_tokens">
|
||
<summary>
|
||
The number of items pointed at by pos, seq_id and logits.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.tokens">
|
||
<summary>
|
||
Either `n_tokens` of `llama_token`, or `NULL`, depending on how this batch was created
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.embd">
|
||
<summary>
|
||
Either `n_tokens * embd * sizeof(float)` or `NULL`, depending on how this batch was created
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.pos">
|
||
<summary>
|
||
the positions of the respective token in the sequence
|
||
(if set to NULL, the token position will be tracked automatically by llama_decode)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.n_seq_id">
|
||
<summary>
|
||
https://github.com/ggerganov/llama.cpp/blob/master/llama.h#L139 ???
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.seq_id">
|
||
<summary>
|
||
the sequence to which the respective token belongs
|
||
(if set to NULL, the sequence ID will be assumed to be 0)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaNativeBatch.logits">
|
||
<summary>
|
||
if zero, the logits for the respective token will not be output
|
||
(if set to NULL, only the logits for last token will be returned)
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaPoolingType">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_pooling_type</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPoolingType.Unspecified">
|
||
<summary>
|
||
No specific pooling type. Use the model default if this is specific in <see cref="P:LLama.Abstractions.IContextParams.PoolingType"/>
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPoolingType.None">
|
||
<summary>
|
||
Do not pool embeddings (per-token embeddings)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPoolingType.Mean">
|
||
<summary>
|
||
Take the mean of every token embedding
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPoolingType.CLS">
|
||
<summary>
|
||
Return the embedding for the special "CLS" token
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPoolingType.Rank">
|
||
<summary>
|
||
Used by reranking models to attach the classification head to the graph
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaPos">
|
||
<summary>
|
||
Indicates position in a sequence
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPos.Value">
|
||
<summary>
|
||
The raw value
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaPos.#ctor(System.Int32)">
|
||
<summary>
|
||
Create a new LLamaPos
|
||
</summary>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaPos.op_Explicit(LLama.Native.LLamaPos)~System.Int32">
|
||
<summary>
|
||
Convert a LLamaPos into an integer (extract the raw value)
|
||
</summary>
|
||
<param name="pos"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaPos.op_Implicit(System.Int32)~LLama.Native.LLamaPos">
|
||
<summary>
|
||
Convert an integer into a LLamaPos
|
||
</summary>
|
||
<param name="value"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaPos.op_Increment(LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Increment this position
|
||
</summary>
|
||
<param name="pos"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaPos.op_Decrement(LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Increment this position
|
||
</summary>
|
||
<param name="pos"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaRopeType">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_rope_type</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSeqId">
|
||
<summary>
|
||
ID for a sequence in a batch
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaSeqId.Zero">
|
||
<summary>
|
||
LLamaSeqId with value 0
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaSeqId.Value">
|
||
<summary>
|
||
The raw value
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaSeqId.#ctor(System.Int32)">
|
||
<summary>
|
||
Create a new LLamaSeqId
|
||
</summary>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaSeqId.op_Explicit(LLama.Native.LLamaSeqId)~System.Int32">
|
||
<summary>
|
||
Convert a LLamaSeqId into an integer (extract the raw value)
|
||
</summary>
|
||
<param name="pos"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaSeqId.op_Explicit(System.Int32)~LLama.Native.LLamaSeqId">
|
||
<summary>
|
||
Convert an integer into a LLamaSeqId
|
||
</summary>
|
||
<param name="value"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaSeqId.ToString">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaPerfContextTimings">
|
||
<summary>
|
||
LLama performance information
|
||
</summary>
|
||
<remarks>llama_perf_context_data</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.t_start_ms">
|
||
<summary>
|
||
Timestamp when reset was last called
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.t_load_ms">
|
||
<summary>
|
||
Loading milliseconds
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.t_p_eval_ms">
|
||
<summary>
|
||
total milliseconds spent prompt processing
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.t_eval_ms">
|
||
<summary>
|
||
Total milliseconds in eval/decode calls
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.n_p_eval">
|
||
<summary>
|
||
number of tokens in eval calls for the prompt (with batch size > 1)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaPerfContextTimings.n_eval">
|
||
<summary>
|
||
number of eval calls
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.ResetTimestamp">
|
||
<summary>
|
||
Timestamp when reset was last called
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.Loading">
|
||
<summary>
|
||
Time spent loading
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.PromptEval">
|
||
<summary>
|
||
total milliseconds spent prompt processing
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.Eval">
|
||
<summary>
|
||
Total milliseconds in eval/decode calls
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.PrompTokensEvaluated">
|
||
<summary>
|
||
number of tokens in eval calls for the prompt (with batch size > 1)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaPerfContextTimings.TokensEvaluated">
|
||
<summary>
|
||
number of eval calls
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplingTimings">
|
||
<summary>
|
||
LLama performance information
|
||
</summary>
|
||
<remarks>llama_perf_sampler_data</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaToken">
|
||
<summary>
|
||
A single token
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaToken.InvalidToken">
|
||
<summary>
|
||
Token Value used when token is inherently null
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaToken.Value">
|
||
<summary>
|
||
The raw value
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.#ctor(System.Int32)">
|
||
<summary>
|
||
Create a new LLamaToken
|
||
</summary>
|
||
<param name="value"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.op_Explicit(LLama.Native.LLamaToken)~System.Int32">
|
||
<summary>
|
||
Convert a LLamaToken into an integer (extract the raw value)
|
||
</summary>
|
||
<param name="pos"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.op_Implicit(System.Int32)~LLama.Native.LLamaToken">
|
||
<summary>
|
||
Convert an integer into a LLamaToken
|
||
</summary>
|
||
<param name="value"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.GetAttributes(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get attributes for this token
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.GetAttributes(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
|
||
<summary>
|
||
Get attributes for this token
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.GetScore(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
|
||
<summary>
|
||
Get score for this token
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.IsControl(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Check if this is a control token
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.IsControl(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
|
||
<summary>
|
||
Check if this is a control token
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.IsEndOfGeneration(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Check if this token should end generation
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.IsEndOfGeneration(LLama.Native.SafeLlamaModelHandle.Vocabulary)">
|
||
<summary>
|
||
Check if this token should end generation
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaToken.ToString">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaTokenAttr">
|
||
<summary>
|
||
Token attributes
|
||
</summary>
|
||
<remarks>C# equivalent of llama_token_attr</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaTokenData">
|
||
<summary>
|
||
A single token along with probability of this token being selected
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenData.ID">
|
||
<summary>
|
||
token id
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenData.Logit">
|
||
<summary>
|
||
log-odds of the token
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenData.Probability">
|
||
<summary>
|
||
probability of the token
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenData.#ctor(LLama.Native.LLamaToken,System.Single,System.Single)">
|
||
<summary>
|
||
Create a new LLamaTokenData
|
||
</summary>
|
||
<param name="id"></param>
|
||
<param name="logit"></param>
|
||
<param name="probability"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaTokenDataArray">
|
||
<summary>
|
||
Contains an array of LLamaTokenData, potentially sorted.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenDataArray.Data">
|
||
<summary>
|
||
The LLamaTokenData
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenDataArray.Sorted">
|
||
<summary>
|
||
Indicates if `data` is sorted by logits in descending order. If this is false the token data is in _no particular order_.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArray.#ctor(System.Memory{LLama.Native.LLamaTokenData},System.Boolean)">
|
||
<summary>
|
||
Create a new LLamaTokenDataArray
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="isSorted"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArray.Create(System.ReadOnlySpan{System.Single})">
|
||
<summary>
|
||
Create a new LLamaTokenDataArray, copying the data from the given logits
|
||
</summary>
|
||
<param name="logits"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArray.Create(System.ReadOnlySpan{System.Single},System.Memory{LLama.Native.LLamaTokenData})">
|
||
<summary>
|
||
Create a new LLamaTokenDataArray, copying the data from the given logits into temporary memory.
|
||
</summary>
|
||
<remarks>The memory must not be modified while this <see cref="T:LLama.Native.LLamaTokenDataArray"/> is in use.</remarks>
|
||
<param name="logits"></param>
|
||
<param name="buffer">Temporary memory which will be used to work on these logits. Must be at least as large as logits array</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArray.OverwriteLogits(System.ReadOnlySpan{System.ValueTuple{LLama.Native.LLamaToken,System.Single}})">
|
||
<summary>
|
||
Overwrite the logit values for all given tokens
|
||
</summary>
|
||
<param name="values">tuples of token and logit value to overwrite</param>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArray.Softmax">
|
||
<summary>
|
||
Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaTokenDataArrayNative">
|
||
<summary>
|
||
Contains a pointer to an array of LLamaTokenData which is pinned in memory.
|
||
</summary>
|
||
<remarks>C# equivalent of llama_token_data_array</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenDataArrayNative._data">
|
||
<summary>
|
||
A pointer to an array of LlamaTokenData
|
||
</summary>
|
||
<remarks>Memory must be pinned in place for all the time this LLamaTokenDataArrayNative is in use (i.e. `fixed` or `.Pin()`)</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenDataArrayNative._size">
|
||
<summary>
|
||
Number of LLamaTokenData in the array
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaTokenDataArrayNative._selected">
|
||
<summary>
|
||
The index in the array (i.e. not the token id)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Data">
|
||
<summary>
|
||
A pointer to an array of LlamaTokenData
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Sorted">
|
||
<summary>
|
||
Indicates if the items in the array are sorted, so the most likely token is first
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Selected">
|
||
<summary>
|
||
The index of the selected token (i.e. <b>not the token id</b>)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaTokenDataArrayNative.Size">
|
||
<summary>
|
||
Number of LLamaTokenData in the array. Set this to shrink the array
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaTokenDataArrayNative.Create(LLama.Native.LLamaTokenDataArray,LLama.Native.LLamaTokenDataArrayNative@)">
|
||
<summary>
|
||
Create a new LLamaTokenDataArrayNative around the data in the LLamaTokenDataArray
|
||
</summary>
|
||
<param name="array">Data source</param>
|
||
<param name="native">Created native array</param>
|
||
<returns>A memory handle, pinning the data in place until disposed</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaVocabNative">
|
||
<summary>
|
||
C# equivalent of llama_vocab struct. This struct is an opaque type, with no fields in the API and is only used for typed pointers.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_get_attr(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Get attributes for a specific token
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<param name="token"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_is_eog(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<param name="token"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_is_control(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Identify if Token Id is a control token or a render-able token
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<param name="token"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_bos(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
beginning-of-sentence
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_eos(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
end-of-sentence
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_eot(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
end-of-turn
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_sep(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
sentence separator
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_nl(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
next-line
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaVocabNative.llama_vocab_pad(LLama.Native.LLamaVocabNative*)">
|
||
<summary>
|
||
padding
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaVocabPreType">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_vocab_pre_type</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaVocabType">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_vocab_type</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.None">
|
||
<summary>
|
||
For models without vocab
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.SentencePiece">
|
||
<summary>
|
||
LLaMA tokenizer based on byte-level BPE with byte fallback
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.BytePairEncoding">
|
||
<summary>
|
||
GPT-2 tokenizer based on byte-level BPE
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.WordPiece">
|
||
<summary>
|
||
BERT tokenizer based on WordPiece
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.Unigram">
|
||
<summary>
|
||
T5 tokenizer based on Unigram
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaVocabType.RWKV">
|
||
<summary>
|
||
RWKV tokenizer based on greedy tokenization
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLavaImageEmbed">
|
||
<summary>
|
||
LLaVa Image embeddings
|
||
</summary>
|
||
<remarks>llava_image_embed</remarks>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryConfig.Instance">
|
||
<summary>
|
||
Set configurations for all the native libraries, including LLama and LLava
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryConfig.All">
|
||
<summary>
|
||
Set configurations for all the native libraries, including LLama and LLava
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryConfig.LLama">
|
||
<summary>
|
||
Configuration for LLama native library
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryConfig.LLava">
|
||
<summary>
|
||
Configuration for LLava native library
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryConfig.LibraryHasLoaded">
|
||
<summary>
|
||
Check if the native library has already been loaded. Configuration cannot be modified if this is true.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfig.WithLogCallback(LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Set the log callback that will be used for all llama.cpp log messages
|
||
</summary>
|
||
<param name="callback"></param>
|
||
<exception cref="T:System.NotImplementedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfig.WithLogCallback(Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Set the log callback that will be used for all llama.cpp log messages
|
||
</summary>
|
||
<param name="logger"></param>
|
||
<exception cref="T:System.NotImplementedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfig.DryRun(LLama.Abstractions.INativeLibrary@)">
|
||
<summary>
|
||
Try to load the native library with the current configurations,
|
||
but do not actually set it to <see cref="T:LLama.Native.NativeApi"/>.
|
||
|
||
You can still modify the configuration after this calling but only before any call from <see cref="T:LLama.Native.NativeApi"/>.
|
||
</summary>
|
||
<param name="loadedLibrary">
|
||
The loaded livrary. When the loading failed, this will be null.
|
||
However if you are using .NET standard2.0, this will never return null.
|
||
</param>
|
||
<returns>Whether the running is successful.</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLibraryConfigContainer">
|
||
<summary>
|
||
A class to set same configurations to multiple libraries at the same time.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfigContainer.ForEach(System.Action{LLama.Native.NativeLibraryConfig})">
|
||
<summary>
|
||
Do an action for all the configs in this container.
|
||
</summary>
|
||
<param name="action"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfigContainer.WithLogCallback(LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Set the log callback that will be used for all llama.cpp log messages
|
||
</summary>
|
||
<param name="callback"></param>
|
||
<exception cref="T:System.NotImplementedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfigContainer.WithLogCallback(Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Set the log callback that will be used for all llama.cpp log messages
|
||
</summary>
|
||
<param name="logger"></param>
|
||
<exception cref="T:System.NotImplementedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryConfigContainer.DryRun(LLama.Abstractions.INativeLibrary@,LLama.Abstractions.INativeLibrary@)">
|
||
<summary>
|
||
Try to load the native library with the current configurations,
|
||
but do not actually set it to <see cref="T:LLama.Native.NativeApi"/>.
|
||
|
||
You can still modify the configuration after this calling but only before any call from <see cref="T:LLama.Native.NativeApi"/>.
|
||
</summary>
|
||
<returns>Whether the running is successful.</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLibraryName">
|
||
<summary>
|
||
The name of the native library
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.NativeLibraryName.LLama">
|
||
<summary>
|
||
The native library compiled from llama.cpp.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.NativeLibraryName.LLava">
|
||
<summary>
|
||
The native library compiled from the LLaVA example of llama.cpp.
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLibraryFromPath">
|
||
<summary>
|
||
A native library specified with a local file path.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryFromPath.Metadata">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryFromPath.#ctor(System.String)">
|
||
<summary>
|
||
|
||
</summary>
|
||
<param name="path"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryFromPath.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLibraryMetadata">
|
||
<summary>
|
||
Information of a native library file.
|
||
</summary>
|
||
<param name="NativeLibraryName">Which kind of library it is.</param>
|
||
<param name="UseCuda">Whether it's compiled with cublas.</param>
|
||
<param name="UseVulkan">Whether it's compiled with vulkan.</param>
|
||
<param name="AvxLevel">Which AvxLevel it's compiled with.</param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryMetadata.#ctor(LLama.Native.NativeLibraryName,System.Boolean,System.Boolean,LLama.Native.AvxLevel)">
|
||
<summary>
|
||
Information of a native library file.
|
||
</summary>
|
||
<param name="NativeLibraryName">Which kind of library it is.</param>
|
||
<param name="UseCuda">Whether it's compiled with cublas.</param>
|
||
<param name="UseVulkan">Whether it's compiled with vulkan.</param>
|
||
<param name="AvxLevel">Which AvxLevel it's compiled with.</param>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryMetadata.NativeLibraryName">
|
||
<summary>Which kind of library it is.</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryMetadata.UseCuda">
|
||
<summary>Whether it's compiled with cublas.</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryMetadata.UseVulkan">
|
||
<summary>Whether it's compiled with vulkan.</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.NativeLibraryMetadata.AvxLevel">
|
||
<summary>Which AvxLevel it's compiled with.</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.AvxLevel">
|
||
<summary>
|
||
Avx support configuration
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.AvxLevel.None">
|
||
<summary>
|
||
No AVX
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.AvxLevel.Avx">
|
||
<summary>
|
||
Advanced Vector Extensions (supported by most processors after 2011)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.AvxLevel.Avx2">
|
||
<summary>
|
||
AVX2 (supported by most processors after 2013)
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.AvxLevel.Avx512">
|
||
<summary>
|
||
AVX512 (supported by some processors after 2016, not widely supported)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLibraryUtils.TryLoadLibrary(LLama.Native.NativeLibraryConfig,LLama.Abstractions.INativeLibrary@)">
|
||
<summary>
|
||
Try to load libllama/llava_shared, using CPU feature detection to try and load a more specialised DLL if possible
|
||
</summary>
|
||
<returns>The library handle to unload later, or IntPtr.Zero if no library was loaded</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.SystemInfo">
|
||
<summary>
|
||
Operating system information.
|
||
</summary>
|
||
<param name="OSPlatform"></param>
|
||
<param name="CudaMajorVersion"></param>
|
||
<param name="VulkanVersion"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SystemInfo.#ctor(System.Runtime.InteropServices.OSPlatform,System.Int32,System.String)">
|
||
<summary>
|
||
Operating system information.
|
||
</summary>
|
||
<param name="OSPlatform"></param>
|
||
<param name="CudaMajorVersion"></param>
|
||
<param name="VulkanVersion"></param>
|
||
</member>
|
||
<member name="P:LLama.Native.SystemInfo.OSPlatform">
|
||
<summary></summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SystemInfo.CudaMajorVersion">
|
||
<summary></summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SystemInfo.VulkanVersion">
|
||
<summary></summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SystemInfo.Get">
|
||
<summary>
|
||
Get the system information of the current machine.
|
||
</summary>
|
||
<returns></returns>
|
||
<exception cref="T:System.PlatformNotSupportedException"></exception>
|
||
</member>
|
||
<member name="T:LLama.Native.UnknownNativeLibrary">
|
||
<summary>
|
||
When you are using .NET standard2.0, dynamic native library loading is not supported.
|
||
This class will be returned in <see cref="M:LLama.Native.NativeLibraryConfig.DryRun(LLama.Abstractions.INativeLibrary@)"/>.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.UnknownNativeLibrary.Metadata">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="M:LLama.Native.UnknownNativeLibrary.Prepare(LLama.Native.SystemInfo,LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<inheritdoc/>
|
||
</member>
|
||
<member name="T:LLama.Native.LoraAdapter">
|
||
<summary>
|
||
A LoRA adapter which can be applied to a context for a specific model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LoraAdapter.Model">
|
||
<summary>
|
||
The model which this LoRA adapter was loaded with.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LoraAdapter.Path">
|
||
<summary>
|
||
The full path of the file this adapter was loaded from
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LoraAdapter.Pointer">
|
||
<summary>
|
||
Native pointer of the loaded adapter, will be automatically freed when the model is unloaded
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.LoraAdapter.Loaded">
|
||
<summary>
|
||
Indicates if this adapter has been unloaded
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LoraAdapter.Unload">
|
||
<summary>
|
||
Unload this adapter
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeApi">
|
||
<summary>
|
||
Direct translation of the llama.cpp API
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_empty_call">
|
||
<summary>
|
||
A method that does nothing. This is a native method, calling it will force the llama native dependencies to be loaded.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_backend_free">
|
||
<summary>
|
||
Call once at the end of the program - currently only used for MPI
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_max_devices">
|
||
<summary>
|
||
Get the maximum number of devices supported by llama.cpp
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_supports_mmap">
|
||
<summary>
|
||
Check if memory mapping is supported
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_supports_mlock">
|
||
<summary>
|
||
Check if memory locking is supported
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_supports_gpu_offload">
|
||
<summary>
|
||
Check if GPU offload is supported
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_supports_rpc">
|
||
<summary>
|
||
Check if RPC offload is supported
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_backend_init">
|
||
<summary>
|
||
Initialize the llama + ggml backend. Call once at the start of the program.
|
||
|
||
This is private because LLamaSharp automatically calls it, and it's only valid to call it once!
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_state_load_file(LLama.Native.SafeLLamaContextHandle,System.String,LLama.Native.LLamaToken[],System.UInt64,System.UInt64@)">
|
||
<summary>
|
||
Load session file
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="path_session"></param>
|
||
<param name="tokens_out"></param>
|
||
<param name="n_token_capacity"></param>
|
||
<param name="n_token_count_out"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_state_save_file(LLama.Native.SafeLLamaContextHandle,System.String,LLama.Native.LLamaToken[],System.UInt64)">
|
||
<summary>
|
||
Save session file
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="path_session"></param>
|
||
<param name="tokens"></param>
|
||
<param name="n_token_count"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_set_causal_attn(LLama.Native.SafeLLamaContextHandle,System.Boolean)">
|
||
<summary>
|
||
Set whether to use causal attention or not. If set to true, the model will only attend to the past tokens
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_set_embeddings(LLama.Native.SafeLLamaContextHandle,System.Boolean)">
|
||
<summary>
|
||
Set whether the model is in embeddings mode or not.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="embeddings">If true, embeddings will be returned but logits will not</param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_set_abort_callback(LLama.Native.SafeLlamaModelHandle,System.IntPtr,System.IntPtr)">
|
||
<summary>
|
||
Set abort callback
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_n_seq_max(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the n_seq_max for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_get_embeddings(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get all output token embeddings.
|
||
When pooling_type == LLAMA_POOLING_TYPE_NONE or when using a generative model, the embeddings for which
|
||
llama_batch.logits[i] != 0 are stored contiguously in the order they have appeared in the batch.
|
||
shape: [n_outputs*n_embd]
|
||
Otherwise, returns an empty span.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_chat_apply_template(System.Byte*,LLama.Native.LLamaChatMessage*,System.UIntPtr,System.Boolean,System.Byte*,System.Int32)">
|
||
<summary>
|
||
Apply chat template. Inspired by hf apply_chat_template() on python.
|
||
</summary>
|
||
<param name="tmpl">A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.</param>
|
||
<param name="chat">Pointer to a list of multiple llama_chat_message</param>
|
||
<param name="n_msg">Number of llama_chat_message in this chat</param>
|
||
<param name="add_ass">Whether to end the prompt with the token(s) that indicate the start of an assistant message.</param>
|
||
<param name="buf">A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)</param>
|
||
<param name="length">The size of the allocated buffer</param>
|
||
<returns>The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_chat_builtin_templates(System.Char**,System.UIntPtr)">
|
||
<summary>
|
||
Get list of built-in chat templates
|
||
</summary>
|
||
<param name="output"></param>
|
||
<param name="len"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_print_timings(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Print out timing information for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_print_system_info">
|
||
<summary>
|
||
Print system information
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_token_to_piece(LLama.Native.SafeLlamaModelHandle.Vocabulary,LLama.Native.LLamaToken,System.Span{System.Byte},System.Int32,System.Boolean)">
|
||
<summary>
|
||
Convert a single token into text
|
||
</summary>
|
||
<param name="vocab"></param>
|
||
<param name="llamaToken"></param>
|
||
<param name="buffer">buffer to write string into</param>
|
||
<param name="lstrip">User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')</param>
|
||
<param name="special">If true, special tokens are rendered in the output</param>
|
||
<returns>The length written, or if the buffer is too small a negative that indicates the length required</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_tokenize(LLama.Native.LLamaVocabNative*,System.Byte*,System.Int32,LLama.Native.LLamaToken*,System.Int32,System.Boolean,System.Boolean)">
|
||
<summary>
|
||
Convert text into tokens
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="text"></param>
|
||
<param name="text_len"></param>
|
||
<param name="tokens">The tokens pointer must be large enough to hold the resulting tokens.</param>
|
||
<param name="n_max_tokens"></param>
|
||
<param name="add_special">add_special Allow to add BOS and EOS tokens if model is configured to do so.</param>
|
||
<param name="parse_special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.</param>
|
||
<returns>Returns the number of tokens on success, no more than n_max_tokens.
|
||
Returns a negative number on failure - the number of tokens that would have been returned
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_detokenize(LLama.Native.LLamaVocabNative*,LLama.Native.LLamaToken*,System.Int32,System.Byte*,System.Int32,System.Boolean,System.Boolean)">
|
||
<summary>
|
||
Convert the provided tokens into text (inverse of llama_tokenize()).
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="tokens"></param>
|
||
<param name="nTokens"></param>
|
||
<param name="textOut">The char pointer must be large enough to hold the resulting text.</param>
|
||
<param name="textLengthMax"></param>
|
||
<param name="removeSpecial">remove_special Allow to remove BOS and EOS tokens if model is configured to do so.</param>
|
||
<param name="unparseSpecial">unparse_special If true, special tokens are rendered in the output.</param>
|
||
<returns>Returns the number of chars/bytes on success, no more than textLengthMax. Returns a negative number on failure - the number of chars/bytes that would have been returned.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Register a callback to receive llama log messages
|
||
</summary>
|
||
<param name="logCallback"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_get_kv_cache_token_count(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Returns the number of tokens in the KV cache (slow, use only for debug)
|
||
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_get_kv_cache_used_cells(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_clear(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Clear the KV cache. Both cell info is erased and KV data is zeroed
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_rm(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
<returns>Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_cp(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Copy all tokens that belong to the specified sequence to another sequence
|
||
Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="src"></param>
|
||
<param name="dest"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_keep(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Removes all tokens that do not belong to the specified sequence
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="seq"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_add(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
||
If the KV cache is RoPEd, the KV data is updated accordingly:
|
||
- lazily on next llama_decode()
|
||
- explicitly with llama_kv_cache_update()
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
<param name="delta"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_div(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Integer division of the positions by factor of `d > 1`
|
||
If the KV cache is RoPEd, the KV data is updated accordingly:
|
||
- lazily on next llama_decode()
|
||
- explicitly with llama_kv_cache_update()
|
||
<br />
|
||
p0 < 0 : [0, p1]
|
||
<br />
|
||
p1 < 0 : [p0, inf)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
<param name="d"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_kv_cache_seq_pos_max(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Returns the largest position present in the KV cache for the specified sequence
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="seq"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_batch_init(System.Int32,System.Int32,System.Int32)">
|
||
<summary>
|
||
Allocates a batch of tokens on the heap
|
||
Each token can be assigned up to n_seq_max sequence ids
|
||
The batch has to be freed with llama_batch_free()
|
||
If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
|
||
Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
|
||
The rest of the llama_batch members are allocated with size n_tokens
|
||
All members are left uninitialized
|
||
</summary>
|
||
<param name="n_tokens"></param>
|
||
<param name="embd"></param>
|
||
<param name="n_seq_max">Each token can be assigned up to n_seq_max sequence ids</param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_batch_free(LLama.Native.LLamaNativeBatch)">
|
||
<summary>
|
||
Frees a batch of tokens allocated with llama_batch_init()
|
||
</summary>
|
||
<param name="batch"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_apply_adapter_cvec(LLama.Native.SafeLLamaContextHandle,System.Single*,System.UIntPtr,System.Int32,System.Int32,System.Int32)">
|
||
<summary>
|
||
Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
||
the currently loaded vector.
|
||
n_embd should be the size of a single layer's control, and data should point
|
||
to an n_embd x n_layers buffer starting from layer 1.
|
||
il_start and il_end are the layer range the vector should apply to (both inclusive)
|
||
See llama_control_vector_load in common to load a control vector.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="data"></param>
|
||
<param name="len"></param>
|
||
<param name="n_embd"></param>
|
||
<param name="il_start"></param>
|
||
<param name="il_end"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_split_path(System.String,System.UIntPtr,System.String,System.Int32,System.Int32)">
|
||
<summary>
|
||
Build a split GGUF final path for this chunk.
|
||
llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
|
||
</summary>
|
||
<param name="split_path"></param>
|
||
<param name="maxlen"></param>
|
||
<param name="path_prefix"></param>
|
||
<param name="split_no"></param>
|
||
<param name="split_count"></param>
|
||
<returns>Returns the split_path length.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_split_prefix(System.String,System.UIntPtr,System.String,System.Int32,System.Int32)">
|
||
<summary>
|
||
Extract the path prefix from the split_path if and only if the split_no and split_count match.
|
||
llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
|
||
</summary>
|
||
<param name="split_prefix"></param>
|
||
<param name="maxlen"></param>
|
||
<param name="split_path"></param>
|
||
<param name="split_no"></param>
|
||
<param name="split_count"></param>
|
||
<returns>Returns the split_prefix length.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llava_validate_embed_size(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLlavaModelHandle)">
|
||
<summary>
|
||
Sanity check for clip <-> llava embed size match
|
||
</summary>
|
||
<param name="ctxLlama">LLama Context</param>
|
||
<param name="ctxClip">Llava Model</param>
|
||
<returns>True if validate successfully</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llava_image_embed_make_with_bytes(LLama.Native.SafeLlavaModelHandle,System.Int32,System.Byte[],System.Int32)">
|
||
<summary>
|
||
Build an image embed from image file bytes
|
||
</summary>
|
||
<param name="ctx_clip">SafeHandle to the Clip Model</param>
|
||
<param name="n_threads">Number of threads</param>
|
||
<param name="image_bytes">Binary image in jpeg format</param>
|
||
<param name="image_bytes_length">Bytes length of the image</param>
|
||
<returns>SafeHandle to the Embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llava_image_embed_make_with_filename(LLama.Native.SafeLlavaModelHandle,System.Int32,System.String)">
|
||
<summary>
|
||
Build an image embed from a path to an image filename
|
||
</summary>
|
||
<param name="ctx_clip">SafeHandle to the Clip Model</param>
|
||
<param name="n_threads">Number of threads</param>
|
||
<param name="image_path">Image filename (jpeg) to generate embeddings</param>
|
||
<returns>SafeHandle to the embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llava_image_embed_free(System.IntPtr)">
|
||
<summary>
|
||
Free an embedding made with llava_image_embed_make_*
|
||
</summary>
|
||
<param name="embed">Embeddings to release</param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llava_eval_image_embed(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32,System.Int32@)">
|
||
<summary>
|
||
Write the image represented by embed into the llama context with batch size n_batch, starting at context
|
||
pos n_past. on completion, n_past points to the next position in the context after the image embed.
|
||
</summary>
|
||
<param name="ctx_llama">Llama Context</param>
|
||
<param name="embed">Embedding handle</param>
|
||
<returns>True on success</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.GetLoadedNativeLibrary(LLama.Native.NativeLibraryName)">
|
||
<summary>
|
||
Get the loaded native library. If you are using netstandard2.0, it will always return null.
|
||
</summary>
|
||
<param name="name"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeApi.llama_model_quantize(System.String,System.String,LLama.Native.LLamaModelQuantizeParams@)">
|
||
<summary>
|
||
Returns 0 on success
|
||
</summary>
|
||
<param name="fname_inp"></param>
|
||
<param name="fname_out"></param>
|
||
<param name="param"></param>
|
||
<returns>Returns 0 on success</returns>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLogConfig">
|
||
<summary>
|
||
Configure llama.cpp logging
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.NativeLogConfig.LLamaLogCallback">
|
||
<summary>
|
||
Callback from llama.cpp with log messages
|
||
</summary>
|
||
<param name="level"></param>
|
||
<param name="message"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLogConfig.native_llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Register a callback to receive llama log messages
|
||
</summary>
|
||
<param name="logCallback"></param>
|
||
</member>
|
||
<member name="F:LLama.Native.NativeLogConfig._currentLogCallbackHandle">
|
||
<summary>
|
||
A GC handle for the current log callback to ensure the callback is not collected
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLogConfig.llama_log_set(LLama.Native.NativeLogConfig.LLamaLogCallback)">
|
||
<summary>
|
||
Register a callback to receive llama log messages
|
||
</summary>
|
||
<param name="logCallback"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.NativeLogConfig.llama_log_set(Microsoft.Extensions.Logging.ILogger)">
|
||
<summary>
|
||
Register a callback to receive llama log messages
|
||
</summary>
|
||
<param name="logger"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.RopeScalingType">
|
||
<summary>
|
||
RoPE scaling type.
|
||
</summary>
|
||
<remarks>C# equivalent of llama_rope_scaling_type</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.RopeScalingType.Unspecified">
|
||
<summary>
|
||
No particular scaling type has been specified
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.RopeScalingType.None">
|
||
<summary>
|
||
Do not apply any RoPE scaling
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.RopeScalingType.Linear">
|
||
<summary>
|
||
Positional linear interpolation, as described by kaikendev: https://kaiokendev.github.io/til#extending-context-to-8k
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.RopeScalingType.Yarn">
|
||
<summary>
|
||
YaRN scaling: https://arxiv.org/pdf/2309.00071.pdf
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.RopeScalingType.LongRope">
|
||
<summary>
|
||
LongRope scaling
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLLamaContextHandle">
|
||
<summary>
|
||
A safe wrapper around a llama_context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.ContextSize">
|
||
<summary>
|
||
Total number of tokens in the context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.EmbeddingSize">
|
||
<summary>
|
||
Dimension of embedding vectors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.BatchSize">
|
||
<summary>
|
||
Get the maximum batch size for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.UBatchSize">
|
||
<summary>
|
||
Get the physical maximum batch size for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.GenerationThreads">
|
||
<summary>
|
||
Get or set the number of threads used for generation of a single token.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.BatchThreads">
|
||
<summary>
|
||
Get or set the number of threads used for prompt and batch processing (multiple token).
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.PoolingType">
|
||
<summary>
|
||
Get the pooling type for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.ModelHandle">
|
||
<summary>
|
||
Get the model which this context is using
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.Vocab">
|
||
<summary>
|
||
Get the vocabulary for the model this context is using
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Create(LLama.Native.SafeLlamaModelHandle,LLama.Native.LLamaContextParams)">
|
||
<summary>
|
||
Create a new llama_state for the given model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="lparams"></param>
|
||
<returns></returns>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_init_from_model(LLama.Native.SafeLlamaModelHandle,LLama.Native.LLamaContextParams)">
|
||
<summary>
|
||
Create a new llama_context with the given model. **This should never be called directly! Always use SafeLLamaContextHandle.Create**!
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="params"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_free(System.IntPtr)">
|
||
<summary>
|
||
Frees all allocated memory in the given llama_context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_set_abort_callback(LLama.Native.SafeLLamaContextHandle,LLama.Native.SafeLLamaContextHandle.GgmlAbortCallback,System.Void*)">
|
||
<summary>
|
||
Set a callback which can abort computation
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="abort_callback"></param>
|
||
<param name="abort_callback_data"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLLamaContextHandle.GgmlAbortCallback">
|
||
<summary>
|
||
If this returns true computation is cancelled
|
||
</summary>
|
||
<param name="data"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_decode(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaNativeBatch)">
|
||
<summary>
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="batch"></param>
|
||
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
|
||
- 0: success<br />
|
||
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
|
||
- < 0: error<br />
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_encode(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaNativeBatch)">
|
||
<summary>
|
||
Processes a batch of tokens with the encoder part of the encoder-decoder model. Stores the encoder output
|
||
internally for later use by the decoder cross-attention layers.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="batch"></param>
|
||
<returns>0 = success <br />< 0 = error</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_set_n_threads(LLama.Native.SafeLLamaContextHandle,System.Int32,System.Int32)">
|
||
<summary>
|
||
Set the number of threads used for decoding
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="n_threads">n_threads is the number of threads used for generation (single token)</param>
|
||
<param name="n_threads_batch">n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_threads(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the number of threads used for generation of a single token.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_threads_batch(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the number of threads used for prompt and batch processing (multiple token).
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_logits(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Token logits obtained from the last call to llama_decode
|
||
The logits for the last token are stored in the last row
|
||
Can be mutated in order to change the probabilities of the next token.<br />
|
||
Rows: n_tokens<br />
|
||
Cols: n_vocab
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_logits_ith(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="i"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_ctx(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the size of the context window for the model for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_batch(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the batch size for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_n_ubatch(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the ubatch size for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_get_size(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Returns the **actual** size in bytes of the state (logits, embedding and kv_cache).
|
||
Only use when saving the state, not when restoring it, otherwise the size may be too small.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_get_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr)">
|
||
<summary>
|
||
Copies the state to the specified destination address.
|
||
Destination needs to have allocated enough memory.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="dest"></param>
|
||
<param name="size"></param>
|
||
<returns>the number of bytes copied</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_set_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr)">
|
||
<summary>
|
||
Set the state reading from the specified address
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="src"></param>
|
||
<param name="size"></param>
|
||
<returns>the number of bytes read</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_get_size(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the exact size needed to copy the KV cache of a single sequence
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_get_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Copy the KV cache of a single sequence into the specified buffer
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="dst"></param>
|
||
<param name="size"></param>
|
||
<param name="seqId"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_state_seq_set_data(LLama.Native.SafeLLamaContextHandle,System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<param name="src"></param>
|
||
<param name="size"></param>
|
||
<param name="destSeqId"></param>
|
||
<returns>
|
||
- Positive: Ok
|
||
- Zero: Failed to load
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_defrag(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Defragment the KV cache. This will be applied:
|
||
- lazily on next llama_decode()
|
||
- explicitly with llama_kv_cache_update()
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_update(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_kv_cache_can_shift(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Check if the context supports KV cache shifting
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_synchronize(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Wait until all computations are finished. This is automatically done when using any of the functions to obtain computation results
|
||
and is not necessary to call it explicitly in most cases.
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_pooling_type(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Get the pooling type for this context
|
||
</summary>
|
||
<param name="ctx"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_embeddings_seq(LLama.Native.SafeLLamaContextHandle,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the embeddings for a sequence id.
|
||
Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
|
||
when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
|
||
otherwise: float[n_embd] (1-dimensional)
|
||
</summary>
|
||
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.llama_get_embeddings_ith(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Get the embeddings for the ith sequence.
|
||
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
|
||
</summary>
|
||
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.AddLoraAdapter(LLama.Native.LoraAdapter,System.Single)">
|
||
<summary>
|
||
Add a LoRA adapter to this context
|
||
</summary>
|
||
<param name="lora"></param>
|
||
<param name="scale"></param>
|
||
<exception cref="T:System.ArgumentException"></exception>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.RemoveLoraAdapter(LLama.Native.LoraAdapter)">
|
||
<summary>
|
||
Remove a LoRA adapter from this context
|
||
</summary>
|
||
<param name="lora"></param>
|
||
<returns>Indicates if the lora was in this context and was remove</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.ClearLoraAdapters">
|
||
<summary>
|
||
Remove all LoRA adapters from this context
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetLogits(System.Int32)">
|
||
<summary>
|
||
Token logits obtained from the last call to llama_decode.
|
||
The logits for the last token are stored in the last row.
|
||
Only tokens with `logits = true` requested are present.<br/>
|
||
Can be mutated in order to change the probabilities of the next token.<br />
|
||
Rows: n_tokens<br />
|
||
Cols: n_vocab
|
||
</summary>
|
||
<param name="numTokens">
|
||
The amount of tokens whose logits should be retrieved, in <b>[numTokens X n_vocab]</b> format.<br/>
|
||
Tokens' order is based on their order in the LlamaBatch (so, first tokens are first, etc).<br/>
|
||
This is helpful when requesting logits for many tokens in a sequence, or want to decode multiple sequences in one go.
|
||
</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetLogitsIth(System.Int32)">
|
||
<summary>
|
||
Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
|
||
</summary>
|
||
<param name="i"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetEmbeddingsIth(LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Get the embeddings for the ith sequence.
|
||
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
|
||
</summary>
|
||
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetEmbeddingsSeq(LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the embeddings for the a specific sequence.
|
||
Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
|
||
</summary>
|
||
<returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
|
||
<summary>
|
||
Convert the given text into tokens
|
||
</summary>
|
||
<param name="text">The text to tokenize</param>
|
||
<param name="add_bos">Whether the "BOS" token should be added</param>
|
||
<param name="encoding">Encoding to use for the text</param>
|
||
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
|
||
<returns></returns>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.TokenToSpan(LLama.Native.LLamaToken,System.Span{System.Byte})">
|
||
<summary>
|
||
Convert a single llama token into bytes
|
||
</summary>
|
||
<param name="token">Token to decode</param>
|
||
<param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
|
||
<returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
|
||
</member>
|
||
<member name="F:LLama.Native.SafeLLamaContextHandle.GlobalInferenceLock">
|
||
<summary>
|
||
This object exists to ensure there is only ever 1 inference running at a time. This is a workaround for thread safety issues in llama.cpp itself.
|
||
Most notably CUDA, which seems to use some global singleton resources and will crash if multiple inferences are run (even against different models).
|
||
|
||
For more information see these issues:
|
||
- https://github.com/SciSharp/LLamaSharp/issues/596
|
||
- https://github.com/ggerganov/llama.cpp/issues/3960
|
||
|
||
If these are ever resolved this lock can probably be removed.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Synchronize">
|
||
<summary>
|
||
Wait until all computations are finished. This is automatically done when using any of the functions to obtain computation results
|
||
and is not necessary to call it explicitly in most cases.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Encode(LLama.Native.LLamaBatch)">
|
||
<summary>
|
||
Processes a batch of tokens with the encoder part of the encoder-decoder model. Stores the encoder output
|
||
internally for later use by the decoder cross-attention layers.
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<returns>0 = success <br />< 0 = error (the KV cache state is restored to the state before this call)</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(LLama.Native.LLamaBatch)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
|
||
- 0: success<br />
|
||
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
|
||
- < 0: error (the KV cache state is restored to the state before this call)<br />
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(System.Collections.Generic.List{LLama.Native.LLamaToken},LLama.Native.LLamaSeqId,LLama.Native.LLamaBatch,System.Int32@)">
|
||
<summary>
|
||
Decode a set of tokens in batch-size chunks.
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="id"></param>
|
||
<param name="batch"></param>
|
||
<param name="n_past"></param>
|
||
<returns>A tuple, containing the decode result and the number of tokens that have <b>not</b> been decoded yet.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.Decode(LLama.Native.LLamaBatchEmbeddings)">
|
||
<summary>
|
||
</summary>
|
||
<param name="batch"></param>
|
||
<returns>Positive return values does not mean a fatal error, but rather a warning:<br />
|
||
- 0: success<br />
|
||
- 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
|
||
- < 0: error<br />
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetStateSize">
|
||
<summary>
|
||
Get the size of the state, when saved as bytes
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetStateSize(LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the size of the KV cache for a single sequence ID, when saved as bytes
|
||
</summary>
|
||
<param name="sequence"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetState(System.Byte*,System.UIntPtr)">
|
||
<summary>
|
||
Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
|
||
</summary>
|
||
<param name="dest">Destination to write to</param>
|
||
<param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
|
||
<returns>The number of bytes written to dest</returns>
|
||
<exception cref="T:System.ArgumentOutOfRangeException">Thrown if dest is too small</exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetState(System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Get the raw state of a single sequence from this context, encoded as bytes. Data is written into the `dest` pointer.
|
||
</summary>
|
||
<param name="dest">Destination to write to</param>
|
||
<param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
|
||
<param name="sequence">The sequence to get state data for</param>
|
||
<returns>The number of bytes written to dest</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.SetState(System.Byte*,System.UIntPtr)">
|
||
<summary>
|
||
Set the raw state of this context
|
||
</summary>
|
||
<param name="src">The pointer to read the state from</param>
|
||
<param name="size">Number of bytes that can be safely read from the pointer</param>
|
||
<returns>Number of bytes read from the src pointer</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.SetState(System.Byte*,System.UIntPtr,LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Set the raw state of a single sequence
|
||
</summary>
|
||
<param name="src">The pointer to read the state from</param>
|
||
<param name="sequence">Sequence ID to set</param>
|
||
<param name="size">Number of bytes that can be safely read from the pointer</param>
|
||
<returns>Number of bytes read from the src pointer</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.GetTimings">
|
||
<summary>
|
||
Get performance information
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.ResetTimings">
|
||
<summary>
|
||
Reset all performance information for this context
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaContextHandle.KvCacheCanShift">
|
||
<summary>
|
||
Check if the context supports KV cache shifting
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheUpdate">
|
||
<summary>
|
||
Apply KV cache updates (such as K-shifts, defragmentation, etc.)
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheDefrag">
|
||
<summary>
|
||
Defragment the KV cache. This will be applied:
|
||
- lazily on next llama_decode()
|
||
- explicitly with llama_kv_cache_update()
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheGetDebugView(System.Int32)">
|
||
<summary>
|
||
Get a new KV cache view that can be used to debug the KV cache
|
||
</summary>
|
||
<param name="maxSequences"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheCountCells">
|
||
<summary>
|
||
Count the number of used cells in the KV cache (i.e. have at least one sequence assigned to them)
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheCountTokens">
|
||
<summary>
|
||
Returns the number of tokens in the KV cache (slow, use only for debug)
|
||
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheClear">
|
||
<summary>
|
||
Clear the KV cache - both cell info is erased and KV data is zeroed
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheRemove(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
||
</summary>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceCopy(LLama.Native.LLamaSeqId,LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos)">
|
||
<summary>
|
||
Copy all tokens that belong to the specified sequence to another sequence. Note that
|
||
this does not allocate extra KV cache memory - it simply assigns the tokens to the
|
||
new sequence
|
||
</summary>
|
||
<param name="src"></param>
|
||
<param name="dest"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceKeep(LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Removes all tokens that do not belong to the specified sequence
|
||
</summary>
|
||
<param name="seq"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceAdd(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Adds relative position "delta" to all tokens that belong to the specified sequence
|
||
and have positions in [p0, p1. If the KV cache is RoPEd, the KV data is updated
|
||
accordingly
|
||
</summary>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
<param name="delta"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheSequenceDivide(LLama.Native.LLamaSeqId,LLama.Native.LLamaPos,LLama.Native.LLamaPos,System.Int32)">
|
||
<summary>
|
||
Integer division of the positions by factor of `d > 1`.
|
||
If the KV cache is RoPEd, the KV data is updated accordingly.<br />
|
||
p0 < 0 : [0, p1]<br />
|
||
p1 < 0 : [p0, inf)
|
||
</summary>
|
||
<param name="seq"></param>
|
||
<param name="p0"></param>
|
||
<param name="p1"></param>
|
||
<param name="divisor"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaContextHandle.KvCacheMaxPosition(LLama.Native.LLamaSeqId)">
|
||
<summary>
|
||
Returns the largest position present in the KV cache for the specified sequence
|
||
</summary>
|
||
<param name="seq"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLLamaHandleBase">
|
||
<summary>
|
||
Base class for all llama handles to native resources
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaHandleBase.IsInvalid">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaHandleBase.ToString">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLlamaModelHandle">
|
||
<summary>
|
||
A reference to a set of llama model weights
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.RopeType">
|
||
<summary>
|
||
Get the rope (positional embedding) type for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.ContextSize">
|
||
<summary>
|
||
The number of tokens in the context that this model was trained for
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.RopeFrequency">
|
||
<summary>
|
||
Get the rope frequency this model was trained with
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.EmbeddingSize">
|
||
<summary>
|
||
Dimension of embedding vectors
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.SizeInBytes">
|
||
<summary>
|
||
Get the size of this model in bytes
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.ParameterCount">
|
||
<summary>
|
||
Get the number of parameters in this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.LayerCount">
|
||
<summary>
|
||
Get the number of layers in this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.HeadCount">
|
||
<summary>
|
||
Get the number of heads in this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.HasEncoder">
|
||
<summary>
|
||
Returns true if the model contains an encoder that requires llama_encode() call
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.HasDecoder">
|
||
<summary>
|
||
Returns true if the model contains a decoder that requires llama_decode() call
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.IsRecurrent">
|
||
<summary>
|
||
Returns true if the model is recurrent (like Mamba, RWKV, etc.)
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Description">
|
||
<summary>
|
||
Get a description of this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.MetadataCount">
|
||
<summary>
|
||
Get the number of metadata key/value pairs
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocab">
|
||
<summary>
|
||
Get the vocabulary of this model
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.LoadFromFile(System.String,LLama.Native.LLamaModelParams)">
|
||
<summary>
|
||
Load a model from the given file path into memory
|
||
</summary>
|
||
<param name="modelPath"></param>
|
||
<param name="lparams"></param>
|
||
<returns></returns>
|
||
<exception cref="T:LLama.Exceptions.RuntimeError"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_load_from_file(System.String,LLama.Native.LLamaModelParams)">
|
||
<summary>
|
||
Load the model from a file
|
||
If the file is split into multiple parts, the file name must follow this pattern: {name}-%05d-of-%05d.gguf
|
||
If the split file name does not follow this pattern, use llama_model_load_from_splits
|
||
</summary>
|
||
<param name="path"></param>
|
||
<param name="params"></param>
|
||
<returns>The loaded model, or null on failure.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_load_from_splits(System.Char**,System.UIntPtr,LLama.Native.LLamaModelParams)">
|
||
<summary>
|
||
Load the model from multiple splits (support custom naming scheme)
|
||
The paths must be in the correct order
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_apply_lora_from_file(LLama.Native.SafeLlamaModelHandle,System.String,System.Single,System.String,System.Int32)">
|
||
<summary>
|
||
Apply a LoRA adapter to a loaded model
|
||
path_base_model is the path to a higher quality model to use as a base for
|
||
the layers modified by the adapter. Can be NULL to use the current loaded model.
|
||
The model needs to be reloaded before applying a new adapter, otherwise the adapter
|
||
will be applied on top of the previous one
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="path"></param>
|
||
<param name="scale"></param>
|
||
<param name="pathBase"></param>
|
||
<param name="threads"></param>
|
||
<returns>Returns 0 on success</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_free(System.IntPtr)">
|
||
<summary>
|
||
Frees all allocated memory associated with a model
|
||
</summary>
|
||
<param name="model"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_count(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the number of metadata key/value pairs
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_key_by_index(LLama.Native.SafeLlamaModelHandle,System.Int32,System.Span{System.Byte})">
|
||
<summary>
|
||
Get metadata key name by index
|
||
</summary>
|
||
<param name="model">Model to fetch from</param>
|
||
<param name="index">Index of key to fetch</param>
|
||
<param name="dest">buffer to write result into</param>
|
||
<returns>The length of the string on success (even if the buffer is too small). -1 is the key does not exist.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_val_str_by_index(LLama.Native.SafeLlamaModelHandle,System.Int32,System.Span{System.Byte})">
|
||
<summary>
|
||
Get metadata value as a string by index
|
||
</summary>
|
||
<param name="model">Model to fetch from</param>
|
||
<param name="index">Index of val to fetch</param>
|
||
<param name="dest">Buffer to write result into</param>
|
||
<returns>The length of the string on success (even if the buffer is too small). -1 is the key does not exist.</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_meta_val_str(LLama.Native.SafeLlamaModelHandle,System.String,System.Span{System.Byte})">
|
||
<summary>
|
||
Get metadata value as a string by key name
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="key"></param>
|
||
<param name="dest"></param>
|
||
<returns>The length of the string on success, or -1 on failure</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_n_vocab(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the number of tokens in the model vocabulary
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_ctx_train(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the size of the context window for the model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_embd(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the dimension of embedding vectors from this model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_layer(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the number of layers in this model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_head(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the number of heads in this model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_desc(LLama.Native.SafeLlamaModelHandle,System.Byte*,System.IntPtr)">
|
||
<summary>
|
||
Get a string describing the model type
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="buf"></param>
|
||
<param name="bufSize"></param>
|
||
<returns>The length of the string on success (even if the buffer is too small)., or -1 on failure</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_size(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the size of the model in bytes
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns>The size of the model</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_n_params(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the number of parameters in this model
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns>The functions return the length of the string on success, or -1 on failure</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_rope_freq_scale_train(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Get the model's RoPE frequency scaling factor
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_decoder_start_token(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
For encoder-decoder models, this function returns id of the token that must be provided
|
||
to the decoder to start generating output sequence. For other models, it returns -1.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_has_encoder(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Returns true if the model contains an encoder that requires llama_encode() call
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_has_decoder(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Returns true if the model contains a decoder that requires llama_decode() call
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.llama_model_is_recurrent(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Returns true if the model is recurrent (like Mamba, RWKV, etc.)
|
||
</summary>
|
||
<param name="model"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.LoadLoraFromFile(System.String)">
|
||
<summary>
|
||
Load a LoRA adapter from file. The adapter will be associated with this model but will not be applied
|
||
</summary>
|
||
<param name="path"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.TokenToSpan(LLama.Native.LLamaToken,System.Span{System.Byte},System.Int32,System.Boolean)">
|
||
<summary>
|
||
Convert a single llama token into bytes
|
||
</summary>
|
||
<param name="token">Token to decode</param>
|
||
<param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
|
||
<param name="lstrip">User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')</param>
|
||
<param name="special">If true, special characters will be converted to text. If false they will be invisible.</param>
|
||
<returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.TokensToSpan(System.Collections.Generic.IReadOnlyList{LLama.Native.LLamaToken},System.Span{System.Char},System.Text.Encoding)">
|
||
<summary>
|
||
Convert a sequence of tokens into characters.
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
<param name="dest"></param>
|
||
<param name="encoding"></param>
|
||
<returns>The section of the span which has valid data in it.
|
||
If there was insufficient space in the output span this will be
|
||
filled with as many characters as possible, starting from the _last_ token.
|
||
</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.Tokenize(System.String,System.Boolean,System.Boolean,System.Text.Encoding)">
|
||
<summary>
|
||
Convert a string of text into tokens
|
||
</summary>
|
||
<param name="text"></param>
|
||
<param name="addBos"></param>
|
||
<param name="encoding"></param>
|
||
<param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.CreateContext(LLama.Native.LLamaContextParams)">
|
||
<summary>
|
||
Create a new context for this model
|
||
</summary>
|
||
<param name="params"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataValueByKey(System.String)">
|
||
<summary>
|
||
Get the metadata value for the given key
|
||
</summary>
|
||
<param name="key">The key to fetch</param>
|
||
<returns>The value, null if there is no such key</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataKeyByIndex(System.Int32)">
|
||
<summary>
|
||
Get the metadata key for the given index
|
||
</summary>
|
||
<param name="index">The index to get</param>
|
||
<returns>The key, null if there is no such key or if the buffer was too small</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.MetadataValueByIndex(System.Int32)">
|
||
<summary>
|
||
Get the metadata value for the given index
|
||
</summary>
|
||
<param name="index">The index to get</param>
|
||
<returns>The value, null if there is no such value or if the buffer was too small</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlamaModelHandle.GetTemplate(System.String)">
|
||
<summary>
|
||
Get the default chat template. Returns nullptr if not available
|
||
If name is NULL, returns the default chat template
|
||
</summary>
|
||
<param name="name"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLlamaModelHandle.Vocabulary">
|
||
<summary>
|
||
Get tokens for a model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Count">
|
||
<summary>
|
||
Total number of tokens in this vocabulary
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Type">
|
||
<summary>
|
||
Get the the type of this vocabulary
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.BOS">
|
||
<summary>
|
||
Get the Beginning of Sentence token for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.EOS">
|
||
<summary>
|
||
Get the End of Sentence token for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Newline">
|
||
<summary>
|
||
Get the newline token for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.Pad">
|
||
<summary>
|
||
Get the padding token for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.SEP">
|
||
<summary>
|
||
Get the sentence separator token for this model
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillPrefix">
|
||
<summary>
|
||
Codellama beginning of infill prefix
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillMiddle">
|
||
<summary>
|
||
Codellama beginning of infill middle
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillSuffix">
|
||
<summary>
|
||
Codellama beginning of infill suffix
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillPad">
|
||
<summary>
|
||
Codellama pad
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillRep">
|
||
<summary>
|
||
Codellama rep
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.InfillSep">
|
||
<summary>
|
||
Codellama rep
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.EOT">
|
||
<summary>
|
||
end-of-turn token
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.DecoderStartToken">
|
||
<summary>
|
||
For encoder-decoder models, this function returns id of the token that must be provided
|
||
to the decoder to start generating output sequence.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.ShouldAddBOS">
|
||
<summary>
|
||
Check if the current model requires a BOS token added
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlamaModelHandle.Vocabulary.ShouldAddEOS">
|
||
<summary>
|
||
Check if the current model requires a EOS token added
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLLamaSamplerChainHandle">
|
||
<summary>
|
||
A chain of sampler stages that can be used to select tokens from logits.
|
||
</summary>
|
||
<remarks>Wraps a handle returned from `llama_sampler_chain_init`. Other samplers are owned by this chain and are never directly exposed.</remarks>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLLamaSamplerChainHandle.Count">
|
||
<summary>
|
||
Get the number of samplers in this chain
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Apply(LLama.Native.LLamaTokenDataArrayNative@)">
|
||
<summary>
|
||
Apply this sampler to a set of candidates
|
||
</summary>
|
||
<param name="candidates"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Sample and accept a token from the idx-th output of the last evaluation. Shorthand for:
|
||
|
||
<code>
|
||
var logits = ctx.GetLogitsIth(idx);
|
||
var token_data_array = LLamaTokenDataArray.Create(logits);
|
||
using LLamaTokenDataArrayNative.Create(token_data_array, out var native_token_data);
|
||
sampler_chain.Apply(native_token_data);
|
||
var token = native_token_data.Data.Span[native_token_data.Selected];
|
||
sampler_chain.Accept(token);
|
||
return token;
|
||
</code>
|
||
</summary>
|
||
<param name="context"></param>
|
||
<param name="index"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Reset">
|
||
<summary>
|
||
Reset the state of this sampler
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Accept(LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Accept a token and update the internal state of this sampler
|
||
</summary>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.GetName(System.Int32)">
|
||
<summary>
|
||
Get the name of the sampler at the given index
|
||
</summary>
|
||
<param name="index"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.GetSeed(System.Int32)">
|
||
<summary>
|
||
Get the seed of the sampler at the given index if applicable. returns LLAMA_DEFAULT_SEED otherwise
|
||
</summary>
|
||
<param name="index"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Create(LLama.Native.LLamaSamplerChainParams)">
|
||
<summary>
|
||
Create a new sampler chain
|
||
</summary>
|
||
<param name="params"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddClone(LLama.Native.SafeLLamaSamplerChainHandle,System.Int32)">
|
||
<summary>
|
||
Clone a sampler stage from another chain and add it to this chain
|
||
</summary>
|
||
<param name="src">The chain to clone a stage from</param>
|
||
<param name="index">The index of the stage to clone</param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.Remove(System.Int32)">
|
||
<summary>
|
||
Remove a sampler stage from this chain
|
||
</summary>
|
||
<param name="index"></param>
|
||
<exception cref="T:System.ArgumentOutOfRangeException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddCustom``1(``0)">
|
||
<summary>
|
||
Add a custom sampler stage
|
||
</summary>
|
||
<typeparam name="TSampler"></typeparam>
|
||
<param name="sampler"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddGreedySampler">
|
||
<summary>
|
||
Add a sampler which picks the most likely token.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDistributionSampler(System.UInt32)">
|
||
<summary>
|
||
Add a sampler which picks from the probability distribution of all tokens
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMirostat1Sampler(System.Int32,System.UInt32,System.Single,System.Single,System.Int32)">
|
||
<summary>
|
||
Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
||
</summary>
|
||
<param name="seed"></param>
|
||
<param name="tau">The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.</param>
|
||
<param name="eta">The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.</param>
|
||
<param name="m">The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm.</param>
|
||
<param name="vocabCount"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMirostat2Sampler(System.UInt32,System.Single,System.Single)">
|
||
<summary>
|
||
Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
||
</summary>
|
||
<param name="seed"></param>
|
||
<param name="tau">The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.</param>
|
||
<param name="eta">The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTopK(System.Int32)">
|
||
<summary>
|
||
Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTopP(System.Single,System.IntPtr)">
|
||
<summary>
|
||
Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddMinP(System.Single,System.IntPtr)">
|
||
<summary>
|
||
Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTypical(System.Single,System.IntPtr)">
|
||
<summary>
|
||
Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddTemperature(System.Single)">
|
||
<summary>
|
||
Apply temperature to the logits.
|
||
If temperature is less than zero the maximum logit is left unchanged and the rest are set to -infinity
|
||
</summary>
|
||
<param name="t"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDynamicTemperature(System.Single,System.Single,System.Single)">
|
||
<summary>
|
||
Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
|
||
</summary>
|
||
<param name="t"></param>
|
||
<param name="delta"></param>
|
||
<param name="exponent"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddXTC(System.Single,System.Single,System.Int32,System.UInt32)">
|
||
<summary>
|
||
XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
|
||
</summary>
|
||
<param name="p"></param>
|
||
<param name="t"></param>
|
||
<param name="minKeep"></param>
|
||
<param name="seed"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddFillInMiddleInfill(LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
This sampler is meant to be used for fill-in-the-middle infilling, after top_k + top_p sampling
|
||
<br />
|
||
1. if the sum of the EOG probs times the number of candidates is higher than the sum of the other probs -> pick EOG<br />
|
||
2. combine probs of tokens that have the same prefix<br />
|
||
<br />
|
||
example:<br />
|
||
<br />
|
||
- before:<br />
|
||
"abc": 0.5<br />
|
||
"abcd": 0.2<br />
|
||
"abcde": 0.1<br />
|
||
"dummy": 0.1<br />
|
||
<br />
|
||
- after:<br />
|
||
"abc": 0.8<br />
|
||
"dummy": 0.1<br />
|
||
<br />
|
||
3. discard non-EOG tokens with low prob<br />
|
||
4. if no tokens are left -> pick EOT
|
||
</summary>
|
||
<param name="model"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddGrammar(LLama.Native.SafeLlamaModelHandle,System.String,System.String)">
|
||
<summary>
|
||
Create a sampler which makes tokens impossible unless they match the grammar
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="grammar"></param>
|
||
<param name="root">Root rule of the grammar</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddLazyGrammar(LLama.Native.SafeLlamaModelHandle,System.String,System.String,System.ReadOnlySpan{System.String},System.ReadOnlySpan{LLama.Native.LLamaToken})">
|
||
<summary>
|
||
Create a sampler using lazy grammar sampling: https://github.com/ggerganov/llama.cpp/pull/9639
|
||
</summary>
|
||
<param name="model"></param>
|
||
<param name="grammar">Grammar in GBNF form</param>
|
||
<param name="root">Root rule of the grammar</param>
|
||
<param name="triggerTokens">A list of tokens that will trigger the grammar sampler.</param>
|
||
<param name="triggerWords">A list of words that will trigger the grammar sampler.</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddPenalties(System.Int32,System.Single,System.Single,System.Single)">
|
||
<summary>
|
||
Create a sampler that applies various repetition penalties.
|
||
|
||
Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
||
</summary>
|
||
<param name="penaltyCount">How many tokens of history to consider when calculating penalties</param>
|
||
<param name="repeat">Repetition penalty</param>
|
||
<param name="freq">Frequency penalty</param>
|
||
<param name="presence">Presence penalty</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddDry(LLama.Native.SafeLlamaModelHandle,System.ReadOnlySpan{System.String},System.Single,System.Single,System.Int32,System.Int32)">
|
||
<summary>
|
||
DRY sampler, designed by p-e-w, as described in: <a href="https://github.com/oobabooga/text-generation-webui/pull/5677">https://github.com/oobabooga/text-generation-webui/pull/5677</a>.
|
||
Porting Koboldcpp implementation authored by pi6am: <a href="https://github.com/LostRuins/koboldcpp/pull/982">https://github.com/LostRuins/koboldcpp/pull/982</a>
|
||
</summary>
|
||
<param name="model">The model this sampler will be used with</param>
|
||
<param name="sequenceBreakers"></param>
|
||
<param name="multiplier">penalty multiplier, 0.0 = disabled</param>
|
||
<param name="base">exponential base</param>
|
||
<param name="allowedLength">repeated sequences longer than this are penalized</param>
|
||
<param name="penaltyLastN">how many tokens to scan for repetitions (0 = entire context)</param>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLLamaSamplerChainHandle.AddLogitBias(System.Int32,System.Span{LLama.Native.LLamaLogitBias})">
|
||
<summary>
|
||
Create a sampler that applies a bias directly to the logits
|
||
</summary>
|
||
<param name="vocabSize"></param>
|
||
<param name="biases"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerChainParams">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_sampler_chain_params</remarks>
|
||
</member>
|
||
<member name="P:LLama.Native.LLamaSamplerChainParams.NoPerf">
|
||
<summary>
|
||
whether to measure performance timings
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.LLamaSamplerChainParams.Default">
|
||
<summary>
|
||
Get the default LLamaSamplerChainParams
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaLogitBias">
|
||
<summary>
|
||
A bias to apply directly to a logit
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogitBias.Token">
|
||
<summary>
|
||
The token to apply the bias to
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaLogitBias.Bias">
|
||
<summary>
|
||
The bias to add
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_sampler_i</remarks>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.NameDelegate">
|
||
<summary>
|
||
Get the name of this sampler
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.AcceptDelegate">
|
||
<summary>
|
||
Update internal sampler state after a token has been chosen
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.ApplyDelegate">
|
||
<summary>
|
||
Apply this sampler to a set of logits
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
<param name="logits"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.ResetDelegate">
|
||
<summary>
|
||
Reset the internal state of this sampler
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.CloneDelegate">
|
||
<summary>
|
||
Create a clone of this sampler
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerINative.FreeDelegate">
|
||
<summary>
|
||
Free all resources held by this sampler
|
||
</summary>
|
||
<param name="smpl"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.LLamaSamplerNative">
|
||
<summary>
|
||
|
||
</summary>
|
||
<remarks>llama_sampler</remarks>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaSamplerNative.Interface">
|
||
<summary>
|
||
Holds the function pointers which make up the actual sampler
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.LLamaSamplerNative.Context">
|
||
<summary>
|
||
Any additional context this sampler needs, may be anything. We will use it
|
||
to hold a GCHandle.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.CustomSamplerHandle._gcHandle">
|
||
<summary>
|
||
This GCHandle roots this object, preventing it from being freed.
|
||
</summary>
|
||
</member>
|
||
<member name="F:LLama.Native.CustomSamplerHandle._sampler">
|
||
<summary>
|
||
A reference to the user code which implements the custom sampler
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.CustomSamplerHandle.GetLLamaSamplerPointer">
|
||
<summary>
|
||
Get a pointer to a `llama_sampler` (LLamaSamplerNative) struct, suitable for passing to `llama_sampler_chain_add`
|
||
</summary>
|
||
<returns></returns>
|
||
<exception cref="T:System.NotImplementedException"></exception>
|
||
</member>
|
||
<member name="T:LLama.Native.ICustomSampler">
|
||
<summary>
|
||
A custom sampler stage for modifying logits or selecting a token
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.ICustomSampler.Name">
|
||
<summary>
|
||
The human readable name of this stage
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.ICustomSampler.Apply(LLama.Native.LLamaTokenDataArrayNative@)">
|
||
<summary>
|
||
Apply this stage to a set of logits.
|
||
This can modify logits or select a token (or both).
|
||
If logits are modified the Sorted flag <b>must</b> be set to false.
|
||
</summary>
|
||
<remarks>
|
||
If the logits are no longer sorted after the custom sampler has run it is <b>critically</b> important to
|
||
set <i>Sorted=false</i>. If unsure, always set it to false, this is a safe default.
|
||
</remarks>
|
||
<param name="tokenData"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.ICustomSampler.Accept(LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Update the internal state of the sampler when a token is chosen
|
||
</summary>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="M:LLama.Native.ICustomSampler.Reset">
|
||
<summary>
|
||
Reset the internal state of this sampler
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.ICustomSampler.Clone">
|
||
<summary>
|
||
Create a clone of this sampler
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLlavaImageEmbedHandle">
|
||
<summary>
|
||
A Reference to a llava Image Embed handle
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.Model">
|
||
<summary>
|
||
Get the model used to create this image embedding
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.EmbeddingDimensions">
|
||
<summary>
|
||
Get the number of dimensions in an embedding
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlavaImageEmbedHandle.PatchCount">
|
||
<summary>
|
||
Get the number of "patches" in an image embedding
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromFileName(LLama.Native.SafeLlavaModelHandle,LLama.LLamaContext,System.String)">
|
||
<summary>
|
||
Create an image embed from an image file
|
||
</summary>
|
||
<param name="clip"></param>
|
||
<param name="ctx"></param>
|
||
<param name="image">Path to the image file. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromFileName(LLama.Native.SafeLlavaModelHandle,System.String,System.Int32)">
|
||
<summary>
|
||
Create an image embed from an image file
|
||
</summary>
|
||
<param name="clip"></param>
|
||
<param name="image">Path to the image file. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<param name="threads"></param>
|
||
<returns></returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromMemory(LLama.Native.SafeLlavaModelHandle,LLama.LLamaContext,System.Byte[])">
|
||
<summary>
|
||
Create an image embed from the bytes of an image.
|
||
</summary>
|
||
<param name="clip"></param>
|
||
<param name="ctx"></param>
|
||
<param name="image">Image bytes. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.CreateFromMemory(LLama.Native.SafeLlavaModelHandle,System.Byte[],System.Int32)">
|
||
<summary>
|
||
Create an image embed from the bytes of an image.
|
||
</summary>
|
||
<param name="clip"></param>
|
||
<param name="image">Image bytes. Supported formats:
|
||
<list type="bullet">
|
||
<item>JPG</item>
|
||
<item>PNG</item>
|
||
<item>BMP</item>
|
||
<item>TGA</item>
|
||
</list>
|
||
</param>
|
||
<param name="threads"></param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaImageEmbedHandle.GetEmbedding(System.Span{System.Single},System.Int32)">
|
||
<summary>
|
||
Copy the embeddings data to the destination span
|
||
</summary>
|
||
<param name="dest"></param>
|
||
<param name="index"></param>
|
||
</member>
|
||
<member name="T:LLama.Native.SafeLlavaModelHandle">
|
||
<summary>
|
||
A reference to a set of llava model weights.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlavaModelHandle.EmbeddingDimensions">
|
||
<summary>
|
||
Get the number of dimensions in an embedding
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Native.SafeLlavaModelHandle.PatchCount">
|
||
<summary>
|
||
Get the number of "patches" in an image embedding
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.ReleaseHandle">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.LoadFromFile(System.String,System.Int32)">
|
||
<summary>
|
||
Load a model from the given file path into memory
|
||
</summary>
|
||
<param name="modelPath">MMP File (Multi-Modal Projections)</param>
|
||
<param name="verbosity">Verbosity level</param>
|
||
<returns>SafeHandle of the Clip Model</returns>
|
||
<exception cref="T:System.InvalidOperationException"></exception>
|
||
<exception cref="T:LLama.Exceptions.LoadWeightsFailedException"></exception>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(LLama.LLamaContext,System.String)">
|
||
<summary>
|
||
Create the Image Embeddings.
|
||
</summary>
|
||
<param name="ctxLlama">LLama Context</param>
|
||
<param name="image">Image filename (it supports jpeg format only)</param>
|
||
<returns>return the SafeHandle of these embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(System.String,System.Int32)">
|
||
<summary>
|
||
Create the Image Embeddings.
|
||
</summary>
|
||
<param name="image">Image in binary format (it supports jpeg format only)</param>
|
||
<param name="threads">Number of threads to use</param>
|
||
<returns>return the SafeHandle of these embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(LLama.LLamaContext,System.Byte[])">
|
||
<summary>
|
||
Create the Image Embeddings.
|
||
</summary>
|
||
<param name="ctxLlama">LLama Context</param>
|
||
<param name="image">Image in binary format (it supports jpeg format only)</param>
|
||
<returns>return the SafeHandle of these embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.CreateImageEmbeddings(System.Byte[],System.Int32)">
|
||
<summary>
|
||
Create the Image Embeddings.
|
||
</summary>
|
||
<param name="image">Image in binary format (it supports jpeg format only)</param>
|
||
<param name="threads">Number of threads to use</param>
|
||
<returns>return the SafeHandle of these embeddings</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.EvalImageEmbed(LLama.LLamaContext,LLama.Native.SafeLlavaImageEmbedHandle,System.Int32@)">
|
||
<summary>
|
||
Evaluates the image embeddings.
|
||
</summary>
|
||
<param name="ctxLlama">Llama Context</param>
|
||
<param name="imageEmbed">The current embeddings to evaluate</param>
|
||
<param name="n_past"></param>
|
||
<returns>True on success</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.clip_model_load(System.String,System.Int32)">
|
||
<summary>
|
||
Load MULTI MODAL PROJECTIONS model / Clip Model
|
||
</summary>
|
||
<param name="mmProj"> Model path/file</param>
|
||
<param name="verbosity">Verbosity level</param>
|
||
<returns>SafeLlavaModelHandle</returns>
|
||
</member>
|
||
<member name="M:LLama.Native.SafeLlavaModelHandle.clip_free(System.IntPtr)">
|
||
<summary>
|
||
Frees MULTI MODAL PROJECTIONS model / Clip Model
|
||
</summary>
|
||
<param name="ctx">Internal Pointer to the model</param>
|
||
</member>
|
||
<member name="T:LLama.Sampling.BaseSamplingPipeline">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.#ctor">
|
||
<summary>
|
||
Create a new sampler wrapping a llama.cpp sampler chain
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Create a sampling chain. This will be called once, the base class will automatically dispose the chain.
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.Dispose">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.Reset">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Sampling.BaseSamplingPipeline.Accept(LLama.Native.LLamaToken)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Sampling.DefaultSamplingPipeline">
|
||
<summary>
|
||
An implementation of ISamplePipeline which mimics the default llama.cpp sampling
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.LogitBias">
|
||
<summary>
|
||
Bias values to add to certain logits
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.RepeatPenalty">
|
||
<summary>
|
||
Repetition penalty, as described in https://arxiv.org/abs/1909.05858
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.FrequencyPenalty">
|
||
<summary>
|
||
Frequency penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
|
||
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text
|
||
so far, decreasing the model's likelihood to repeat the same line verbatim.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PresencePenalty">
|
||
<summary>
|
||
Presence penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
|
||
Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the
|
||
text so far, increasing the model's likelihood to talk about new topics.
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PenaltyCount">
|
||
<summary>
|
||
How many tokens should be considered for penalties
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PenalizeNewline">
|
||
<summary>
|
||
Whether the newline token should be protected from being modified by penalty
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.PreventEOS">
|
||
<summary>
|
||
Whether the EOS token should be suppressed. Setting this to 'true' prevents EOS from being sampled
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Temperature">
|
||
<summary>
|
||
Temperature to apply (higher temperature is more "creative")
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TopK">
|
||
<summary>
|
||
Number of tokens to keep in TopK sampling
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TypicalP">
|
||
<summary>
|
||
P value for locally typical sampling
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.TopP">
|
||
<summary>
|
||
P value for TopP sampling
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.MinP">
|
||
<summary>
|
||
P value for MinP sampling
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Grammar">
|
||
<summary>
|
||
Grammar to apply to constrain possible tokens
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.MinKeep">
|
||
<summary>
|
||
The minimum number of tokens to keep for samplers which remove tokens
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.DefaultSamplingPipeline.Seed">
|
||
<summary>
|
||
Seed to use for random sampling
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.DefaultSamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Sampling.Grammar">
|
||
<summary>
|
||
A grammar in GBNF form
|
||
</summary>
|
||
<param name="Gbnf"></param>
|
||
<param name="Root"></param>
|
||
</member>
|
||
<member name="M:LLama.Sampling.Grammar.#ctor(System.String,System.String)">
|
||
<summary>
|
||
A grammar in GBNF form
|
||
</summary>
|
||
<param name="Gbnf"></param>
|
||
<param name="Root"></param>
|
||
</member>
|
||
<member name="P:LLama.Sampling.Grammar.Gbnf">
|
||
<summary></summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.Grammar.Root">
|
||
<summary></summary>
|
||
</member>
|
||
<member name="T:LLama.Sampling.GreedySamplingPipeline">
|
||
<summary>
|
||
A sampling pipeline which always selects the most likely token
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.Sampling.GreedySamplingPipeline.Grammar">
|
||
<summary>
|
||
Grammar to apply to constrain possible tokens
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.GreedySamplingPipeline.CreateChain(LLama.Native.SafeLLamaContextHandle)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="T:LLama.Sampling.ISamplingPipeline">
|
||
<summary>
|
||
Convert a span of logits into a single sampled token. This interface can be implemented to completely customise the sampling process.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.ISamplingPipeline.Sample(LLama.Native.SafeLLamaContextHandle,System.Int32)">
|
||
<summary>
|
||
Sample a single token from the given context at the given position
|
||
</summary>
|
||
<param name="ctx">The context being sampled from</param>
|
||
<param name="index">Position to sample logits from</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.Sampling.ISamplingPipeline.Reset">
|
||
<summary>
|
||
Reset all internal state of the sampling pipeline
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.ISamplingPipeline.Accept(LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Update the pipeline, with knowledge that a particular token was just accepted
|
||
</summary>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="T:LLama.Sampling.ISamplingPipelineExtensions">
|
||
<summary>
|
||
Extension methods for <see cref="T:LLama.Sampling.ISamplingPipeline"/>
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Sampling.ISamplingPipelineExtensions.Sample(LLama.Sampling.ISamplingPipeline,LLama.LLamaContext,System.Int32)">
|
||
<summary>
|
||
Sample a single token from the given context at the given position
|
||
</summary>
|
||
<param name="pipe"></param>
|
||
<param name="ctx">The context being sampled from</param>
|
||
<param name="index">Position to sample logits from</param>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="T:LLama.StreamingTokenDecoder">
|
||
<summary>
|
||
Decodes a stream of tokens into a stream of characters
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StreamingTokenDecoder.AvailableCharacters">
|
||
<summary>
|
||
The number of decoded characters waiting to be read
|
||
</summary>
|
||
</member>
|
||
<member name="P:LLama.StreamingTokenDecoder.DecodeSpecialTokens">
|
||
<summary>
|
||
If true, special characters will be converted to text. If false they will be invisible.
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.LLamaWeights)">
|
||
<summary>
|
||
Create a new decoder
|
||
</summary>
|
||
<param name="encoding">Text encoding to use</param>
|
||
<param name="weights">Model weights</param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.#ctor(LLama.LLamaContext)">
|
||
<summary>
|
||
Create a new decoder
|
||
</summary>
|
||
<param name="context">Context to retrieve encoding and model weights from</param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.Native.SafeLLamaContextHandle)">
|
||
<summary>
|
||
Create a new decoder
|
||
</summary>
|
||
<param name="encoding">Text encoding to use</param>
|
||
<param name="context">Context to retrieve model weights from</param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.#ctor(System.Text.Encoding,LLama.Native.SafeLlamaModelHandle)">
|
||
<summary>
|
||
Create a new decoder
|
||
</summary>
|
||
<param name="encoding">Text encoding to use</param>
|
||
<param name="weights">Models weights to use</param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.Add(LLama.Native.LLamaToken)">
|
||
<summary>
|
||
Add a single token to the decoder
|
||
</summary>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.Add(System.Int32)">
|
||
<summary>
|
||
Add a single token to the decoder
|
||
</summary>
|
||
<param name="token"></param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.AddRange``1(``0)">
|
||
<summary>
|
||
Add all tokens in the given enumerable
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.AddRange(System.ReadOnlySpan{LLama.Native.LLamaToken})">
|
||
<summary>
|
||
Add all tokens in the given span
|
||
</summary>
|
||
<param name="tokens"></param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.Read(System.Collections.Generic.List{System.Char})">
|
||
<summary>
|
||
Read all decoded characters and clear the buffer
|
||
</summary>
|
||
<param name="dest"></param>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.Read">
|
||
<summary>
|
||
Read all decoded characters as a string and clear the buffer
|
||
</summary>
|
||
<returns></returns>
|
||
</member>
|
||
<member name="M:LLama.StreamingTokenDecoder.Reset">
|
||
<summary>
|
||
Set the decoder back to its initial state
|
||
</summary>
|
||
</member>
|
||
<member name="T:LLama.Transformers.PromptTemplateTransformer">
|
||
<summary>
|
||
A prompt formatter that will use llama.cpp's template formatter
|
||
If your model is not supported, you will need to define your own formatter according the cchat prompt specification for your model
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Transformers.PromptTemplateTransformer.#ctor(LLama.LLamaWeights,System.Boolean)">
|
||
<summary>
|
||
A prompt formatter that will use llama.cpp's template formatter
|
||
If your model is not supported, you will need to define your own formatter according the cchat prompt specification for your model
|
||
</summary>
|
||
</member>
|
||
<member name="M:LLama.Transformers.PromptTemplateTransformer.HistoryToText(LLama.Common.ChatHistory)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Transformers.PromptTemplateTransformer.TextToHistory(LLama.Common.AuthorRole,System.String)">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Transformers.PromptTemplateTransformer.Clone">
|
||
<inheritdoc />
|
||
</member>
|
||
<member name="M:LLama.Transformers.PromptTemplateTransformer.ToModelPrompt(LLama.LLamaTemplate)">
|
||
<summary>
|
||
Apply the template to the messages and return the resulting prompt as a string
|
||
</summary>
|
||
|
||
<returns>The formatted template string as defined by the model</returns>
|
||
</member>
|
||
</members>
|
||
</doc>
|