diff --git a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs index fdccf8facf..8963a0c180 100644 --- a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs @@ -1044,6 +1044,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo ( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini // chat + ( "gpt-5.3-", ModelEncoding.O200kBase ), ( "gpt-5.2-", ModelEncoding.O200kBase ), ( "gpt-5.1-", ModelEncoding.O200kBase ), ( "gpt-5-", ModelEncoding.O200kBase ), @@ -1073,6 +1074,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "o4-mini", ModelEncoding.O200kBase }, // chat + { "gpt-5.3", ModelEncoding.O200kBase }, { "gpt-5.2", ModelEncoding.O200kBase }, { "gpt-5.1", ModelEncoding.O200kBase }, { "gpt-5", ModelEncoding.O200kBase }, diff --git a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs index c7c1e342d8..2a7f0fc3a9 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs @@ -38,6 +38,7 @@ public class TiktokenTests public static Tokenizer GPT5 { get; } = TiktokenTokenizer.CreateForModel("gpt-5"); public static Tokenizer GPT5_1 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.1"); public static Tokenizer GPT5_2 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.2"); + public static Tokenizer GPT5_3 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.3"); public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4"); public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b"); @@ -421,6 +422,8 @@ public void TestEncodeR50kBase() [InlineData("gpt-5.1-mini")] [InlineData("gpt-5.2")] [InlineData("gpt-5.2-mini")] + [InlineData("gpt-5.3")] + [InlineData("gpt-5.3-mini")] [InlineData("chatgpt-4o-")] [InlineData("gpt-4")] [InlineData("gpt-4-")] @@ -541,6 +544,7 @@ public void TestEncodingNamesNegativeCases() [InlineData("gpt-5")] [InlineData("gpt-5.1")] [InlineData("gpt-5.2")] + [InlineData("gpt-5.3")] [InlineData("o1")] [InlineData("o3")] [InlineData("o4-mini")]