From 30c173b39edbd24219932c7644f119d001253a55 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 09:16:04 +0100 Subject: [PATCH 01/15] Disable pre-releases in global.json and require at least 8.0.1 security patch --- global.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/global.json b/global.json index dad2db5..faf133a 100644 --- a/global.json +++ b/global.json @@ -1,7 +1,7 @@ { "sdk": { - "version": "8.0.0", + "version": "8.0.1", "rollForward": "latestMajor", - "allowPrerelease": true + "allowPrerelease": false } } \ No newline at end of file From 98a3d369d2a3eeb7bb068970b93f8ec8872964ff Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 09:53:10 +0100 Subject: [PATCH 02/15] Bump package and assembly version --- MagicFileEncoding/MagicFileEncoding.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MagicFileEncoding/MagicFileEncoding.csproj b/MagicFileEncoding/MagicFileEncoding.csproj index 1eabf03..00e9ebe 100644 --- a/MagicFileEncoding/MagicFileEncoding.csproj +++ b/MagicFileEncoding/MagicFileEncoding.csproj @@ -2,7 +2,7 @@ net8.0 - 3.0.0 + 3.0.1 Magic File Encoding Jan Schwien by Jan Schwien @@ -19,7 +19,7 @@ Be aware of possible transformation issues if the target encoding is simpler tha It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected. enable 11 - 3.0.0 + 3.0.1 From 842440df63d5cee5397501ef44f1fcceb5b10d7a Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 10:26:05 +0100 Subject: [PATCH 03/15] Fix readme example 7 and 8; Prepare upcoming readme example tests --- CHANGELOG.md | 5 +- MagicFileEncoding.sln | 44 ++++----- README.md | 8 +- UnitTests/ReadmeExampleTest.cs | 157 +++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 27 deletions(-) create mode 100644 UnitTests/ReadmeExampleTest.cs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ada8e3..8abf5d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ -## v3.0.0 - 5 Feb 2024 +## v3.0.1 +* Fix code example 7 and 8 in readme + +## v3.0.0 - 5 Feb 2024 * Switch to .Net 8.0 * Updated dependencies * Improved error handling for empty and whitespace path diff --git a/MagicFileEncoding.sln b/MagicFileEncoding.sln index c090d89..cf760ed 100644 --- a/MagicFileEncoding.sln +++ b/MagicFileEncoding.sln @@ -1,22 +1,22 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Debug|Any CPU + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Debug|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Debug|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Debug|Any CPU + EndGlobalSection +EndGlobal diff --git a/README.md b/README.md index f5d2554..dab414c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Actions Status](https://github.com/Jan5366x/MagicFileEncoding/workflows/Build%20and%20Test/badge.svg)](https://github.com/Jan5366x/MagicFileEncoding/actions) ![GitHub](https://img.shields.io/github/license/jan5366x/MagicFileEncoding) -The Magic File Encoding Library is a powerful tool designed to assist you in loading and transforming simple and closed scope +The Magic File Encoding Library is designed to assist you in loading and transforming simple and closed scope character set text files. Whether you're working with txt, xml, json, EDIFACT or similar text formats, this library provides a comprehensive solution to handle various encoding scenarios effortlessly. @@ -13,7 +13,7 @@ comprehensive solution to handle various encoding scenarios effortlessly. ## .Net Version - **.Net 8:** Magic File Encoding **3.0.0 and newer** -- **.Net 6:** Magic File Encoding **2.0.1 and older** +- **.Net 6:** Magic File Encoding **2.0.1** ## Transformation Considerations When performing encoding transformations, it is important to be mindful of potential issues @@ -100,7 +100,7 @@ Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); ```csharp byte[] bytes = File.ReadAllBytes("example.txt"); -string text = FileEncoding.ReadAllText(bytes); +string text = FileEncoding.ReadAllBytes(bytes); Console.WriteLine("Text: " + text); ``` @@ -110,7 +110,7 @@ byte[] bytes = File.ReadAllBytes("example.txt"); Encoding targetEncoding = Encoding.UTF8; Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); -string text = FileEncoding.ReadAllText(bytes, targetEncoding, fallbackEncoding); +string text = FileEncoding.ReadAllBytes(bytes, targetEncoding, fallbackEncoding); Console.WriteLine("Text: " + text); ``` diff --git a/UnitTests/ReadmeExampleTest.cs b/UnitTests/ReadmeExampleTest.cs new file mode 100644 index 0000000..841396b --- /dev/null +++ b/UnitTests/ReadmeExampleTest.cs @@ -0,0 +1,157 @@ +using System; +using System.IO; +using System.Text; +using MagicFileEncoding; +using NUnit.Framework; + +namespace UnitTests; + +[TestFixture] +public class ReadmeExampleTest +{ + // TODO adjust files and assert result + + [Test] + public void Example1() + { + // Arrange + + // Act + // -> Readme Code + string filename = "example.txt"; + Encoding fallbackEncoding = Encoding.UTF8; + + Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filename, fallbackEncoding); + Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); + // <- + + // Assert + } + + [Test] + public void Example2() + { + // Arrange + + // Act + // -> Readme Code + string filename = "example.txt"; + + string text = FileEncoding.ReadAllText(filename); + Console.WriteLine("Text: " + text); + // <- + + // Assert + } + + [Test] + public void Example3() + { + // Arrange + + // Act + // -> Readme Code + string filename = "example.txt"; + Encoding targetEncoding = Encoding.UTF8; + Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); + + string text = FileEncoding.ReadAllText(filename, targetEncoding, fallbackEncoding); + Console.WriteLine("Text: " + text); + // <- + + // Assert + } + + [Test] + public void Example4() + { + // Arrange + + // Act + // -> Readme Code + string path = "output.txt"; + Encoding targetEncoding = Encoding.UTF8; + string text = "Hello, world!"; + + FileEncoding.WriteAllText(path, targetEncoding, text); + Console.WriteLine("Text written to file."); + // <- + + // Assert + } + + [Test] + public void Example5() + { + // Arrange + + // Act + // -> Readme Code + string path = "output.txt"; + Encoding targetEncoding = Encoding.UTF8; + + FileEncoding.Write(path, targetEncoding, writer => + { + writer.WriteLine("Line 1"); + writer.WriteLine("Line 2"); + writer.WriteLine("Line 3"); + }); + + Console.WriteLine("Text written to file."); + // <- + + // Assert + } + + [Test] + public void Example6() + { + // Arrange + + // Act + // -> Readme Code + byte[] bytes = File.ReadAllBytes("example.txt"); + Encoding fallbackEncoding = Encoding.UTF8; + + Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(bytes, fallbackEncoding); + Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); + // <- + + // Assert + } + + [Test] + public void Example7() + { + // Arrange + + // Act + // -> Readme Code + byte[] bytes = File.ReadAllBytes("example.txt"); + + string text = FileEncoding.ReadAllBytes(bytes); + Console.WriteLine("Text: " + text); + // <- + + // Assert + } + + [Test] + public void Example8() + { + // Arrange + + // Act + // -> Readme Code + byte[] bytes = File.ReadAllBytes("example.txt"); + Encoding targetEncoding = Encoding.UTF8; + Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); + + string text = FileEncoding.ReadAllBytes(bytes, targetEncoding, fallbackEncoding); + Console.WriteLine("Text: " + text); + // <- + + // Assert + } + +} \ No newline at end of file From bd3e85adbd469331be11ce5ea51b43a2e0d89de2 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 11:35:06 +0100 Subject: [PATCH 04/15] Implement basic readme example tests; Update readme code examples --- CHANGELOG.md | 3 +- README.md | 35 +++++++------- UnitTests/ReadmeExampleTest.cs | 88 +++++++++++++++++++++++++++------- 3 files changed, 92 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8abf5d1..4743b5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## v3.0.1 -* Fix code example 7 and 8 in readme +* Fix code example 7 and 8 in readme +* Adjust readme code example sto use filePath instead of filename ## v3.0.0 - 5 Feb 2024 * Switch to .Net 8.0 diff --git a/README.md b/README.md index dab414c..85d73be 100644 --- a/README.md +++ b/README.md @@ -36,47 +36,48 @@ Here are some code examples demonstrating the usage of the code library: ### File System ### Example 1: Getting the acceptable encoding of a file ```csharp -string filename = "example.txt"; +string filePath = "~/example.txt"; Encoding fallbackEncoding = Encoding.UTF8; -Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filename, fallbackEncoding); +Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filePath, fallbackEncoding); + Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); ``` ### Example 2: Reading all text from a file using automatic encoding detection ```csharp -string filename = "example.txt"; +string filePath = "~/example.txt"; -string text = FileEncoding.ReadAllText(filename); -Console.WriteLine("Text: " + text); +string text = FileEncoding.ReadAllText(filePath); + Console.WriteLine("Text: " + text); ``` ### Example 3: Reading all text from a file and transforming it into a target encoding ```csharp -string filename = "example.txt"; +string filePath = "~/example.txt"; Encoding targetEncoding = Encoding.UTF8; Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); -string text = FileEncoding.ReadAllText(filename, targetEncoding, fallbackEncoding); +string text = FileEncoding.ReadAllText(filePath, targetEncoding, fallbackEncoding); Console.WriteLine("Text: " + text); ``` ### Example 4: Writing text to a file in a specific encoding ```csharp -string path = "output.txt"; -Encoding targetEncoding = Encoding.UTF8; -string text = "Hello, world!"; +string filePath = "~/output.txt"; +Encoding targetEncoding = Encoding.Unicode; +string text = "\u2387 Hello, world!"; -FileEncoding.WriteAllText(path, targetEncoding, text); +FileEncoding.WriteAllText(filePath, targetEncoding, text); Console.WriteLine("Text written to file."); ``` ### Example 5: Providing writer access to a file in a specific encoding ```csharp -string path = "output.txt"; +string filePath = "~/output.txt"; Encoding targetEncoding = Encoding.UTF8; -FileEncoding.Write(path, targetEncoding, writer => +FileEncoding.Write(filePath, targetEncoding, writer => { writer.WriteLine("Line 1"); writer.WriteLine("Line 2"); @@ -89,7 +90,7 @@ Console.WriteLine("Text written to file."); ### Example 6: Getting the acceptable encoding of a byte array ```csharp -byte[] bytes = File.ReadAllBytes("example.txt"); +byte[] bytes = File.ReadAllBytes(filePath); Encoding fallbackEncoding = Encoding.UTF8; Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(bytes, fallbackEncoding); @@ -98,7 +99,8 @@ Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); ### Example 7: Reading all text from a byte array using automatic encoding detection ```csharp -byte[] bytes = File.ReadAllBytes("example.txt"); +string filePath = "~/example.txt"; +byte[] bytes = File.ReadAllBytes(filePath); string text = FileEncoding.ReadAllBytes(bytes); Console.WriteLine("Text: " + text); @@ -106,7 +108,8 @@ Console.WriteLine("Text: " + text); ### Example 8: Reading all text from a byte array and transforming it into a target encoding ```csharp -byte[] bytes = File.ReadAllBytes("example.txt"); +string filePath = "~/example.txt"; +byte[] bytes = File.ReadAllBytes(filePath); Encoding targetEncoding = Encoding.UTF8; Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); diff --git a/UnitTests/ReadmeExampleTest.cs b/UnitTests/ReadmeExampleTest.cs index 841396b..4fdd38d 100644 --- a/UnitTests/ReadmeExampleTest.cs +++ b/UnitTests/ReadmeExampleTest.cs @@ -3,6 +3,7 @@ using System.Text; using MagicFileEncoding; using NUnit.Framework; +using UnitTests.TestHelper; namespace UnitTests; @@ -11,86 +12,116 @@ public class ReadmeExampleTest { // TODO adjust files and assert result + /// + /// Example 1: Getting the acceptable encoding of a file + /// [Test] public void Example1() { // Arrange + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt" + .Replace('/', Path.DirectorySeparatorChar); // Act // -> Readme Code - string filename = "example.txt"; + //string filePath = "~/example.txt"; Encoding fallbackEncoding = Encoding.UTF8; - Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filename, fallbackEncoding); + Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filePath, fallbackEncoding); + Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName); // <- // Assert + Assert.That(acceptableEncoding.EncodingName, Is.EqualTo(Encoding.UTF8.EncodingName)); } + /// + /// Example 2: Reading all text from a file using automatic encoding detection + /// [Test] public void Example2() { // Arrange + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt" + .Replace('/', Path.DirectorySeparatorChar); // Act // -> Readme Code - string filename = "example.txt"; + //string filePath = "~/example.txt"; - string text = FileEncoding.ReadAllText(filename); + string text = FileEncoding.ReadAllText(filePath); Console.WriteLine("Text: " + text); // <- // Assert + Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß")); } + /// + /// Example 3: Reading all text from a file and transforming it into a target encoding + /// [Test] public void Example3() { // Arrange - + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_ANSI.txt" + .Replace('/', Path.DirectorySeparatorChar); + // Act // -> Readme Code - string filename = "example.txt"; + // string filePath = "~/example.txt"; Encoding targetEncoding = Encoding.UTF8; Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); - string text = FileEncoding.ReadAllText(filename, targetEncoding, fallbackEncoding); + string text = FileEncoding.ReadAllText(filePath, targetEncoding, fallbackEncoding); Console.WriteLine("Text: " + text); // <- // Assert + Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß")); } + /// + /// Example 4: Writing text to a file in a specific encoding + /// [Test] public void Example4() { // Arrange + using var tmpFile = new TempFile(); + var filePath = tmpFile.Path; // Act // -> Readme Code - string path = "output.txt"; - Encoding targetEncoding = Encoding.UTF8; - string text = "Hello, world!"; + //string filePath = "~/output.txt"; + Encoding targetEncoding = Encoding.Unicode; + string text = "\u2387 Hello, world!"; - FileEncoding.WriteAllText(path, targetEncoding, text); + FileEncoding.WriteAllText(filePath, targetEncoding, text); Console.WriteLine("Text written to file."); // <- // Assert + Assert.That(FileEncoding.GetAcceptableEncoding(filePath).EncodingName, Is.EqualTo(Encoding.Unicode.EncodingName)); } + /// + /// Example 5: Providing writer access to a file in a specific encoding + /// [Test] public void Example5() { // Arrange + using var tmpFile = new TempFile(); + var filePath = tmpFile.Path; // Act // -> Readme Code - string path = "output.txt"; + // string filePath = "~/output.txt"; Encoding targetEncoding = Encoding.UTF8; - FileEncoding.Write(path, targetEncoding, writer => + FileEncoding.Write(filePath, targetEncoding, writer => { writer.WriteLine("Line 1"); writer.WriteLine("Line 2"); @@ -101,16 +132,23 @@ public void Example5() // <- // Assert + Assert.That(FileEncoding.ReadAllText(filePath, Encoding.UTF8), Is.EqualTo("Line 1\nLine 2\nLine 3\n")); } + /// + /// Example 6: Getting the acceptable encoding of a byte array + /// [Test] public void Example6() { // Arrange - + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt" + .Replace('/', Path.DirectorySeparatorChar); + // Act // -> Readme Code - byte[] bytes = File.ReadAllBytes("example.txt"); + // string filePath = "~/example.txt"; + byte[] bytes = File.ReadAllBytes(filePath); Encoding fallbackEncoding = Encoding.UTF8; Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(bytes, fallbackEncoding); @@ -118,32 +156,47 @@ public void Example6() // <- // Assert + Assert.That(acceptableEncoding.EncodingName, Is.EqualTo(Encoding.UTF8.EncodingName)); } + /// + /// Example 7: Reading all text from a byte array using automatic encoding detection + /// [Test] public void Example7() { // Arrange + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt" + .Replace('/', Path.DirectorySeparatorChar); // Act // -> Readme Code - byte[] bytes = File.ReadAllBytes("example.txt"); + // string filePath = "~/example.txt"; + byte[] bytes = File.ReadAllBytes(filePath); string text = FileEncoding.ReadAllBytes(bytes); Console.WriteLine("Text: " + text); // <- // Assert + Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß")); } + /// + /// Example 8: Reading all text from a byte array and transforming it into a target encoding + /// [Test] public void Example8() { // Arrange + var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_ANSI.txt" + .Replace('/', Path.DirectorySeparatorChar); + // Act // -> Readme Code - byte[] bytes = File.ReadAllBytes("example.txt"); + // string filePath = "~/example.txt"; + byte[] bytes = File.ReadAllBytes(filePath); Encoding targetEncoding = Encoding.UTF8; Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1"); @@ -152,6 +205,7 @@ public void Example8() // <- // Assert + Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß")); } } \ No newline at end of file From c9be89f5321e6f86ce478cde7ce96a1002be2421 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 11:41:17 +0100 Subject: [PATCH 05/15] Fix multi platform issue in readme Example5 test Use Environment.NewLine instead of Unix new line --- CHANGELOG.md | 2 +- UnitTests/ReadmeExampleTest.cs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4743b5c..1e60407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## v3.0.1 * Fix code example 7 and 8 in readme -* Adjust readme code example sto use filePath instead of filename +* Adjust readme code example sto use filePath instead of filename or path ## v3.0.0 - 5 Feb 2024 * Switch to .Net 8.0 diff --git a/UnitTests/ReadmeExampleTest.cs b/UnitTests/ReadmeExampleTest.cs index 4fdd38d..8db8a84 100644 --- a/UnitTests/ReadmeExampleTest.cs +++ b/UnitTests/ReadmeExampleTest.cs @@ -115,6 +115,8 @@ public void Example5() // Arrange using var tmpFile = new TempFile(); var filePath = tmpFile.Path; + var expectedResult = "Line 1" + Environment.NewLine + "Line 2" + Environment.NewLine + "Line 3" + + Environment.NewLine; // Act // -> Readme Code @@ -132,7 +134,7 @@ public void Example5() // <- // Assert - Assert.That(FileEncoding.ReadAllText(filePath, Encoding.UTF8), Is.EqualTo("Line 1\nLine 2\nLine 3\n")); + Assert.That(FileEncoding.ReadAllText(filePath, Encoding.UTF8), Is.EqualTo(expectedResult)); } /// From c70fa4a6f6a837b5a193d938471a5d0c855bf7f1 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Mon, 5 Feb 2024 13:51:28 +0100 Subject: [PATCH 06/15] Start to reduce cognitive complexity csharpsquid:S3776 --- MagicFileEncoding/ByteOrderMask.cs | 4 +- .../EncodingSecurityException.cs | 1 - MagicFileEncoding/Tools/EncodingTools.cs | 117 ++++++++++-------- UnitTests/ReadmeExampleTest.cs | 2 - 4 files changed, 70 insertions(+), 54 deletions(-) diff --git a/MagicFileEncoding/ByteOrderMask.cs b/MagicFileEncoding/ByteOrderMask.cs index ef9eebd..7ae7060 100644 --- a/MagicFileEncoding/ByteOrderMask.cs +++ b/MagicFileEncoding/ByteOrderMask.cs @@ -24,9 +24,9 @@ public static readonly ByteOrderMaskInfo UTF8 public static readonly ByteOrderMaskInfo UTF7 = new (Encoding.UTF7, 0x2b, 0x2f, 0x76); - public static readonly List List = new () + public static readonly IList List = new List() { UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7 - }; + }.AsReadOnly(); } #pragma warning restore SYSLIB0001 \ No newline at end of file diff --git a/MagicFileEncoding/EncodingSecurityException.cs b/MagicFileEncoding/EncodingSecurityException.cs index 8c19d35..a96c434 100644 --- a/MagicFileEncoding/EncodingSecurityException.cs +++ b/MagicFileEncoding/EncodingSecurityException.cs @@ -2,7 +2,6 @@ namespace MagicFileEncoding; -[Serializable] public class EncodingSecurityException : Exception { public EncodingSecurityException(string message) : base(message) diff --git a/MagicFileEncoding/Tools/EncodingTools.cs b/MagicFileEncoding/Tools/EncodingTools.cs index a43362c..6f90fe9 100644 --- a/MagicFileEncoding/Tools/EncodingTools.cs +++ b/MagicFileEncoding/Tools/EncodingTools.cs @@ -53,49 +53,9 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco // For the below, false positives should be exceedingly rare (and would // be either slightly malformed UTF-8 (which would suit our purposes // anyway) or 8-bit extended ASCII/UTF-16/32 at a vanishingly long shot). - var i = 0; - var utf8 = false; - while (i < taster - 4) - { - if (bytes[i] <= 0x7F) - { - i += 1; - continue; - } + - // If all characters are below 0x80, then it is valid UTF8, - // but UTF8 is not 'required' (and therefore the text is more desirable to be treated as - // the default codepage of the computer). Hence, there's no "utf8 = true;" - // code unlike the next three checks. - - if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0) - { - i += 2; - utf8 = true; - continue; - } - - if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 && - bytes[i + 2] < 0xC0) - { - i += 3; - utf8 = true; - continue; - } - - if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && - bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0) - { - i += 4; - utf8 = true; - continue; - } - - utf8 = false; - break; - } - - if (utf8) + if (CheckForUtf8(bytes, taster)) { text = provideText ? Encoding.UTF8.GetString(bytes) : null; return Encoding.UTF8; @@ -138,6 +98,53 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco return fallbackEncoding ?? FileEncoding.DefaultFallback; } + private static bool CheckForUtf8(byte[] bytes, int taster) + { + var utf8 = false; + var i = 0; + while (i < taster - 4) + { + if (bytes[i] <= 0x7F) + { + i += 1; + continue; + } + + // If all characters are below 0x80, then it is valid UTF8, + // but UTF8 is not 'required' (and therefore the text is more desirable to be treated as + // the default codepage of the computer). Hence, there's no "utf8 = true;" + // code unlike the next three checks. + + if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0) + { + i += 2; + utf8 = true; + continue; + } + + if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 && + bytes[i + 2] < 0xC0) + { + i += 3; + utf8 = true; + continue; + } + + if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && + bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0) + { + i += 4; + utf8 = true; + continue; + } + + utf8 = false; + break; + } + + return utf8; + } + /// /// A long shot - let's see if we can find "charset=xyz" or /// "encoding=xyz" to identify the encoding: @@ -153,17 +160,30 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt for (var n = 0; n < taster - 9; n++) { if (!IsCharsetMarker(bytes, n) && !IsEncodingMarker(bytes, n)) + { continue; + } - if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C') n += 8; - else n += 9; + if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C') + { + n += 8; + } + else + { + n += 9; + } - if (bytes[n] == '"' || bytes[n] == '\'') n++; + if (bytes[n] == '"' || bytes[n] == '\'') + { + n++; + } var oldN = n; while (IsCharsetNameRange(taster, bytes, n)) + { n++; + } var nb = new byte[n - oldN]; Array.Copy(bytes, oldN, nb, 0, n - oldN); @@ -171,10 +191,9 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt { var internalEnc = Encoding.ASCII.GetString(nb); text = provideText ? Encoding.GetEncoding(internalEnc).GetString(bytes) : null; - { - encoding = Encoding.GetEncoding(internalEnc); - return true; - } + + encoding = Encoding.GetEncoding(internalEnc); + return true; } catch { diff --git a/UnitTests/ReadmeExampleTest.cs b/UnitTests/ReadmeExampleTest.cs index 8db8a84..554c891 100644 --- a/UnitTests/ReadmeExampleTest.cs +++ b/UnitTests/ReadmeExampleTest.cs @@ -10,8 +10,6 @@ namespace UnitTests; [TestFixture] public class ReadmeExampleTest { - // TODO adjust files and assert result - /// /// Example 1: Getting the acceptable encoding of a file /// From 339304fdc3c7633340839a1e65ec0566e6124951 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 14:29:06 +0100 Subject: [PATCH 07/15] Upgrade to .NET 10.0, bump version to 4.0.0, and update documentation --- .../.idea/projectSettingsUpdater.xml | 4 +++- CHANGELOG.md | 3 ++- Documentation/upgrade_guide_3_0_0_to_4_0_x.md | 8 ++++++++ MagicFileEncoding/MagicFileEncoding.csproj | 4 ++-- README.md | 3 ++- UnitTests/UnitTests.csproj | 4 ++-- 6 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 Documentation/upgrade_guide_3_0_0_to_4_0_x.md diff --git a/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml b/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml index bd1d7c7..3e83363 100644 --- a/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml +++ b/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml @@ -1,6 +1,8 @@ - \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e60407..5411833 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ -## v3.0.1 +## v4.0.0 +* Switch to .Net 10.0 * Fix code example 7 and 8 in readme * Adjust readme code example sto use filePath instead of filename or path diff --git a/Documentation/upgrade_guide_3_0_0_to_4_0_x.md b/Documentation/upgrade_guide_3_0_0_to_4_0_x.md new file mode 100644 index 0000000..f59665c --- /dev/null +++ b/Documentation/upgrade_guide_3_0_0_to_4_0_x.md @@ -0,0 +1,8 @@ +# Upgrade Guide v3.0.0 to v4.0.* + +No code changes required! + +### Dependency Change + +Now requires .Net 10.0 + diff --git a/MagicFileEncoding/MagicFileEncoding.csproj b/MagicFileEncoding/MagicFileEncoding.csproj index 00e9ebe..14f7e04 100644 --- a/MagicFileEncoding/MagicFileEncoding.csproj +++ b/MagicFileEncoding/MagicFileEncoding.csproj @@ -1,7 +1,7 @@ - net8.0 + net10.0 3.0.1 Magic File Encoding Jan Schwien @@ -18,7 +18,7 @@ Be aware of possible transformation issues if the target encoding is simpler tha It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected. enable - 11 + 14 3.0.1 diff --git a/README.md b/README.md index 87097d8..c6e75f5 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ comprehensive solution to handle various encoding scenarios effortlessly. [MagicFileEncoding at nuget.org](https://www.nuget.org/packages/MagicFileEncoding/) ## .Net Version -- **.Net 8:** Magic File Encoding **3.0.0 and newer** +- **.Net 10:** Magic File Encoding **4.0.0 and newer** +- **.Net 8:** Magic File Encoding **3.0.0** - **.Net 6:** Magic File Encoding **2.0.1** ## Transformation Considerations diff --git a/UnitTests/UnitTests.csproj b/UnitTests/UnitTests.csproj index 3146ca3..61d2b84 100644 --- a/UnitTests/UnitTests.csproj +++ b/UnitTests/UnitTests.csproj @@ -1,10 +1,10 @@ - net8.0 + net10.0 false enable - 11 + 14 From 43a9ebe91d04c4e69ca1baed999a2cddbceb660c Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 14:38:46 +0100 Subject: [PATCH 08/15] Suppress warnings CA2022 and S2674 for BOM reading in EncodingTools due to expected dynamic length read operation --- MagicFileEncoding/Tools/EncodingTools.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MagicFileEncoding/Tools/EncodingTools.cs b/MagicFileEncoding/Tools/EncodingTools.cs index 6f90fe9..56d5575 100644 --- a/MagicFileEncoding/Tools/EncodingTools.cs +++ b/MagicFileEncoding/Tools/EncodingTools.cs @@ -271,9 +271,11 @@ private static bool IsCharsetNameRange(int taster, byte[] bytes, int n) var bom = new byte[4]; fileStream.Position = 0; - - // ReSharper disable once MustUseReturnValue + + // read the BOM with dynamical length +#pragma warning disable CA2022, S2674 fileStream.Read(bom, 0, 4); +#pragma warning restore CA2022 return GetEncodingByBom(bom, fallbackEncoding, out _, false); } From c1ac863bd3ae8bc27f901f00a12f738fb4d83736 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 14:40:47 +0100 Subject: [PATCH 09/15] Update changelog: fix typo in readme example note and document suppressed warnings --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5411833..0a81a98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ ## v4.0.0 * Switch to .Net 10.0 * Fix code example 7 and 8 in readme -* Adjust readme code example sto use filePath instead of filename or path +* Adjust readme code example to use filePath instead of filename or path +* Suppress warnings CA2022 and S2674 due to expected dynamic array length ## v3.0.0 - 5 Feb 2024 * Switch to .Net 8.0 From ae4d520b423a7e8098210a8396c75fe0e40acf97 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 15:10:37 +0100 Subject: [PATCH 10/15] Bump package and assembly version to 4.0.0, enable package generation on build --- MagicFileEncoding/MagicFileEncoding.csproj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MagicFileEncoding/MagicFileEncoding.csproj b/MagicFileEncoding/MagicFileEncoding.csproj index 14f7e04..1fe035d 100644 --- a/MagicFileEncoding/MagicFileEncoding.csproj +++ b/MagicFileEncoding/MagicFileEncoding.csproj @@ -2,7 +2,7 @@ net10.0 - 3.0.1 + 4.0.0 Magic File Encoding Jan Schwien by Jan Schwien @@ -19,7 +19,8 @@ Be aware of possible transformation issues if the target encoding is simpler tha It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected. enable 14 - 3.0.1 + 4.0.0 + true From 658dedfdc46687f7e545fcc1ce56dfc0f7f6d551 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 15:18:41 +0100 Subject: [PATCH 11/15] Fix solution configuration to correctly build Release mode for all projects --- MagicFileEncoding.sln | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MagicFileEncoding.sln b/MagicFileEncoding.sln index cf760ed..9566acb 100644 --- a/MagicFileEncoding.sln +++ b/MagicFileEncoding.sln @@ -12,11 +12,11 @@ Global GlobalSection(ProjectConfigurationPlatforms) = postSolution {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Debug|Any CPU - {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Debug|Any CPU + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Debug|Any CPU - {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Debug|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal From f4e76e12d1a868f528a376d22d187f5917329734 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 15:21:02 +0100 Subject: [PATCH 12/15] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a81a98..0200862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## v4.0.0 * Switch to .Net 10.0 +* Code refactoring * Fix code example 7 and 8 in readme +* Add tests for readme code examples * Adjust readme code example to use filePath instead of filename or path * Suppress warnings CA2022 and S2674 due to expected dynamic array length From 83927485018e9fbea79c6e5767ffcf74c4652f28 Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 15:24:52 +0100 Subject: [PATCH 13/15] Update changelog and global.json for .NET 10.0 and C# 14 upgrade --- CHANGELOG.md | 2 ++ global.json | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0200862..ebf6282 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ ## v4.0.0 * Switch to .Net 10.0 +* Switch to C# 14 * Code refactoring * Fix code example 7 and 8 in readme * Add tests for readme code examples * Adjust readme code example to use filePath instead of filename or path * Suppress warnings CA2022 and S2674 due to expected dynamic array length +* Remove Serializable annotation from EncodingSecurityException ## v3.0.0 - 5 Feb 2024 * Switch to .Net 8.0 diff --git a/global.json b/global.json index faf133a..9a523dc 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "8.0.1", + "version": "10.0.0", "rollForward": "latestMajor", "allowPrerelease": false } From 26396861167bc6b93281725e03a78628782ff4fa Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 20:05:40 +0100 Subject: [PATCH 14/15] Add release date to changelog for v4.0.0 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebf6282..216297f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## v4.0.0 +## v4.0.0 - 19 Dez 2025 * Switch to .Net 10.0 * Switch to C# 14 * Code refactoring From 0b371d81e94802c621f9fcf7c6b31d64e9bc0f3a Mon Sep 17 00:00:00 2001 From: Jan Schwien Date: Fri, 19 Dec 2025 20:07:45 +0100 Subject: [PATCH 15/15] Upgrade GitHub Actions workflow to use .NET 10.0 --- .github/workflows/dotnet-core.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dotnet-core.yml b/.github/workflows/dotnet-core.yml index 3ee686f..411a45b 100644 --- a/.github/workflows/dotnet-core.yml +++ b/.github/workflows/dotnet-core.yml @@ -15,10 +15,10 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - - name: Setup .NET 8.0 + - name: Setup .NET 10.0 uses: actions/setup-dotnet@v3 with: - dotnet-version: 8.0.x + dotnet-version: 10.0.x include-prerelease: false - name: Install dependencies run: dotnet restore