diff --git a/.github/workflows/dotnet-core.yml b/.github/workflows/dotnet-core.yml
index 3ee686f..411a45b 100644
--- a/.github/workflows/dotnet-core.yml
+++ b/.github/workflows/dotnet-core.yml
@@ -15,10 +15,10 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- - name: Setup .NET 8.0
+ - name: Setup .NET 10.0
uses: actions/setup-dotnet@v3
with:
- dotnet-version: 8.0.x
+ dotnet-version: 10.0.x
include-prerelease: false
- name: Install dependencies
run: dotnet restore
diff --git a/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml b/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml
index bd1d7c7..3e83363 100644
--- a/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml
+++ b/.idea/.idea.MagicFileEncoding/.idea/projectSettingsUpdater.xml
@@ -1,6 +1,8 @@
-
+
+
+
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ada8e3..216297f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,14 @@
-## v3.0.0 - 5 Feb 2024
+## v4.0.0 - 19 Dez 2025
+* Switch to .Net 10.0
+* Switch to C# 14
+* Code refactoring
+* Fix code example 7 and 8 in readme
+* Add tests for readme code examples
+* Adjust readme code example to use filePath instead of filename or path
+* Suppress warnings CA2022 and S2674 due to expected dynamic array length
+* Remove Serializable annotation from EncodingSecurityException
+
+## v3.0.0 - 5 Feb 2024
* Switch to .Net 8.0
* Updated dependencies
* Improved error handling for empty and whitespace path
diff --git a/Documentation/upgrade_guide_3_0_0_to_4_0_x.md b/Documentation/upgrade_guide_3_0_0_to_4_0_x.md
new file mode 100644
index 0000000..f59665c
--- /dev/null
+++ b/Documentation/upgrade_guide_3_0_0_to_4_0_x.md
@@ -0,0 +1,8 @@
+# Upgrade Guide v3.0.0 to v4.0.*
+
+No code changes required!
+
+### Dependency Change
+
+Now requires .Net 10.0
+
diff --git a/MagicFileEncoding.sln b/MagicFileEncoding.sln
index c090d89..9566acb 100644
--- a/MagicFileEncoding.sln
+++ b/MagicFileEncoding.sln
@@ -1,22 +1,22 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
-EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|Any CPU = Debug|Any CPU
- Release|Any CPU = Release|Any CPU
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
- {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
- EndGlobalSection
-EndGlobal
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
+ {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+EndGlobal
diff --git a/MagicFileEncoding/ByteOrderMask.cs b/MagicFileEncoding/ByteOrderMask.cs
index ef9eebd..7ae7060 100644
--- a/MagicFileEncoding/ByteOrderMask.cs
+++ b/MagicFileEncoding/ByteOrderMask.cs
@@ -24,9 +24,9 @@ public static readonly ByteOrderMaskInfo UTF8
public static readonly ByteOrderMaskInfo UTF7
= new (Encoding.UTF7, 0x2b, 0x2f, 0x76);
- public static readonly List List = new ()
+ public static readonly IList List = new List()
{
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
- };
+ }.AsReadOnly();
}
#pragma warning restore SYSLIB0001
\ No newline at end of file
diff --git a/MagicFileEncoding/EncodingSecurityException.cs b/MagicFileEncoding/EncodingSecurityException.cs
index 8c19d35..a96c434 100644
--- a/MagicFileEncoding/EncodingSecurityException.cs
+++ b/MagicFileEncoding/EncodingSecurityException.cs
@@ -2,7 +2,6 @@
namespace MagicFileEncoding;
-[Serializable]
public class EncodingSecurityException : Exception
{
public EncodingSecurityException(string message) : base(message)
diff --git a/MagicFileEncoding/MagicFileEncoding.csproj b/MagicFileEncoding/MagicFileEncoding.csproj
index 1eabf03..1fe035d 100644
--- a/MagicFileEncoding/MagicFileEncoding.csproj
+++ b/MagicFileEncoding/MagicFileEncoding.csproj
@@ -1,8 +1,8 @@
- net8.0
- 3.0.0
+ net10.0
+ 4.0.0
Magic File Encoding
Jan Schwien
by Jan Schwien
@@ -18,8 +18,9 @@ Be aware of possible transformation issues if the target encoding is simpler tha
It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected.
enable
- 11
- 3.0.0
+ 14
+ 4.0.0
+ true
diff --git a/MagicFileEncoding/Tools/EncodingTools.cs b/MagicFileEncoding/Tools/EncodingTools.cs
index a43362c..56d5575 100644
--- a/MagicFileEncoding/Tools/EncodingTools.cs
+++ b/MagicFileEncoding/Tools/EncodingTools.cs
@@ -53,49 +53,9 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
// For the below, false positives should be exceedingly rare (and would
// be either slightly malformed UTF-8 (which would suit our purposes
// anyway) or 8-bit extended ASCII/UTF-16/32 at a vanishingly long shot).
- var i = 0;
- var utf8 = false;
- while (i < taster - 4)
- {
- if (bytes[i] <= 0x7F)
- {
- i += 1;
- continue;
- }
+
- // If all characters are below 0x80, then it is valid UTF8,
- // but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
- // the default codepage of the computer). Hence, there's no "utf8 = true;"
- // code unlike the next three checks.
-
- if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
- {
- i += 2;
- utf8 = true;
- continue;
- }
-
- if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
- bytes[i + 2] < 0xC0)
- {
- i += 3;
- utf8 = true;
- continue;
- }
-
- if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
- bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
- {
- i += 4;
- utf8 = true;
- continue;
- }
-
- utf8 = false;
- break;
- }
-
- if (utf8)
+ if (CheckForUtf8(bytes, taster))
{
text = provideText ? Encoding.UTF8.GetString(bytes) : null;
return Encoding.UTF8;
@@ -138,6 +98,53 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
return fallbackEncoding ?? FileEncoding.DefaultFallback;
}
+ private static bool CheckForUtf8(byte[] bytes, int taster)
+ {
+ var utf8 = false;
+ var i = 0;
+ while (i < taster - 4)
+ {
+ if (bytes[i] <= 0x7F)
+ {
+ i += 1;
+ continue;
+ }
+
+ // If all characters are below 0x80, then it is valid UTF8,
+ // but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
+ // the default codepage of the computer). Hence, there's no "utf8 = true;"
+ // code unlike the next three checks.
+
+ if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
+ {
+ i += 2;
+ utf8 = true;
+ continue;
+ }
+
+ if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
+ bytes[i + 2] < 0xC0)
+ {
+ i += 3;
+ utf8 = true;
+ continue;
+ }
+
+ if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
+ bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
+ {
+ i += 4;
+ utf8 = true;
+ continue;
+ }
+
+ utf8 = false;
+ break;
+ }
+
+ return utf8;
+ }
+
///
/// A long shot - let's see if we can find "charset=xyz" or
/// "encoding=xyz" to identify the encoding:
@@ -153,17 +160,30 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt
for (var n = 0; n < taster - 9; n++)
{
if (!IsCharsetMarker(bytes, n) && !IsEncodingMarker(bytes, n))
+ {
continue;
+ }
- if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C') n += 8;
- else n += 9;
+ if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C')
+ {
+ n += 8;
+ }
+ else
+ {
+ n += 9;
+ }
- if (bytes[n] == '"' || bytes[n] == '\'') n++;
+ if (bytes[n] == '"' || bytes[n] == '\'')
+ {
+ n++;
+ }
var oldN = n;
while (IsCharsetNameRange(taster, bytes, n))
+ {
n++;
+ }
var nb = new byte[n - oldN];
Array.Copy(bytes, oldN, nb, 0, n - oldN);
@@ -171,10 +191,9 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt
{
var internalEnc = Encoding.ASCII.GetString(nb);
text = provideText ? Encoding.GetEncoding(internalEnc).GetString(bytes) : null;
- {
- encoding = Encoding.GetEncoding(internalEnc);
- return true;
- }
+
+ encoding = Encoding.GetEncoding(internalEnc);
+ return true;
}
catch
{
@@ -252,9 +271,11 @@ private static bool IsCharsetNameRange(int taster, byte[] bytes, int n)
var bom = new byte[4];
fileStream.Position = 0;
-
- // ReSharper disable once MustUseReturnValue
+
+ // read the BOM with dynamical length
+#pragma warning disable CA2022, S2674
fileStream.Read(bom, 0, 4);
+#pragma warning restore CA2022
return GetEncodingByBom(bom, fallbackEncoding, out _, false);
}
diff --git a/README.md b/README.md
index 87097d8..c6e75f5 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,8 @@ comprehensive solution to handle various encoding scenarios effortlessly.
[MagicFileEncoding at nuget.org](https://www.nuget.org/packages/MagicFileEncoding/)
## .Net Version
-- **.Net 8:** Magic File Encoding **3.0.0 and newer**
+- **.Net 10:** Magic File Encoding **4.0.0 and newer**
+- **.Net 8:** Magic File Encoding **3.0.0**
- **.Net 6:** Magic File Encoding **2.0.1**
## Transformation Considerations
diff --git a/UnitTests/ReadmeExampleTest.cs b/UnitTests/ReadmeExampleTest.cs
new file mode 100644
index 0000000..554c891
--- /dev/null
+++ b/UnitTests/ReadmeExampleTest.cs
@@ -0,0 +1,211 @@
+using System;
+using System.IO;
+using System.Text;
+using MagicFileEncoding;
+using NUnit.Framework;
+using UnitTests.TestHelper;
+
+namespace UnitTests;
+
+[TestFixture]
+public class ReadmeExampleTest
+{
+ ///
+ /// Example 1: Getting the acceptable encoding of a file
+ ///
+ [Test]
+ public void Example1()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+ // Act
+ // -> Readme Code
+ //string filePath = "~/example.txt";
+ Encoding fallbackEncoding = Encoding.UTF8;
+
+ Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(filePath, fallbackEncoding);
+
+ Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName);
+ // <-
+
+ // Assert
+ Assert.That(acceptableEncoding.EncodingName, Is.EqualTo(Encoding.UTF8.EncodingName));
+ }
+
+ ///
+ /// Example 2: Reading all text from a file using automatic encoding detection
+ ///
+ [Test]
+ public void Example2()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+ // Act
+ // -> Readme Code
+ //string filePath = "~/example.txt";
+
+ string text = FileEncoding.ReadAllText(filePath);
+ Console.WriteLine("Text: " + text);
+ // <-
+
+ // Assert
+ Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß"));
+ }
+
+ ///
+ /// Example 3: Reading all text from a file and transforming it into a target encoding
+ ///
+ [Test]
+ public void Example3()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_ANSI.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+ // Act
+ // -> Readme Code
+ // string filePath = "~/example.txt";
+ Encoding targetEncoding = Encoding.UTF8;
+ Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1");
+
+ string text = FileEncoding.ReadAllText(filePath, targetEncoding, fallbackEncoding);
+ Console.WriteLine("Text: " + text);
+ // <-
+
+ // Assert
+ Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß"));
+ }
+
+ ///
+ /// Example 4: Writing text to a file in a specific encoding
+ ///
+ [Test]
+ public void Example4()
+ {
+ // Arrange
+ using var tmpFile = new TempFile();
+ var filePath = tmpFile.Path;
+
+ // Act
+ // -> Readme Code
+ //string filePath = "~/output.txt";
+ Encoding targetEncoding = Encoding.Unicode;
+ string text = "\u2387 Hello, world!";
+
+ FileEncoding.WriteAllText(filePath, targetEncoding, text);
+ Console.WriteLine("Text written to file.");
+ // <-
+
+ // Assert
+ Assert.That(FileEncoding.GetAcceptableEncoding(filePath).EncodingName, Is.EqualTo(Encoding.Unicode.EncodingName));
+ }
+
+ ///
+ /// Example 5: Providing writer access to a file in a specific encoding
+ ///
+ [Test]
+ public void Example5()
+ {
+ // Arrange
+ using var tmpFile = new TempFile();
+ var filePath = tmpFile.Path;
+ var expectedResult = "Line 1" + Environment.NewLine + "Line 2" + Environment.NewLine + "Line 3" +
+ Environment.NewLine;
+
+ // Act
+ // -> Readme Code
+ // string filePath = "~/output.txt";
+ Encoding targetEncoding = Encoding.UTF8;
+
+ FileEncoding.Write(filePath, targetEncoding, writer =>
+ {
+ writer.WriteLine("Line 1");
+ writer.WriteLine("Line 2");
+ writer.WriteLine("Line 3");
+ });
+
+ Console.WriteLine("Text written to file.");
+ // <-
+
+ // Assert
+ Assert.That(FileEncoding.ReadAllText(filePath, Encoding.UTF8), Is.EqualTo(expectedResult));
+ }
+
+ ///
+ /// Example 6: Getting the acceptable encoding of a byte array
+ ///
+ [Test]
+ public void Example6()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+ // Act
+ // -> Readme Code
+ // string filePath = "~/example.txt";
+ byte[] bytes = File.ReadAllBytes(filePath);
+ Encoding fallbackEncoding = Encoding.UTF8;
+
+ Encoding acceptableEncoding = FileEncoding.GetAcceptableEncoding(bytes, fallbackEncoding);
+ Console.WriteLine("Acceptable encoding: " + acceptableEncoding.EncodingName);
+ // <-
+
+ // Assert
+ Assert.That(acceptableEncoding.EncodingName, Is.EqualTo(Encoding.UTF8.EncodingName));
+ }
+
+ ///
+ /// Example 7: Reading all text from a byte array using automatic encoding detection
+ ///
+ [Test]
+ public void Example7()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_UTF-8.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+ // Act
+ // -> Readme Code
+ // string filePath = "~/example.txt";
+ byte[] bytes = File.ReadAllBytes(filePath);
+
+ string text = FileEncoding.ReadAllBytes(bytes);
+ Console.WriteLine("Text: " + text);
+ // <-
+
+ // Assert
+ Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß"));
+ }
+
+ ///
+ /// Example 8: Reading all text from a byte array and transforming it into a target encoding
+ ///
+ [Test]
+ public void Example8()
+ {
+ // Arrange
+ var filePath = TestContext.CurrentContext.WorkDirectory + "/TestFiles/SimpleFiles/A_ANSI.txt"
+ .Replace('/', Path.DirectorySeparatorChar);
+
+
+ // Act
+ // -> Readme Code
+ // string filePath = "~/example.txt";
+ byte[] bytes = File.ReadAllBytes(filePath);
+ Encoding targetEncoding = Encoding.UTF8;
+ Encoding fallbackEncoding = Encoding.GetEncoding("ISO-8859-1");
+
+ string text = FileEncoding.ReadAllBytes(bytes, targetEncoding, fallbackEncoding);
+ Console.WriteLine("Text: " + text);
+ // <-
+
+ // Assert
+ Assert.That(text, Is.EqualTo("Kleiner Test äöüÄÖÜ?ß"));
+ }
+
+}
\ No newline at end of file
diff --git a/UnitTests/UnitTests.csproj b/UnitTests/UnitTests.csproj
index 3146ca3..61d2b84 100644
--- a/UnitTests/UnitTests.csproj
+++ b/UnitTests/UnitTests.csproj
@@ -1,10 +1,10 @@
- net8.0
+ net10.0
false
enable
- 11
+ 14
diff --git a/global.json b/global.json
index dad2db5..9a523dc 100644
--- a/global.json
+++ b/global.json
@@ -1,7 +1,7 @@
{
"sdk": {
- "version": "8.0.0",
+ "version": "10.0.0",
"rollForward": "latestMajor",
- "allowPrerelease": true
+ "allowPrerelease": false
}
}
\ No newline at end of file