diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d1b41..0ed47bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.0] – unreleased + +- Mb: Make encoding detection stricter. + + ## [2.0.0] – 2023-03-07 - Iconv: Fix warning on PHP 8.2 when passing `null` as source encoding. @@ -23,5 +28,6 @@ The project has been revived and is now available under the name [`fossar/transc - Added Nix expression for easier development and sharing the environment with CI. - Switched to GitHub Actions for CI and added more PHP versions. -[2.0.0]: https://github.com/fossar/transcoder/compare/1.0.1...v2.0.0 +[3.0.0]: https://github.com/fossar/transcoder/compare/v2.0.0...v3.0.0 +[2.0.0]: https://github.com/fossar/transcoder/compare/v1.0.1...v2.0.0 [1.0.1]: https://github.com/fossar/transcoder/compare/1.0.0...v1.0.1 diff --git a/src/MbTranscoder.php b/src/MbTranscoder.php index 053fb36..232f44e 100644 --- a/src/MbTranscoder.php +++ b/src/MbTranscoder.php @@ -55,34 +55,31 @@ public function transcode(string $string, $from = null, ?string $to = null): str } else { $this->assertSupported($from); } + } else { + $from = 'auto'; } if ($to) { $this->assertSupported($to, false); } - $handleErrors = !$from || 'auto' === $from; - if ($handleErrors) { - set_error_handler( - function ($no, $warning) use ($string): void { - throw new UndetectableEncodingException($string, $warning); - }, - E_WARNING - ); + if ($from === 'auto') { + $from = mb_detect_encoding($string, 'auto', true); } - try { - $result = mb_convert_encoding( - $string, - $to ?: $this->defaultEncoding, - $from ?: 'auto' - ); - } finally { - if ($handleErrors) { - restore_error_handler(); - } + if ($from === false) { + throw new UndetectableEncodingException($string, 'Unable to detect character encoding'); } + $result = mb_convert_encoding( + $string, + $to ?: $this->defaultEncoding, + $from + ); + + // For PHPStan: We check the encoding is valid. + assert($result !== false); + return $result; } diff --git a/tests/MbTranscoderTest.php b/tests/MbTranscoderTest.php index 422fdbe..97e9077 100644 --- a/tests/MbTranscoderTest.php +++ b/tests/MbTranscoderTest.php @@ -55,7 +55,7 @@ public function testUndetectableEncoding(): void $this->expectException(\Ddeboer\Transcoder\Exception\UndetectableEncodingException::class); $this->expectExceptionMessage('is undetectable'); $result = $this->transcoder->transcode( - '‘curly quotes make this incompatible with 1252’', + '‘Windows-1252 encodes curly quotes as 0x91 and 0x92, which are indistinguishable from any other single-byte encoding’', null, 'windows-1252' );