diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e22034fe..c175fbee2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Deprecated ### Removed ### Fixed +- Fix `SmartSerializer` throwing `JsonException` on JSON with unpaired UTF-16 surrogate escape sequences produced by search highlighting ([#403](https://github.com/opensearch-project/opensearch-php/pull/403)) ### Security ### Updated APIs diff --git a/src/OpenSearch/Serializers/SmartSerializer.php b/src/OpenSearch/Serializers/SmartSerializer.php index 79f70d08d..cf0725583 100644 --- a/src/OpenSearch/Serializers/SmartSerializer.php +++ b/src/OpenSearch/Serializers/SmartSerializer.php @@ -76,6 +76,15 @@ private function decode(?string $data): array try { return json_decode($data, true, 512, JSON_THROW_ON_ERROR); } catch (\JsonException $e) { + if ($e->getCode() === JSON_ERROR_UTF16) { + $escaped = str_replace('\\', '\\\\', $data); + try { + return json_decode($escaped, true, 512, JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + throw new JsonException($e->getCode(), $escaped, $e); + } + } + throw new JsonException($e->getCode(), $data, $e); } } diff --git a/tests/Serializers/SmartSerializerTest.php b/tests/Serializers/SmartSerializerTest.php index 0080c1aaa..f5f06caf7 100644 --- a/tests/Serializers/SmartSerializerTest.php +++ b/tests/Serializers/SmartSerializerTest.php @@ -55,6 +55,28 @@ public function testThrowJsonException(): void } } + public function testDeserializeWithUnpairedUtf16Surrogates(): void + { + $data = '{ "data": "ud83d\\ude4f" }'; + + $result = $this->serializer->deserialize($data, []); + + $this->assertIsArray($result); + $this->assertArrayHasKey('data', $result); + $this->assertSame('ud83d\\ude4f', $result['data']); + } + + public function testDeserializeWithHighlightedSurrogatePair(): void + { + $data = '{ "data": "\\ud83c\\udd11" }'; + + $result = $this->serializer->deserialize($data, []); + + $this->assertIsArray($result); + $this->assertArrayHasKey('data', $result); + $this->assertSame('\\ud83c\\udd11', $result['data']); + } + public function testDeserialize(): void { $data = '{ "foo" : "bar" }';