diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e22034fe..c175fbee2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Deprecated
### Removed
### Fixed
+- Fix `SmartSerializer` throwing `JsonException` on JSON with unpaired UTF-16 surrogate escape sequences produced by search highlighting ([#403](https://github.com/opensearch-project/opensearch-php/pull/403))
### Security
### Updated APIs
diff --git a/src/OpenSearch/Serializers/SmartSerializer.php b/src/OpenSearch/Serializers/SmartSerializer.php
index 79f70d08d..cf0725583 100644
--- a/src/OpenSearch/Serializers/SmartSerializer.php
+++ b/src/OpenSearch/Serializers/SmartSerializer.php
@@ -76,6 +76,15 @@ private function decode(?string $data): array
try {
return json_decode($data, true, 512, JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
+ if ($e->getCode() === JSON_ERROR_UTF16) {
+ $escaped = str_replace('\\', '\\\\', $data);
+ try {
+ return json_decode($escaped, true, 512, JSON_THROW_ON_ERROR);
+ } catch (\JsonException $e) {
+ throw new JsonException($e->getCode(), $escaped, $e);
+ }
+ }
+
throw new JsonException($e->getCode(), $data, $e);
}
}
diff --git a/tests/Serializers/SmartSerializerTest.php b/tests/Serializers/SmartSerializerTest.php
index 0080c1aaa..f5f06caf7 100644
--- a/tests/Serializers/SmartSerializerTest.php
+++ b/tests/Serializers/SmartSerializerTest.php
@@ -55,6 +55,28 @@ public function testThrowJsonException(): void
}
}
+ public function testDeserializeWithUnpairedUtf16Surrogates(): void
+ {
+ $data = '{ "data": "ud83d\\ude4f" }';
+
+ $result = $this->serializer->deserialize($data, []);
+
+ $this->assertIsArray($result);
+ $this->assertArrayHasKey('data', $result);
+ $this->assertSame('ud83d\\ude4f', $result['data']);
+ }
+
+ public function testDeserializeWithHighlightedSurrogatePair(): void
+ {
+ $data = '{ "data": "\\ud83c\\udd11" }';
+
+ $result = $this->serializer->deserialize($data, []);
+
+ $this->assertIsArray($result);
+ $this->assertArrayHasKey('data', $result);
+ $this->assertSame('\\ud83c\\udd11', $result['data']);
+ }
+
public function testDeserialize(): void
{
$data = '{ "foo" : "bar" }';