diff --git a/pr_agent/git_providers/gitea_provider.py b/pr_agent/git_providers/gitea_provider.py index 89a6248e9b..bda89796b8 100644 --- a/pr_agent/git_providers/gitea_provider.py +++ b/pr_agent/git_providers/gitea_provider.py @@ -813,10 +813,10 @@ def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str: if hasattr(response, 'data'): raw_data = response.data.read() - return raw_data.decode('utf-8') + return raw_data.decode('utf-8', errors='replace') elif isinstance(response, tuple): raw_data = response[0].read() - return raw_data.decode('utf-8') + return raw_data.decode('utf-8', errors='replace') else: error_msg = f"Unexpected response format received from API: {type(response)}" self.logger.error(error_msg) @@ -946,6 +946,11 @@ def get_file_content(self, owner: str, repo: str, commit_sha: str, filepath: str except ApiException as e: self.logger.error(f"Error getting file: {filepath}, content: {e}") return "" + except UnicodeDecodeError as e: + self.logger.warning( + f"Skipping non-UTF-8 file content for {filepath}: {e}" + ) + return "" except Exception as e: self.logger.error(f"Unexpected error: {e}") return "" diff --git a/tests/unittest/test_gitea_provider.py b/tests/unittest/test_gitea_provider.py index 4174b398d0..4302a7607d 100644 --- a/tests/unittest/test_gitea_provider.py +++ b/tests/unittest/test_gitea_provider.py @@ -103,3 +103,65 @@ def call_api_side_effect(path, method, **kwargs): args, kwargs = mock_api_client.call_api.call_args assert args[0] == '/repos/owner/repo/pulls/123/commits' assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken'] + + + @patch('pr_agent.git_providers.gitea_provider.get_settings') + @patch('pr_agent.git_providers.gitea_provider.giteapy.ApiClient') + def test_gitea_provider_skips_non_utf8_file_content(self, mock_api_client_cls, mock_get_settings): + settings = MagicMock() + settings.get.side_effect = lambda k, d=None: { + 'GITEA.URL': 'https://gitea.example.com', + 'GITEA.PERSONAL_ACCESS_TOKEN': 'test-token', + 'GITEA.REPO_SETTING': None, + 'GITEA.SKIP_SSL_VERIFICATION': False, + 'GITEA.SSL_CA_CERT': None + }.get(k, d) + mock_get_settings.return_value = settings + + mock_api_client = mock_api_client_cls.return_value + mock_api_client.configuration.api_key = {'Authorization': 'token test-token'} + mock_resp = MagicMock() + mock_resp.data = BytesIO(b'\xff\xfe\x00binary') + mock_api_client.call_api.return_value = mock_resp + + from pr_agent.git_providers.gitea_provider import RepoApi + + repo_api = RepoApi(mock_api_client) + + assert repo_api.get_file_content('owner', 'repo', 'sha1', 'assets/image.webp') == '' + args, kwargs = mock_api_client.call_api.call_args + assert args[0] == '/repos/owner/repo/raw/assets/image.webp' + assert kwargs.get('query_params') == [('ref', 'sha1')] + assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken'] + + + @patch('pr_agent.git_providers.gitea_provider.get_settings') + @patch('pr_agent.git_providers.gitea_provider.giteapy.ApiClient') + def test_gitea_provider_decodes_non_utf8_diff_with_replacement(self, mock_api_client_cls, mock_get_settings): + settings = MagicMock() + settings.get.side_effect = lambda k, d=None: { + 'GITEA.URL': 'https://gitea.example.com', + 'GITEA.PERSONAL_ACCESS_TOKEN': 'test-token', + 'GITEA.REPO_SETTING': None, + 'GITEA.SKIP_SSL_VERIFICATION': False, + 'GITEA.SSL_CA_CERT': None + }.get(k, d) + mock_get_settings.return_value = settings + + mock_api_client = mock_api_client_cls.return_value + mock_api_client.configuration.api_key = {'Authorization': 'token test-token'} + mock_resp = MagicMock() + mock_resp.data = BytesIO(b'diff --git a/image.png b/image.webp\n+' + bytes([0xff]) + b'binary') + mock_api_client.call_api.return_value = mock_resp + + from pr_agent.git_providers.gitea_provider import RepoApi + + repo_api = RepoApi(mock_api_client) + + diff = repo_api.get_pull_request_diff('owner', 'repo', 123) + + assert 'diff --git a/image.png b/image.webp' in diff + assert '�' in diff + args, kwargs = mock_api_client.call_api.call_args + assert args[0] == '/repos/owner/repo/pulls/123.diff' + assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']