diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..846280fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Ignore all __pycache__ directories +**/__pycache__/ + +# Ignore all .egg-info directories or files +*.egg-info/ \ No newline at end of file diff --git a/src/ember/__init__.py b/src/ember/__init__.py index b7f18d34..3cdea817 100644 --- a/src/ember/__init__.py +++ b/src/ember/__init__.py @@ -145,7 +145,7 @@ def initialize_ember( model = registry.get_model("openai:gpt-4") """ # Import here to avoid circular imports - from ember.core.utils.logging import configure_logging + from ember.core.utils.ember_logging import configure_logging from ember.core.config.manager import create_config_manager # 0. Configure logging first diff --git a/src/ember/core/app_context.py b/src/ember/core/app_context.py index 10d2ee45..b42c1f6f 100644 --- a/src/ember/core/app_context.py +++ b/src/ember/core/app_context.py @@ -21,7 +21,7 @@ from ember.core.registry.model.base.registry.model_registry import ModelRegistry from ember.core.registry.model.base.services.usage_service import UsageService from ember.core.registry.model.initialization import initialize_registry -from ember.core.utils.logging import configure_logging +from ember.core.utils.ember_logging import configure_logging # Re-import for patching to work correctly import logging diff --git a/src/ember/core/registry/model/base/schemas/chat_schemas.py b/src/ember/core/registry/model/base/schemas/chat_schemas.py index 61d64699..fb4649b3 100644 --- a/src/ember/core/registry/model/base/schemas/chat_schemas.py +++ b/src/ember/core/registry/model/base/schemas/chat_schemas.py @@ -115,5 +115,6 @@ class ChatResponse(BaseModel): """ data: str + embedding: list[float] = None raw_output: Any = None usage: Optional[UsageStats] = None diff --git a/src/ember/core/registry/model/providers/openai/openai_provider.py b/src/ember/core/registry/model/providers/openai/openai_provider.py index b9cdd34f..5b45819e 100644 --- a/src/ember/core/registry/model/providers/openai/openai_provider.py +++ b/src/ember/core/registry/model/providers/openai/openai_provider.py @@ -71,10 +71,10 @@ """ import logging -from typing import Any, Dict, Final, List, Optional, cast +from typing import Any, Dict, Final, List, Optional, cast, ClassVar import openai -from pydantic import Field, field_validator +from pydantic import Field, field_validator, ConfigDict, BaseModel from requests.exceptions import HTTPError from tenacity import retry, stop_after_attempt, wait_exponential @@ -84,7 +84,7 @@ ChatResponse, ProviderParams, ) -from ember.core.registry.model.base.schemas.model_info import ModelInfo +from ember.core.registry.model.base.schemas.model_info import ModelInfo, ProviderInfo from ember.core.registry.model.base.utils.model_registry_exceptions import ( InvalidPromptError, ProviderAPIError, @@ -96,6 +96,15 @@ ) from ember.plugin_system import provider +from ember.core.registry.model.providers.provider_capability import ( + EmbeddingRequest, + EmbeddingResponse, + EmbeddingProviderModel, + CompletionRequest, + CompletionResponse, + TextCompletionProviderModel, +) +import os class OpenAIProviderParams(ProviderParams): """OpenAI-specific provider parameters for fine-tuning API requests. @@ -438,3 +447,283 @@ def forward(self, request: ChatRequest) -> ChatResponse: message=f"API error: {str(exc)}", cause=exc, ) + +class OpenAICompletionParameters(BaseModel): + """Parameter conversion for OpenAI, specifically text completion requests. + + Handles parameter validation and conversion between Ember's universal format + and OpenAI's specific API requirements. + + Attributes: + prompt: The text prompt to complete. + max_tokens: Maximum number of tokens to generate. + temperature: Controls randomness (0.0-2.0). + stop_sequences: Sequences that signal end of generation. + """ + + model_config = ConfigDict( + protected_namespaces=(), # Disable Pydantic's protected namespace checks + ) + + prompt: str + max_tokens: Optional[int] = Field(default=50) + temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0) + stop_sequences: Optional[List[str]] = None + + def to_openai_kwargs(self) -> Dict[str, Any]: + """Converting parameters to OpenAI API format. + + Returns: + Dictionary of parameters for the OpenAI API. + """ + kwargs: Dict[str, Any] = { + "prompt": self.prompt, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + if self.stop_sequences: + kwargs["stop"] = self.stop_sequences + + return kwargs + +@provider("OpenAIExtended") +class OpenAIExtendedModel(TextCompletionProviderModel, EmbeddingProviderModel): + """Extended OpenAI provider supporting chat, text completion, and embeddings. + + This class implements a provider that supports multiple model types through + capability interfaces. + + Attributes: + PROVIDER_NAME: Provider name for registration with the plugin system. + CAPABILITIES: Capability flags showing supported model types. + """ + + PROVIDER_NAME: ClassVar[str] = "OpenAIExtended" + CAPABILITIES: ClassVar[Dict[str, bool]] = { + "chat": True, + "completion": True, + "embedding": True, + } + + def create_client(self) -> Any: + """Creating and configuring the OpenAI client. + + Retrieves the API key from the model information and configures the client. + + Returns: + The configured OpenAI client. + + Raises: + ProviderAPIError: If API key is missing or invalid. + """ + import openai + + api_key: Optional[str] = self.model_info.get_api_key() + if not api_key: + raise ProviderAPIError("OpenAI API key is missing or invalid.") + + openai.api_key = api_key + return openai + + def forward(self, request: ChatRequest) -> ChatResponse: + """Processing a chat request (implementing BaseProviderModel). + + This method provides the standard chat functionality required by + the BaseProviderModel interface. + + Args: + request: Chat request to process. + + Returns: + Chat response from the model. + + Raises: + InvalidPromptError: If prompt is empty. + ProviderAPIError: For unexpected errors during API calls. + """ + # Implementation would match OpenAIModel's forward method + # This is a simplified placeholder + if not request.prompt: + raise InvalidPromptError("OpenAI prompt cannot be empty.") + + # Implementation details would mirror the standard OpenAIModel + # Return placeholder + return ChatResponse(data="Chat implementation placeholder") + + def complete(self, request: CompletionRequest) -> CompletionResponse: + """Processing a text completion request. + + Implements text completion capabilities using the OpenAI completions API. + + Args: + request: Text completion request. + + Returns: + Completion response from the model. + + Raises: + InvalidPromptError: If prompt is empty. + ProviderAPIError: For unexpected errors during API calls. + """ + if not request.prompt: + raise InvalidPromptError("OpenAI completion prompt cannot be empty.") + + logger.info( + "OpenAI completion invoked", + extra={ + "provider": self.PROVIDER_NAME, + "model_name": self.model_info.name, + "prompt_length": len(request.prompt), + }, + ) + + # Convert universal parameters to OpenAI format + openai_parameters = OpenAICompletionParameters( + prompt=request.prompt, + max_tokens=request.max_tokens, + temperature=request.temperature, + stop_sequences=request.stop_sequences, + ) + openai_kwargs = openai_parameters.to_openai_kwargs() + + # Add provider-specific parameters + provider_params = cast(OpenAICompletionParameters, request.provider_params) + openai_kwargs.update( + {k: v for k, v in provider_params.items() if v is not None} + ) + + try: + # Request timeout from parameters or default + timeout = openai_kwargs.pop("timeout", 30) + + # Make the API call + response = self.client.completions.create( + model=self.model_info.name, + timeout=timeout, + **openai_kwargs, + ) + + # Extract completion text + text = response.choices[0].text.strip() + + # Calculate usage statistics + # For simplicity, we assume a usage calculator is implemented elsewhere + usage_stats = ( + None # self.usage_calculator.calculate(response, self.model_info) + ) + + return CompletionResponse( + text=text, + raw_output=response, + usage=usage_stats, + ) + + except Exception as exc: + logger.exception("Unexpected error in OpenAIExtendedModel.complete()") + raise ProviderAPIError(str(exc)) from exc + + def embed(self, request: EmbeddingRequest) -> EmbeddingResponse: + """Generating embeddings for the input text(s). + + Implements embedding capabilities using the OpenAI embeddings API. + + Args: + request: Embedding request with input text(s). + + Returns: + Embedding response with vector representations. + + Raises: + InvalidPromptError: If input is empty. + ProviderAPIError: For unexpected errors during API calls. + """ + # Use the provided model or default to the model in model_info + model_name = request.model or self.model_info.name + + input_text = request.input + if not input_text: + raise InvalidPromptError("Input text for embeddings cannot be empty.") + + logger.info( + "OpenAI embeddings invoked", + extra={ + "provider": self.PROVIDER_NAME, + "model_name": model_name, + "input_type": "batch" if isinstance(input_text, list) else "single", + }, + ) + + try: + # Make the API call + response = self.client.embeddings.create( + model=model_name, + input=input_text, + timeout=30, + ) + + # Extract embeddings + if isinstance(input_text, list): + print(f"batch processing") + # For batch processing + embeddings = [item.embedding for item in response.data] + else: + # For single text input + embeddings = response.data[0].embedding + + # Get dimensions from the first embedding + if isinstance(embeddings, list) and isinstance(embeddings[0], list): + dimensions = len(embeddings[0]) + else: + dimensions = len(embeddings) + + # Calculate usage statistics (implementation would depend on your system) + usage_stats = ( + None # self.usage_calculator.calculate(response, self.model_info) + ) + + return EmbeddingResponse( + embeddings=embeddings, + model=model_name, + dimensions=dimensions, + raw_output=response, + usage=usage_stats, + ) + + except Exception as exc: + logger.exception("Unexpected error in OpenAIExtendedModel.embed()") + raise ProviderAPIError(str(exc)) from exc + + +def create_openai_embedding_model(model_name: str = "text-embedding-ada-002") -> OpenAIExtendedModel: + """ + Tool for creating an OpenAI embedding model by passing the embedding model name. + + Args: + model_name: Name of particular embedding model endpoint as specified by the OpenAI API + + Returns: + OpenAIExtendedModel initialized to serve model_name; None if model could not + be created + + Raises: + InvalidPromptError: If input is empty. + ProviderAPIError: For unexpected errors during API calls. + """ + # All OpenAI embedding models contain "text-embedding" in their model name + if "text-embedding" not in model_name: + return None + + model_info = ModelInfo( + id="openai:gpt-4o", + name=model_name, + provider=ProviderInfo( + name="OpenAI", + default_api_key=os.environ.get("OPENAI_API_KEY"), + base_url="https://api.openai.com/v1", + ) + ) + + embedding_model = OpenAIExtendedModel(model_info) + + return embedding_model \ No newline at end of file diff --git a/src/ember/core/registry/model/providers/provider_capability.py b/src/ember/core/registry/model/providers/provider_capability.py new file mode 100644 index 00000000..169a4c64 --- /dev/null +++ b/src/ember/core/registry/model/providers/provider_capability.py @@ -0,0 +1,310 @@ +import logging +from typing import Any, ClassVar, Dict, List, Optional, TypeVar, Union + +import numpy as np +from pydantic import BaseModel, ConfigDict, Field, field_validator +from typing_extensions import Protocol, TypedDict + +from ember.core.registry.model.base.schemas.chat_schemas import ( + ProviderParams, +) +from ember.core.registry.model.base.schemas.usage import UsageStats +from ember.core.registry.model.base.utils.model_registry_exceptions import ( + InvalidPromptError, + ProviderAPIError, +) +from ember.core.registry.model.providers.base_provider import BaseProviderModel + +logger = logging.getLogger(__name__) + +class CompletionRequest(BaseModel): + """Universal text completion request model. + + Similar to ChatRequest but designed for single-turn text completion. + Used for traditional completion models that predate chat-oriented models. + + Attributes: + prompt: The text prompt to complete. + max_tokens: Optional maximum number of tokens to generate. + temperature: Optional sampling temperature controlling randomness. + stop_sequences: Optional list of sequences that signal the end of generation. + provider_params: Provider-specific parameters as a flexible dictionary. + """ + + model_config = ConfigDict( + protected_namespaces=(), # Disable Pydantic's protected namespace checks + ) + + prompt: str + max_tokens: Optional[int] = None + temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0) + stop_sequences: Optional[List[str]] = None + provider_params: ProviderParams = Field(default_factory=dict) + + +class CompletionResponse(BaseModel): + """Universal text completion response model. + + Standardizes the response format for text completion models. + + Attributes: + text: The generated completion text. + raw_output: The unprocessed provider-specific response data. + usage: Optional usage statistics for token counting and cost tracking. + """ + + model_config = ConfigDict( + protected_namespaces=(), # Disable Pydantic's protected namespace checks + ) + + text: str + raw_output: Any = None + usage: Optional[UsageStats] = None + + +class EmbeddingRequest(BaseModel): + """Request model for generating vector embeddings from text. + + Used to generate semantic vector representations that capture the meaning + of input text, suitable for similarity comparisons, clustering, and search. + + Attributes: + input: Text input(s) to embed - can be a single string or list of strings. + model: Optional specific embedding model to use when the provider has multiple. + provider_params: Provider-specific parameters as a flexible dictionary. + """ + + model_config = ConfigDict( + protected_namespaces=(), # Disable Pydantic's protected namespace checks + ) + + input: Union[str, List[str]] + model: Optional[str] = None + provider_params: ProviderParams = Field(default_factory=dict) + + @field_validator("input") + def validate_input(cls, value: Union[str, List[str]]) -> Union[str, List[str]]: + """Validating the input text is not empty. + + Args: + value: The input text(s) to validate. + + Returns: + The validated input value. + + Raises: + ValueError: If input is empty string or empty list. + """ + if isinstance(value, str) and not value.strip(): + raise ValueError("Input text cannot be empty") + if isinstance(value, list) and ( + len(value) == 0 or all(not t.strip() for t in value) + ): + raise ValueError("Input list cannot be empty or contain only empty strings") + return value + + +class EmbeddingResponse(BaseModel): + """Response model containing vector embeddings. + + Contains numerical vector representations of input text that capture semantic meaning. + + Attributes: + embeddings: Vector representation(s) of the input text(s). + model: Name of the embedding model used. + dimensions: The dimensionality of the embedding vectors. + raw_output: The unprocessed provider-specific response data. + usage: Optional usage statistics for token counting and cost tracking. + """ + + model_config = ConfigDict( + protected_namespaces=(), # Disable Pydantic's protected namespace checks + ) + + embeddings: Union[List[float], List[List[float]]] + model: str + dimensions: int + raw_output: Any = None + usage: Optional[UsageStats] = None + + +# Type variable for implementation-specific typing +ModelT = TypeVar("ModelT", bound="CapabilityModel") + +class TextCompletionCapable(Protocol): + """Protocol defining the interface for text completion models. + + Provider implementations supporting text completion should implement this protocol. + """ + + def complete(self, request: CompletionRequest) -> CompletionResponse: + """Processing a text completion request. + + Args: + request: The text completion request. + + Returns: + The text completion response. + """ + ... + + def complete_text(self, prompt: str, **kwargs: Any) -> CompletionResponse: + """Convenience method for simple text completion. + + Args: + prompt: The text to complete. + **kwargs: Additional parameters for the completion request. + + Returns: + The text completion response. + """ + ... + + +class EmbeddingCapable(Protocol): + """Protocol defining the interface for embedding models. + + Provider implementations supporting embeddings should implement this protocol. + """ + + def embed(self, request: EmbeddingRequest) -> EmbeddingResponse: + """Generating embeddings for the input text(s). + + Args: + request: The embedding request. + + Returns: + The embedding response containing vector representations. + """ + ... + + def embed_text( + self, input_text: Union[str, List[str]], **kwargs: Any + ) -> EmbeddingResponse: + """Convenience method for simple embedding generation. + + Args: + input_text: The text(s) to embed. + **kwargs: Additional parameters for the embedding request. + + Returns: + The embedding response with vector representations. + """ + ... + + +# Base class for capability-aware models +class CapabilityModel(BaseProviderModel): + """Extended base provider model with capability flags. + + This class extends BaseProviderModel with explicit capability tracking + to allow runtime capability detection for different model types. + + Attributes: + CAPABILITIES: Class variable mapping capability names to support flags. + """ + + CAPABILITIES: ClassVar[Dict[str, bool]] = { + "chat": True, + "completion": False, + "embedding": False, + } + + +# ----------------------------------------------------------------------------- +# PART 3: Extended Provider Base Classes +# ----------------------------------------------------------------------------- + + +class TextCompletionProviderModel(CapabilityModel, TextCompletionCapable): + """Base class for text completion model providers. + + Extends the BaseProviderModel to support text completion capabilities. + Providers supporting text completion should inherit from this class. + """ + + CAPABILITIES: ClassVar[Dict[str, bool]] = { + "chat": True, + "completion": True, + "embedding": False, + } + + def complete(self, request: CompletionRequest) -> CompletionResponse: + """Processing a text completion request. + + Args: + request: The text completion request. + + Returns: + The text completion response. + + Raises: + NotImplementedError: If the provider has not implemented this capability. + """ + raise NotImplementedError( + f"Provider {self.__class__.__name__} does not support text completion" + ) + + def complete_text(self, prompt: str, **kwargs: Any) -> CompletionResponse: + """Convenience method for text completion. + + Creates a CompletionRequest from the prompt and additional parameters, + then delegates to the complete() method for processing. + + Args: + prompt: The text to complete. + **kwargs: Additional parameters for the completion request. + + Returns: + The text completion response. + """ + request = CompletionRequest(prompt=prompt, **kwargs) + return self.complete(request=request) + + +class EmbeddingProviderModel(CapabilityModel, EmbeddingCapable): + """Base class for embedding model providers. + + Extends the BaseProviderModel to support embedding capabilities. + Providers supporting embeddings should inherit from this class. + """ + + CAPABILITIES: ClassVar[Dict[str, bool]] = { + "chat": True, + "completion": False, + "embedding": True, + } + + def embed(self, request: EmbeddingRequest) -> EmbeddingResponse: + """Generating embeddings for the input text(s). + + Args: + request: The embedding request. + + Returns: + The embedding response containing vector representations. + + Raises: + NotImplementedError: If the provider has not implemented this capability. + """ + raise NotImplementedError( + f"Provider {self.__class__.__name__} does not support embeddings" + ) + + def embed_text( + self, input_text: Union[str, List[str]], **kwargs: Any + ) -> EmbeddingResponse: + """Convenience method for generating embeddings. + + Creates an EmbeddingRequest from the input text and additional parameters, + then delegates to the embed() method for processing. + + Args: + input_text: The text(s) to embed. + **kwargs: Additional parameters for the embedding request. + + Returns: + The embedding response with vector representations. + """ + request = EmbeddingRequest(input=input_text, **kwargs) + return self.embed(request=request) \ No newline at end of file diff --git a/src/ember/core/registry/operator/core/diversity_scorer.py b/src/ember/core/registry/operator/core/diversity_scorer.py new file mode 100644 index 00000000..7d7c0300 --- /dev/null +++ b/src/ember/core/registry/operator/core/diversity_scorer.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from collections import Counter +from typing import List, Optional + +from ember.core.registry.operator.base.operator_base import Operator +from ember.core.registry.specification.specification import Specification +from ember.core.types import EmberModel + +from ember.core.utils.eval.evaluators import DiversityEnsembledEvaluator +from ember.core.registry.model.examples.provider_extension_guide import EmbeddingProviderModel + +import logging + +class DiversityScoringOperatorInputs(EmberModel): + """Input model for DiversityScoringOperator. + + Attributes: + responses (List[str]): A list of response strings. + """ + + responses: List[str] + +class DiversityScoringOperatorOutputs(EmberModel): + """Output model for DiversityScoringOperator. + + Attributes: + responses (List[str]): A list of response strings. + diversity score (int): A score representing the diversity between all responses. + + """ + + responses: List[str] + diversity_score: int + + +class DiversityScoringOperator( + Operator[DiversityScoringOperatorInputs, DiversityScoringOperatorOutputs] +): + """Operator to aggregate all responses and run a score of a diversity-based metric.""" + + specification: Specification = Specification( + input_model=DiversityScoringOperatorInputs, + structured_output=DiversityScoringOperatorOutputs, + ) + def __init__(self, *, embedding_model: EmbeddingProviderModel) -> None: + self.embedding_model = embedding_model + if self.embedding_model is None: + logging.warning("DiversityScoringEvaluator isn't initialized with an embedding model") + + + def forward( + self, *, inputs: DiversityScoringOperatorInputs + ) -> DiversityScoringOperatorOutputs: + if not inputs.responses or not inputs.model_service: + return {"responses": None, "diversity_score": 0} + + score = DiversityEnsembledEvaluator(embedding_model=self.embedding_model).evaluate(inputs.responses).score + # logger instead + logging.info(f"DiversityScoringOperator's score from {len(inputs.responses)} responses: {score}") + + return {"responses": inputs.responses, "diversity_score": score} \ No newline at end of file diff --git a/src/ember/core/utils/embedding_utils.py b/src/ember/core/utils/embedding_utils.py index a02592f1..48de1b1b 100644 --- a/src/ember/core/utils/embedding_utils.py +++ b/src/ember/core/utils/embedding_utils.py @@ -1,13 +1,18 @@ from __future__ import annotations -import math from abc import ABC, abstractmethod from typing import List, Protocol +import numpy as np + +from ember.core.registry.model.providers.provider_capability import (EmbeddingProviderModel, + EmbeddingResponse) +from ember.core.registry.model.providers.openai.openai_provider import create_openai_embedding_model ################################################################ # 1) Embedding Model Interfaces & Implementations ################################################################ +# NOTE: These protocols are now outdated by the EmbeddingProviderModel/CapabilityModel interfaces class EmbeddingModel(Protocol): """Interface for embedding models. @@ -56,7 +61,6 @@ def embed_text(self, text: str) -> List[float]: return [] return [ord(ch) / 256.0 for ch in text] - ################################################################ # 2) Similarity Metric Interface & Implementations ################################################################ @@ -105,13 +109,16 @@ def similarity(self, vec_a: List[float], vec_b: List[float]) -> float: if not vec_a or not vec_b: return 0.0 - dot_product: float = sum(a * b for a, b in zip(vec_a, vec_b)) - norm_a: float = math.sqrt(sum(a * a for a in vec_a)) - norm_b: float = math.sqrt(sum(b * b for b in vec_b)) + a = np.array(vec_a) + b = np.array(vec_b) + + norm_a = np.linalg.norm(a) + norm_b = np.linalg.norm(b) + if norm_a == 0 or norm_b == 0: return 0.0 - return dot_product / (norm_a * norm_b) + return float(np.dot(a, b) / (norm_a * norm_b)) ################################################################ @@ -120,7 +127,7 @@ def similarity(self, vec_a: List[float], vec_b: List[float]) -> float: def calculate_text_similarity( - text1: str, text2: str, model: EmbeddingModel, metric: SimilarityMetric + text1: str, text2: str, model: EmbeddingProviderModel, metric: SimilarityMetric ) -> float: """Calculates text similarity using an embedding model and a similarity metric. @@ -136,22 +143,27 @@ def calculate_text_similarity( Returns: float: The computed similarity score. """ - embedding1: List[float] = model.embed_text(text=text1) - embedding2: List[float] = model.embed_text(text=text2) - return metric.similarity(vec_a=embedding1, vec_b=embedding2) + response1: EmbeddingResponse = model.embed_text(input_text=text1) + response2: EmbeddingResponse = model.embed_text(input_text=text2) + + embeddings1: List[float] = response1.embeddings + embeddings2: List[float] = response2.embeddings + + return metric.similarity(vec_a=embeddings1, + vec_b=embeddings2) ################################################################ # 4) Example Usage (Executable as Script) ################################################################ if __name__ == "__main__": - mock_model: MockEmbeddingModel = MockEmbeddingModel() - cosine: CosineSimilarity = CosineSimilarity() + openai_embedding_model = create_openai_embedding_model() + cosine_simlarity = CosineSimilarity() text_a: str = "Hello world!" text_b: str = "Hello, world??" score: float = calculate_text_similarity( - text1=text_a, text2=text_b, model=mock_model, metric=cosine + text1=text_a, text2=text_b, model=openai_embedding_model, metric=cosine_simlarity ) print(f"Similarity between '{text_a}' and '{text_b}': {score}") diff --git a/src/ember/core/utils/logging.py b/src/ember/core/utils/ember_logging.py similarity index 97% rename from src/ember/core/utils/logging.py rename to src/ember/core/utils/ember_logging.py index 25de7aa2..4b7be51c 100644 --- a/src/ember/core/utils/logging.py +++ b/src/ember/core/utils/ember_logging.py @@ -12,11 +12,11 @@ Usage: # To apply standard configuration with reduced verbosity: - from ember.core.utils.logging import configure_logging + from ember.core.utils.ember_logging import configure_logging configure_logging(verbose=False) # To adjust specific component verbosity: - from ember.core.utils.logging import set_component_level + from ember.core.utils.ember_logging import set_component_level set_component_level("model_discovery", logging.DEBUG) """ diff --git a/src/ember/core/utils/eval/__init__.py b/src/ember/core/utils/eval/__init__.py index b3cba4a4..33304c9c 100644 --- a/src/ember/core/utils/eval/__init__.py +++ b/src/ember/core/utils/eval/__init__.py @@ -12,6 +12,13 @@ NumericToleranceEvaluator, PartialRegexEvaluator, ) +from .diversity_evaluators import ( + DiversityEnsembledEvaluator, + DiversityCosineSimilarityEvaluator, + DiversityEditDistanceEvaluator, + DiversityCompressionEvaluator, + DiversityNoveltyEvaluator, +) from .pipeline import ( BatchEvaluationSummary, PipelineEvaluator, diff --git a/src/ember/core/utils/eval/diversity_evaluators.py b/src/ember/core/utils/eval/diversity_evaluators.py new file mode 100644 index 00000000..1830e68b --- /dev/null +++ b/src/ember/core/utils/eval/diversity_evaluators.py @@ -0,0 +1,373 @@ +from __future__ import annotations + +from typing import Any, List, Tuple + +from .base_evaluator import EvaluationResult, IEvaluator + +# diversity imports +from diversity import compression_ratio +import Levenshtein +import numpy as np +from ember.core.utils.embedding_utils import (CosineSimilarity, + calculate_text_similarity) +from ember.core.registry.model.providers.provider_capability import EmbeddingProviderModel +from ember.core.registry.model.providers.openai.openai_provider import create_openai_embedding_model + +import logging + +# Composite Evaluator Example +class DiversityEnsembledEvaluator(IEvaluator[List[str], None]): + """Evaluator that combines multiple diversity metrics to assess ensemble output diversity. + + Computes diversity as an average of cosine similarity, compression ratio, and edit distance. + The higher this score is, the more diverse your text. + + Args: + system_output (List[str]): List of generated outputs from the system. + embedding_model (EmbeddingModel): The embedding model to compute cosine similarity. + + Returns: + EvaluationResult: Average of the three diversity scores with `is_correct=True`. + """ + def __init__(self, embedding_model: EmbeddingProviderModel): + self.embedding_model = embedding_model + if self.embedding_model is None: + logging.warning("DiversityEnsembledEvaluator isn't initialized with an embedding model") + + def evaluate( + self, + system_output: List[str], + **kwargs + ) -> EvaluationResult: + if not system_output: + logging.debug("DiversityEnsembledEvaluator didn't receive an output") + return EvaluationResult(is_correct=False, score=-1) + if self.embedding_model is None: + logging.debug("DiversityEnsembledEvaluator wasn't initialized with an embedding model") + return EvaluationResult(is_correct=False, score=-1) + if len(system_output) == 1: + logging.debug("DiversityEnsembledEvaluator only received one string of text") + return EvaluationResult(is_correct=True, score=0) + + # Lower cosine similarity --> more diverse + cosine_score = 1.0 - DiversityCosineSimilarityEvaluator(embedding_model=self.embedding_model).evaluate(system_output).score + # higher compression score --> more diverse + compression_score = DiversityCompressionEvaluator().evaluate(system_output).score + # higher edit distance --> more diverse + edit_score = DiversityEditDistanceEvaluator().evaluate(system_output).score + + avg_diversity = (cosine_score + compression_score + edit_score) / 3 + + return EvaluationResult( + is_correct=True, + score=avg_diversity, + metadata={"responses": system_output} + ) + + +class DiversityCosineSimilarityEvaluator(IEvaluator[List[str], None]): + """Evaluator that computes average pairwise cosine similarity between outputs. + + Lower average cosine similarity implies greater semantic diversity. + + Args: + system_output (List[str]): List of generated outputs from the system. + embedding_model (EmbeddingModel): The embedding model used to compute cosine similarity. + + Returns: + EvaluationResult: Result with average similarity score and output metadata. + """ + def __init__(self, embedding_model: EmbeddingProviderModel = None): + self.embedding_model = embedding_model + if self.embedding_model is None: + logging.warning("DiversityCosineEvaluator isn't initialized with an embedding model " + + "Using default OpenAI embedding model instead") + self.embedding_model = create_openai_embedding_model() + + def evaluate( + self, + system_output: List[str], + **kwargs + ) -> EvaluationResult: + if not system_output: + logging.debug("DiversityCosineEvaluator didn't receive an output") + return EvaluationResult(is_correct=False, score=-1) + if self.embedding_model is None: + logging.debug("DiversityCosineEvaluator wasn't initialized with an embedding model") + return EvaluationResult(is_correct=False, score=-1) + if len(system_output) == 1: + logging.deubg("DiversityCosineEvaluator only received one string of text") + return EvaluationResult(is_correct=True, score=0) + + cosine_similarity = CosineSimilarity() + scores = [] + + # TODO IDEA: Compute embedding vectors for all system_output --> get the average + # Then compute cosine similarity between all other outputs + + # Compare every possible combination of system_output vectors + for i in range(len(system_output)): + for j in range(i + 1, len(system_output)): + sim = calculate_text_similarity( + system_output[i], system_output[j], self.embedding_model, metric=cosine_similarity + ) + scores.append(sim) + + avg_score = float(np.average(scores)) + + return EvaluationResult( + is_correct=True, + score=avg_score, + metadata={"responses": system_output} + ) + + +class DiversityCompressionEvaluator(IEvaluator[List[str], None]): + """Evaluator that measures diversity using a compression ratio heuristic. + + Lower compression ratio indicates higher textual diversity. The final score is scaled + based on a minimum number of responses (5) and minimum total character count (100). + + Args: + system_output (List[str]): List of generated responses. + + Returns: + EvaluationResult: Scaled diversity score based on compression. + """ + + def evaluate( + self, + system_output: List[str], + **kwargs + ) -> EvaluationResult: + if not system_output: + return EvaluationResult(is_correct=False, score=-1) + + total_chars = sum(len(r) for r in system_output) + # ratio = (size of compressed data) / (size of uncompressed data) + # Higher ratio is --> more diverse + ratio = 1 / compression_ratio(system_output) + # Penalize inputs with few words (hard to measure) and inputs with very few characters + # Note that this is a temporary patch for compression_ratio does not normalizing over word length + scaled_score = ratio * min(1, len(system_output) / 5) * min(1, total_chars / 100) + + return EvaluationResult( + is_correct=True, + score=scaled_score, + metadata={"responses": system_output} + ) + + +class DiversityEditDistanceEvaluator: + """Evaluator that measures lexical diversity using normalized Levenshtein edit distance. + + Computes average pairwise normalized edit distance across all outputs. + + Args: + system_output (List[str]): List of generated responses. + + Returns: + EvaluationResult: Average normalized edit distance score. + """ + + def evaluate(self, system_output: List[str], **kwargs) -> EvaluationResult: + if not system_output: + return EvaluationResult(is_correct=False, score=-1, metadata={}) + + score = self.compute_distance(system_output) + + return EvaluationResult( + is_correct=True, + score=score, + metadata={"responses": system_output} + ) + + def compute_distance(self, outputs: List[str]) -> float: + n = len(outputs) + if n < 2: + return 0.0 + + total_distance = 0.0 + num_pairs = 0 + + for i in range(n): + for j in range(i + 1, n): + dist = Levenshtein.distance(outputs[i], outputs[j]) + max_len = max(len(outputs[i]), len(outputs[j])) + norm_dist = dist / max_len if max_len > 0 else 0 + total_distance += norm_dist + num_pairs += 1 + + return total_distance / num_pairs if num_pairs > 0 else 0.0 + + +class DiversityNoveltyEvaluator: + """Evaluator that measures novelty of each output relative to previously generated ones. + + For each response, computes its cosine distance from all prior responses. + Higher novelty implies lower similarity to prior outputs. + + Args: + model (EmbeddingModel): Embedding model used for computing cosine similarity. + system_output (List[str]): List of outputs ordered by generation. + + Returns: + EvaluationResult: Average novelty score across the sequence. + """ + + def __init__(self, embedding_model: EmbeddingProviderModel = None): + self.embedding_model = embedding_model + if self.embedding_model is None: + logging.warning("DiversityNoveltyEvaluator isn't initialized with an embedding model " + + "Using default OpenAI embedding model instead") + self.embedding_model = create_openai_embedding_model() + + def evaluate(self, + system_output: List[str], + **kwargs + ) -> EvaluationResult: + """ + Evaluates the novelty of each response in a sequence relative to the responses that came before it, + using cosine similarity of embeddings. + + For each response, an embedding is computed and compared against embeddings of all prior responses. + The novelty score is defined as 1.0 minus the maximum cosine similarity with any prior response. + A high score indicates a novel response, while a low score indicates redundancy. + + Note: + - If all responses are identical, the first response gets a score of 1.0 while + all others get 0.0, resulting in an average (and minimum) score of 1/len(system_output). + + Returns: + EvaluationResult: + - is_correct: True if evaluation ran successfully. + - score: Average novelty score across all responses. + - metadata: Contains raw responses and their individual novelty scores. + """ + + if len(system_output) == 0: + logging.warning("Length of inputs to evaluate function is zero") + return EvaluationResult(is_correct=False, score=-1, metadata={}) + + novelty_scores = [] + prior_embeddings = [] + + for r in system_output: + new_emb, novelty = self._compute_novelty(self.embedding_model, r, prior_embeddings) + novelty_scores.append(novelty) + prior_embeddings.append(new_emb) + + avg_score = float(np.mean(novelty_scores)) if novelty_scores else 0.0 + + return EvaluationResult( + is_correct=True, + score=avg_score, + metadata={ + "responses": system_output, + "novelty_scores": novelty_scores + } + ) + + def _compute_novelty(self, + model: EmbeddingProviderModel, + response: str, + prior_embeddings: List[str] + ) -> Tuple[np.ndarray, float]: + + new_emb = model.embed_text(response).embeddings + + if not prior_embeddings: + return new_emb, 1.0 + + similarities = [ + np.dot(new_emb, pe) / (np.linalg.norm(new_emb) * np.linalg.norm(pe)) + for pe in prior_embeddings + ] + + return new_emb, 1.0 - max(similarities) + + +if __name__ == "__main__": + text_embedding_ada_002 = create_openai_embedding_model("text-embedding-ada-002") + + # List of text that represents completely n + very_diverse_text = ["Bananas don't belong in briefcases, but socks and t-shirts do!", + "Abraham Lincoln was the 16th president of the United States of America", + "ERROR 404: Index Not Found"] + + # This group of text all rephrase the same request, except + different_words_not_diverse_strs = ["Could you please lend me a hand with this?", + "Might you assist me with a task?", + "Can you spare a second to help me do something?"] + + repetition_strs = ["This is a sample text with lots of repetition.", + "This is a sample text with lots of repetition.", + "This is a sample text with lots of repetition."] + + # List of sample strings that have varying levels of diversity: + test_strings = [very_diverse_text, different_words_not_diverse_strs, repetition_strs] + + + # Measure Cosine similarity + cosine_similarity_evaluator = DiversityCosineSimilarityEvaluator(text_embedding_ada_002) + + print("\n" + "=" * 50 ) + print("Cosine Similarity Evaluator\n") + for i in range(len(test_strings)): + print(f"Computing cosine-similarity for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = cosine_similarity_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Diversity score: {score}\n") + + + # Measure Edit Distance + print("=" * 50 + "\nEdit Distance Evaluator\n") + edit_distance_evaluator = DiversityEditDistanceEvaluator() + + for i in range(len(test_strings)): + print(f"Computing Edit-Distance for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = edit_distance_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Edit-Distance score: {score}\n") + print("=" * 50 + "\n") + + + # Measure Novelty + print("=" * 50 + "\nNovelty Evaluator\n") + novelty_evaluator = DiversityNoveltyEvaluator() + + for i in range(len(test_strings)): + print(f"Computing Novelty for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Novelty score: {score}\n") + print("=" * 50 + "\n") + + + # Measure Compression Ratio + print("=" * 50 + "\nCompression Ratio Evaluator\n") + novelty_evaluator = DiversityCompressionEvaluator() + + for i in range(len(test_strings)): + print(f"Computing Compression Ratio for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Compression Ratio: {score}\n") + print("=" * 50 + "\n") + + + # Measure Ensembled Diversity + print("=" * 50 + "\nEnsembled Diversity Evaluator\n") + novelty_evaluator = DiversityCompressionEvaluator() + + for i in range(len(test_strings)): + print(f"Computing Ensembled Diversity Score for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Ensembled Diversity Score: {score}\n") + print("=" * 50 + "\n") \ No newline at end of file diff --git a/src/ember/core/utils/eval/evaluators.py b/src/ember/core/utils/eval/evaluators.py index 94734243..47ddb8b9 100644 --- a/src/ember/core/utils/eval/evaluators.py +++ b/src/ember/core/utils/eval/evaluators.py @@ -1,7 +1,7 @@ from __future__ import annotations import subprocess -from typing import Any, Callable, Generic, Optional, TypeVar +from typing import Any, List, Callable, Generic, Optional, TypeVar from .base_evaluator import EvaluationResult, IEvaluator from .extractors import RegexExtractor @@ -198,10 +198,6 @@ def evaluate( metadata={"error": f"{type(error).__name__}: {str(error)}"}, ) - -# Composite Evaluator Example - - class MultipleChoiceEvaluator(IEvaluator[str, str]): """Evaluator to check if a system output contains the correct multiple-choice answer. diff --git a/src/ember/examples/advanced/example_architectures.py b/src/ember/examples/advanced/example_architectures.py index cc7f1aa8..2b8b735f 100644 --- a/src/ember/examples/advanced/example_architectures.py +++ b/src/ember/examples/advanced/example_architectures.py @@ -219,7 +219,7 @@ def create_pipeline(*, model_name: str = "gpt-4o") -> non.Sequential: if __name__ == "__main__": # Use the centralized logging configuration with reduced verbosity - from ember.core.utils.logging import configure_logging + from ember.core.utils.ember_logging import configure_logging from ember.xcs.engine.execution_options import set_execution_options configure_logging(verbose=False) diff --git a/src/ember/examples/diversity_demo.ipynb b/src/ember/examples/diversity_demo.ipynb new file mode 100644 index 00000000..277d95e7 --- /dev/null +++ b/src/ember/examples/diversity_demo.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:ember.core.registry.model.providers.anthropic.anthropic_discovery:Error fetching Anthropic models via REST API: 401 Client Error: Unauthorized for url: https://api.anthropic.com/v1/models\n", + "/root/anaconda3/envs/ember_upgrade/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "I0000 00:00:1742550067.907817 410594 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.\n" + ] + } + ], + "source": [ + "import os \n", + "import logging\n", + "\n", + "# Set global logging level to ERROR\n", + "logging.basicConfig(level=logging.ERROR)\n", + "\n", + "os.environ[\"EMBER_LOGGING_LEVEL\"] = \"ERROR\"\n", + "\n", + "# from ember.core.registry.model.model_module.lm import LMModule, LMModuleConfig\n", + "from ember.core.registry.model.config.settings import initialize_registry\n", + "from ember.core.registry.model.base.services.model_service import ModelService" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:ember.core.registry.model.providers.anthropic.anthropic_discovery:Error fetching Anthropic models via REST API: 401 Client Error: Unauthorized for url: https://api.anthropic.com/v1/models\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['openai:gpt-4o-mini-transcribe', 'openai:gpt-4o-audio-preview-2024-12-17', 'openai:dall-e-3', 'openai:dall-e-2', 'openai:gpt-4o-audio-preview-2024-10-01', 'openai:gpt-4o-realtime-preview-2024-10-01', 'openai:gpt-4o-audio-preview', 'openai:text-embedding-3-large', 'openai:gpt-4', 'openai:gpt-4o-mini-2024-07-18', 'openai:gpt-4o-2024-05-13', 'openai:gpt-4o-realtime-preview', 'openai:gpt-4o-mini', 'openai:gpt-4o-mini-audio-preview', 'openai:gpt-3.5-turbo-instruct-0914', 'openai:gpt-4o-mini-search-preview', 'openai:gpt-3.5-turbo-1106', 'openai:gpt-4o-search-preview', 'openai:gpt-4-turbo', 'openai:gpt-4o-realtime-preview-2024-12-17', 'openai:gpt-3.5-turbo-instruct', 'openai:gpt-3.5-turbo', 'openai:gpt-4-turbo-preview', 'openai:gpt-4o-mini-search-preview-2025-03-11', 'openai:gpt-4o-mini-realtime-preview', 'openai:gpt-3.5-turbo-0125', 'openai:gpt-4o-2024-08-06', 'openai:gpt-4-turbo-2024-04-09', 'openai:gpt-3.5-turbo-16k', 'openai:gpt-4o', 'openai:gpt-4o-mini-realtime-preview-2024-12-17', 'openai:gpt-4-1106-preview', 'openai:text-embedding-ada-002', 'openai:gpt-4-0613', 'openai:gpt-4.5-preview', 'openai:gpt-4.5-preview-2025-02-27', 'openai:gpt-4o-search-preview-2025-03-11', 'openai:gpt-4o-2024-11-20', 'openai:gpt-4o-mini-tts', 'openai:gpt-4-0125-preview', 'openai:gpt-4o-transcribe', 'openai:text-embedding-3-small', 'openai:gpt-4o-mini-audio-preview-2024-12-17', 'anthropic:claude-3-sonnet', 'anthropic:claude-3-opus', 'anthropic:claude-3-haiku', 'anthropic:claude-3.5-sonnet', 'anthropic:claude-3.7-sonnet', 'google:models/gemini-1.0-pro-vision-latest', 'google:models/gemini-pro-vision', 'google:models/gemini-1.5-pro-latest', 'google:models/gemini-1.5-pro-001', 'google:models/gemini-1.5-pro-002', 'google:models/gemini-1.5-pro', 'google:models/gemini-1.5-flash-latest', 'google:models/gemini-1.5-flash-001', 'google:models/gemini-1.5-flash-001-tuning', 'google:models/gemini-1.5-flash', 'google:models/gemini-1.5-flash-002', 'google:models/gemini-1.5-flash-8b', 'google:models/gemini-1.5-flash-8b-001', 'google:models/gemini-1.5-flash-8b-latest', 'google:models/gemini-1.5-flash-8b-exp-0827', 'google:models/gemini-1.5-flash-8b-exp-0924', 'google:models/gemini-2.0-flash-exp', 'google:models/gemini-2.0-flash', 'google:models/gemini-2.0-flash-001', 'google:models/gemini-2.0-flash-exp-image-generation', 'google:models/gemini-2.0-flash-lite-001', 'google:models/gemini-2.0-flash-lite', 'google:models/gemini-2.0-flash-lite-preview-02-05', 'google:models/gemini-2.0-flash-lite-preview', 'google:models/gemini-2.0-pro-exp', 'google:models/gemini-2.0-pro-exp-02-05', 'google:models/gemini-exp-1206', 'google:models/gemini-2.0-flash-thinking-exp-01-21', 'google:models/gemini-2.0-flash-thinking-exp', 'google:models/gemini-2.0-flash-thinking-exp-1219', 'google:models/learnlm-1.5-pro-experimental', 'google:models/gemma-3-27b-it']\n" + ] + } + ], + "source": [ + "model_registry = initialize_registry()\n", + "print(model_registry.list_models())\n", + "llm = ModelService(registry=model_registry)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['openai:gpt-4o-mini-transcribe',\n", + " 'openai:gpt-4o-audio-preview-2024-12-17',\n", + " 'openai:dall-e-3',\n", + " 'openai:dall-e-2',\n", + " 'openai:gpt-4o-audio-preview-2024-10-01',\n", + " 'openai:gpt-4o-realtime-preview-2024-10-01',\n", + " 'openai:gpt-4o-audio-preview',\n", + " 'openai:text-embedding-3-large',\n", + " 'openai:gpt-4',\n", + " 'openai:gpt-4o-mini-2024-07-18',\n", + " 'openai:gpt-4o-2024-05-13',\n", + " 'openai:gpt-4o-realtime-preview',\n", + " 'openai:gpt-4o-mini',\n", + " 'openai:gpt-4o-mini-audio-preview',\n", + " 'openai:gpt-3.5-turbo-instruct-0914',\n", + " 'openai:gpt-4o-mini-search-preview',\n", + " 'openai:gpt-3.5-turbo-1106',\n", + " 'openai:gpt-4o-search-preview',\n", + " 'openai:gpt-4-turbo',\n", + " 'openai:gpt-4o-realtime-preview-2024-12-17',\n", + " 'openai:gpt-3.5-turbo-instruct',\n", + " 'openai:gpt-3.5-turbo',\n", + " 'openai:gpt-4-turbo-preview',\n", + " 'openai:gpt-4o-mini-search-preview-2025-03-11',\n", + " 'openai:gpt-4o-mini-realtime-preview',\n", + " 'openai:gpt-3.5-turbo-0125',\n", + " 'openai:gpt-4o-2024-08-06',\n", + " 'openai:gpt-4-turbo-2024-04-09',\n", + " 'openai:gpt-3.5-turbo-16k',\n", + " 'openai:gpt-4o',\n", + " 'openai:gpt-4o-mini-realtime-preview-2024-12-17',\n", + " 'openai:gpt-4-1106-preview',\n", + " 'openai:text-embedding-ada-002',\n", + " 'openai:gpt-4-0613',\n", + " 'openai:gpt-4.5-preview',\n", + " 'openai:gpt-4.5-preview-2025-02-27',\n", + " 'openai:gpt-4o-search-preview-2025-03-11',\n", + " 'openai:gpt-4o-2024-11-20',\n", + " 'openai:gpt-4o-mini-tts',\n", + " 'openai:gpt-4-0125-preview',\n", + " 'openai:gpt-4o-transcribe',\n", + " 'openai:text-embedding-3-small',\n", + " 'openai:gpt-4o-mini-audio-preview-2024-12-17',\n", + " 'anthropic:claude-3-sonnet',\n", + " 'anthropic:claude-3-opus',\n", + " 'anthropic:claude-3-haiku',\n", + " 'anthropic:claude-3.5-sonnet',\n", + " 'anthropic:claude-3.7-sonnet',\n", + " 'google:models/gemini-1.0-pro-vision-latest',\n", + " 'google:models/gemini-pro-vision',\n", + " 'google:models/gemini-1.5-pro-latest',\n", + " 'google:models/gemini-1.5-pro-001',\n", + " 'google:models/gemini-1.5-pro-002',\n", + " 'google:models/gemini-1.5-pro',\n", + " 'google:models/gemini-1.5-flash-latest',\n", + " 'google:models/gemini-1.5-flash-001',\n", + " 'google:models/gemini-1.5-flash-001-tuning',\n", + " 'google:models/gemini-1.5-flash',\n", + " 'google:models/gemini-1.5-flash-002',\n", + " 'google:models/gemini-1.5-flash-8b',\n", + " 'google:models/gemini-1.5-flash-8b-001',\n", + " 'google:models/gemini-1.5-flash-8b-latest',\n", + " 'google:models/gemini-1.5-flash-8b-exp-0827',\n", + " 'google:models/gemini-1.5-flash-8b-exp-0924',\n", + " 'google:models/gemini-2.0-flash-exp',\n", + " 'google:models/gemini-2.0-flash',\n", + " 'google:models/gemini-2.0-flash-001',\n", + " 'google:models/gemini-2.0-flash-exp-image-generation',\n", + " 'google:models/gemini-2.0-flash-lite-001',\n", + " 'google:models/gemini-2.0-flash-lite',\n", + " 'google:models/gemini-2.0-flash-lite-preview-02-05',\n", + " 'google:models/gemini-2.0-flash-lite-preview',\n", + " 'google:models/gemini-2.0-pro-exp',\n", + " 'google:models/gemini-2.0-pro-exp-02-05',\n", + " 'google:models/gemini-exp-1206',\n", + " 'google:models/gemini-2.0-flash-thinking-exp-01-21',\n", + " 'google:models/gemini-2.0-flash-thinking-exp',\n", + " 'google:models/gemini-2.0-flash-thinking-exp-1219',\n", + " 'google:models/learnlm-1.5-pro-experimental',\n", + " 'google:models/gemma-3-27b-it']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_registry.list_models()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -q openai" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Diversity Demo" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "from abc import ABC, abstractmethod\n", + "from typing import List, Protocol\n", + "import math\n", + "\n", + "\n", + "################################################################\n", + "# 1) Embedding Model Interfaces & Implementations\n", + "################################################################\n", + "\n", + "\n", + "class EmbeddingModel(Protocol):\n", + " \"\"\"Interface for embedding models.\n", + "\n", + " This protocol defines the minimal interface required to compute a text\n", + " embedding. Implementations may use local models, external APIs, or custom\n", + " neural networks.\n", + "\n", + " Methods:\n", + " embed_text: Compute the embedding for a given text.\n", + " \"\"\"\n", + "\n", + " def embed_text(self, text: str) -> List[float]:\n", + " \"\"\"Computes the embedding vector for the provided text.\n", + "\n", + " Args:\n", + " text (str): The text to be embedded.\n", + "\n", + " Returns:\n", + " List[float]: A list of floats representing the embedding vector.\n", + " \"\"\"\n", + " ...\n", + "\n", + "class Text_Embedding_Ada_002_Model:\n", + " \"\"\"Interface for embedding models.\n", + "\n", + " This protocol defines the minimal interface required to compute a text\n", + " embedding. Implementations may use local models, external APIs, or custom\n", + " neural networks.\n", + "\n", + " Methods:\n", + " embed_text: Compute the embedding for a given text.\n", + " \"\"\"\n", + "\n", + " def embed_text(self, text: str) -> List[float]:\n", + " \"\"\"Computes the embedding vector for the provided text.\n", + "\n", + " Args:\n", + " text (str): The text to be embedded.\n", + "\n", + " Returns:\n", + " List[float]: A list of floats representing the embedding vector.\n", + " \"\"\"\n", + " response = llm(model_id=\"openai:text-embedding-ada-002\", prompt=text)\n", + " return response.embedding\n", + "\n", + "\n", + "class MockEmbeddingModel:\n", + " \"\"\"Mock implementation of an embedding model using naive ASCII encoding.\n", + "\n", + " This simple model converts each character in the text to a normalized ASCII\n", + " value. It is intended solely for demonstration and testing purposes.\n", + "\n", + " Methods:\n", + " embed_text: Converts text to a sequence of normalized ASCII values.\n", + " \"\"\"\n", + "\n", + " def embed_text(self, text: str) -> List[float]:\n", + " \"\"\"Embeds text by converting each character to its normalized ASCII code.\n", + "\n", + " Args:\n", + " text (str): The input text to be embedded.\n", + "\n", + " Returns:\n", + " List[float]: A list of floats representing the embedding. Returns an\n", + " empty list if the text is empty.\n", + " \"\"\"\n", + " if not text:\n", + " return []\n", + " return [ord(ch) / 256.0 for ch in text]\n", + "\n", + "\n", + "################################################################\n", + "# 2) Similarity Metric Interface & Implementations\n", + "################################################################\n", + "\n", + "\n", + "class SimilarityMetric(ABC):\n", + " \"\"\"Abstract base class for computing similarity between embedding vectors.\n", + "\n", + " Subclasses must implement the similarity method to calculate a similarity\n", + " score between two vectors.\n", + " \"\"\"\n", + "\n", + " @abstractmethod\n", + " def similarity(self, vec_a: List[float], vec_b: List[float]) -> float:\n", + " \"\"\"Calculates the similarity between two embedding vectors.\n", + "\n", + " Args:\n", + " vec_a (List[float]): The first embedding vector.\n", + " vec_b (List[float]): The second embedding vector.\n", + "\n", + " Returns:\n", + " float: The similarity score, typically in the range [0, 1] or [-1, 1].\n", + " \"\"\"\n", + " ...\n", + "\n", + "\n", + "class CosineSimilarity(SimilarityMetric):\n", + " \"\"\"Implementation of cosine similarity for embedding vectors.\n", + "\n", + " The cosine similarity is defined as:\n", + " similarity(a, b) = (a · b) / (||a|| * ||b||)\n", + "\n", + " Returns 0.0 if either vector is empty or if any vector's norm is zero.\n", + " \"\"\"\n", + "\n", + " def similarity(self, vec_a: List[float], vec_b: List[float]) -> float:\n", + " \"\"\"Computes cosine similarity between two embedding vectors.\n", + "\n", + " Args:\n", + " vec_a (List[float]): The first embedding vector.\n", + " vec_b (List[float]): The second embedding vector.\n", + "\n", + " Returns:\n", + " float: The cosine similarity score.\n", + " \"\"\"\n", + " if not vec_a or not vec_b:\n", + " return 0.0\n", + "\n", + " dot_product: float = sum(a * b for a, b in zip(vec_a, vec_b))\n", + " norm_a: float = math.sqrt(sum(a * a for a in vec_a))\n", + " norm_b: float = math.sqrt(sum(b * b for b in vec_b))\n", + " if norm_a == 0 or norm_b == 0:\n", + " return 0.0\n", + "\n", + " return dot_product / (norm_a * norm_b)\n", + "\n", + "\n", + "################################################################\n", + "# 3) High-Level Utility Function\n", + "################################################################\n", + "\n", + "\n", + "def calculate_text_similarity(\n", + " text1: str, text2: str, model: EmbeddingModel, metric: SimilarityMetric\n", + ") -> float:\n", + " \"\"\"Calculates text similarity using an embedding model and a similarity metric.\n", + "\n", + " This function generates embeddings for the provided texts and then computes a\n", + " similarity score using the given similarity metric.\n", + "\n", + " Args:\n", + " text1 (str): The first text string.\n", + " text2 (str): The second text string.\n", + " model (EmbeddingModel): An instance conforming to the embedding model interface.\n", + " metric (SimilarityMetric): An instance implementing a similarity metric.\n", + "\n", + " Returns:\n", + " float: The computed similarity score.\n", + " \"\"\"\n", + " embedding1: List[float] = model.embed_text(text=text1)\n", + " embedding2: List[float] = model.embed_text(text=text2)\n", + "\n", + " return metric.similarity(vec_a=embedding1, vec_b=embedding2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cosine similarity Score: 0.7287\n", + "\n", + "Cosine similarity Score: 0.8205\n", + "\n", + "Cosine similarity Score: 1.0000\n", + "\n" + ] + } + ], + "source": [ + "mock_model: Text_Embedding_Ada_002_Model = Text_Embedding_Ada_002_Model()\n", + "cosine: CosineSimilarity = CosineSimilarity()\n", + "\n", + "text_a: str = \"Hello world!\"\n", + "text_b: str = \"Hello, world??\"\n", + "\n", + "diverse_text = [\"Bananas don't belong in briefcases\", \"Abraham Lincoln\", \"ERROR 404: Index Not Found\"]\n", + "\n", + "different_words_not_diverse_strs = [\"peanut butter and jelly\", \"bacon lettuce tomato\"]\n", + "\n", + "repetition_strs = [\"This is a sample text with lots of repetition.\", \n", + " \"This is a sample text with lots of repetition.\"]\n", + "\n", + "test_strings = [diverse_text, different_words_not_diverse_strs, repetition_strs]\n", + "\n", + "for test in test_strings:\n", + " score: float = calculate_text_similarity(\n", + " text1=test[0], text2=test[1], model=mock_model, metric=cosine\n", + " )\n", + "\n", + " print(f\"Cosine similarity Score: {score:.4f}\")\n", + " print(\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "---\n", + "\n", + "## Compression Ratio (WIP)\n", + "\n", + "from `src/ember/core/utils/eval/evaluators.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -q diversity==0.2.0\n", + "%pip install -q spacy==3.8.4" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "from typing import Any, Dict, TypeVar, Optional, List, Generic, Callable, Union\n", + "\n", + "from ember.core.utils.eval.base_evaluator import IEvaluator, EvaluationResult\n", + "from ember.core.utils.eval.extractors import RegexExtractor\n", + "\n", + "from diversity import compression_ratio\n", + "\n", + "T_out = TypeVar(\"T_out\")\n", + "T_truth = TypeVar(\"T_truth\")\n", + "\n", + "\n", + "class ComposedEvaluator(IEvaluator[T_out, T_truth], Generic[T_out, T_truth]):\n", + " \"\"\"Combines an output extractor with an evaluator for the extracted data.\n", + "\n", + " This evaluator first transforms the system output using the provided extractor,\n", + " then evaluates the extracted value using the specified base evaluator.\n", + "\n", + " Args:\n", + " extractor: An object with an `extract` method to process the system output.\n", + " base_evaluator (IEvaluator): An evaluator that processes the extracted output.\n", + "\n", + " Returns:\n", + " EvaluationResult: The result of the evaluation.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " extractor: Any, # Expecting an extractor with an `extract` method.\n", + " base_evaluator: IEvaluator[Any, Any],\n", + " ) -> None:\n", + " self.extractor = extractor\n", + " self.base_evaluator = base_evaluator\n", + "\n", + " def evaluate(\n", + " self, system_output: T_out, correct_answer: Any, **kwargs: Any\n", + " ) -> EvaluationResult:\n", + " \"\"\"Evaluates the provided system output against the correct answer.\n", + "\n", + " Args:\n", + " system_output (T_out): The raw output generated by the system.\n", + " correct_answer (Any): The expected correct answer.\n", + " **kwargs: Additional keyword arguments for extraction or evaluation.\n", + "\n", + " Returns:\n", + " EvaluationResult: The result of evaluating the extracted value.\n", + " \"\"\"\n", + " extracted_value = self.extractor.extract(system_output, **kwargs)\n", + " return self.base_evaluator.evaluate(extracted_value, correct_answer, **kwargs)\n", + "\n", + "\n", + "# Basic Evaluators\n", + "\n", + "\n", + "class ExactMatchEvaluator(IEvaluator[str, str]):\n", + " \"\"\"Evaluator to check for an exact match between two strings,\n", + " ignoring differences in whitespace and case.\n", + "\n", + " Example:\n", + " evaluator = ExactMatchEvaluator()\n", + " result = evaluator.evaluate(\"Hello World\", \"hello world\")\n", + "\n", + " Args:\n", + " compare_fn (Optional[Callable[[str, str], bool]]): Optional custom comparison function.\n", + " If not provided, strings are normalized (whitespace removed, lowercase) before comparison.\n", + "\n", + " Returns:\n", + " EvaluationResult: The result containing a correctness flag and a score.\n", + " \"\"\"\n", + "\n", + " def __init__(self, compare_fn: Optional[Callable[[str, str], bool]] = None) -> None:\n", + " self.compare_fn = compare_fn or self._default_compare\n", + "\n", + " def _default_compare(self, str1: str, str2: str) -> bool:\n", + " \"\"\"Default string comparison function that ignores case and whitespace.\n", + "\n", + " Args:\n", + " str1 (str): First string to compare\n", + " str2 (str): Second string to compare\n", + "\n", + " Returns:\n", + " bool: True if strings match after normalization\n", + " \"\"\"\n", + " return str1.strip().lower() == str2.strip().lower()\n", + "\n", + " def evaluate(\n", + " self, system_output: str, correct_answer: str, **kwargs: Any\n", + " ) -> EvaluationResult:\n", + " \"\"\"Evaluates whether a system output exactly matches the correct answer.\n", + "\n", + " Args:\n", + " system_output (str): The system-generated string.\n", + " correct_answer (str): The expected answer string.\n", + " **kwargs: Additional keyword arguments (unused).\n", + "\n", + " Returns:\n", + " EvaluationResult: An object with `is_correct` set to True if the normalized strings match,\n", + " along with a corresponding score.\n", + " \"\"\"\n", + " is_correct = self.compare_fn(system_output, correct_answer)\n", + " score = 1.0 if is_correct else 0.0\n", + " return EvaluationResult(is_correct=is_correct, score=score)\n", + "\n", + "class DiversityScoringEvaluator(IEvaluator[List[str], None]):\n", + " \"\"\"\n", + " Evaluator to test ensemble outputs -> score them (float)\n", + " \"\"\"\n", + " def evaluate(\n", + " self, \n", + " system_output: List[str], \n", + " **kwargs) -> EvaluationResult:\n", + " if system_output is None or len(system_output) == 0:\n", + " return EvaluationResult(is_correct=False, score=-1)\n", + "\n", + "\n", + " letter_sum = sum(len(response) for response in system_output)\n", + " ratio = compression_ratio(system_output) * min(1, len(system_output)/5) * min(1, letter_sum/100)\n", + "\n", + " return EvaluationResult(is_correct=True,score=ratio,metadata = {'responses': system_output})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "---\n", + "\n", + "## **Edit Distance**" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n", + "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -q python-Levenshtein" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import Levenshtein\n", + "from typing import List\n", + "from dataclasses import dataclass\n", + "\n", + "@dataclass\n", + "class EvaluationResult:\n", + " is_correct: bool\n", + " score: float\n", + " metadata: dict\n", + "\n", + "class EditDistanceScoringEvaluator:\n", + "\n", + " def evaluate(self, system_output: List[str], **kwargs) -> EvaluationResult:\n", + " if system_output is None or len(system_output) == 0:\n", + " return EvaluationResult(is_correct=False, score=-1, metadata={})\n", + "\n", + " diversity_score = self.compute_distance(system_output)\n", + "\n", + " return EvaluationResult(\n", + " is_correct=True, \n", + " score=diversity_score,\n", + " metadata={'responses': system_output}\n", + " )\n", + "\n", + " def compute_distance(self, outputs: List[str]) -> float:\n", + " n = len(outputs)\n", + " if n < 2:\n", + " return 0.0\n", + "\n", + " total_distance = 0\n", + " pairs = 0\n", + "\n", + " for i in range(n):\n", + " for j in range(i + 1, n):\n", + " dist = Levenshtein.distance(outputs[i], outputs[j])\n", + " max_len = max(len(outputs[i]), len(outputs[j]))\n", + " normalized_dist = dist / max_len if max_len > 0 else 0 \n", + " total_distance += normalized_dist\n", + " pairs += 1\n", + " \n", + " return total_distance / pairs if pairs > 0 else 0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Diversity Score: 0.8635\n", + "Is Correct: True\n", + "Metadata: {'responses': [\"Bananas don't belong in briefcases\", 'Abraham Lincoln', 'ERROR 404: Index Not Found']}\n", + "\n", + "Diversity Score: 0.8573\n", + "Is Correct: True\n", + "Metadata: {'responses': ['peanut butter and jelly', 'bacon lettuce tomato', 'grilled cheese', 'Banh mi']}\n", + "\n", + "Diversity Score: 0.0000\n", + "Is Correct: True\n", + "Metadata: {'responses': ['This is a sample text with lots of repetition.', 'This is a sample text with lots of repetition.', 'This is a sample text with lots of repetition.']}\n", + "\n" + ] + } + ], + "source": [ + "distance_evaluator = EditDistanceScoringEvaluator()\n", + "\n", + "# input_strs = [\n", + "# \";lkjawefopajwiefpoij23jf9aj8sdfj8903jf908j -- Understanding the importance of effective communication in the workplace cannot be overstated. Clear communication fosters a positive environment where people can express their ideas and work together efficiently. When team members understand one another, they can collaborate seamlessly, avoid misunderstandings, and achieve collective goals. Furthermore, communication skills are essential for building trust, resolving conflicts, and ensuring that expectations are clear. Whether through verbal discussions, emails, or presentations, knowing how to convey thoughts in an understandable way is key to success in any professional setting.\",\n", + "# \"fej89qw098efjq29f38j0938j20f398jqwe098fjq98wf -- In any workplace, the ability to communicate effectively is crucial for success. When individuals can clearly articulate their ideas and listen actively, it leads to a more productive and harmonious environment. Good communication prevents misunderstandings, aids in team collaboration, and helps in meeting shared objectives. It also plays a vital role in fostering trust among colleagues, resolving disputes, and ensuring transparency. Whether it’s through face-to-face conversations, written messages, or virtual meetings, mastering communication is essential to creating a positive, high-functioning work culture.\",\n", + "# \"Effective communication is a cornerstone of a successful work environment. When employees communicate clearly and efficiently, it improves the overall flow of work and enhances collaboration. Clear exchanges of ideas help to eliminate confusion, build mutual trust, and ensure that everyone is aligned in their goals. Additionally, strong communication skills are key to managing conflicts and setting clear expectations among teams. Whether in meetings, emails, or other formats, being able to communicate effectively contributes to a thriving and efficient workplace.\",\n", + "# \"The role of communication in the workplace cannot be overlooked. It serves as the foundation for successful teamwork and organizational growth. When team members share information clearly, it promotes a collaborative atmosphere and reduces the risk of errors or misinterpretations. Strong communication is also vital in building relationships, resolving issues, and making sure everyone is on the same page. Whether it's verbal exchanges or written correspondence, honing your ability to communicate well is vital for fostering an effective work environment.\",\n", + "# \"Communication within the workplace is a vital element for success. Clear and open communication promotes a cooperative and efficient atmosphere, helping team members to better understand each other’s ideas and work toward common goals. It reduces confusion, builds trust, and allows for smoother problem-solving when conflicts arise. By conveying thoughts and expectations effectively, individuals can create stronger working relationships and a productive team dynamic. Whether through emails, phone calls, or face-to-face interactions, mastering communication techniques is key for professional achievement.\",\n", + "# ]\n", + "\n", + "diverse_text = [\"Bananas don't belong in briefcases\", \"Abraham Lincoln\", \"ERROR 404: Index Not Found\"]\n", + "\n", + "different_words_not_diverse_strs = [\"peanut butter and jelly\", \"bacon lettuce tomato\", \"grilled cheese\"]\n", + "\n", + "repetition_strs = [\"This is a sample text with lots of repetition.\", \n", + " \"This is a sample text with lots of repetition.\",\n", + " \"This is a sample text with lots of repetition.\"]\n", + "\n", + "test_strings = [diverse_text, different_words_not_diverse_strs, repetition_strs]\n", + "\n", + "for test in test_strings:\n", + " edit_distance = distance_evaluator.evaluate(test)\n", + "\n", + " print(f\"Diversity Score: {edit_distance.score:.4f}\")\n", + " print(f\"Is Correct: {edit_distance.is_correct}\")\n", + " print(f\"Metadata: {edit_distance.metadata}\")\n", + " print(\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "---\n", + "\n", + "## Novelty Score\n", + "\n", + "#### (From AidanBench)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "from dataclasses import dataclass\n", + "import numpy as np\n", + "\n", + "@dataclass\n", + "class EvaluationResult:\n", + " is_correct: bool\n", + " score: float\n", + " metadata: dict\n", + "\n", + "class NoveltyScoringEvaluator:\n", + " \n", + " def evaluate(self, model: EmbeddingModel, system_output: List[str], **kwargs) -> EvaluationResult:\n", + " if not system_output or len(system_output) == 0:\n", + " return EvaluationResult(is_correct=False, score=-1, metadata={})\n", + " \n", + " self.model = model\n", + "\n", + " novelty_scores = [self.compute_novelty(r, system_output[:i]) for i, r in enumerate(system_output)]\n", + "\n", + " print(\"scores: \", novelty_scores)\n", + "\n", + " avg_novelty = sum(novelty_scores) / len(novelty_scores) if novelty_scores else 0.0\n", + "\n", + " return EvaluationResult(\n", + " is_correct=True,\n", + " score=avg_novelty,\n", + " metadata={'responses': system_output, 'novelty_scores': novelty_scores}\n", + " )\n", + "\n", + " def compute_novelty(self, response: str, prior_responses: List[str]) -> float:\n", + " if not prior_responses:\n", + " return 1.0\n", + "\n", + " new_embedding = self.model.embed_text(response)\n", + " prior_embeddings = [self.model.embed_text(r) for r in prior_responses]\n", + "\n", + " similarities = [\n", + " np.dot(new_embedding, prior_embedding) /\n", + " (np.linalg.norm(new_embedding) * np.linalg.norm(prior_embedding))\n", + " for prior_embedding in prior_embeddings\n", + " ]\n", + "\n", + " return 1 - max(similarities)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores: [1.0, 0.2712776724205106, 0.259596190452704]\n", + "Diversity Score: 0.5103\n", + "Is Correct: True\n", + "Metadata: {'responses': [\"Bananas don't belong in briefcases\", 'Abraham Lincoln', 'ERROR 404: Index Not Found'], 'novelty_scores': [1.0, 0.2712776724205106, 0.259596190452704]}\n", + "\n", + "scores: [1.0, 0.17952900510509806, 0.13489158507389332]\n", + "Diversity Score: 0.4381\n", + "Is Correct: True\n", + "Metadata: {'responses': ['peanut butter and jelly', 'bacon lettuce tomato', 'grilled cheese'], 'novelty_scores': [1.0, 0.17952900510509806, 0.13489158507389332]}\n", + "\n", + "scores: [1.0, 0.0, 0.0]\n", + "Diversity Score: 0.3333\n", + "Is Correct: True\n", + "Metadata: {'responses': ['This is a sample text with lots of repetition.', 'This is a sample text with lots of repetition.', 'This is a sample text with lots of repetition.'], 'novelty_scores': [1.0, 0.0, 0.0]}\n", + "\n" + ] + } + ], + "source": [ + "novelty_evaluator = NoveltyScoringEvaluator()\n", + "\n", + "diverse_text = [\"Bananas don't belong in briefcases\", \"Abraham Lincoln\", \"ERROR 404: Index Not Found\"]\n", + "\n", + "different_words_not_diverse_strs = [\"peanut butter and jelly\", \"bacon lettuce tomato\", \"grilled cheese\"]\n", + "\n", + "repetition_strs = [\"This is a sample text with lots of repetition.\", \n", + " \"This is a sample text with lots of repetition.\",\n", + " \"This is a sample text with lots of repetition.\"]\n", + "\n", + "ada_002: Text_Embedding_Ada_002_Model = Text_Embedding_Ada_002_Model()\n", + "test_strings = [diverse_text, different_words_not_diverse_strs, repetition_strs]\n", + "\n", + "for test in test_strings:\n", + " results = novelty_evaluator.evaluate(ada_002, test)\n", + "\n", + " print(f\"Diversity Score: {results.score:.4f}\")\n", + " print(f\"Is Correct: {results.is_correct}\")\n", + " print(f\"Metadata: {results.metadata}\")\n", + " print(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ember_upgrade", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/ember/examples/diversity_testbench.ipynb b/src/ember/examples/diversity_testbench.ipynb new file mode 100644 index 00000000..79c09852 --- /dev/null +++ b/src/ember/examples/diversity_testbench.ipynb @@ -0,0 +1,832 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Diversity Testbench\n", + "\n", + "---\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ember Package" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Setup Dependencies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NOTE: things below this are to install required dependencies (do this in the virtual env)\n", + "\n", + "This may require running these commaned to setup your venv first:\n", + "- https://github.com/jaredquincy/ember/blob/main/INSTALLATION_GUIDE.md\n", + "- `uv venv`\n", + "- `uv pip install ember-ai`\n", + "- `uv pip install pip`\n", + "- `source .venv/bin/activate`\n", + "\n", + "Plus, add your OpenAI API key -> environ\n", + "- `export OPENAI_API_KEY=` in the terminal\n", + "- `os.environ[\"OPENAI_API_KEY\"] = ` in ipynb cell" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import sys, os, logging" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# < enter api keys here! >" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "openai_key = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", + "# Set global logging level to ERROR\n", + "logging.basicConfig(level=logging.ERROR)\n", + "\n", + "os.environ[\"EMBER_LOGGING_LEVEL\"] = \"ERROR\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2mUsing Python 3.11.9 environment at: /Users/concon/research/ember-branch/ember/.venv\u001b[0m\n", + "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 27ms\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "# !uv pip install -q -e .\n", + "\n", + "# compression ratio dependencies\n", + "!uv pip install diversity==0.2.0\n", + "!uv pip install -q spacy==3.8.4\n", + "\n", + "# edit distance\n", + "!uv pip install -q python-Levenshtein\n", + "\n", + "# ensemble example\n", + "!uv pip install -q matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# basic imports & dependencies\n", + "from __future__ import annotations\n", + "\n", + "# class definitions used in ember repo\n", + "import math, re, subprocess\n", + "from typing import Dict, Any, List, Protocol, TypeVar, Optional, Generic, Callable, Union\n", + "from abc import ABC, abstractmethod\n", + "\n", + "# compression related items\n", + "from diversity import compression_ratio\n", + "import Levenshtein\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import tqdm\n", + "\n", + "# ember repo loads\n", + "from ember.core.registry.model.config.settings import initialize_registry\n", + "from ember.core.registry.model.base.services.model_service import ModelService\n", + "from ember.core.registry.model.base.schemas.model_info import ModelInfo\n", + "from ember.core.registry.model.base.schemas.cost import ModelCost, RateLimit\n", + "from ember.core.registry.model.base.schemas.provider_info import ProviderInfo\n", + "\n", + "from ember.core.registry.model import load_model, ChatResponse\n", + "from ember.core.registry.model.base.services.model_service import ModelService\n", + "\n", + "from ember.core.registry.model.providers.openai.openai_provider import create_openai_embedding_model\n", + "from ember.core.utils.embedding_utils import CosineSimilarity, calculate_text_similarity\n", + "from ember.core.utils.eval.diversity_evaluators import DiversityCosineSimilarityEvaluator, DiversityEditDistanceEvaluator, DiversityCompressionEvaluator, DiversityNoveltyEvaluator, DiversityEnsembledEvaluator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup model registry to run models" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "model_registry = initialize_registry()\n", + "llm = ModelService(registry=model_registry)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ModelRegistry checks (**OPTIONAL**)\n", + "\n", + "From the code above, it should auto add models from your config files (which can displayed from printing below), but you can also add your own models as shown below!" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['openai:gpt-4o-audio-preview-2024-12-17',\n", + " 'openai:dall-e-3',\n", + " 'openai:dall-e-2',\n", + " 'openai:gpt-4o-audio-preview-2024-10-01',\n", + " 'openai:gpt-4o-realtime-preview-2024-10-01',\n", + " 'openai:gpt-4o-transcribe',\n", + " 'openai:gpt-4o-mini-transcribe',\n", + " 'openai:gpt-4o-realtime-preview',\n", + " 'openai:gpt-4o-mini-tts',\n", + " 'openai:gpt-4o-realtime-preview-2024-12-17',\n", + " 'openai:text-embedding-3-large',\n", + " 'openai:gpt-4',\n", + " 'openai:text-embedding-ada-002',\n", + " 'openai:gpt-4o-mini-audio-preview',\n", + " 'openai:gpt-4o-audio-preview',\n", + " 'openai:gpt-4o-mini-realtime-preview',\n", + " 'openai:gpt-4o-mini-realtime-preview-2024-12-17',\n", + " 'openai:gpt-3.5-turbo-instruct-0914',\n", + " 'openai:gpt-4o-mini-search-preview',\n", + " 'openai:gpt-4-turbo-preview',\n", + " 'openai:gpt-4-0125-preview',\n", + " 'openai:gpt-3.5-turbo-1106',\n", + " 'openai:gpt-4-turbo',\n", + " 'openai:gpt-3.5-turbo-instruct',\n", + " 'openai:gpt-3.5-turbo',\n", + " 'openai:gpt-4o-mini-search-preview-2025-03-11',\n", + " 'openai:gpt-4o-2024-11-20',\n", + " 'openai:gpt-3.5-turbo-0125',\n", + " 'openai:gpt-4o-2024-05-13',\n", + " 'openai:gpt-3.5-turbo-16k',\n", + " 'openai:gpt-4-turbo-2024-04-09',\n", + " 'openai:gpt-4-1106-preview',\n", + " 'openai:gpt-4-0613',\n", + " 'openai:gpt-4o-search-preview',\n", + " 'openai:gpt-4.5-preview',\n", + " 'openai:gpt-4.5-preview-2025-02-27',\n", + " 'openai:gpt-4o-search-preview-2025-03-11',\n", + " 'openai:text-embedding-3-small',\n", + " 'openai:gpt-4o',\n", + " 'openai:gpt-4o-mini',\n", + " 'openai:gpt-4o-2024-08-06',\n", + " 'openai:gpt-4o-mini-2024-07-18',\n", + " 'openai:gpt-4o-mini-audio-preview-2024-12-17']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_registry.list_models()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error registering model: Model 'openai:text-embedding-3-large' is already registered.\n" + ] + } + ], + "source": [ + "# Register an OpenAI text-embedding model\n", + "try:\n", + " openai_info = ModelInfo(\n", + " id=\"openai:text-embedding-3-large\",\n", + " name=\"text-embedding-3-large\",\n", + " cost=ModelCost(input_cost_per_thousand=0.03, output_cost_per_thousand=0.06),\n", + " rate_limit=RateLimit(tokens_per_minute=80000, requests_per_minute=5000),\n", + " provider=ProviderInfo(name=\"OpenAI\", default_api_key=openai_key),\n", + " api_key=openai_key,\n", + " )\n", + " model_registry.register_model(openai_info)\n", + "except ValueError as e:\n", + " print(\"Error registering model:\", e)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test example of ModelRegistry (**OPTIONAL**)\n", + "taken from `src/ember/core/registry/model/examples/example.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "model_ids: List[str] = [\n", + " \"openai:o1\",\n", + " \"openai:gpt-4o\",\n", + " \"openai:gpt-4o-mini\",\n", + " # \"anthropic:claude-3.5-sonnet\", # API key not working\n", + " # \"invalid:model\", # Expected to trigger an error.\n", + " # \"google:model/gemini-1.5-pro\", # need to fix model alignment\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "➡️ Testing model: openai:o1\n", + "❌ Error with model openai:o1: [Error 3002] Model 'openai:o1' not found. Available models:\n", + "- openai:gpt-4o-audio-preview-2024-12-17\n", + "- openai:dall-e-3\n", + "- openai:dall-e-2\n", + "- openai:gpt-4o-audio-preview-2024-10-01\n", + "- openai:gpt-4o-realtime-preview-2024-10-01\n", + "- openai:gpt-4o-transcribe\n", + "- openai:gpt-4o-mini-transcribe\n", + "- openai:gpt-4o-realtime-preview\n", + "- openai:gpt-4o-mini-tts\n", + "- openai:gpt-4o-realtime-preview-2024-12-17\n", + "- openai:text-embedding-3-large\n", + "- openai:gpt-4\n", + "- openai:text-embedding-ada-002\n", + "- openai:gpt-4o-mini-audio-preview\n", + "- openai:gpt-4o-audio-preview\n", + "- openai:gpt-4o-mini-realtime-preview\n", + "- openai:gpt-4o-mini-realtime-preview-2024-12-17\n", + "- openai:gpt-3.5-turbo-instruct-0914\n", + "- openai:gpt-4o-mini-search-preview\n", + "- openai:gpt-4-turbo-preview\n", + "- openai:gpt-4-0125-preview\n", + "- openai:gpt-3.5-turbo-1106\n", + "- openai:gpt-4-turbo\n", + "- openai:gpt-3.5-turbo-instruct\n", + "- openai:gpt-3.5-turbo\n", + "- openai:gpt-4o-mini-search-preview-2025-03-11\n", + "- openai:gpt-4o-2024-11-20\n", + "- openai:gpt-3.5-turbo-0125\n", + "- openai:gpt-4o-2024-05-13\n", + "- openai:gpt-3.5-turbo-16k\n", + "- openai:gpt-4-turbo-2024-04-09\n", + "- openai:gpt-4-1106-preview\n", + "- openai:gpt-4-0613\n", + "- openai:gpt-4o-search-preview\n", + "- openai:gpt-4.5-preview\n", + "- openai:gpt-4.5-preview-2025-02-27\n", + "- openai:gpt-4o-search-preview-2025-03-11\n", + "- openai:text-embedding-3-small\n", + "- openai:gpt-4o\n", + "- openai:gpt-4o-mini\n", + "- openai:gpt-4o-2024-08-06\n", + "- openai:gpt-4o-mini-2024-07-18\n", + "- openai:gpt-4o-mini-audio-preview-2024-12-17 [Recovery: Check the model name and ensure it's correctly registered] [Context: caller_file='/Users/concon/research/ember-branch/ember/src/ember/core/registry/model/base/registry/model_registry.py', caller_function='get_model', caller_lineno=144]\n", + "➡️ Testing model: openai:gpt-4o\n", + "🛎️ Service response from openai:gpt-4o:\n", + "Quantum computing harnesses quantum mechanics to process information using quantum bits (qubits), which exist in multiple states simultaneously (superposition). It leverages entanglement and interference for complex computations, outperforming classical computers in certain tasks by solving problems exponentially faster, revolutionizing fields like cryptography, optimization, and drug discovery.\n", + "\n", + "🎯 Direct response from openai:gpt-4o:\n", + "The capital of France is Paris.\n", + "\n", + "➡️ Testing model: openai:gpt-4o-mini\n", + "🛎️ Service response from openai:gpt-4o-mini:\n", + "Quantum computing utilizes quantum bits, or qubits, which can exist in multiple states simultaneously due to superposition. This allows quantum computers to process complex problems at unprecedented speeds compared to classical computers. Entanglement further enhances processing power, enabling efficient solutions for tasks like cryptography, optimization, and complex simulations.\n", + "\n", + "🎯 Direct response from openai:gpt-4o-mini:\n", + "The capital of France is Paris.\n", + "\n" + ] + } + ], + "source": [ + "for model_id in model_ids:\n", + " try:\n", + " print(f\"➡️ Testing model: {model_id}\")\n", + "\n", + " # Two usage styles are demonstrated below:\n", + " # 1. Service-based invocation: Recommended for automatic usage tracking.\n", + " service_response: ChatResponse = llm.invoke_model(\n", + " model_id=model_id,\n", + " prompt=\"Explain quantum computing in 50 words\",\n", + " )\n", + " print(f\"🛎️ Service response from {model_id}:\\n{service_response.data}\\n\")\n", + "\n", + " # 2. Direct model instance usage: Useful for more granular or PyTorch-like workflows.\n", + " model = load_model(model_id=model_id, registry=model_registry)\n", + " direct_response: ChatResponse = model(\n", + " prompt=\"What's the capital of France?\"\n", + " )\n", + " print(f\"🎯 Direct response from {model_id}:\\n{direct_response.data}\\n\")\n", + "\n", + " except Exception as error:\n", + " print(f\"❌ Error with model {model_id}: {str(error)}\")\n", + " continue\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello! How can I assist you today?\n" + ] + } + ], + "source": [ + "response = llm(prompt=\"Hello!\", model_id=\"openai:gpt-4o\")\n", + "print(response.data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "---\n", + "\n", + "## Diversity Scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Neural Similarity Scoring - Cosine Similarity\n", + "\n", + "- from `src/ember/core/utils/embedding_utils.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cosine similarity Score: 0.7287\n", + "\n", + "Cosine similarity Score: 0.8205\n", + "\n", + "Cosine similarity Score: 1.0000\n", + "\n" + ] + } + ], + "source": [ + "embedding_model = create_openai_embedding_model()\n", + "cosine: CosineSimilarity = CosineSimilarity()\n", + "\n", + "text_a: str = \"Hello world!\"\n", + "text_b: str = \"Hello, world??\"\n", + "\n", + "diverse_text = [\"Bananas don't belong in briefcases\", \"Abraham Lincoln\", \"ERROR 404: Index Not Found\"]\n", + "\n", + "different_words_not_diverse_strs = [\"peanut butter and jelly\", \"bacon lettuce tomato\"]\n", + "\n", + "repetition_strs = [\"This is a sample text with lots of repetition.\", \n", + " \"This is a sample text with lots of repetition.\"]\n", + "\n", + "test_strings = [diverse_text, different_words_not_diverse_strs, repetition_strs]\n", + "\n", + "for test in test_strings:\n", + " score: float = calculate_text_similarity(\n", + " text1=test[0], text2=test[1], model=embedding_model, metric=cosine\n", + " )\n", + "\n", + " print(f\"Cosine similarity Score: {score:.4f}\")\n", + " print(\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compression Ratio\n", + "\n", + "- from `src/ember/core/utils/eval/evaluators.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Compression Score: 0.5766\n", + "Is Correct: True\n", + "Metadata: {'responses': ['hi there', 'hi', 'hello', 'yo whatup']}\n" + ] + } + ], + "source": [ + "compression_evaluator = DiversityCompressionEvaluator()\n", + "\n", + "input_strs = [\"hi there\", \"hi\", \"hello\", \"yo whatup\"]\n", + "\n", + "# input_strs = [\"This is a sample text with lots of repetition.\", \n", + "# \"This is a sample text with lots of repetition.\",\n", + "# \"This is a sample text with lots of repetition.\"]\n", + "\n", + "edit_distance = compression_evaluator.evaluate(input_strs)\n", + "\n", + "print(f\"Compression Score: {edit_distance.score:.4f}\")\n", + "print(f\"Is Correct: {edit_distance.is_correct}\")\n", + "print(f\"Metadata: {edit_distance.metadata}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Edit Distance" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Edit Distance Score: 0.8301\n", + "Is Correct: True\n", + "Metadata: {'responses': ['hi there', 'hi', 'hello', 'yo whatup']}\n" + ] + } + ], + "source": [ + "distance_evaluator = DiversityEditDistanceEvaluator()\n", + "\n", + "input_strs = [\"hi there\", \"hi\", \"hello\", \"yo whatup\"]\n", + "\n", + "# input_strs = [\"This is a sample text with lots of repetition.\", \n", + "# \"This is a sample text with lots of repetition.\",\n", + "# \"This is a sample text with lots of repetition.\"]\n", + "\n", + "edit_distance = distance_evaluator.evaluate(input_strs)\n", + "\n", + "print(f\"Edit Distance Score: {edit_distance.score:.4f}\")\n", + "print(f\"Is Correct: {edit_distance.is_correct}\")\n", + "print(f\"Metadata: {edit_distance.metadata}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Novelty Score (WIP)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Novelty Score: 0.3319\n", + "Is Correct: True\n", + "Metadata: {'responses': ['hi there', 'hi', 'hello', 'yo whatup'], 'novelty_scores': [1.0, 0.11153064719273964, 0.07012108639942194, 0.1458646243357714]}\n" + ] + } + ], + "source": [ + "novelty_evaluator = DiversityNoveltyEvaluator(create_openai_embedding_model())\n", + "\n", + "input_strs = [\"hi there\", \"hi\", \"hello\", \"yo whatup\"]\n", + "\n", + "novelty = novelty_evaluator.evaluate(input_strs)\n", + "\n", + "print(f\"Novelty Score: {novelty.score:.4f}\")\n", + "print(f\"Is Correct: {novelty.is_correct}\")\n", + "print(f\"Metadata: {novelty.metadata}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "---\n", + "\n", + "## Ensembled Score Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Function to combine all the scores (cosine similarity, compression ratio, edit distance)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "text_embedding_ada_002 = create_openai_embedding_model(\"text-embedding-ada-002\")\n", + "\n", + "cosine_evaluator = DiversityCosineSimilarityEvaluator(embedding_model=text_embedding_ada_002)\n", + "compression_evaluator = DiversityCompressionEvaluator()\n", + "edit_dist_evaluator = DiversityEditDistanceEvaluator()\n", + "diversity_evaluator = DiversityEnsembledEvaluator(embedding_model=embedding_model)\n", + "\n", + "def ensemble_diversity(strings):\n", + " div_cosine = cosine_evaluator.evaluate(strings).score\n", + " div_compression = compression_evaluator.evaluate(strings).score\n", + " div_edit = edit_dist_evaluator.evaluate(strings).score\n", + " div_ensemble_score = diversity_evaluator.evaluate(strings).score\n", + " return div_cosine, div_compression, div_edit, div_ensemble_score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a list of strings" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "input_strs = []\n", + "scores = []\n", + "input_strs.append([\"This is a sample text with lots of repetition.\", \n", + " \"This is a sample text with lots of repetition.\",\n", + " \"This is a sample text with lots of repetition.\"])\n", + "\n", + "responses = []\n", + "for i in range(10):\n", + " res = llm(prompt=\"Tell me a funny joke. Keep it concise.\", model_id=\"openai:gpt-4o\").data.replace(\"\\n\", \"\")\n", + " responses.append(res)\n", + "input_strs.append(responses)\n", + "\n", + "responses = []\n", + "res = llm(prompt=\"Tell me 10 different jokes. make them split with \\'||\\'. Don't say anything else besides the joke. \", model_id=\"openai:gpt-4o\").data.replace(\"\\n\", \"\").split('||')\n", + "responses += res\n", + "input_strs.append(responses)\n", + "\n", + "responses = []\n", + "res1 = llm(prompt=\"Tell me a story about how quantum computers work. Make it 100 words. Don't say anything else besides the story. \", model_id=\"openai:gpt-4o\").data\n", + "responses.append(res1)\n", + "res2 = llm(prompt=\"Tell me a story about bunnies frolicking in the grass. Make it 100 words. Don't say anything else besides the story. \", model_id=\"openai:gpt-4o\").data\n", + "responses.append(res2)\n", + "res3 = llm(prompt=\"Tell me a story about the pokemon pikachu and it's adventures. Make it 100 words. Don't say anything else besides the story. \", model_id=\"openai:gpt-4o\").data\n", + "responses.append(res3)\n", + "res4 = llm(prompt=\"Tell me a story about a ramen shop. Make it 100 words. Don't say anything else besides the story. \", model_id=\"openai:gpt-4o\").data\n", + "responses.append(res4)\n", + "input_strs.append(responses)\n", + "\n", + "input_strs.append([\n", + " \"Understanding the importance of effective communication in the workplace cannot be overstated. Clear communication fosters a positive environment where people can express their ideas and work together efficiently. When team members understand one another, they can collaborate seamlessly, avoid misunderstandings, and achieve collective goals. Furthermore, communication skills are essential for building trust, resolving conflicts, and ensuring that expectations are clear. Whether through verbal discussions, emails, or presentations, knowing how to convey thoughts in an understandable way is key to success in any professional setting.\",\n", + " \"In any workplace, the ability to communicate effectively is crucial for success. When individuals can clearly articulate their ideas and listen actively, it leads to a more productive and harmonious environment. Good communication prevents misunderstandings, aids in team collaboration, and helps in meeting shared objectives. It also plays a vital role in fostering trust among colleagues, resolving disputes, and ensuring transparency. Whether it’s through face-to-face conversations, written messages, or virtual meetings, mastering communication is essential to creating a positive, high-functioning work culture.\",\n", + " \"Effective communication is a cornerstone of a successful work environment. When employees communicate clearly and efficiently, it improves the overall flow of work and enhances collaboration. Clear exchanges of ideas help to eliminate confusion, build mutual trust, and ensure that everyone is aligned in their goals. Additionally, strong communication skills are key to managing conflicts and setting clear expectations among teams. Whether in meetings, emails, or other formats, being able to communicate effectively contributes to a thriving and efficient workplace.\",\n", + " \"The role of communication in the workplace cannot be overlooked. It serves as the foundation for successful teamwork and organizational growth. When team members share information clearly, it promotes a collaborative atmosphere and reduces the risk of errors or misinterpretations. Strong communication is also vital in building relationships, resolving issues, and making sure everyone is on the same page. Whether it's verbal exchanges or written correspondence, honing your ability to communicate well is vital for fostering an effective work environment.\",\n", + " \"Communication within the workplace is a vital element for success. Clear and open communication promotes a cooperative and efficient atmosphere, helping team members to better understand each other’s ideas and work toward common goals. It reduces confusion, builds trust, and allows for smoother problem-solving when conflicts arise. By conveying thoughts and expectations effectively, individuals can create stronger working relationships and a productive team dynamic. Whether through emails, phone calls, or face-to-face interactions, mastering communication techniques is key for professional achievement.\",\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 -------------------\n", + "\t- This is a sample text with lots of repetition.\n", + "\t- This is a sample text with lots of repetition.\n", + "\t- This is a sample text with lots of repetition.\n", + "1 -------------------\n", + "\t- Why don't scientists trust atoms?Because they make up everything!\n", + "\t- Why don't skeletons fight each other? They don't have the guts.\n", + "\t- Why don't skeletons fight each other? They don't have the guts!\n", + "\t- Why don't skeletons fight each other? They don't have the guts!\n", + "\t- Why don't skeletons fight each other? They don't have the guts.\n", + "\t- Why did the scarecrow win an award? Because he was outstanding in his field!\n", + "\t- Why do cows have hooves instead of feet? Because they lactose!\n", + "\t- Why don't scientists trust atoms?Because they make up everything!\n", + "\t- Why did the scarecrow win an award? Because he was outstanding in his field!\n", + "\t- Why don't scientists trust atoms? Because they make up everything!\n", + "2 -------------------\n", + "\t- Why did the scarecrow win an award? Because he was outstanding in his field! \n", + "\t- Why don’t scientists trust atoms? Because they make up everything! \n", + "\t- What do you call fake spaghetti? An impasta! \n", + "\t- Why was the math book sad? Because it had too many problems. \n", + "\t- How does a penguin build its house? Igloos it together. \n", + "\t- What do you call cheese that isn't yours? Nacho cheese! \n", + "\t- Why did the bicycle fall over? Because it was two-tired! \n", + "\t- How does a cucumber become a pickle? It goes through a jarring experience. \n", + "\t- Why did the golfer bring two pairs of pants? In case he got a hole in one! \n", + "\t- What do you get when you cross a snowman and a vampire? Frostbite!\n", + "3 -------------------\n", + "\t- In a vibrant quantum lab, tiny particles danced. Unlike regular bits that were zeros or ones, qubits spun like magical coins, showing both at once. Entwined like celestial partners, these qubits shared secrets through entanglement, defying distances. Scientists orchestrated this cosmic ballet, using algorithms like ancient spells to solve complex puzzles. As qubits calmed into states with precision, their whispers unlocked solutions to problems once deemed impossible. The quantum computer, a symphony of probability and possibility, became a lighthouse of advancement, shining new paths in medicine, cryptography, and beyond, merging the mysteries of the quantum world with human ingenuity.\n", + "\t- In a meadow kissed by sunlight, a group of bunnies joyfully frolicked in the lush, emerald grass. Their soft fur shimmered in the golden rays as they leapt and twirled, ears flopping jauntily. One brave bunny ventured to the center, starting a playful chase that sent them all scampering in mirthful circles. As they paused, catching breath in the shade of a towering oak, a gentle breeze whispered through the trees. The bunnies nuzzled together, sharing the serenity of the moment, before resuming their joyful dance, celebrating the simple delight of life under the endless sky.\n", + "\t- In the lush forest of Viridian, Pikachu trotted joyfully with its trainer, Ash. They stumbled upon a mysterious glowing berry that Pikachu eagerly nibbled. Suddenly, Pikachu was enveloped in a golden aura, granting it the power to understand other Pokémon's thoughts. With this newfound ability, Pikachu led a mission to resolve a dispute between two rival Pokémon clans: the Squirtle Squad and the Charmander League. Through communication and empathy, Pikachu fostered a harmonious agreement, creating a united community. Victorious, Pikachu and Ash celebrated under the twinkling stars, their bond stronger than ever, ready for the next adventure.\n", + "\t- Under dim lantern light, Suki's ramen shop thrived quietly in a bustling Tokyo alley. Each bowl crafted was a symphony of flavors, secrets passed from her grandmother. The irresistible aroma lured weary souls seeking solace in slurps. One rainy night, a wandering musician left his tune as thanks. Enchanted, Suki played his melody, infusing it into her broth. Customers, entranced by the unseen harmony, felt warmth beyond the soup. Word spread of this magical amalgamation, and soon, seekers of both food and peace filled the tiny shop. Suki's ramen didn’t just feed bodies; it nourished spirits.\n", + "4 -------------------\n", + "\t- Understanding the importance of effective communication in the workplace cannot be overstated. Clear communication fosters a positive environment where people can express their ideas and work together efficiently. When team members understand one another, they can collaborate seamlessly, avoid misunderstandings, and achieve collective goals. Furthermore, communication skills are essential for building trust, resolving conflicts, and ensuring that expectations are clear. Whether through verbal discussions, emails, or presentations, knowing how to convey thoughts in an understandable way is key to success in any professional setting.\n", + "\t- In any workplace, the ability to communicate effectively is crucial for success. When individuals can clearly articulate their ideas and listen actively, it leads to a more productive and harmonious environment. Good communication prevents misunderstandings, aids in team collaboration, and helps in meeting shared objectives. It also plays a vital role in fostering trust among colleagues, resolving disputes, and ensuring transparency. Whether it’s through face-to-face conversations, written messages, or virtual meetings, mastering communication is essential to creating a positive, high-functioning work culture.\n", + "\t- Effective communication is a cornerstone of a successful work environment. When employees communicate clearly and efficiently, it improves the overall flow of work and enhances collaboration. Clear exchanges of ideas help to eliminate confusion, build mutual trust, and ensure that everyone is aligned in their goals. Additionally, strong communication skills are key to managing conflicts and setting clear expectations among teams. Whether in meetings, emails, or other formats, being able to communicate effectively contributes to a thriving and efficient workplace.\n", + "\t- The role of communication in the workplace cannot be overlooked. It serves as the foundation for successful teamwork and organizational growth. When team members share information clearly, it promotes a collaborative atmosphere and reduces the risk of errors or misinterpretations. Strong communication is also vital in building relationships, resolving issues, and making sure everyone is on the same page. Whether it's verbal exchanges or written correspondence, honing your ability to communicate well is vital for fostering an effective work environment.\n", + "\t- Communication within the workplace is a vital element for success. Clear and open communication promotes a cooperative and efficient atmosphere, helping team members to better understand each other’s ideas and work toward common goals. It reduces confusion, builds trust, and allows for smoother problem-solving when conflicts arise. By conveying thoughts and expectations effectively, individuals can create stronger working relationships and a productive team dynamic. Whether through emails, phone calls, or face-to-face interactions, mastering communication techniques is key for professional achievement.\n" + ] + } + ], + "source": [ + "for i in range(len(input_strs)):\n", + " print(f\"{i} -------------------\")\n", + " for s in input_strs[i]:\n", + " print(\"\\t- \" + s)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 5/5 [02:12<00:00, 26.55s/it]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAUZ5JREFUeJzt3QeYVOX5N+CXIk0RVEREUTRqAEFUEIMaK4olxhY1agSNmqIk9gQ0gtiNJZjYG5ZoNLFXjKLYsLdYwNgQLIgt2Kn7Xc/7XbP/XViqe9h239c12Z0zZ868M3Mi+zvPWxqVlZWVJQAAAKDaNa7+QwIAAABB6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBoA5NGrUKJ144omprrv22mtTly5d0lJLLZXatm1brcc+4IADUufOnVNtNWbMmPw9xs+Gqr6cxwB1ndANwFzeeuut9Otf/zqtueaaqUWLFmnZZZdNm266aTrvvPPSt99+W9PNYyGMHz8+B+Mf/OAH6bLLLkuXXnrpPPeNYBYBrXRr1apVWm211dLOO++cRo4cmaZNm5bqg+uvvz6NGDGi2o9b+twOPvjgKh8//vjjy/f55JNPFvn4Y8eOzd/R//73v2poLQBLWqOysrKyJf6qANRad999d9pzzz1T8+bN04ABA1L37t3T9OnT02OPPZZuvvnmHOTmF+Dqg++++y41bdo03+qqiy++OP32t79Nb7zxRlprrbXmu28EuuHDh6eLLrooLbPMMjlkv//+++m+++7LgW+99dZLd911V+rUqVP5c2bMmJFmz56dz5PaKNoW522zZs1S48b/v8bwk5/8JL3yyitpwoQJ1fpaEabj4lTcPvroo/yaFcXFqw8//DCfVx9//HFq167dIh3/7LPPTscee2x65513Fql3QX04jwHqA/8VBqBc/FH/85//PK2++urpwQcfTCuvvHL5Y4cddlh68803cyivj0ohrRSe6ropU6bkn4vSrfxnP/tZpUA4dOjQdN111+WLL3Eh5sknnyx/LLqs1+R3tCARtJfk97j99tunO+64I917771pl112Kd8eFy3i/1d77LFHvmhVtPp2HgPUB7qXA1Duz3/+c/rqq6/SFVdcUSlwl0TF9PDDDy+/P3PmzHTyySfnLsxR8Ywq3HHHHTdXd+TYHlXGGF/bu3fv1LJly9SjR4/y8ba33HJLvh8hoVevXumFF16o9PyorkcF9u233079+/dPSy+9dOrYsWM66aST0pwdtqIquMkmm6QVVlghv04c76abbqqyOjlo0KAcKtddd93c/lGjRlU5FvbLL79MRxxxRH4fsV/79u3Ttttum55//vlKx/zXv/6VXy9eN8LrL37xi1wxruq9xPZdd901/77iiiumY445Js2aNWuhvqcLL7ywvM3xOcQFkYpdj6Odw4YNy7/Hsb/P2N799tsvd5t+6qmn0v3331/pfZSqrlH1Xn755dOBBx441/O/+OKL/L3G+yuJ8yPaF+dTvIeooP/hD3+Y67yZ33d0ww035M+6devWefhDnD8x/GFeY7q33HLLfMHo3XffLe/qHe2P8z3Op4rndcl7772XmjRpkk4//fQFfk6rrLJK2nzzzXMX9oqi7dG26DFSlfhcI7C3adMmd+vfYost0uOPP17+eHxvUeUOa6yxRnnbS9X6RTmPQ5x3Bx10UD5vYt84ZvSIiKBe+i6j18Paa6+dv7f4/9Fmm21W6bsHYNGodANQ7s4778xdYSO0LowIY1dffXWukB599NE5QERAGTduXLr11lsr7RtV8n333TePFY8wGuE4xgxHN+gI6oceemjeL56/1157pddff728W3CIQBrh5Ec/+lG+OBDBIoJbBP8I3yURvH7605/msBhBIsJZVGmje/ROO+1UqU1Rzf/nP/+ZQ0uE5Hl13f3Nb36Tg3vs161bt/Tpp5/m7vbxPjfccMO8z1VXXZVD50YbbZTfQ3QzjrZEgIqLCBUrzvFe4uLBxhtvnD+HBx54IJ1zzjn54kUEoIXpCt6vX7+8b3xO0S38mWeeya8VFegYt3zNNdfk76DUZTy6iC+u/fffPw8p+Pe//50vNswpXnO33XbLF08uueSSSt2rb7vtthymowdFqRIb3098fr/61a9S165d08svv5z+8pe/pP/+9795/wV9RxEA99lnn7TNNtukM888M+8X30W8/6rCc2lc9dSpU3OQjtcK8bnELdp+4403pnPPPTeH7JJ//OMf+aJOnEsLI87veP0I8nHcODfjQsxRRx2Vu3rPKd7bDjvskC8exLkc53uMod96663To48+mvr06ZN23333/LlEW6LdpZ4IcTFlfp9RVT744IN8zLhAE599TLIXITzO7W+++SZ/b3F+xfkb/9+OfeOiybPPPpsvMFX13QOwEGJMNwBMnTo1SsZlu+yyy0Lt/+KLL+b9Dz744ErbjznmmLz9wQcfLN+2+uqr521jx44t33bfffflbS1btix79913y7dfcskleftDDz1Uvm3gwIF52+9+97vybbNnzy7baaedypo1a1b28ccfl2//5ptvKrVn+vTpZd27dy/beuutK22P4zVu3Ljs1Vdfneu9xWPDhg0rv9+mTZuyww47bJ6fRbxG+/bt8+t8++235dvvuuuufKyhQ4fO9V5OOumkSsfYYIMNynr16jXP1whTpkzJ73e77bYrmzVrVvn2888/Px/zyiuvLN8W7Y9tFT+beVnQvp9//nl+fLfddqv0PuJ7nfP7vPPOOys9d8cddyxbc801y+9fe+21+XN/9NFHK+138cUX5+c//vjjC/yODj/88LJll122bObMmfN8T3H+zHkexflSsc1ztv3ee++ttH299dYr22KLLeb5GhXbGefHZ599lr+feI/h7rvvLmvUqFHZhAkT5vqM4/xde+21y/r3759/r3j+rrHGGmXbbrtt+bazzjorP/edd96p8rUX9jweMGBA3veZZ56Za99SG3r27Jk/JwCqj+7lAGRR0QrRXXdh3HPPPflnVPEqiop3mHPsd1SI+/btW34/qrwhqnoxU/ac26Mr+ZyikldS6lYb1eyoFJdE1+6Szz//PFc3f/zjH8/VFTxEV95o14JElTqq+FEprEpUAmMMdVTrK46jjcp6VBOrGgcf1fOKoo1VveeK4n3G+42u7hV7ARxyyCG5i3VR4+2jalvqZj8v8T1GlTUqxhU//6hK77333uXbovIb1e34XGIm79Itnh8eeuihBX5H8X18/fXX1dblOXoNRHfr6KJdEhOu/ec//8m9MhbWcsstl3tjRFU6RFfz6DUScyTM6cUXX8yT3EV1PHpOlD6HeF9RwX/kkUdyr4CFsTDncRwrehFE75IY4jGn+P9T6bN99dVXc9sAqB5CNwBZhLYFBauKYmxsBL85Z8bu0KFD/sM9Hq+oYrAOMYY1VJwRu+L2CGwVxWtF1/eK1llnnfyz4mzU0Y08uqBH+I1xxtENN7pYR/ieU4xnXRjRnT1CWLQ1utxGF9yKAbn0Xn/4wx/O9dwIl3N+FtG2it2DS4Ftzvc8p3m9TnQLjs9mztepLtFdekEXZGKG7Jgs7Pbbby8fmx3dzWOMcMXQHWEuQl28/4q30ndZmgBuft9RXNyI/aNr9qqrrpp++ctflo9jXhxxbkUX8gil0c06RACP7ymGJiyKCNFxMWDixIn5eHG/KqVQO3DgwLk+i8svvzx/hlWds1VZmPM4Zk2PC2vzGlteEkM1ovt5fL4xFj3Gk8fFBwAWn9ANQHnojmpfhMtFUaqQLUjFsbILs31xVrSMcbAxXjjCUkw2FtX4CEARfKo6XsWq+PzEGPMI2X/729/yZ3TWWWflSatipurFMa/3XFuVzokFLT0W47bjok3pc4lxxnHRoWfPnpUqrhHm4nup6lYa2z+/7ygmsotKccwWHt93VMcjgEeAXVwxQ3tcXIigHOdKVKlj8r/SRaCFFe2JCcqiLRGc49ypSqmKHefSvD6LUg+DBVnY83hhxGRwb731VrryyitzQI8LADFvQfwEYPGYSA2AchEyYsKsJ554olJX8KpEl9kIDlGxi+7CJTGBWFTKqupS+33Ea0XwLVVEQ0wwFUoTR8WSTBG4Y33piutHx+RU31fM5h6BMG5RjY0gcuqpp+awV3qvMalZqZt0SWyrrs+i4utUrPpHl/NYliq6SRfh2muvzT9j8rcFBbb4nKKLecx4HRN8xQRmFcVkcS+99FLuQr2wF2yqEtX96Codtzg34nuJSdxOOOGEeV4cmN/rRcDcYIMNcoU7qudRqY6LLIsqAnDMSv/3v/89nxvzWpM7PofSxa4FfW/f53MqiQp6vNbCXFQrzUQft7gQEd9r9O6IydUAWHQq3QCUi2WbYvmk+OM6wvOcogJWWpZpxx13zD9jpuyKYgboMOdM4dXh/PPPL/89qpFxP2bOjgBXqiBHQKm49FZ0PZ9zRuxFEceas5tvVFqj4l3qRh1jZGNbzMRecdmrqPjGrNrV9VlEOIuw+de//rVS5T6WeIs2FvGZR8U3qpxxEab0Oc+vm3bMZB+z4EdQj9m7K3YtD1H5jRmzL7vssrme/+233+YxzQsSY6DnfN3S7OxzLjtWUZzb8+uyHbO0xwztcU7HUlkRmhdHLI8Ws5HHBYB5iRnLI3jH7PWl7vtzdgev2O5QcVm4RRWfUVwMiO8m5iCYU+l8mvOzjWp7XMSY3+cKwPypdANQLkJAhKwISlG9ji63UQGMSurYsWPzJFixPnOILsPRhTYq4xEGYjKnp59+Oi8hFn/cb7XVVtXatqhgx7jdeM2YbC0CbUwcFsuNlcZHR+iM0B+TWUWX8qhIX3DBBTk0LO641OguHZXPCJPxniOExIRmsURXLPMVIvjH0lVRGYzPIZazKi0ZFlX4I488slo+g3ifQ4YMyUuGxXuMrsxR9Y6u9LFU2aJM+lWVWDoq3l983xGMo8dALMMV7zu++4UR505UiCN0Rjfyir0gSsE2up3HRHLRLXzTTTfNFzbGjx+ft8drVjXRV0VxUeizzz7LvQriu4mx7PGa66+//lyvN2fQjSp8TP4Xn1e816iUl8Q5ExeeYqm1WI4tvtfFEZ9XxS718wrBcTEjgn0MVYhzJ9b6js89PpeoSkdALrU7RK+B6MIf7Yp2l8L4wjrttNPyRYU4R0vLtX344Yf5u40l3GIuhpiQLdY0j9eMincE9NJyeQAspmqcCR2AeuK///1v2SGHHFLWuXPnvARS69atyzbddNOyv/3tb2Xfffdd+X4zZswoGz58eF7iaKmllirr1KlT2ZAhQyrtE2KZpqqWISottVRRLIsU22OZpIrLUy299NJlb731Vl4uq1WrVmUrrbRSXg6p4tJZ4YorrshLMTVv3rysS5cuZSNHjixfrmlBr13VUkvTpk0rO/bYY/NSSvE5RDvi9wsvvHCu591444156a947eWXX75sv/32K3vvvfcq7VN6L3Oqqo3zEkuExXuLzzw+h9/+9rd5Wa+qjrcoS4aVbi1atChbddVVy37yk5/kZcjm/D5L76Oq5bdi6ak4D+I4p5xyyjyXWDvzzDPL1l133fxZLbfccnm5tDiXYum6BX1HN910Uz4PYpm2OD9XW221sl//+tdlH3744XyXDPvqq6/K9t1337K2bdvmx6pqfyxxNufydgsyv3NpQd/HCy+8ULb77ruXrbDCCvmziDbttddeZaNHj66038knn1y2yiqr5CW/Ki4ftrDncUkszxdLh6244or59WI5t3h+nOchvrM+ffrkzyiW84vz7NRTT83fGQCLp1H8z+IGdgBYEqK6HtW2qrrhQnXabbfd0ssvv5zefPPNmm4KAPWEMd0AACnlrtYxZCG6wANAdTGmGwBo0GLm9xi7HmOsY7z0r3/965puEgD1iEo3ANCgPfzww7m6HeE7JgLs0KFDTTcJgHrEmG4AAAAoiEo3AAAAFEToBgAAgII0uInUZs+enT744IPUunXr1KhRo5puDgAAAHVQjNT+8ssvU8eOHVPjxvOuZze40B2Bu1OnTjXdDAAAAOqBSZMmpVVXXXWejze40B0V7tIHs+yyy9Z0cwAAAKiDvvjii1zQLWXMeWlwobvUpTwCt9ANAADA97GgYcsmUgMAAICCCN0AAABQEKEbAAAACtLgxnQDwLyWlJw+fXpNN4NabKmllkpNmjSp6WYAUMcI3QA0eBG233nnnRy8YX7atm2bOnTosMBJcwCgROgGoEErKytLH374Ya5gxrIfjRsbeUXV58k333yTpkyZku+vvPLKNd0kAOoIoRuABm3mzJk5THXs2DG1atWqpptDLdayZcv8M4J3+/btdTUHYKG4nA9AgzZr1qz8s1mzZjXdFOqA0oWZGTNm1HRTAKgjhG4ASMkYXRaK8wSARSV0AwAAQEGEbgCgSldddVWerRsAWHwmUgOAKnQefPcSfb0JZ+y0WM+bPHlyOvXUU9Pdd9+d3n///TzB1/rrr5+OOOKItM0223yvNu29995pxx13TEUbM2ZM2mqrrdLnn38u5ANQ7wjdAFBHTZgwIW266aY5qJ511lmpR48eeYKv++67Lx122GFp/Pjx33u27tKM3QDA4tG9HADqqEMPPTRP7PX000+nPfbYI62zzjpp3XXXTUcddVR68skn8z4TJ05Mu+yyS1pmmWXSsssum/baa6/00UcflR/jpZdeylXm1q1b58d79eqVnn322Sq7l5944om5in7ttdemzp07pzZt2qSf//zn6csvvyzfZ/bs2en0009Pa6yxRg7sPXv2TDfddNMiva/S68bFg65du+a2b7/99nk99fDvf/87tWjRIv3vf/+r9LzDDz88bb311ov5aQJAMYRuAKiDPvvsszRq1Khc0V566aXnejxCawTgCNyx78MPP5zuv//+9Pbbb+du4yX77bdfWnXVVdMzzzyTnnvuuTR48OC01FJLzfN133rrrXTbbbelu+66K9/iuGeccUb54xG4r7nmmnTxxRenV199NR155JHpF7/4Rd5vUcTa6WeffXYO+I888ki+eHDMMcfkx6LbfLy/m2++udLSbzfeeGN+PwBQm+heDgB10JtvvpnKyspSly5d5rnP6NGj08svv5zeeeed1KlTp7wtAnFUwyNkb7TRRjnMHnvsseXHWXvttef7uhHkoxIdlfGw//7759eJceXTpk1Lp512WnrggQdS37598+Nrrrlmeuyxx9Ill1yStthii4V+f9FNPoL7D37wg3x/0KBB6aSTTsq/N2nSJFfYr7/++nTQQQeVv9eofEfFHwBqE5VuAKiDInAvyLhx43LYLgXu0K1bt1wljsdCdEU/+OCDU79+/XLFOirZ8xPdykuBO6y88sppypQp5RcCokK97bbb5i7hpVsE/dJxI/CXtu+www7zfJ1WrVqVB+45XydERTsmYPvggw/y/euuuy7ttNNOJmIDoNap0dAd3cV23nnn1LFjxzwmLbqrLUj8A7vhhhum5s2bp7XWWitfbQeAhiYq0vFv5/edLC3GaUc38AisDz74YA7lt9566zz3n7PrebQhqt/hq6++yj9jJvUXX3yx/Pbaa6+Vj+u+5557yrdffvnli/Q6FS80RJU+QvkNN9yQvv3229xmXcsBqI1qNHR//fXXeYKVCy64YKH2j+5x8UdBTPgS/1jHcihxdT4mWgGAhmT55ZdP/fv3z/+Gxr+nc4qu1jEJ2aRJk/KtJAJwPBbhuiQmYIux1zFB2e67755Gjhy5WG2KY8ZF8eiyHhfGK95K1fbVV1+9fNsqq6ySvo8I2VHhvvPOO1Pjxo3z3wgAUNvU6Jju6FY2v65lc4qxXTEb6jnnnJPvxx8TMU7sL3/5S/7DAwAakgjcsWRYnz598njn9dZbL82cOTNPmHbRRRflgB3LiEU4HTFiRH4sZjyPsdW9e/fOFeIYz/2zn/0s//v63nvv5bHeizsuOrqdx2RnEeCj+r3ZZpulqVOnpscffzzPjD5w4MBqff/xvqJSH+PJ4z1E4AeA2qZOTaT2xBNP5DFnFUXYjor3vMSkLnEr+eKLLwptIwAsKTFJ2fPPP59D59FHH52X1FpxxRXzsl8RuqNL9u23355+97vfpc033zxXg2Pprb/97W/lE5J9+umnacCAAXkZsXbt2uVK9/Dhwxe7TSeffHJuQ8xiHjOlxxjrGBZ23HHHpeoW1fK44BBLpsVFBQCojRqVLcxMLEtA/GEQ47F23XXXee4T3d8OPPDANGTIkPJtMTYsupPFxC2xHuic4gp4VX88xJX3uOpem3UefHeqLSacocseUD999913efhSVHpj7WeYH+cLABULum3atFlgtqz3s5dHQI8PoXSrOK4NAAAAilSnupd36NAhd3+rKO7HVYWqqtwhxncZ4wUAAEBNqFOV7r59+6bRo0dX2haTxcR2AAAAqG1qNHTHep6ltTpDjJGK32OpkVLX8JjcpeQ3v/lNnpTlD3/4Q16X9MILL0z//Oc/8yypAAAAUNvUaOh+9tln0wYbbJBv4aijjsq/Dx06NN+PWVhLATzEpCV33313rm7H+t6xdNjll19uuTAAAABqpRod073lllum+U2eftVVV1X5nBdeeKHglgEAAEADG9MNAAAAdYnQDQAAAAURugEAAKAgQjcAUC907tw5jRgxoqabAQC1ZyI1AKi1TmyzhF9v6mI9bfLkyenUU0/Nq3u8//77qX379mn99ddPRxxxRNpmm21SQ/LMM8+kpZdeuqabAQCVCN0AUEdNmDAhbbrppqlt27bprLPOSj169EgzZsxI9913XzrssMPS+PHjU20SbVtqqaUKO/6KK65Y2LEBYHHpXg4AddShhx6aGjVqlJ5++um0xx57pHXWWSetu+666aijjkpPPvlk3mfixIlpl112Scsss0xadtll01577ZU++uij8mOceOKJuTJ+5ZVXptVWWy3vF8edNWtW+vOf/5w6dOiQq+dRTa8oXveiiy5KO+ywQ2rZsmVac80100033VTpgkDsc+ONN6YtttgitWjRIl133XX5scsvvzx17do1b+vSpUu68MILy583ffr0NGjQoLTyyivnx1dfffV0+umn58dimdFob7SzefPmqWPHjun3v//9PLuXL+x7v/baa/Nz27Rpk37+85+nL7/8spq/KQAaMpVuAKiDPvvsszRq1KgchqvqUh3V79mzZ5eHzocffjjNnDkzV8D33nvvNGbMmPJ933rrrXTvvffm48XvP/vZz9Lbb7+dQ3w8b+zYsemXv/xl6tevX9p4443Ln3fCCSekM844I5133nk5uEZgffnll3OgLhk8eHA655xz0gYbbFAevIcOHZrOP//8vO2FF15IhxxySH4PAwcOTH/961/THXfckf75z3/mcD1p0qR8CzfffHP6y1/+km644YZ8cSG61r/00ktVfj6L8t5vu+22dNddd6XPP/88B/N4T3NeZACAxSV0A0Ad9Oabb+bKb1SK52X06NE5BL/zzjupU6dOeds111yTA2uMf95oo43KA2pUulu3bp26deuWttpqq/T666+ne+65JzVu3Dj98Ic/TGeeeWZ66KGHKoXuPffcMx188MH595NPPjndf//96W9/+1ulynWMLd99993L7w8bNiyH8NK2NdZYI7322mvpkksuyaE7qtNrr7122myzzXKlPCrdJfFYVN4j/Ec39Qjlffr0+d7v/aqrrsrvPey///75uUI3ANVF93IAqIMicC/IuHHjcuAshc4QoTqq4PFYSXStLoXOsNJKK+X9InBX3DZlypRKx+/bt+9c9yseN/Tu3bv896+//jpXlg866KBcgS7dTjnllLw9HHDAAenFF1/MQT+6jv/73/+uFPK//fbb3JU9quO33nprrmBX53uPbu1zvk8A+D6EbgCog6IaHJXg6pgsbc7JzeK4VW2LqvCiqtj1/auvvso/L7vsshysS7dXXnmlfAz6hhtumKvTUTmPgB3dvaO7e4gAHRX4qKTHOPIYe7755pvnCdoWV3W9TwCYF6EbAOqg5ZdfPvXv3z9dcMEFuYI8p//97395bHXFMdEhunLHY1H1/b5KQbni/YrjuecU1fKY/CzGi6+11lqVbtHNvCQmPYux1xHOYyK2GMsdY9hDhO2dd945j/2OsdlPPPFE7kY+p6LfOwAsLGO6AaCOisAdS4bFuOaTTjoprbfeerm7dYytjpnFI2TGMmL77bdfntU7HovqcMwmXrHb9+L617/+lY8T469jgrSYRf2KK66Y73OGDx+eu43HTOHbb799mjZtWnr22WfzJGYx6/q5556bu3jHJGvRvT1eI8ZxR7fwGHsds6rHuPJWrVqlv//97zmEVxz3XRLjvot87wCwsFS6AaCOirHNzz//fJ747Oijj07du3dP2267bZ4ILEJ3dJW+/fbb03LLLZe7YUcQjedE9bg6RICOmcQj7MckZf/4xz8WWEWOiddiybCRI0fmUBwhOMJ0qdId46tjqbIIxjHZWSw9VprQLYJ3VL/jQkO85gMPPJDuvPPOtMIKK8z1OkW/dwBYWI3KFmYmlnrkiy++yFfXp06dmruv1WadB9+daosJZ+xU000AKMR3332XxxBH6IslrVg4EWpjIrNdd901NSTOFwAWNVvqXg4AACwRiko0RLqXAwAAQEFUugGARdbARqcBwGJT6QYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBoIFo1KhRuu222/LvEyZMyPdffPHFmm4WANRr1ukGgCr0uLrHEn29lwe+vEj7H3DAAenqq6+ea3v//v3TqFGjFvj8Tp06pQ8//DC1a9cu3x8zZkzaaqut0ueff57atm27SG0BAOZN6AaAOmr77bdPI0eOrLStefPmC/XcJk2apA4dOhTUMgCgRPdyAKijImBHcK54W2655fJjb7zxRtp8881TixYtUrdu3dL9999f6bkVu5fH71HlDvH82B6VdADg+1PpBoB6Zvbs2Wn33XdPK620UnrqqafS1KlT0xFHHDHfruY333xz2mOPPdLrr7+ell122dSyZcsl2mYAqK+EbgCoo+666660zDLLVNp23HHHpd69e6fx48en++67L3Xs2DFvP+2009IOO+wwz67myy+/fP69ffv2xnQDQDUSugGgjoou4RdddFGlbRGer7322ly9LgXu0Ldv3xpoIQAgdANAHbX00kuntdZaq6abAQDMh4nUAKCe6dq1a5o0aVJeEqzkySefnO9zmjVrln/OmjWr8PYBQEOi0g0AddS0adPS5MmTK21r2rRp6tevX1pnnXXSwIED01lnnZW++OKLdPzxx8/3WKuvvnqetTzGie+44455IrU5x4sDAItOpRsA6qhRo0allVdeudJts802S40bN0633npr+vbbb1OfPn3SwQcfnE499dT5HmuVVVZJw4cPT4MHD86zng8aNGiJvQ8AqM8alZWVlaUGJK72t2nTJi+fEkui1GadB9+daosJZ+xU000AKMR3332X3nnnnbTGGmvkNa1hfpwv8P34+5aGmC1VugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwCwREyYMCE1atQovfjii/PcZ8yYMXmf//3vf9/rtTp37pxGjBjxvY4BANWhabUcBQDqmXFdui7R1+s6ftwi7X/AAQekq6++eq7t/fv3T6NGjarGlgEA34fQDQB11Pbbb59GjhxZaVvz5s1rrD0AwNx0LweAOioCdocOHSrdlltuufxYdNG+/PLL02677ZZatWqV1l577XTHHXeUP/fzzz9P++23X1pxxRVTy5Yt8+MVA/ykSZPSXnvtldq2bZuWX375tMsuu+Tu4RUr7bvuums67bTT0korrZT3O+mkk9LMmTPTsccem5+z6qqrznVRIIwfPz5tsskmqUWLFql79+7p4Ycfnu/7fOyxx9KPf/zj3M5OnTql3//+9+nrr78uf3zKlClp5513zo+vscYa6brrrvveny0AVBehGwDqqeHDh+fg/J///CftuOOOOWR/9tln+bETTjghvfbaa+nee+9N48aNSxdddFFq165dfmzGjBm5m3rr1q3To48+mh5//PG0zDLL5Mr69OnTy4//4IMPpg8++CA98sgj6dxzz03Dhg1LP/nJT3Lwf+qpp9JvfvOb9Otf/zq99957ldoVofzoo49OL7zwQurbt28OzJ9++mmV7+Gtt97Kr7vHHnvk93HjjTfmED5o0KBKFwDiIsFDDz2UbrrppnThhRfmIA4AtYHQDQB11F133ZXDcMVbVJ4rhtF99tknrbXWWnn7V199lZ5++un82MSJE9MGG2yQevfunScd69evXw6/IYLt7Nmzc6W8R48eqWvXrrliHc+Jic5Kopr917/+Nf3whz9Mv/zlL/PPb775Jh133HG5cj5kyJDUrFmzHJIrisAcITqOG2G/TZs26YorrqjyPZ5++un5YsERRxyRjxkV8njNa665Jn333Xfpv//9b75wcNlll6Uf/ehHqVevXvlY3377bUGfOgAsGmO6AaCO2mqrrXJorSiCcMl6661X/vvSSy+dll122fIK8G9/+9scfJ9//vm03Xbb5a7iEWjDSy+9lN58881c6a4oQm5UnkvWXXfd1Ljx/12/j27m0V28pEmTJmmFFVaYq+oc1e2Spk2b5uAf1faqRFuiwl2xy3hZWVm+KPDOO+/k0B3HiLBd0qVLl9zdHQBqA6EbAOqoCNJRxZ6XpZZaqtL9GOcdYTXssMMO6d1330333HNPuv/++9M222yTDjvssHT22WfniniE2KrGRscY8Pkdf36vuTiiLdFFPcZxz2m11VbLoRsAajPdywGggYoAPXDgwPT3v/89r2l96aWX5u0bbrhheuONN1L79u1zqK94i67g39eTTz5Z/ntMvPbcc8/lruZVibbE2PM52xG36LoeVe3SMUpef/31773ONwBUF6EbAOqoadOmpcmTJ1e6ffLJJwv13KFDh6bbb789dyN/9dVX8/jwUvCNMdQxqVrMWB4TqUU37hjLHdXmOSdFWxwXXHBBuvXWW/Ms5lFdj5nUY0x4Vf74xz+msWPH5nHgL774Yr4YEO0uTaQW48hjorWohsfkbRG+Dz744DyTOQDUBkI3ANRRo0aNSiuvvHKl22abbbZQz40qcUx0FuO+N9988zz++oYbbsiPxRJjMSN5dN/efffdcxg/6KCD8pjuGBf+fZ1xxhn51rNnzzzJWixlVpo5fU7RvlhSLLqRx7JhMflbXDDo2LFj+T4xyVvc32KLLXJ7f/WrX+UqPQDUBo3KYjaSBuSLL77IXeOmTp1aLX84FKnz4LtTbTHhjJ1qugkAhYggGZXcWN851o2G+XG+wPfj71saYrZU6QYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgBIKTWweUVZTM4TABaV0A1AgxZLZYXp06fXdFOoA7755pv8c6mllqrppgBQRzSt6QYAQE1q2rRpXpf6448/zkGqcWPXo6m6wh2Be8qUKalt27blF2sAYEGEbgAatEaNGqWVV145r7387rvv1nRzqOUicHfo0KGmmwFAHSJ0A9DgNWvWLK299tq6mDNf0RNChRuARSV0A0BMctK4cWrRokVNNwMAqGcMXAMAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKEjTog4MNEydB9+daosJZ+xU000AAKCBU+kGAACAggjdAAAAUBChGwAAAAoidAMAAEB9Dd0XXHBB6ty5c2rRokXaeOON09NPPz3f/UeMGJF++MMfppYtW6ZOnTqlI488Mn333XdLrL0AAACwsGo0dN94443pqKOOSsOGDUvPP/986tmzZ+rfv3+aMmVKlftff/31afDgwXn/cePGpSuuuCIf47jjjlvibQcAAIBaHbrPPffcdMghh6QDDzwwdevWLV188cWpVatW6corr6xy/7Fjx6ZNN9007bvvvrk6vt1226V99tlngdVxAAAAaFChe/r06em5555L/fr1+7/GNG6c7z/xxBNVPmeTTTbJzymF7Lfffjvdc889accdd5zn60ybNi198cUXlW4AAACwJDRNNeSTTz5Js2bNSiuttFKl7XF//PjxVT4nKtzxvM022yyVlZWlmTNnpt/85jfz7V5++umnp+HDh1d7+wEAAKDWT6S2KMaMGZNOO+20dOGFF+Yx4Lfccku6++6708knnzzP5wwZMiRNnTq1/DZp0qQl2mYAAAAarhqrdLdr1y41adIkffTRR5W2x/0OHTpU+ZwTTjgh7b///unggw/O93v06JG+/vrr9Ktf/Sodf/zxuXv6nJo3b55vAAAA0GAq3c2aNUu9evVKo0ePLt82e/bsfL9v375VPuebb76ZK1hHcA/R3RwAAABqkxqrdIdYLmzgwIGpd+/eqU+fPnkN7qhcx2zmYcCAAWmVVVbJ47LDzjvvnGc832CDDfKa3m+++Waufsf2UvgGAACA2qJGQ/fee++dPv744zR06NA0efLktP7666dRo0aVT642ceLESpXtP/3pT6lRo0b55/vvv59WXHHFHLhPPfXUGnwXAAAAUAtDdxg0aFC+zWvitIqaNm2ahg0blm8AAABQ29Wp2csBAACgLhG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQX2cvBwBg0XUefHeqLSacsVNNNwGg1lLpBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAg1ukGAKDe6HF1j1QbvDzw5ZpuAlBLqHQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoSNOiDgwAdUXnwXen2mLCGTvVdBMAgGqk0g0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAArStKgDAwB1W4+re6Ta4OWBL9d0EwBgsal0AwAAQEFUugEAAGqQnkX1m0o3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFKRpUQcGABbDiW1SrbHGajXdAgCo81S6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQG0K3TNnzkwPPPBAuuSSS9KXX36Zt33wwQfpq6++qu72AQAAQMNZp/vdd99N22+/fZo4cWKaNm1a2nbbbVPr1q3TmWeeme9ffPHFxbQUAAAA6nul+/DDD0+9e/dOn3/+eWrZsmX59t122y2NHj26utsHAAAADafS/eijj6axY8emZs2aVdreuXPn9P7771dn2wAAAKBhVbpnz56dZs2aNdf29957L3czBwAAABYzdG+33XZpxIgR5fcbNWqUJ1AbNmxY2nHHHRf1cAAAAFBvLXL38rPPPjtPpNatW7f03XffpX333Te98cYbqV27dukf//hHMa0EAACAhhC6O3XqlF566aV044035p9R5T7ooIPSfvvtV2liNQAAAGjoFil0z5gxI3Xp0iXdddddOWTHDQAAAKiGMd1LLbVU7lIOAAAAFDCR2mGHHZbOPPPMNHPmzEV9KgAAADQoixy6n3nmmXTLLbek1VZbLfXv3z/tvvvulW6L6oILLshrfLdo0SJtvPHG6emnn57v/v/73/9y8F955ZVT8+bN0zrrrJPuueeeRX5dAAAAqHUTqbVt2zbtscce1fLiMRnbUUcdlS6++OIcuGMpsgjyr7/+emrfvv1c+0+fPj1tu+22+bGbbroprbLKKundd9/NbQIAAIA6H7pHjhxZbS9+7rnnpkMOOSQdeOCB+X6E77vvvjtdeeWVafDgwXPtH9s/++yzNHbs2Dy+PESVHAAAAOpF9/KSjz/+OD322GP5Fr8vqqhaP/fcc6lfv37/15jGjfP9J554osrn3HHHHalv3765e/lKK62Uunfvnk477bQ0a9aseb7OtGnT0hdffFHpBgAAALUydH/99dfpl7/8ZR5Tvfnmm+dbx44d81rd33zzzUIf55NPPslhOcJzRXF/8uTJVT7n7bffzt3K43kxjvuEE05I55xzTjrllFPm+Tqnn356atOmTfkt1hkHAACAWhm6Ywz2ww8/nO688848qVncbr/99rzt6KOPTkWaPXt2Hs996aWXpl69eqW99947HX/88blb+rwMGTIkTZ06tfw2adKkQtsIAAAAiz2m++abb87V5i233LJ824477phatmyZ9tprr3TRRRct1HHatWuXmjRpkj766KNK2+N+hw4dqnxOVNdjLHc8r6Rr1665Mh7d1Zs1azbXc2KG87gBAABAra90RxfyObuEh6hAL0r38gjIUa0ePXp0pUp23I9x21XZdNNN05tvvpn3K/nvf/+bw3hVgRsAAADqVOiOQDxs2LD03XfflW/79ttv0/Dhw+cZlufXVf2yyy5LV199dRo3blz67W9/m8eMl2YzHzBgQO4eXhKPx+zlhx9+eA7bMdN5TKQWE6sBAABAne9eft555+W1tFddddXUs2fPvO2ll15KLVq0SPfdd98iHSvGZMfM50OHDs1dxNdff/00atSo8kr6xIkT84zmJTEJWrzGkUcemdZbb728TncE8D/+8Y+L+jYAAACg9oXuWKbrjTfeSNddd10aP3583rbPPvuk/fbbL4/rXlSDBg3Kt6qMGTNmrm1RTX/yyScX+XUAAACg1ofu0KpVq3TIIYdUf2sAAACgIY/pjnWvr7zyyrm2x7YzzzyzutoFAAAADS90X3LJJalLly5zbV933XXnu142AAAANDSLHLpjwrNYomtOK664Yvrwww+rq10AAADQ8EJ3zCD++OOPz7U9tnXs2LG62gUAAAANbyK1mEDtiCOOSDNmzEhbb7113jZ69Oj0hz/8IR199NFFtBEAAAAaRug+9thj06effpoOPfTQNH369Lwt1uiOtbKHDBlSRBsBAACgYYTuRo0a5VnKTzjhhDRu3Li8Nvfaa6+dmjdvXkwLAQAAoKGM6S5ZZpll0kYbbZRat26d3nrrrTR79uzqbRkAAAA0lNAd63Cfe+65lbb96le/SmuuuWbq0aNH6t69e5o0aVIRbQQAAID6HbovvfTStNxyy5XfHzVqVBo5cmS65ppr0jPPPJPatm2bhg8fXlQ7AQAAoP6O6X7jjTdS7969y+/ffvvtaZdddkn77bdfvn/aaaelAw88sJhWAgAAQH2udH/77bdp2WWXLb8/duzYtPnmm5ffj27mkydPrv4WAgAAQH0P3auvvnp67rnn8u+ffPJJevXVV9Omm25a/ngE7jZt2hTTSgAAAKjP3csHDhyYDjvssBy2H3zwwdSlS5fUq1evSpXvmEwNAAAAWMTQ/Yc//CF988036ZZbbkkdOnRI//rXvyo9/vjjj6d99tlnYQ8HAAAA9d5Ch+7GjRunk046Kd+qMmcIBwAAgIZuocd0AwAAAItG6AYAAICCCN0AAABQEKEbAAAAakvofuihh4ppCQAAADT00L399tunH/zgB+mUU05JkyZNKqZVAAAA0JCWDCt5//3307XXXpuuvvrqNHz48LT11lungw46KO26666pWbNmxbQSAGiwxnXpmmqLruPH1XQTAKjvle527dqlI488Mr344ovpqaeeSuuss0469NBDU8eOHdPvf//79NJLLxXTUgAAAGhIE6ltuOGGaciQIWnQoEHpq6++SldeeWXq1atX+vGPf5xeffXV6mslAAAANJTQPWPGjHTTTTelHXfcMa2++urpvvvuS+eff3766KOP0ptvvpm37bnnntXfWgAAAKjPY7p/97vfpX/84x+prKws7b///unPf/5z6t69e/njSy+9dDr77LNzd3MAAABoyBY5dL/22mvpb3/7W9p9991T8+bN5znu29JiAAAANHSL3L182LBhuev4nIF75syZ6ZFHHsm/N23aNG2xxRbV10oAAABoCKF7q622Sp999tlc26dOnZofAwAAABaze3mM5W7UqNFc2z/99NM8nhuAufW4ukeqDV4e+HJNNwEAoEFZ6NAdY7hDBO4DDjigUvfyWbNmpf/85z9pk002KaaVAAAAUJ9Dd5s2bcor3a1bt04tW7Ysf6xZs2bpRz/6UTrkkEOKaSUAAADU59A9cuTI/LNz587pmGOO0ZUcAAAAqntMd8xeDgAAAFRT6N5www3T6NGj03LLLZc22GCDKidSK3n++ecX5pAAAABQ7y1U6N5ll13KJ07bddddi24TAAAANJzQXbFLue7lAAAAsHAap0U0adKk9N5775Xff/rpp9MRRxyRLr300kU9FAAAANRrixy699133/TQQw/l3ydPnpz69euXg/fxxx+fTjrppCLaCAAAAA0jdL/yyiupT58++fd//vOfqUePHmns2LHpuuuuS1dddVURbQQAAICGEbpnzJhRPqnaAw88kH7605/m37t06ZI+/PDD6m8hAAAANJTQve6666aLL744Pfroo+n+++9P22+/fd7+wQcfpBVWWKGINgIAAEDDCN1nnnlmuuSSS9KWW26Z9tlnn9SzZ8+8/Y477ijvdg4AAAAs5JJhJWVlZWnNNddMEydOTDNnzkzLLbdc+WO/+tWvUqtWrYpoIwAAANT/SneE7rXWWivPWl4xcIfOnTun9u3bV3f7AAAAoGGE7saNG6e11147ffrpp8W1CAAAABrqmO4zzjgjHXvssXnpMAAAAKCaxnSHAQMGpG+++SZPoNasWbPUsmXLSo9/9tlni3pIAAAAqJcWOXSPGDGimJYAAABAQw/dAwcOLKYlAAAA0NDHdIe33nor/elPf8rrdE+ZMiVvu/fee9Orr75a3e0DAACAhhO6H3744dSjR4/01FNPpVtuuSV99dVXeftLL72Uhg0bVkQbAQAAoGF0Lx88eHA65ZRT0lFHHZVat25dvn3rrbdO559/fnW3DwAA6pxxXbqm2qLr+HE13QRo0Ba50v3yyy+n3Xbbba7t7du3T5988kl1tQsAAAAaXuhu27Zt+vDDD+fa/sILL6RVVlmlutoFAAAADS90//znP09//OMf0+TJk1OjRo3S7Nmz0+OPP56OOeaYvIY3AAAAsJih+7TTTktdunRJnTp1ypOodevWLW2++eZpk002yTOaAwAAAIs5kVqzZs3SZZddlk444YT0yiuv5OC9wQYbpLXXXntRDwUAAAD12iKH7sceeyxtttlmabXVVss3AAAAoJq6l8fSYGussUY67rjj0muvvbaoTwcAAIAGY5FD9wcffJCOPvro9PDDD6fu3bun9ddfP5111lnpvffeK6aFAAAA0FBCd7t27dKgQYPyjOVvvfVW2nPPPdPVV1+dOnfunKvgAAAAwGKG7oqim/ngwYPTGWeckXr06JGr3wAAAMD3DN1R6T700EPTyiuvnPbdd9/c1fzuu+9e3MMBAABAvbPIs5cPGTIk3XDDDXls97bbbpvOO++8tMsuu6RWrVoV00IAAABoKKH7kUceSccee2zaa6+98vhuAAAAoJpCd3QrBwAAAKopdN9xxx1phx12SEsttVT+fX5++tOfLswhAQAAoN5bqNC96667psmTJ6f27dvn3+elUaNGadasWdXZPmqLE9ukWuPEqTXdAgAAgOoL3bNnz67ydwAAAKAax3QDAECt7RG3xmo13QKAxQ/dUeW+6qqr0i233JImTJiQu5OvscYa6Wc/+1naf//9830AAADg/2ucFlJZWVmeJO3ggw9O77//furRo0dad91107vvvpsOOOCAtNtuuy3soQAAAKBBWOhKd1S4Y43u0aNHp6222qrSYw8++GCeYO2aa65JAwYMKKKdAAAAUH8r3f/4xz/ScccdN1fgDltvvXUaPHhwuu6666q7fQAAAFD/Q/d//vOftP3228/z8VjH+6WXXqqudgEAAEDDCd2fffZZWmmlleb5eDz2+eefV1e7AAAAoOGM6Z41a1Zq2nTeuzdp0iTNnDmzutoFAABQHEvdUdtCd8xeHrOUN2/evMrHp02bVp3tAgAAgIYTugcOHLjAfcxcDgAAAIsRukeOHLmwuwIAAACLMpEaAAAAsGiEbgAAAKjPofuCCy5InTt3Ti1atEgbb7xxevrppxfqeTfccENq1KhR2nXXXQtvIwAAANS50H3jjTemo446Kg0bNiw9//zzqWfPnql///5pypQp833ehAkT0jHHHJN+/OMfL7G2AgAAQJ0K3eeee2465JBD0oEHHpi6deuWLr744tSqVat05ZVXznfN8P322y8NHz48rbnmmku0vQAAAFAnQvf06dPTc889l/r16/d/DWrcON9/4okn5vm8k046KbVv3z4ddNBBS6ilAAAAUOCSYUX45JNPctV6pZVWqrQ97o8fP77K5zz22GPpiiuuSC+++OJCvca0adPyreSLL774nq0GAACAOtK9fFF8+eWXaf/990+XXXZZateu3UI95/TTT09t2rQpv3Xq1KnwdgIAAECNV7ojODdp0iR99NFHlbbH/Q4dOsy1/1tvvZUnUNt5553Lt82ePTv/bNq0aXr99dfTD37wg0rPGTJkSJ6orWKlW/AGAACg3ofuZs2apV69eqXRo0eXL/sVITruDxo0aK79u3Tpkl5++eVK2/70pz/lCvh5551XZZhu3rx5vgEAAECDCt0hqtADBw5MvXv3Tn369EkjRoxIX3/9dZ7NPAwYMCCtssoquZt4rOPdvXv3Ss9v27Zt/jnndgAAAEgNPXTvvffe6eOPP05Dhw5NkydPTuuvv34aNWpU+eRqEydOzDOaAwAAQF1T46E7RFfyqrqThzFjxsz3uVdddVVBrQIAAGg4xnXpmmqLruPHpfpCCRkAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBmhZ1YIAad2KbVGussVpNtwAAgBqg0g0AAAAFEboBAACgIEI3AAAAFEToBgAAgIKYSA2gARnXpWuqLbqOH1fTTQAAKJxKNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEB9Dt0XXHBB6ty5c2rRokXaeOON09NPPz3PfS+77LL04x//OC233HL51q9fv/nuDwAAAA02dN94443pqKOOSsOGDUvPP/986tmzZ+rfv3+aMmVKlfuPGTMm7bPPPumhhx5KTzzxROrUqVPabrvt0vvvv7/E2w4AAAC1OnSfe+656ZBDDkkHHnhg6tatW7r44otTq1at0pVXXlnl/tddd1069NBD0/rrr5+6dOmSLr/88jR79uw0evToJd52AAAAqLWhe/r06em5557LXcTLG9S4cb4fVeyF8c0336QZM2ak5ZdfvsCWAgAAwKJrmmrQJ598kmbNmpVWWmmlStvj/vjx4xfqGH/84x9Tx44dKwX3iqZNm5ZvJV988cX3bDUAAADUke7l38cZZ5yRbrjhhnTrrbfmSdiqcvrpp6c2bdqU32IMOAAAANT70N2uXbvUpEmT9NFHH1XaHvc7dOgw3+eeffbZOXT/+9//Tuutt9489xsyZEiaOnVq+W3SpEnV1n4AAACotaG7WbNmqVevXpUmQStNita3b995Pu/Pf/5zOvnkk9OoUaNS79695/sazZs3T8suu2ylGwAAANT7Md0hlgsbOHBgDs99+vRJI0aMSF9//XWezTwMGDAgrbLKKrmbeDjzzDPT0KFD0/XXX5/X9p48eXLevswyy+QbAAAA1BY1Hrr33nvv9PHHH+cgHQE6lgKLCnZpcrWJEyfmGc1LLrroojzr+c9+9rNKx4l1vk888cQl3n4AAACotaE7DBo0KN+qMmbMmEr3J0yYsIRaBQAAAA149nIAAACozYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFaVrUgaG+G9ela6otuo4fV9NNAAAAqqDSDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQAAAAURugEAAKAgQjcAAAAUROgGAACAggjdAAAAUBChGwAAAAoidAMAAEBBhG4AAAAoiNANAAAABRG6AQAAoCBCNwAAABRE6AYAAICCCN0AAABQEKEbAAAACiJ0AwAAQEGEbgAAACiI0A0AAAD1OXRfcMEFqXPnzqlFixZp4403Tk8//fR89//Xv/6VunTpkvfv0aNHuueee5ZYWwEAAKDOhO4bb7wxHXXUUWnYsGHp+eefTz179kz9+/dPU6ZMqXL/sWPHpn322ScddNBB6YUXXki77rprvr3yyitLvO0AAABQq0P3ueeemw455JB04IEHpm7duqWLL744tWrVKl155ZVV7n/eeeel7bffPh177LGpa9eu6eSTT04bbrhhOv/885d42wEAAKDWhu7p06en5557LvXr1+//GtS4cb7/xBNPVPmc2F5x/xCV8XntDwAAADWlaY29ckrpk08+SbNmzUorrbRSpe1xf/z48VU+Z/LkyVXuH9urMm3atHwrmTp1av75xRdfpNpu9rRvUm3xRaOyVGvUku/uq1mzUm1Rm85n523VZn1bO84X523VnLdVc97OzXlbNeft3Jy3VXPeVs15W7vP2wW1saysrPaG7iXh9NNPT8OHD59re6dOnWqkPXVVm1SLnFGrWlM7tPGZVKV2fSrjUm3QJ9Uiztsq1a5PxXk7F+dtlWrXp+K8nYvztkq161Nx3tbl8/bLL79MbebT3hoN3e3atUtNmjRJH330UaXtcb9Dhw5VPie2L8r+Q4YMyRO1lcyePTt99tlnaYUVVkiNGjWqlvfBkr2aFBdMJk2alJZddtmabg4sFOctdZHzlrrIeUtd5Lytu6LCHYG7Y8eO892vRkN3s2bNUq9evdLo0aPzDOSlUBz3Bw0aVOVz+vbtmx8/4ogjyrfdf//9eXtVmjdvnm8VtW3btlrfB0te/AfJf5Soa5y31EXOW+oi5y11kfO2bppfhbvWdC+PKvTAgQNT7969U58+fdKIESPS119/nWczDwMGDEirrLJK7iYeDj/88LTFFlukc845J+20007phhtuSM8++2y69NJLa/idAAAAQC0L3XvvvXf6+OOP09ChQ/NkaOuvv34aNWpU+WRpEydOzDOal2yyySbp+uuvT3/605/Scccdl9Zee+102223pe7du9fguwAAAIBaGLpDdCWfV3fyMWPGzLVtzz33zDcanhgqMGzYsLmGDEBt5rylLnLeUhc5b6mLnLf1X6OyBc1vDgAAACyW/+u3DQAAAFQroRsAAAAKInQDAABAQYRu6owLLrggde7cObVo0SJtvPHG6emnn67pJsF8PfLII2nnnXdOHTt2TI0aNcorLUBtFstzbrTRRql169apffv2adddd02vv/56TTcL5uuiiy5K6623Xvkax3379k333ntvTTcLFskZZ5yR/1Y44ogjaropFEDopk648cYb85ruMbPj888/n3r27Jn69++fpkyZUtNNg3n6+uuv87kaF4ygLnj44YfTYYcdlp588sl0//33pxkzZqTtttsun8tQW6266qo5sDz33HPp2WefTVtvvXXaZZdd0quvvlrTTYOF8swzz6RLLrkkXzyifjJ7OXVCVLaj+nL++efn+7Nnz06dOnVKv/vd79LgwYNrunmwQHH1+tZbb82VQ6grPv7441zxjjC++eab13RzYKEtv/zy6ayzzkoHHXRQTTcF5uurr75KG264YbrwwgvTKaecktZff/00YsSImm4W1Uylm1pv+vTp+ep1v379yrc1btw433/iiSdqtG0A9dnUqVPLAwzUBbNmzUo33HBD7p0R3cyhtoveRTvttFOlv3Opf5rWdANgQT755JP8j+hKK61UaXvcHz9+fI21C6A+ix5FMbZw0003Td27d6/p5sB8vfzyyzlkf/fdd2mZZZbJPYu6detW082C+YoLRDFsMrqXU78J3QBAldWXV155JT322GM13RRYoB/+8IfpxRdfzL0zbrrppjRw4MA8LELwpraaNGlSOvzww/P8GTFJMPWb0E2t165du9SkSZP00UcfVdoe9zt06FBj7QKorwYNGpTuuuuuPAN/TFIFtV2zZs3SWmutlX/v1atXrhyed955eXIqqI1i6GRMCBzjuUuiZ2f8dzfmMJo2bVr++5f6wZhu6sQ/pPEP6OjRoyt1e4z7xmsBVJ+YWzUCd3TNffDBB9Maa6xR002CxRJ/J0Rogdpqm222ycMioodG6da7d++033775d8F7vpFpZs6IZYLi65i8R+jPn365FkdY5KUAw88sKabBvOdkfTNN98sv//OO+/kf0hjUqrVVlutRtsG8+pSfv3116fbb789r9U9efLkvL1NmzapZcuWNd08qNKQIUPSDjvskP+7+uWXX+ZzeMyYMem+++6r6abBPMV/Y+ecL2PppZdOK6ywgnk06iGhmzph7733zkvXDB06NP8RGMspjBo1aq7J1aA2ifVit9pqq0oXj0JcQLrqqqtqsGVQtYsuuij/3HLLLSttHzlyZDrggANqqFUwf9FFd8CAAenDDz/MF4hireMI3Ntuu21NNw0gs043AAAAFMSYbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAQYRuAAAAKIjQDQC11AEHHJAaNWqUb0sttVRaY4010h/+8If03Xff1XTTAICF1HRhdwQAlrztt98+jRw5Ms2YMSM999xzaeDAgTmEn3nmmTXdNABgIah0A0At1rx589ShQ4fUqVOntOuuu6Z+/fql+++/Pz82e/bsdPrpp+cKeMuWLVPPnj3TTTfdVP7czz//PO23335pxRVXzI+vvfbaOcCHCRMm5PB+ww03pE022SS1aNEide/ePT388MOVXj/u9+nTJ7dj5ZVXToMHD04zZ84sf3zLLbdMv//973MFfvnll89tPfHEE8sfLysry/dXW221fIyOHTvm/UumTZuWjjnmmLTKKqukpZdeOm288cZpzJgxhX6mALAkCd0AUEe88soraezYsalZs2b5fgTua665Jl188cXp1VdfTUceeWT6xS9+UR6cTzjhhPTaa6+le++9N40bNy5ddNFFqV27dpWOeeyxx6ajjz46vfDCC6lv375p5513Tp9++ml+7P3330877rhj2mijjdJLL72Un3/FFVekU045pdIxrr766hyYn3rqqfTnP/85nXTSSeUXBm6++eb0l7/8JV1yySXpjTfeSLfddlvq0aNH+XMHDRqUnnjiiRz+//Of/6Q999wzV/djXwCoDxqVxSVoAKBWjun++9//nqvQUV2OqnDjxo3TP//5z/STn/wkV5YfeOCBHJZLDj744PTNN9+k66+/Pv30pz/NIfvKK6+c69hR6Y4K+RlnnJH++Mc/5m3xGrHtd7/7Xa5cH3/88Tk0R2CPqni48MIL8/5Tp07NbYlK96xZs9Kjjz5afuyojG+99db52Oeee24O3HHBIMalVzRx4sS05ppr5p9RAS+Jan4c47TTTivkcwWAJcmYbgCoxbbaaqtcYf76669zxbhp06Zpjz32yJXtCNfbbrttpf2nT5+eNthgg/z7b3/727zv888/n7bbbrvcPT26kldUMbDHsXv37p1Ddoif8XgpcIdNN900ffXVV+m9997LXcbDeuutV+mY0Q19ypQp+feoXI8YMSKH66hgR+U8qunxWi+//HIO7Ouss06l58fFhRVWWKGaPkEAqFlCNwDUYtFte6211sq/R8U6xm1HF+8Yfx3uvvvuPB66ohg7HXbYYYf07rvvpnvuuSd3995mm23SYYcdls4+++xqbeOcFewI6THePMRY9Ndffz1X5KMNhx56aDrrrLNyF/gI702aNMkTxMXPipZZZplqbSMA1BRjugGgjoju3Mcdd1z605/+lLp165bDdXTNjlBe8RZBtyQmUYsZz6ObelScL7300krHfPLJJ8t/j+7lEYC7du2a78fPGG9dcSTa448/nlq3bp1WXXXVhW53TOIW1e2//vWveZK0OGZUuaMiH5XuqIrP+R5iQjYAqA9UugGgDonu2jH5WYyTjlm/Y/K0qCpvttlmeZx1hOJll102B+2hQ4emXr16pXXXXTd32b7rrrvKA3XJBRdckGc1j+3RfT1mPP/lL3+ZH4uqdAT1GOMdE55FxXrYsGHpqKOOyhcAFsZVV12Vg3XMSt6qVasc/iOEr7766rkLecyuPmDAgHTOOefkEP7xxx+n0aNH5y7rO+20UyGfIQAsSUI3ANQhMRY6AnDMEv7OO+/kSnbMYv7222+ntm3bpg033DBXw0PMcj5kyJA8aVoE3R//+Md5lvCKYrKzuL344ou5wnzHHXeUz3Ae3daja3qE/OjWHhO3HXTQQbnSvrCiTXH8COoRvmPm8jvvvLN8zHYsYRazoccM6jFberz2j370ozxRHADUB2YvB4AGqDR7eSwVtv7669d0cwCg3jKmGwAAAAoidAMAAEBBdC8HAACAgqh0AwAAQEGEbgAAACiI0A0AAAAFEboBAACgIEI3AAAAFEToBgAAgIII3QAAAFAQoRsAAAAKInQDAABAKsb/A1WztvIewC6JAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scores = [ensemble_diversity(s_arr) for s_arr in tqdm.tqdm(input_strs)]\n", + "labels = [str(number) for number in range(1, len(input_strs) + 1)]\n", + "\n", + "df = pd.DataFrame(\n", + " scores,\n", + " columns=['Cosine-Inv', 'Compression', 'Edit', 'Ensembled']\n", + ")\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "x = range(len(df))\n", + "width = 0.2\n", + "\n", + "plt.bar([i - width*1.5 for i in x], df['Cosine-Inv'], width=width, label='Cosine-Inv')\n", + "plt.bar([i - width*0.5 for i in x], df['Compression'], width=width, label='Compression')\n", + "plt.bar([i + width*0.5 for i in x], df['Edit'], width=width, label='Edit')\n", + "plt.bar([i + width*1.5 for i in x], df['Ensembled'], width=width, label='Ensembled')\n", + "\n", + "plt.xticks(x, df.index if 'Input' not in df.columns else df['Input'])\n", + "\n", + "plt.xlabel(\"Response\")\n", + "plt.ylabel(\"Diversity Score\")\n", + "plt.legend()\n", + "plt.title(\"Comparison of Diversity Metrics\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example above:\n", + "- 1. Base case of just the same sentence every time\n", + "- 2. Asking the LLM 10 times for a joke (with the default temp being low, this should output the same joke most of the time)\n", + "- 3. Asking the LLM once for 10 different jokes\n", + "- 4. Asking the LLM 4 different times to create a story about 4 random topics (pre-selected)\n", + "- 5. Pre-selected LLM call, where it was told to produce 4, ~500 word paragraphs where it explained the exact same thing in a different way\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "---\n", + "## Potential Exploration\n", + "- work ensembling all \"diversity\" related metrics \n", + " - add more metrics\n", + " - tune added metrics\n", + "- combination of validation/hallucination metric + ensembled diversity metric -> score" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/ember/examples/operators/diversity_operators_example.py b/src/ember/examples/operators/diversity_operators_example.py new file mode 100644 index 00000000..cbf264fa --- /dev/null +++ b/src/ember/examples/operators/diversity_operators_example.py @@ -0,0 +1,107 @@ +import os +import logging + +# Set global logging level to ERROR +logging.basicConfig(level=logging.ERROR) + +os.environ["EMBER_LOGGING_LEVEL"] = "ERROR" + +# from ember.core.registry.model.model_module.lm import LMModule, LMModuleConfig +from ember.core.registry.model.config.settings import initialize_registry +from ember.core.registry.model.base.services.model_service import ModelService +from ember.core.utils.eval.evaluators import (DiversityCosineSimilarityEvaluator, + DiversityEnsembledEvaluator, + DiversityEditDistanceEvaluator, + DiversityNoveltyEvaluator, + DiversityCompressionEvaluator +) + +from ember.core.registry.model.providers.openai.openai_provider import create_openai_embedding_model + + +model_registry = initialize_registry() +logging.info(model_registry.list_models()) + +text_embedding_ada_002 = create_openai_embedding_model("text-embedding-ada-002") + +# List of text that represents completely n +very_diverse_text = ["Bananas don't belong in briefcases, but socks and t-shirts do!", + "Abraham Lincoln was the 16th president of the United States of America", + "ERROR 404: Index Not Found"] + +# This group of text all rephrase the same request, except +different_words_not_diverse_strs = ["Could you please lend me a hand with this?", + "Might you assist me with a task?", + "Can you spare a second to help me do something?"] + +repetition_strs = ["This is a sample text with lots of repetition.", + "This is a sample text with lots of repetition.", + "This is a sample text with lots of repetition."] + +# List of sample strings that have varying levels of diversity: +test_strings = [very_diverse_text, different_words_not_diverse_strs, repetition_strs] + + +# Measure Cosine similarity +cosine_similarity_evaluator = DiversityCosineSimilarityEvaluator(text_embedding_ada_002) + +print("\n" + "=" * 50 ) +print("Cosine Similarity Evaluator\n") +for i in range(len(test_strings)): + print(f"Computing cosine-similarity for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = cosine_similarity_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Diversity score: {score}\n") + + +# Measure Edit Distance +print("=" * 50 + "\nEdit Distance Evaluator\n") +edit_distance_evaluator = DiversityEditDistanceEvaluator() + +for i in range(len(test_strings)): + print(f"Computing Edit-Distance for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = edit_distance_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Edit-Distance score: {score}\n") +print("=" * 50 + "\n") + + +# Measure Novelty +print("=" * 50 + "\nNovelty Evaluator\n") +novelty_evaluator = DiversityNoveltyEvaluator() + +for i in range(len(test_strings)): + print(f"Computing Novelty for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Novelty score: {score}\n") +print("=" * 50 + "\n") + + +# Measure Compression Ratio +print("=" * 50 + "\nCompression Ratio Evaluator\n") +novelty_evaluator = DiversityCompressionEvaluator() + +for i in range(len(test_strings)): + print(f"Computing Compression Ratio for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Compression Ratio: {score}\n") +print("=" * 50 + "\n") + + +# Measure Ensembled Diversity +print("=" * 50 + "\nEnsembled Diversity Evaluator\n") +novelty_evaluator = DiversityCompressionEvaluator() + +for i in range(len(test_strings)): + print(f"Computing Ensembled Diversity Score for the following strings: ") + for j in range(len(test_strings[i])): + print(f"String {j+1}: {test_strings[i][j]}") + score: float = novelty_evaluator.evaluate(system_output=test_strings[i]).score + print(f"Ensembled Diversity Score: {score}\n") +print("=" * 50 + "\n") \ No newline at end of file