From 9bf84f18db265dc067e91bdd5549401db8735230 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Tue, 3 Jun 2025 10:29:46 -0700 Subject: [PATCH 1/4] Remove website routine, no pkgdown site intended (#18) --- .github/workflows/R-CMD-check.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index dbc973811..c35f2cb8d 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -12,10 +12,10 @@ on: name: Package checks jobs: - website: - uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1 - with: - working-directory: ./r-package + # website: + # uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1 + # with: + # working-directory: ./r-package routine: uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1 with: From c0e21801b4bd92311e89a55bfadb8c32f3c807f5 Mon Sep 17 00:00:00 2001 From: Garrick Aden-Buie Date: Tue, 3 Jun 2025 13:33:10 -0400 Subject: [PATCH 2/4] fix: No longer need to manually calls `session$ns()` with shinychat (#10) --- r-package/DESCRIPTION | 2 +- r-package/R/querychat.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 6319c056a..4c38d1845 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -23,7 +23,7 @@ Imports: purrr, rlang, shiny, - shinychat, + shinychat (>= 0.2.0), whisker, xtable Encoding: UTF-8 diff --git a/r-package/R/querychat.R b/r-package/R/querychat.R index 891081dda..f97a867c6 100644 --- a/r-package/R/querychat.R +++ b/r-package/R/querychat.R @@ -175,7 +175,7 @@ querychat_server <- function(id, querychat_config) { append_output <- function(...) { txt <- paste0(...) shinychat::chat_append_message( - session$ns("chat"), + "chat", list(role = "assistant", content = txt), chunk = TRUE, operation = "append", @@ -259,11 +259,11 @@ querychat_server <- function(id, querychat_config) { # the chat model to see. if (!is.null(greeting)) { if (isTRUE(any(nzchar(greeting)))) { - shinychat::chat_append(session$ns("chat"), greeting) + shinychat::chat_append("chat", greeting) } } else { shinychat::chat_append( - session$ns("chat"), + "chat", chat$stream_async( "Please give me a friendly greeting. Include a few sample prompts in a two-level bulleted list." ) @@ -274,7 +274,7 @@ querychat_server <- function(id, querychat_config) { shiny::observeEvent(input$chat_user_input, { # Add user message to the chat history shinychat::chat_append( - session$ns("chat"), + "chat", chat$stream_async(input$chat_user_input) ) }) From c6a24a7884c7fab47ab7c246af5ca09fe46825a6 Mon Sep 17 00:00:00 2001 From: Joe Cheng Date: Tue, 3 Jun 2025 18:20:25 -0700 Subject: [PATCH 3/4] feat: genericizing data source using DataSource protocol: Supports SQLAlchemy and Pandas --- .gitignore | 2 + python-package/README.md | 14 +- python-package/examples/app-database.py | 57 +++ .../examples/{app.py => app-dataframe.py} | 8 +- python-package/pyproject.toml | 3 +- python-package/src/querychat/datasource.py | 299 ++++++++++++++++ python-package/src/querychat/prompt/prompt.md | 16 +- python-package/src/querychat/querychat.py | 329 +++++++++--------- 8 files changed, 558 insertions(+), 170 deletions(-) create mode 100644 python-package/examples/app-database.py rename python-package/examples/{app.py => app-dataframe.py} (82%) create mode 100644 python-package/src/querychat/datasource.py diff --git a/.gitignore b/.gitignore index 06e00a5cb..8229dee08 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ __pycache__/ animation.screenflow/ README_files/ README.html +.DS_Store +python-package/examples/titanic.db .quarto # Byte-compiled / optimized / DLL files diff --git a/python-package/README.md b/python-package/README.md index be8057ea0..9b29fb193 100644 --- a/python-package/README.md +++ b/python-package/README.md @@ -56,7 +56,7 @@ def server(input, output, session): # chat["df"]() reactive. @render.data_frame def data_table(): - return chat["df"]() + return chat.df() # Create Shiny app @@ -171,8 +171,8 @@ which you can then pass via: ```python querychat_config = querychat.init( - df=titanic, - table_name="titanic", + titanic, + "titanic", data_description=Path("data_description.md").read_text() ) ``` @@ -185,8 +185,8 @@ You can add additional instructions of your own to the end of the system prompt, ```python querychat_config = querychat.init( - df=titanic, - table_name="titanic", + titanic, + "titanic", extra_instructions=[ "You're speaking to a British audience--please use appropriate spelling conventions.", "Use lots of emojis! πŸ˜ƒ Emojis everywhere, 🌍 emojis forever. ♾️", @@ -218,8 +218,8 @@ def my_chat_func(system_prompt: str) -> chatlas.Chat: my_chat_func = partial(chatlas.ChatAnthropic, model="claude-3-7-sonnet-latest") querychat_config = querychat.init( - df=titanic, - table_name="titanic", + titanic, + "titanic", create_chat_callback=my_chat_func ) ``` diff --git a/python-package/examples/app-database.py b/python-package/examples/app-database.py new file mode 100644 index 000000000..ac7066d94 --- /dev/null +++ b/python-package/examples/app-database.py @@ -0,0 +1,57 @@ +from pathlib import Path + +from seaborn import load_dataset +from shiny import App, render, ui +from sqlalchemy import create_engine + +import querychat + +# Load titanic data and create SQLite database +db_path = Path(__file__).parent / "titanic.db" +engine = create_engine("sqlite:///" + str(db_path)) + +if not db_path.exists(): + # For example purposes, we'll create the database if it doesn't exist. Don't + # do this in your app! + titanic = load_dataset("titanic") + titanic.to_sql("titanic", engine, if_exists="replace", index=False) + +greeting = (Path(__file__).parent / "greeting.md").read_text() +data_desc = (Path(__file__).parent / "data_description.md").read_text() + +# 1. Configure querychat +querychat_config = querychat.init( + engine, + "titanic", + greeting=greeting, + data_description=data_desc, +) + +# Create UI +app_ui = ui.page_sidebar( + # 2. Place the chat component in the sidebar + querychat.sidebar("chat"), + # Main panel with data viewer + ui.card( + ui.output_data_frame("data_table"), + fill=True, + ), + title="querychat with Python (SQLite)", + fillable=True, +) + + +# Define server logic +def server(input, output, session): + # 3. Initialize querychat server with the config from step 1 + chat = querychat.server("chat", querychat_config) + + # 4. Display the filtered dataframe + @render.data_frame + def data_table(): + # Access filtered data via chat.df() reactive + return chat["df"]() + + +# Create Shiny app +app = App(app_ui, server) diff --git a/python-package/examples/app.py b/python-package/examples/app-dataframe.py similarity index 82% rename from python-package/examples/app.py rename to python-package/examples/app-dataframe.py index 926622cef..1966900f7 100644 --- a/python-package/examples/app.py +++ b/python-package/examples/app-dataframe.py @@ -7,10 +7,8 @@ titanic = load_dataset("titanic") -with open(Path(__file__).parent / "greeting.md", "r") as f: - greeting = f.read() -with open(Path(__file__).parent / "data_description.md", "r") as f: - data_desc = f.read() +greeting = (Path(__file__).parent / "greeting.md").read_text() +data_desc = (Path(__file__).parent / "data_description.md").read_text() # 1. Configure querychat querychat_config = querychat.init( @@ -43,7 +41,7 @@ def server(input, output, session): @render.data_frame def data_table(): # Access filtered data via chat.df() reactive - return chat["df"]() + return chat.df() # Create Shiny app diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index acd772a21..1ac303bb0 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -18,8 +18,9 @@ dependencies = [ "htmltools", "chatlas", "narwhals", + "chevron", + "sqlalchemy>=2.0.0" # Using 2.0+ for improved type hints and API ] - classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.8", diff --git a/python-package/src/querychat/datasource.py b/python-package/src/querychat/datasource.py new file mode 100644 index 000000000..d9322ff49 --- /dev/null +++ b/python-package/src/querychat/datasource.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +from typing import ClassVar, Protocol + +import duckdb +import narwhals as nw +import pandas as pd +from sqlalchemy import inspect, text +from sqlalchemy.engine import Connection, Engine +from sqlalchemy.sql import sqltypes + + +class DataSource(Protocol): + db_engine: ClassVar[str] + + def get_schema(self, *, categorical_threshold) -> str: + """ + Return schema information about the table as a string. + + Args: + categorical_threshold: Maximum number of unique values for a text + column to be considered categorical + + Returns: + A string containing the schema information in a format suitable for + prompting an LLM about the data structure + + """ + ... + + def execute_query(self, query: str) -> pd.DataFrame: + """ + Execute SQL query and return results as DataFrame. + + Args: + query: SQL query to execute + + Returns: + Query results as a pandas DataFrame + + """ + ... + + def get_data(self) -> pd.DataFrame: + """ + Return the unfiltered data as a DataFrame. + + Returns: + The complete dataset as a pandas DataFrame + + """ + ... + + +class DataFrameSource: + """A DataSource implementation that wraps a pandas DataFrame using DuckDB.""" + + db_engine: ClassVar[str] = "DuckDB" + + def __init__(self, df: pd.DataFrame, table_name: str): + """ + Initialize with a pandas DataFrame. + + Args: + df: The DataFrame to wrap + table_name: Name of the table in SQL queries + + """ + self._conn = duckdb.connect(database=":memory:") + self._df = df + self._table_name = table_name + self._conn.register(table_name, df) + + def get_schema(self, *, categorical_threshold: int) -> str: + """ + Generate schema information from DataFrame. + + Args: + table_name: Name to use for the table in schema description + categorical_threshold: Maximum number of unique values for a text column + to be considered categorical + + Returns: + String describing the schema + + """ + ndf = nw.from_native(self._df) + + schema = [f"Table: {self._table_name}", "Columns:"] + + for column in ndf.columns: + # Map pandas dtypes to SQL-like types + dtype = ndf[column].dtype + if dtype.is_integer(): + sql_type = "INTEGER" + elif dtype.is_float(): + sql_type = "FLOAT" + elif dtype == nw.Boolean: + sql_type = "BOOLEAN" + elif dtype == nw.Datetime: + sql_type = "TIME" + elif dtype == nw.Date: + sql_type = "DATE" + else: + sql_type = "TEXT" + + column_info = [f"- {column} ({sql_type})"] + + # For TEXT columns, check if they're categorical + if sql_type == "TEXT": + unique_values = ndf[column].drop_nulls().unique() + if unique_values.len() <= categorical_threshold: + categories = unique_values.to_list() + categories_str = ", ".join([f"'{c}'" for c in categories]) + column_info.append(f" Categorical values: {categories_str}") + + # For numeric columns, include range + elif sql_type in ["INTEGER", "FLOAT", "DATE", "TIME"]: + rng = ndf[column].min(), ndf[column].max() + if rng[0] is None and rng[1] is None: + column_info.append(" Range: NULL to NULL") + else: + column_info.append(f" Range: {rng[0]} to {rng[1]}") + + schema.extend(column_info) + + return "\n".join(schema) + + def execute_query(self, query: str) -> pd.DataFrame: + """ + Execute query using DuckDB. + + Args: + query: SQL query to execute + + Returns: + Query results as pandas DataFrame + + """ + return self._conn.execute(query).df() + + def get_data(self) -> pd.DataFrame: + """ + Return the unfiltered data as a DataFrame. + + Returns: + The complete dataset as a pandas DataFrame + + """ + return self._df.copy() + + +class SQLAlchemySource: + """ + A DataSource implementation that supports multiple SQL databases via SQLAlchemy. + + Supports various databases including PostgreSQL, MySQL, SQLite, Snowflake, and Databricks. + """ + + db_engine: ClassVar[str] = "SQLAlchemy" + + def __init__(self, engine: Engine, table_name: str): + """ + Initialize with a SQLAlchemy engine. + + Args: + engine: SQLAlchemy engine + table_name: Name of the table to query + + """ + self._engine = engine + self._table_name = table_name + + # Validate table exists + inspector = inspect(self._engine) + if not inspector.has_table(table_name): + raise ValueError(f"Table '{table_name}' not found in database") + + def get_schema(self, *, categorical_threshold: int) -> str: + """ + Generate schema information from database table. + + Returns: + String describing the schema + + """ + inspector = inspect(self._engine) + columns = inspector.get_columns(self._table_name) + + schema = [f"Table: {self._table_name}", "Columns:"] + + for col in columns: + # Get SQL type name + sql_type = self._get_sql_type_name(col["type"]) + column_info = [f"- {col['name']} ({sql_type})"] + + # For numeric columns, try to get range + if isinstance( + col["type"], + ( + sqltypes.Integer, + sqltypes.Numeric, + sqltypes.Float, + sqltypes.Date, + sqltypes.Time, + sqltypes.DateTime, + sqltypes.BigInteger, + sqltypes.SmallInteger, + # sqltypes.Interval, + ), + ): + try: + query = text( + f"SELECT MIN({col['name']}), MAX({col['name']}) FROM {self._table_name}", + ) + with self._get_connection() as conn: + result = conn.execute(query).fetchone() + if result and result[0] is not None and result[1] is not None: + column_info.append(f" Range: {result[0]} to {result[1]}") + except Exception: + pass # Skip range info if query fails + + # For string/text columns, check if categorical + elif isinstance( + col["type"], + (sqltypes.String, sqltypes.Text, sqltypes.Enum), + ): + try: + count_query = text( + f"SELECT COUNT(DISTINCT {col['name']}) FROM {self._table_name}", + ) + with self._get_connection() as conn: + distinct_count = conn.execute(count_query).scalar() + if distinct_count and distinct_count <= categorical_threshold: + values_query = text( + f"SELECT DISTINCT {col['name']} FROM {self._table_name} " + f"WHERE {col['name']} IS NOT NULL", + ) + values = [ + str(row[0]) + for row in conn.execute(values_query).fetchall() + ] + values_str = ", ".join([f"'{v}'" for v in values]) + column_info.append(f" Categorical values: {values_str}") + except Exception: + pass # Skip categorical info if query fails + + schema.extend(column_info) + + return "\n".join(schema) + + def execute_query(self, query: str) -> pd.DataFrame: + """ + Execute SQL query and return results as DataFrame. + + Args: + query: SQL query to execute + + Returns: + Query results as pandas DataFrame + + """ + with self._get_connection() as conn: + return pd.read_sql_query(text(query), conn) + + def get_data(self) -> pd.DataFrame: + """ + Return the unfiltered data as a DataFrame. + + Returns: + The complete dataset as a pandas DataFrame + + """ + return self.execute_query(f"SELECT * FROM {self._table_name}") + + def _get_sql_type_name(self, type_: sqltypes.TypeEngine) -> str: + """Convert SQLAlchemy type to SQL type name.""" + if isinstance(type_, sqltypes.Integer): + return "INTEGER" + elif isinstance(type_, sqltypes.Float): + return "FLOAT" + elif isinstance(type_, sqltypes.Numeric): + return "NUMERIC" + elif isinstance(type_, sqltypes.Boolean): + return "BOOLEAN" + elif isinstance(type_, sqltypes.DateTime): + return "TIMESTAMP" + elif isinstance(type_, sqltypes.Date): + return "DATE" + elif isinstance(type_, sqltypes.Time): + return "TIME" + elif isinstance(type_, (sqltypes.String, sqltypes.Text)): + return "TEXT" + else: + return type_.__class__.__name__.upper() + + def _get_connection(self) -> Connection: + """Get a connection to use for queries.""" + return self._engine.connect() diff --git a/python-package/src/querychat/prompt/prompt.md b/python-package/src/querychat/prompt/prompt.md index fcb00a5ed..7acf8066d 100644 --- a/python-package/src/querychat/prompt/prompt.md +++ b/python-package/src/querychat/prompt/prompt.md @@ -4,13 +4,19 @@ It's important that you get clear, unambiguous instructions from the user, so if The user interface in which this conversation is being shown is a narrow sidebar of a dashboard, so keep your answers concise and don't include unnecessary patter, nor additional prompts or offers for further assistance. -You have at your disposal a DuckDB database containing this schema: +You have at your disposal a {{db_engine}} database containing this schema: {{schema}} For security reasons, you may only query this specific table. +{{#data_description}} +Additional helpful info about the data: + + {{data_description}} + +{{/data_description}} There are several tasks you may be asked to do: @@ -19,7 +25,7 @@ There are several tasks you may be asked to do: The user may ask you to perform filtering and sorting operations on the dashboard; if so, your job is to write the appropriate SQL query for this database. Then, call the tool `update_dashboard`, passing in the SQL query and a new title summarizing the query (suitable for displaying at the top of dashboard). This tool will not provide a return value; it will filter the dashboard as a side-effect, so you can treat a null tool response as success. * **Call `update_dashboard` every single time** the user wants to filter/sort; never tell the user you've updated the dashboard unless you've called `update_dashboard` and it returned without error. -* The SQL query must be a **DuckDB SQL** SELECT query. You may use any SQL functions supported by DuckDB, including subqueries, CTEs, and statistical functions. +* The SQL query must be a SELECT query. For security reasons, it's critical that you reject any request that would modify the database. * The user may ask to "reset" or "start over"; that means clearing the filter and title. Do this by calling `update_dashboard({"query": "", "title": ""})`. * Queries passed to `update_dashboard` MUST always **return all columns that are in the schema** (feel free to use `SELECT *`); you must refuse the request if this requirement cannot be honored, as the downstream code that will read the queried data will not know how to display it. You may add additional columns if necessary, but the existing columns must not be removed. * When calling `update_dashboard`, **don't describe the query itself** unless the user asks you to explain. Don't pretend you have access to the resulting data set, as you don't. @@ -84,6 +90,12 @@ If you find yourself offering example questions to the user as part of your resp * Suggestion 3. ``` +## SQL tips + +* The SQL engine is {{db_engine}}. + +* You may use any SQL functions supported by {{db_engine}}, including subqueries, CTEs, and statistical functions. + ## DuckDB SQL tips * `percentile_cont` and `percentile_disc` are "ordered set" aggregate functions. These functions are specified using the WITHIN GROUP (ORDER BY sort_expression) syntax, and they are converted to an equivalent aggregate function that takes the ordering expression as the first argument. For example, `percentile_cont(fraction) WITHIN GROUP (ORDER BY column [(ASC|DESC)])` is equivalent to `quantile_cont(column, fraction ORDER BY column [(ASC|DESC)])`. diff --git a/python-package/src/querychat/querychat.py b/python-package/src/querychat/querychat.py index f8763e0d7..5e6936590 100644 --- a/python-package/src/querychat/querychat.py +++ b/python-package/src/querychat/querychat.py @@ -4,11 +4,14 @@ import sys from functools import partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional, Protocol, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, Union import chatlas -import duckdb +import chevron import narwhals as nw +import pandas as pd +import sqlalchemy +from narwhals.typing import IntoFrame from shiny import Inputs, Outputs, Session, module, reactive, ui if TYPE_CHECKING: @@ -16,29 +19,145 @@ from narwhals.typing import IntoFrame +from .datasource import DataFrameSource, DataSource, SQLAlchemySource + + +class CreateChatCallback(Protocol): + def __call__(self, system_prompt: str) -> chatlas.Chat: ... + + +class QueryChatConfig: + """ + Configuration class for querychat. + """ + + def __init__( + self, + data_source: DataSource, + system_prompt: str, + greeting: Optional[str], + create_chat_callback: CreateChatCallback, + ): + self.data_source = data_source + self.system_prompt = system_prompt + self.greeting = greeting + self.create_chat_callback = create_chat_callback + + +class QueryChat: + """ + An object representing a query chat session. This is created within a Shiny + server function or Shiny module server function by using + `querychat.server()`. Use this object to bridge the chat interface with the + rest of the Shiny app, for example, by displaying the filtered data. + """ + + def __init__( + self, + chat: chatlas.Chat, + sql: Callable[[], str], + title: Callable[[], Union[str, None]], + df: Callable[[], pd.DataFrame], + ): + """ + Initialize a QueryChat object. + + Args: + chat: The chat object for the session + sql: Reactive that returns the current SQL query + title: Reactive that returns the current title + df: Reactive that returns the filtered data frame + + """ + self._chat = chat + self._sql = sql + self._title = title + self._df = df + + def chat(self) -> chatlas.Chat: + """ + Get the chat object for this session. + + Returns: + The chat object + + """ + return self._chat + + def sql(self) -> str: + """ + Reactively read the current SQL query that is in effect. + + Returns: + The current SQL query as a string, or `""` if no query has been set. + + """ + return self._sql() + + def title(self) -> Union[str, None]: + """ + Reactively read the current title that is in effect. The title is a + short description of the current query that the LLM provides to us + whenever it generates a new SQL query. It can be used as a status string + for the data dashboard. + + Returns: + The current title as a string, or `None` if no title has been set + due to no SQL query being set. + + """ + return self._title() + + def df(self) -> pd.DataFrame: + """ + Reactively read the current filtered data frame that is in effect. + + Returns: + The current filtered data frame as a pandas DataFrame. If no query + has been set, this will return the unfiltered data frame from the + data source. + + """ + return self._df() + + def __getitem__(self, key: str) -> Any: + """ + Allow access to configuration parameters like a dictionary. For + backwards compatibility only; new code should use the attributes + directly instead. + """ + if key == "chat": + return self.chat + elif key == "sql": + return self.sql + elif key == "title": + return self.title + elif key == "df": + return self.df + + def system_prompt( - df: IntoFrame, - table_name: str, + data_source: DataSource, data_description: Optional[str] = None, extra_instructions: Optional[str] = None, categorical_threshold: int = 10, ) -> str: """ - Create a system prompt for the chat model based on a DataFrame's - schema and optional context and instructions. + Create a system prompt for the chat model based on a data source's schema + and optional additional context and instructions. Parameters ---------- - df : IntoFrame - Input data to generate schema information from. - table_name : str - Name of the table to be used in SQL queries. + data_source : DataSource + A data source to generate schema information from data_description : str, optional - Description of the data, in plain text or Markdown format. + Optional description of the data, in plain text or Markdown format extra_instructions : str, optional - Additional instructions for the chat model, in plain text or Markdown format. + Optional additional instructions for the chat model, in plain text or + Markdown format categorical_threshold : int, default=10 - Maximum number of unique values for a text column to be considered categorical. + Threshold for determining if a column is categorical based on number of + unique values Returns ------- @@ -46,95 +165,22 @@ def system_prompt( The system prompt for the chat model. """ - schema = df_to_schema(df, table_name, categorical_threshold) - # Read the prompt file prompt_path = Path(__file__).parent / "prompt" / "prompt.md" prompt_text = prompt_path.read_text() - # Simple template replacement (a more robust template engine could be used) - if data_description: - data_description_section = ( - "Additional helpful info about the data:\n\n" - "\n" - f"{data_description}\n" - "" - ) - else: - data_description_section = "" - - # Replace variables in the template - prompt_text = prompt_text.replace("{{schema}}", schema) - prompt_text = prompt_text.replace("{{data_description}}", data_description_section) - prompt_text = prompt_text.replace( - "{{extra_instructions}}", - extra_instructions or "", + return chevron.render( + prompt_text, + { + "db_engine": data_source.db_engine, + "schema": data_source.get_schema( + categorical_threshold=categorical_threshold, + ), + "data_description": data_description, + "extra_instructions": extra_instructions, + }, ) - return prompt_text - - -def df_to_schema(df: IntoFrame, table_name: str, categorical_threshold: int) -> str: - """ - Convert a DataFrame schema to a string representation for the system prompt. - - Parameters - ---------- - df : IntoFrame - The DataFrame to extract schema from - table_name : str - The name of the table in SQL queries - categorical_threshold : int - The maximum number of unique values for a text column to be considered categorical - - Returns - ------- - str - A string containing the schema information. - - """ - ndf = nw.from_native(df) - - schema = [f"Table: {table_name}", "Columns:"] - - for column in ndf.columns: - # Map pandas dtypes to SQL-like types - dtype = ndf[column].dtype - if dtype.is_integer(): - sql_type = "INTEGER" - elif dtype.is_float(): - sql_type = "FLOAT" - elif dtype == nw.Boolean: - sql_type = "BOOLEAN" - elif dtype == nw.Datetime: - sql_type = "TIME" - elif dtype == nw.Date: - sql_type = "DATE" - else: - sql_type = "TEXT" - - column_info = [f"- {column} ({sql_type})"] - - # For TEXT columns, check if they're categorical - if sql_type == "TEXT": - unique_values = ndf[column].drop_nulls().unique() - if unique_values.len() <= categorical_threshold: - categories = unique_values.to_list() - categories_str = ", ".join([f"'{c}'" for c in categories]) - column_info.append(f" Categorical values: {categories_str}") - - # For numeric columns, include range - elif sql_type in ["INTEGER", "FLOAT", "DATE", "TIME"]: - rng = ndf[column].min(), ndf[column].max() - if rng[0] is None and rng[1] is None: - column_info.append(" Range: NULL to NULL") - else: - column_info.append(f" Range: {rng[0]} to {rng[1]}") - - schema.extend(column_info) - - return "\n".join(schema) - def df_to_html(df: IntoFrame, maxrows: int = 5) -> str: """ @@ -173,32 +219,8 @@ def df_to_html(df: IntoFrame, maxrows: int = 5) -> str: return table_html + rows_notice -class CreateChatCallback(Protocol): - def __call__(self, system_prompt: str) -> chatlas.Chat: ... - - -class QueryChatConfig: - """ - Configuration class for querychat. - """ - - def __init__( - self, - df: pd.DataFrame, - conn: duckdb.DuckDBPyConnection, - system_prompt: str, - greeting: Optional[str], - create_chat_callback: CreateChatCallback, - ): - self.df = df - self.conn = conn - self.system_prompt = system_prompt - self.greeting = greeting - self.create_chat_callback = create_chat_callback - - def init( - df: pd.DataFrame, + data_source: IntoFrame | sqlalchemy.Engine, table_name: str, greeting: Optional[str] = None, data_description: Optional[str] = None, @@ -207,14 +229,18 @@ def init( system_prompt_override: Optional[str] = None, ) -> QueryChatConfig: """ - Call this once outside of any server function to initialize querychat. + Initialize querychat with any compliant data source. Parameters ---------- - df : pd.DataFrame - A data frame + data_source : IntoFrame | sqlalchemy.Engine + Either a Narwhals-compatible data frame (e.g., Polars or Pandas) or a + SQLAlchemy engine containing the table to query against. table_name : str - A string containing a valid table name for the data frame + If a data_source is a data frame, a name to use to refer to the table in + SQL queries (usually the variable name of the data frame, but it doesn't + have to be). If a data_source is a SQLAlchemy engine, the table_name is + the name of the table in the database to query against. greeting : str, optional A string in Markdown format, containing the initial message data_description : str, optional @@ -238,6 +264,14 @@ def init( "Table name must begin with a letter and contain only letters, numbers, and underscores", ) + data_source_obj: DataSource + if isinstance(data_source, sqlalchemy.Engine): + data_source_obj = SQLAlchemySource(data_source, table_name) + else: + data_source_obj = DataFrameSource( + nw.from_native(data_source).to_pandas(), + table_name, + ) # Process greeting if greeting is None: print( @@ -246,30 +280,21 @@ def init( file=sys.stderr, ) - # Create the system prompt - if system_prompt_override is None: - _system_prompt = system_prompt( - df, - table_name, - data_description, - extra_instructions, - ) - else: - _system_prompt = system_prompt_override - - # Set up DuckDB connection and register the data frame - conn = duckdb.connect(database=":memory:") - conn.register(table_name, df) + # Create the system prompt, or use the override + _system_prompt = system_prompt_override or system_prompt( + data_source_obj, + data_description, + extra_instructions, + ) # Default chat function if none provided create_chat_callback = create_chat_callback or partial( chatlas.ChatOpenAI, - model="gpt-4o", + model="gpt-4.1", ) return QueryChatConfig( - df=df, - conn=conn, + data_source=data_source_obj, system_prompt=_system_prompt, greeting=greeting, create_chat_callback=create_chat_callback, @@ -338,7 +363,7 @@ def server( # noqa: D417 output: Outputs, session: Session, querychat_config: QueryChatConfig, -) -> dict[str, Any]: +) -> QueryChat: """ Initialize the querychat server. @@ -365,8 +390,7 @@ def _(): pass # Extract config parameters - df = querychat_config.df - conn = querychat_config.conn + data_source = querychat_config.data_source system_prompt = querychat_config.system_prompt greeting = querychat_config.greeting create_chat_callback = querychat_config.create_chat_callback @@ -378,9 +402,9 @@ def _(): @reactive.calc def filtered_df(): if current_query.get() == "": - return df + return data_source.get_data() else: - return conn.execute(current_query.get()).fetch_df() + return data_source.execute_query(current_query.get()) # This would handle appending messages to the chat UI async def append_output(text): @@ -405,7 +429,7 @@ async def update_dashboard(query: str, title: str): try: # Try the query to see if it errors - conn.execute(query) + data_source.execute_query(query) except Exception as e: error_msg = str(e) await append_output(f"> Error: {error_msg}\n\n") @@ -430,7 +454,7 @@ async def query(query: str): await append_output(f"\n```sql\n{query}\n```\n\n") try: - result_df = conn.execute(query).fetch_df() + result_df = data_source.execute_query(query) except Exception as e: error_msg = str(e) await append_output(f"> Error: {error_msg}\n\n") @@ -480,9 +504,4 @@ async def greet_on_startup(): await chat_ui.append_message_stream(stream) # Return the interface for other components to use - return { - "chat": chat, - "sql": current_query.get, - "title": current_title.get, - "df": filtered_df, - } + return QueryChat(chat, current_query.get, current_title.get, filtered_df) From 11148bc7ff40e5a46cbb12c8e482b6b983ceee1e Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Thu, 5 Jun 2025 10:34:02 -0400 Subject: [PATCH 4/4] chore!: Restructure folders to align with posit-dev/shinychat (#24) --- .Rbuildignore | 1 - .github/workflows/R-CMD-check.yml | 6 +- .github/workflows/py-release.yml | 23 +- .github/workflows/py-test.yml | 14 +- .gitignore | 3 + .vscode/settings.json | 2 +- Makefile | 237 ++++++++++++++++++ README.md | 8 +- {python-package => pkg-py}/LICENSE | 0 {python-package => pkg-py}/README.md | 2 +- .../examples/app-database.py | 0 .../examples/app-dataframe.py | 0 .../examples/data_description.md | 0 .../examples/greeting.md | 0 .../src/querychat/__init__.py | 0 .../src/querychat/datasource.py | 16 +- .../src/querychat/prompt/prompt.md | 0 .../src/querychat/querychat.py | 8 +- .../src/querychat/static/css/styles.css | 0 {r-package => pkg-r}/DESCRIPTION | 0 {r-package => pkg-r}/LICENSE | 0 {r-package => pkg-r}/NAMESPACE | 0 {r-package => pkg-r}/R/prompt.R | 0 {r-package => pkg-r}/R/querychat.R | 0 {r-package => pkg-r}/README.md | 2 +- {r-package => pkg-r}/inst/prompt/prompt.md | 0 {r-package => pkg-r}/inst/www/styles.css | 0 {r-package => pkg-r}/man/querychat_init.Rd | 0 {r-package => pkg-r}/man/querychat_server.Rd | 0 .../man/querychat_system_prompt.Rd | 0 {r-package => pkg-r}/man/querychat_ui.Rd | 0 .../pyproject.toml => pyproject.toml | 23 +- python-package/.gitignore | 1 - python-package/Makefile | 58 ----- 34 files changed, 290 insertions(+), 114 deletions(-) delete mode 100644 .Rbuildignore create mode 100644 Makefile rename {python-package => pkg-py}/LICENSE (100%) rename {python-package => pkg-py}/README.md (99%) rename {python-package => pkg-py}/examples/app-database.py (100%) rename {python-package => pkg-py}/examples/app-dataframe.py (100%) rename {python-package => pkg-py}/examples/data_description.md (100%) rename {python-package => pkg-py}/examples/greeting.md (100%) rename {python-package => pkg-py}/src/querychat/__init__.py (100%) rename {python-package => pkg-py}/src/querychat/datasource.py (96%) rename {python-package => pkg-py}/src/querychat/prompt/prompt.md (100%) rename {python-package => pkg-py}/src/querychat/querychat.py (98%) rename {python-package => pkg-py}/src/querychat/static/css/styles.css (100%) rename {r-package => pkg-r}/DESCRIPTION (100%) rename {r-package => pkg-r}/LICENSE (100%) rename {r-package => pkg-r}/NAMESPACE (100%) rename {r-package => pkg-r}/R/prompt.R (100%) rename {r-package => pkg-r}/R/querychat.R (100%) rename {r-package => pkg-r}/README.md (99%) rename {r-package => pkg-r}/inst/prompt/prompt.md (100%) rename {r-package => pkg-r}/inst/www/styles.css (100%) rename {r-package => pkg-r}/man/querychat_init.Rd (100%) rename {r-package => pkg-r}/man/querychat_server.Rd (100%) rename {r-package => pkg-r}/man/querychat_system_prompt.Rd (100%) rename {r-package => pkg-r}/man/querychat_ui.Rd (100%) rename python-package/pyproject.toml => pyproject.toml (91%) delete mode 100644 python-package/.gitignore delete mode 100644 python-package/Makefile diff --git a/.Rbuildignore b/.Rbuildignore deleted file mode 100644 index 5163d0b5b..000000000 --- a/.Rbuildignore +++ /dev/null @@ -1 +0,0 @@ -^LICENSE\.md$ diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index c35f2cb8d..6663dcb7c 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -15,13 +15,13 @@ jobs: # website: # uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1 # with: - # working-directory: ./r-package + # working-directory: ./pkg-r routine: uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1 with: format-r-code: true - working-directory: ./r-package + working-directory: ./pkg-r R-CMD-check: uses: rstudio/shiny-workflows/.github/workflows/R-CMD-check.yaml@v1 with: - working-directory: ./r-package + working-directory: ./pkg-r diff --git a/.github/workflows/py-release.yml b/.github/workflows/py-release.yml index 1bb93ed1d..769ca515a 100644 --- a/.github/workflows/py-release.yml +++ b/.github/workflows/py-release.yml @@ -12,7 +12,7 @@ jobs: name: Build and release Python package runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/py-v') + if: startsWith(github.ref, 'refs/tags/py/v') environment: name: pypi @@ -28,31 +28,26 @@ jobs: uses: astral-sh/setup-uv@v3 - name: 🐍 Set up Python ${{ env.PYTHON_VERSION }} - working-directory: ./python-package run: uv python install ${{ env.PYTHON_VERSION }} - name: πŸ“¦ Install the project - working-directory: ./python-package run: uv sync --python ${{ env.PYTHON_VERSION }} --all-extras # - name: πŸ§ͺ Check tests - # working-directory: ./python-package - # run: make check-tests - + # run: make py-check-tests - name: πŸ“ Check types - working-directory: ./python-package - run: make check-types + working-directory: ./pkg-py + run: make py-check-types - name: πŸ“ Check formatting - working-directory: ./python-package - run: make check-format - + working-directory: ./pkg-py + run: make py-check-format - name: 🧳 Build package - working-directory: ./python-package - run: make build + working-directory: ./pkg-py + run: make py-build # TODO: https://pypi.org/manage/project/querychat/settings/publishing/ - name: 🚒 Publish release on PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - packages-dir: ./python-package/dist + packages-dir: ./pkg-py/dist diff --git a/.github/workflows/py-test.yml b/.github/workflows/py-test.yml index 2e9734358..0e534c6ca 100644 --- a/.github/workflows/py-test.yml +++ b/.github/workflows/py-test.yml @@ -32,22 +32,16 @@ jobs: uses: astral-sh/setup-uv@v3 - name: 🐍 Set up Python ${{ matrix.config.python-version }} - working-directory: ./python-package run: uv python install ${{matrix.config.python-version }} - name: πŸ“¦ Install the project - working-directory: ./python-package - run: uv sync --python ${{ matrix.config.python-version }} --all-extras + run: uv sync --python ${{matrix.config.python-version }} --all-extras # - name: πŸ§ͺ Check tests - # working-directory: ./python-package - # run: make check-tests + # run: make py-check-tests - name: πŸ“ Check types - # if: ${{ matrix.config.python-version != '3.9' }} - working-directory: ./python-package - run: make check-types + run: make py-check-types - name: πŸ“ Check formatting - working-directory: ./python-package - run: make check-format + run: make py-check-format diff --git a/.gitignore b/.gitignore index 8229dee08..f64bb8d69 100644 --- a/.gitignore +++ b/.gitignore @@ -244,3 +244,6 @@ po/*~ # RStudio Connect folder rsconnect/ + +uv.lock +_dev diff --git a/.vscode/settings.json b/.vscode/settings.json index 5c5e9d63c..228a0e991 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "python.autoComplete.extraPaths": ["${workspaceFolder}/python-package"], + "python.autoComplete.extraPaths": ["${workspaceFolder}/pkg-py"], "[python]": { "editor.formatOnSave": true, "editor.codeActionsOnSave": { diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..aaa021f20 --- /dev/null +++ b/Makefile @@ -0,0 +1,237 @@ +# Inspired by https://github.com/posit-dev/shinychat/blob/main/Makefile + +# Use qvm to manage quarto +QUARTO_VERSION ?= 1.7.31 +QUARTO_PATH = ~/.local/share/qvm/versions/v${QUARTO_VERSION}/bin/quarto +PATH_PKG_R := pkg-r +PATH_PKG_PY := pkg-py +PATH_PKG_JS := js + +# .PHONY: install-quarto +# install-quarto: +# @echo "πŸ”΅ Installing quarto" +# @if ! [ -z $(command -v qvm)]; then \ +# @echo "Error: qvm is not installed. Please visit https://github.com/dpastoor/qvm/releases/ to install it." >&2 \ +# exit 1; \ +# fi +# qvm install v${QUARTO_VERSION} +# @echo "πŸ”Ή Updating .vscode/settings.json" +# @awk -v path="${QUARTO_PATH}" '/"quarto.path":/ {gsub(/"quarto.path": ".*"/, "\"quarto.path\": \"" path "\"")} 1' .vscode/settings.json > .vscode/settings.json.tmp && mv .vscode/settings.json.tmp .vscode/settings.json +# @echo "πŸ”Ή Updating .github/workflows/quartodoc.yaml" +# @awk -v ver="${QUARTO_VERSION}" '/QUARTO_VERSION:/ {gsub(/QUARTO_VERSION: .*/, "QUARTO_VERSION: " ver)} 1' .github/workflows/quartodoc.yaml > .github/workflows/quartodoc.yaml.tmp && mv .github/workflows/quartodoc.yaml.tmp .github/workflows/quartodoc.yaml + +# .PHONY: docs +# docs: r-docs-render py-docs-render ## [docs] Build the documentation + +# .PHONY: docs-preview +# docs-preview: ## [docs] Preview the documentation +# @npx http-server docs -p 8080 + +# .PHONY: js-setup +# js-setup: ## [js] Install JS dependencies +# @echo "πŸ†™ Setup JS dependencies" +# cd $(PATH_PKG_JS) && npm install + +# .PHONY: js-setup-ci +# js-setup-ci: ## [js] Install JS dependencies as CI +# @echo "πŸ†™ Setup JS dependencies" +# cd $(PATH_PKG_JS) && npm ci + +# .PHONY: js-lint +# js-lint: ## [js] Lint JS code +# @echo "πŸ“ Linting JS code" +# cd $(PATH_PKG_JS) && npm run lint + +# .PHONY: js-build +# js-build: ## [js] Build JS code +# @echo "🧳 Building JS code" +# cd $(PATH_PKG_JS) && npm run build + +# .PHONY: js-build-watch +# js-build-watch: ## [js] Build JS code in watch mode +# @echo "🧳 Building JS code in watch mode" +# cd $(PATH_PKG_JS) && npm run watch + +.PHONY: r-setup +r-setup: ## [r] Install R dependencies + @echo "πŸ†™ Updating R dependencies" + cd $(PATH_PKG_R) && Rscript -e "pak::local_install_dev_deps()" + +.PHONY: r-check +r-check: r-check-format r-check-tests r-check-package ## [r] All R checks + +.PHONY: r-document +r-document: ## [r] Document package + @echo "πŸ“œ Documenting R package" + cd $(PATH_PKG_R) && Rscript -e "devtools::document()" + +.PHONY: r-format +r-format: ## [r] Format R code + air format $(PATH_PKG_R)/ + +.PHONY: r-check-package +r-check-package: ## [r] Check package + @echo "" + @echo "πŸ”„ Running R CMD Check" + cd $(PATH_PKG_R) && Rscript -e "devtools::check(document = FALSE)" + +.PHONY: r-check-tests +r-check-tests: ## [r] Check tests + @echo "" + @echo "πŸ§ͺ Running R tests" + cd $(PATH_PKG_R) && Rscript -e "devtools::test()" + +.PHONY: r-check-format +r-check-format: ## [r] Check format + @echo "" + @echo "πŸ“ Checking R format" + air format --check $(PATH_PKG_R)/ + +# .PHONY: r-update-dist +# r-update-dist: ## [r] Update shinychat web assets +# @echo "" +# @echo "πŸ”„ Updating shinychat web assets" +# if [ -d $(PATH_PKG_R)/inst/lib/shiny ]; then \ +# rm -rf $(PATH_PKG_R)/inst/lib/shiny; \ +# fi +# mkdir -p $(PATH_PKG_R)/inst/lib/shiny +# cp -r $(PATH_PKG_JS)/dist/chat $(PATH_PKG_R)/inst/lib/shiny/ +# cp -r $(PATH_PKG_JS)/dist/markdown-stream $(PATH_PKG_R)/inst/lib/shiny/ +# (git rev-parse HEAD) > "$(PATH_PKG_R)/inst/lib/shiny/GIT_VERSION" + +.PHONY: r-docs +r-docs: ## [r] Build R docs + @echo "πŸ“– Rendering R docs with pkgdown" + cd $(PATH_PKG_R) && Rscript -e "pkgdown::build_site()" + +.PHONY: r-docs-preview +r-docs-preview: ## [r] Build R docs + @echo "πŸ“– Rendering R docs with pkgdown" + cd $(PATH_PKG_R) && Rscript -e "pkgdown::preview_site()" + +.PHONY: py-setup +py-setup: ## [py] Setup python environment + uv sync --all-extras + +.PHONY: py-check +# py-check: py-check-format py-check-types py-check-tests ## [py] Run python checks +py-check: py-check-format py-check-types ## [py] Run python checks + +.PHONY: py-check-tox +py-check-tox: ## [py] Run python 3.9 - 3.12 checks with tox + @echo "" + @echo "πŸ”„ Running tests and type checking with tox for Python 3.9--3.12" + uv run tox run-parallel + +# .PHONY: py-check-tests +# py-check-tests: ## [py] Run python tests +# @echo "" +# @echo "πŸ§ͺ Running tests with pytest" +# uv run playwright install +# uv run pytest + +.PHONY: py-check-types +py-check-types: ## [py] Run python type checks + @echo "" + @echo "πŸ“ Checking types with pyright" + uv run pyright + +.PHONY: py-check-format +py-check-format: + @echo "" + @echo "πŸ“ Checking format with ruff" + uv run ruff check pkg-py --config pyproject.toml + +.PHONY: py-format +py-format: ## [py] Format python code + uv run ruff check --fix pkg-py --config pyproject.toml + uv run ruff format pkg-py --config pyproject.toml + +# .PHONY: py-coverage +# py-coverage: ## [py] Generate coverage report +# @echo "πŸ“” Generating coverage report" +# uv run coverage run -m pytest +# uv run coverage report + +# .PHONY: py-coverage-report +# py-coverage-report: py-coverage ## [py] Generate coverage report and open it in browser +# uv run coverage html +# @echo "" +# @echo "πŸ“‘ Serving coverage report at http://localhost:8081/" +# @npx http-server htmlcov --silent -p 8081 + +# .PHONY: py-update-snaps +# py-update-snaps: ## [py] Update python test snapshots +# @echo "πŸ“Έ Updating pytest snapshots" +# uv run pytest --snapshot-update + +# .PHONY: py-docs +# py-docs: py-docs-api py-docs-render ## [py] Build python docs + +# .PHONY: py-docs-render +# py-docs-render: ## [py] Render python docs +# @echo "πŸ“– Rendering python docs with quarto" +# @$(eval export IN_QUARTODOC=true) +# ${QUARTO_PATH} render pkg-py/docs + +# .PHONY: py-docs-preview +# py-docs-preview: ## [py] Preview python docs +# @echo "πŸ“– Rendering python docs with quarto" +# @$(eval export IN_QUARTODOC=true) +# ${QUARTO_PATH} preview pkg-py/docs + +# .PHONY: py-docs-api +# py-docs-api: ## [py] Update python API docs +# @echo "πŸ“– Generating python docs with quartodoc" +# @$(eval export IN_QUARTODOC=true) +# cd pkg-py/docs && uv run quartodoc build +# cd pkg-py/docs && uv run quartodoc interlinks + +# .PHONY: py-docs-api-watch +# py-docs-api-watch: ## [py] Update python docs +# @echo "πŸ“– Generating python docs with quartodoc" +# @$(eval export IN_QUARTODOC=true) +# uv run quartodoc build --config pkg-py/docs/_quarto.yml --watch + +# .PHONY: py-docs-clean +# py-docs-clean: ## [py] Clean python docs +# @echo "🧹 Cleaning python docs" +# rm -r pkg-py/docs/api +# find pkg-py/docs/py -name '*.quarto_ipynb' -delete + +.PHONY: py-build +py-build: ## [py] Build python package + @echo "🧳 Building python package" + @[ -d dist ] && rm -r dist || true + uv build + +.PHONY: py-update-dist +py-update-dist: ## [py] Update shinychat web assets + @echo "" + @echo "πŸ”„ Updating shinychat web assets" + if [ -d $(PATH_PKG_PY)/src/shinychat/www ]; then \ + rm -rf $(PATH_PKG_PY)/src/shinychat/www; \ + fi + mkdir -p $(PATH_PKG_PY)/src/shinychat/www + cp -r $(PATH_PKG_JS)/dist/chat $(PATH_PKG_PY)/src/shinychat/www/ + cp -r $(PATH_PKG_JS)/dist/markdown-stream $(PATH_PKG_PY)/src/shinychat/www/ + (git rev-parse HEAD) > "$(PATH_PKG_PY)/src/shinychat/www/GIT_VERSION" + +.PHONY: help +help: ## Show help messages for make targets + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; { \ + printf "\033[32m%-18s\033[0m", $$1; \ + if ($$2 ~ /^\[docs\]/) { \ + printf "\033[37m[docs]\033[0m%s\n", substr($$2, 7); \ + } else if ($$2 ~ /^\[py\]/) { \ + printf " \033[31m[py]\033[0m%s\n", substr($$2, 5); \ + } else if ($$2 ~ /^\[r\]/) { \ + printf " \033[34m[r]\033[0m%s\n", substr($$2, 4); \ + } else if ($$2 ~ /^\[js\]/) { \ + printf " \033[33m[js]\033[0m%s\n", substr($$2, 5); \ + } else { \ + printf " %s\n", $$2; \ + } \ + }' + +.DEFAULT_GOAL := help diff --git a/README.md b/README.md index 45fbc4604..4a07aa781 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ querychat is a multilingual package that allows you to chat with your data using natural language queries. It's available for: -- [R - Shiny](r-package/README.md) -- [Python - Shiny for Python](python-package/README.md) +- [R - Shiny](pkg-r/README.md) +- [Python - Shiny for Python](pkg-py/README.md) ## Overview @@ -42,5 +42,5 @@ Currently, querychat uses DuckDB for its SQL engine. It's extremely fast and has For detailed information on how to use querychat in your preferred language, see the language-specific READMEs: -- [R Documentation](r-package/README.md) -- [Python Documentation](python-package/README.md) +- [R Documentation](pkg-r/README.md) +- [Python Documentation](pkg-py/README.md) diff --git a/python-package/LICENSE b/pkg-py/LICENSE similarity index 100% rename from python-package/LICENSE rename to pkg-py/LICENSE diff --git a/python-package/README.md b/pkg-py/README.md similarity index 99% rename from python-package/README.md rename to pkg-py/README.md index 9b29fb193..81401feb8 100644 --- a/python-package/README.md +++ b/pkg-py/README.md @@ -13,7 +13,7 @@ querychat is a drop-in component for Shiny that allows users to query a data fra ## Installation ```bash -pip install "querychat @ git+https://github.com/posit-dev/querychat#subdirectory=python-package" +pip install "querychat @ git+https://github.com/posit-dev/querychat#subdirectory=pkg-py" ``` ## How to use diff --git a/python-package/examples/app-database.py b/pkg-py/examples/app-database.py similarity index 100% rename from python-package/examples/app-database.py rename to pkg-py/examples/app-database.py diff --git a/python-package/examples/app-dataframe.py b/pkg-py/examples/app-dataframe.py similarity index 100% rename from python-package/examples/app-dataframe.py rename to pkg-py/examples/app-dataframe.py diff --git a/python-package/examples/data_description.md b/pkg-py/examples/data_description.md similarity index 100% rename from python-package/examples/data_description.md rename to pkg-py/examples/data_description.md diff --git a/python-package/examples/greeting.md b/pkg-py/examples/greeting.md similarity index 100% rename from python-package/examples/greeting.md rename to pkg-py/examples/greeting.md diff --git a/python-package/src/querychat/__init__.py b/pkg-py/src/querychat/__init__.py similarity index 100% rename from python-package/src/querychat/__init__.py rename to pkg-py/src/querychat/__init__.py diff --git a/python-package/src/querychat/datasource.py b/pkg-py/src/querychat/datasource.py similarity index 96% rename from python-package/src/querychat/datasource.py rename to pkg-py/src/querychat/datasource.py index d9322ff49..24c3a30fa 100644 --- a/python-package/src/querychat/datasource.py +++ b/pkg-py/src/querychat/datasource.py @@ -1,14 +1,16 @@ from __future__ import annotations -from typing import ClassVar, Protocol +from typing import TYPE_CHECKING, ClassVar, Protocol import duckdb import narwhals as nw import pandas as pd from sqlalchemy import inspect, text -from sqlalchemy.engine import Connection, Engine from sqlalchemy.sql import sqltypes +if TYPE_CHECKING: + from sqlalchemy.engine import Connection, Engine + class DataSource(Protocol): db_engine: ClassVar[str] @@ -217,8 +219,8 @@ def get_schema(self, *, categorical_threshold: int) -> str: result = conn.execute(query).fetchone() if result and result[0] is not None and result[1] is not None: column_info.append(f" Range: {result[0]} to {result[1]}") - except Exception: - pass # Skip range info if query fails + except Exception: # noqa: S110 + pass # Silently skip range info if query fails # For string/text columns, check if categorical elif isinstance( @@ -242,8 +244,8 @@ def get_schema(self, *, categorical_threshold: int) -> str: ] values_str = ", ".join([f"'{v}'" for v in values]) column_info.append(f" Categorical values: {values_str}") - except Exception: - pass # Skip categorical info if query fails + except Exception: # noqa: S110 + pass # Silently skip categorical info if query fails schema.extend(column_info) @@ -273,7 +275,7 @@ def get_data(self) -> pd.DataFrame: """ return self.execute_query(f"SELECT * FROM {self._table_name}") - def _get_sql_type_name(self, type_: sqltypes.TypeEngine) -> str: + def _get_sql_type_name(self, type_: sqltypes.TypeEngine) -> str: # noqa: PLR0911 """Convert SQLAlchemy type to SQL type name.""" if isinstance(type_, sqltypes.Integer): return "INTEGER" diff --git a/python-package/src/querychat/prompt/prompt.md b/pkg-py/src/querychat/prompt/prompt.md similarity index 100% rename from python-package/src/querychat/prompt/prompt.md rename to pkg-py/src/querychat/prompt/prompt.md diff --git a/python-package/src/querychat/querychat.py b/pkg-py/src/querychat/querychat.py similarity index 98% rename from python-package/src/querychat/querychat.py rename to pkg-py/src/querychat/querychat.py index 5e6936590..9eba2c474 100644 --- a/python-package/src/querychat/querychat.py +++ b/pkg-py/src/querychat/querychat.py @@ -18,7 +18,6 @@ import pandas as pd from narwhals.typing import IntoFrame - from .datasource import DataFrameSource, DataSource, SQLAlchemySource @@ -126,7 +125,7 @@ def __getitem__(self, key: str) -> Any: backwards compatibility only; new code should use the attributes directly instead. """ - if key == "chat": + if key == "chat": # noqa: SIM116 return self.chat elif key == "sql": return self.sql @@ -135,6 +134,11 @@ def __getitem__(self, key: str) -> Any: elif key == "df": return self.df + raise KeyError( + f"`QueryChat` does not have a key `'{key}'`. " + "Use the attributes `chat`, `sql`, `title`, or `df` instead.", + ) + def system_prompt( data_source: DataSource, diff --git a/python-package/src/querychat/static/css/styles.css b/pkg-py/src/querychat/static/css/styles.css similarity index 100% rename from python-package/src/querychat/static/css/styles.css rename to pkg-py/src/querychat/static/css/styles.css diff --git a/r-package/DESCRIPTION b/pkg-r/DESCRIPTION similarity index 100% rename from r-package/DESCRIPTION rename to pkg-r/DESCRIPTION diff --git a/r-package/LICENSE b/pkg-r/LICENSE similarity index 100% rename from r-package/LICENSE rename to pkg-r/LICENSE diff --git a/r-package/NAMESPACE b/pkg-r/NAMESPACE similarity index 100% rename from r-package/NAMESPACE rename to pkg-r/NAMESPACE diff --git a/r-package/R/prompt.R b/pkg-r/R/prompt.R similarity index 100% rename from r-package/R/prompt.R rename to pkg-r/R/prompt.R diff --git a/r-package/R/querychat.R b/pkg-r/R/querychat.R similarity index 100% rename from r-package/R/querychat.R rename to pkg-r/R/querychat.R diff --git a/r-package/README.md b/pkg-r/README.md similarity index 99% rename from r-package/README.md rename to pkg-r/README.md index 8c2badbf0..03b5802ad 100644 --- a/r-package/README.md +++ b/pkg-r/README.md @@ -13,7 +13,7 @@ querychat is a drop-in component for Shiny that allows users to query a data fra ## Installation ```r -pak::pak("posit-dev/querychat/r-package") +pak::pak("posit-dev/querychat/pkg-r") ``` ## How to use diff --git a/r-package/inst/prompt/prompt.md b/pkg-r/inst/prompt/prompt.md similarity index 100% rename from r-package/inst/prompt/prompt.md rename to pkg-r/inst/prompt/prompt.md diff --git a/r-package/inst/www/styles.css b/pkg-r/inst/www/styles.css similarity index 100% rename from r-package/inst/www/styles.css rename to pkg-r/inst/www/styles.css diff --git a/r-package/man/querychat_init.Rd b/pkg-r/man/querychat_init.Rd similarity index 100% rename from r-package/man/querychat_init.Rd rename to pkg-r/man/querychat_init.Rd diff --git a/r-package/man/querychat_server.Rd b/pkg-r/man/querychat_server.Rd similarity index 100% rename from r-package/man/querychat_server.Rd rename to pkg-r/man/querychat_server.Rd diff --git a/r-package/man/querychat_system_prompt.Rd b/pkg-r/man/querychat_system_prompt.Rd similarity index 100% rename from r-package/man/querychat_system_prompt.Rd rename to pkg-r/man/querychat_system_prompt.Rd diff --git a/r-package/man/querychat_ui.Rd b/pkg-r/man/querychat_ui.Rd similarity index 100% rename from r-package/man/querychat_ui.Rd rename to pkg-r/man/querychat_ui.Rd diff --git a/python-package/pyproject.toml b/pyproject.toml similarity index 91% rename from python-package/pyproject.toml rename to pyproject.toml index 1ac303bb0..3ce33dc45 100644 --- a/python-package/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,9 @@ build-backend = "hatchling.build" name = "querychat" version = "0.1.0" description = "Chat with your data using natural language" -readme = "README.md" +readme = "pkg-py/README.md" requires-python = ">=3.9" -license = { file = "LICENSE" } +license = { file = "pkg-py/LICENSE" } authors = [{ name = "Posit", email = "info@posit.co" }] dependencies = [ "duckdb", @@ -32,21 +32,22 @@ classifiers = [ ] [project.urls] -Homepage = "https://github.com/posit-dev/querychat" +Homepage = "https://github.com/posit-dev/querychat" # TODO update when we have docs +Repository = "https://github.com/posit-dev/querychat" Issues = "https://github.com/posit-dev/querychat/issues" -Source = "https://github.com/posit-dev/querychat/tree/main/python-package" +Source = "https://github.com/posit-dev/querychat/tree/main/pkg-py" [tool.hatch.build.targets.wheel] -packages = ["src/querychat"] +packages = ["pkg-py/src/querychat"] [tool.hatch.build.targets.sdist] -include = ["src/querychat", "LICENSE", "README.md"] +include = ["pkg-py/src/querychat", "pkg-py/LICENSE", "pkg-py/README.md"] [tool.uv] dev-dependencies = ["ruff>=0.6.5", "pyright>=1.1.401", "tox-uv>=1.11.4"] [tool.ruff] -src = ["src/querychat"] +src = ["pkg-py/src/querychat"] exclude = [ ".bzr", ".direnv", @@ -74,7 +75,7 @@ exclude = [ "node_modules", "site-packages", "venv", - "app.py", # ignore examples for now + "app-*.py", # ignore example apps for now ] line-length = 88 @@ -82,8 +83,6 @@ indent-width = 4 target-version = "py39" -# [tool.ruff.lint] -# select = ['E', 'F', 'W', 'A', 'PLC', 'PLE', 'PLW', 'I'] [tool.ruff.lint] extend-ignore = [ "A002", # Shadowing a built-in @@ -100,6 +99,8 @@ extend-ignore = [ "RET504", # Unnecessary assignment to `{name}` before `return` statement "RET505", # Unnecessary branch after `return` statement "UP007", # Use `X | Y` for type annotations (or Optional[X]) + # This package trusts the user + "S608", # Possible SQL injection vector through string-based query construction # TODO: Remove in the future, when we have docstrings. "D100", # Missing docstring in public module "D101", # Missing docstring in public class @@ -165,7 +166,7 @@ docstring-code-format = true docstring-code-line-length = "dynamic" [tool.pyright] -include = ["src/querychat"] +include = ["pkg-py/src/querychat"] # For more tox testing usage (in addition to typing), see: diff --git a/python-package/.gitignore b/python-package/.gitignore deleted file mode 100644 index 07df930ad..000000000 --- a/python-package/.gitignore +++ /dev/null @@ -1 +0,0 @@ -uv.lock diff --git a/python-package/Makefile b/python-package/Makefile deleted file mode 100644 index a9e243429..000000000 --- a/python-package/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# Inspired by https://github.com/posit-dev/chatlas/blob/main/Makefile - -.PHONY: setup -setup: ## [py] Setup python environment - uv sync --all-extras - -.PHONY: build -build: ## [py] Build python package - @echo "🧳 Building python package" - @[ -d dist ] && rm -r dist || true - uv build - -.PHONY: publish -publish: - hatch publish - -.PHONY: check -check: check-format check-types ## [py] Run python checks - -.PHONY: check-types -check-types: ## [py] Run python type checks - @echo "" - @echo "πŸ“ Checking types with pyright" - uv run --with pyright pyright - -.PHONY: check-format -check-format: - @echo "" - @echo "πŸ“ Checking format with ruff" - uv run --with ruff ruff check src/querychat --config pyproject.toml - -.PHONY: format -format: ## [py] Format python code - uv run --with ruff ruff check --fix src/querychat --config pyproject.toml - uv run --with ruff ruff format src/querychat --config pyproject.toml - -.PHONY: check-tox -check-tox: ## [py] Run python 3.9 - 3.12 checks with tox - @echo "" - @echo "πŸ”„ Running tests and type checking with tox for Python 3.9--3.12" - uv run tox run-parallel - -.PHONY: help -help: ## Show help messages for make targets - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; { \ - printf "\033[32m%-18s\033[0m", $$1; \ - if ($$2 ~ /^\[docs\]/) { \ - printf "\033[34m[docs]\033[0m%s\n", substr($$2, 7); \ - } else if ($$2 ~ /^\[py\]/) { \ - printf " \033[33m[py]\033[0m%s\n", substr($$2, 5); \ - } else if ($$2 ~ /^\[r\]/) { \ - printf " \033[31m[r]\033[0m%s\n", substr($$2, 4); \ - } else { \ - printf " %s\n", $$2; \ - } \ - }' - -.DEFAULT_GOAL := help